summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2011-06-04 12:13:06 +0200
committerIngo Molnar <mingo@elte.hu>2011-06-04 12:13:06 +0200
commit710054ba25c0d1f8f41c22ce13ba336503fb5318 (patch)
treef9b09b722bf511841539173d946f90a20fc2e59a /kernel
parent74c355fbdfedd3820046dba4f537876cea54c207 (diff)
parentb273fa9716aa1564bee88ceee62f9042981cdc81 (diff)
downloadlwn-710054ba25c0d1f8f41c22ce13ba336503fb5318.tar.gz
lwn-710054ba25c0d1f8f41c22ce13ba336503fb5318.zip
Merge branch 'perf/urgent' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpuset.c4
-rw-r--r--kernel/events/core.c8
-rw-r--r--kernel/fork.c42
-rw-r--r--kernel/jump_label.c18
-rw-r--r--kernel/kthread.c4
-rw-r--r--kernel/pm_qos_params.c37
-rw-r--r--kernel/rcutree.c208
-rw-r--r--kernel/rcutree.h30
-rw-r--r--kernel/rcutree_plugin.h33
-rw-r--r--kernel/rcutree_trace.c12
-rw-r--r--kernel/sched.c56
-rw-r--r--kernel/sched_fair.c5
-rw-r--r--kernel/sched_rt.c10
-rw-r--r--kernel/sched_stats.h4
-rw-r--r--kernel/trace/ftrace.c31
-rw-r--r--kernel/trace/ring_buffer.c10
-rw-r--r--kernel/trace/trace.h15
-rw-r--r--kernel/trace/trace_events.c7
-rw-r--r--kernel/trace/trace_output.c27
-rw-r--r--kernel/watchdog.c9
20 files changed, 337 insertions, 233 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 1ceeb049c827..9c9b7545c810 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2190,7 +2190,7 @@ int cpuset_cpus_allowed_fallback(struct task_struct *tsk)
rcu_read_lock();
cs = task_cs(tsk);
if (cs)
- cpumask_copy(&tsk->cpus_allowed, cs->cpus_allowed);
+ do_set_cpus_allowed(tsk, cs->cpus_allowed);
rcu_read_unlock();
/*
@@ -2217,7 +2217,7 @@ int cpuset_cpus_allowed_fallback(struct task_struct *tsk)
* Like above we can temporary set any mask and rely on
* set_cpus_allowed_ptr() as synchronization point.
*/
- cpumask_copy(&tsk->cpus_allowed, cpu_possible_mask);
+ do_set_cpus_allowed(tsk, cpu_possible_mask);
cpu = cpumask_any(cpu_active_mask);
}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 8a15944bf9d2..9efe7108ccaf 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5028,6 +5028,14 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
else
perf_event_output(event, nmi, data, regs);
+ if (event->fasync && event->pending_kill) {
+ if (nmi) {
+ event->pending_wakeup = 1;
+ irq_work_queue(&event->pending);
+ } else
+ perf_event_wakeup(event);
+ }
+
return ret;
}
diff --git a/kernel/fork.c b/kernel/fork.c
index ca406d916713..0276c30401a0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -484,20 +484,6 @@ static void mm_init_aio(struct mm_struct *mm)
#endif
}
-int mm_init_cpumask(struct mm_struct *mm, struct mm_struct *oldmm)
-{
-#ifdef CONFIG_CPUMASK_OFFSTACK
- if (!alloc_cpumask_var(&mm->cpu_vm_mask_var, GFP_KERNEL))
- return -ENOMEM;
-
- if (oldmm)
- cpumask_copy(mm_cpumask(mm), mm_cpumask(oldmm));
- else
- memset(mm_cpumask(mm), 0, cpumask_size());
-#endif
- return 0;
-}
-
static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
{
atomic_set(&mm->mm_users, 1);
@@ -538,17 +524,8 @@ struct mm_struct * mm_alloc(void)
return NULL;
memset(mm, 0, sizeof(*mm));
- mm = mm_init(mm, current);
- if (!mm)
- return NULL;
-
- if (mm_init_cpumask(mm, NULL)) {
- mm_free_pgd(mm);
- free_mm(mm);
- return NULL;
- }
-
- return mm;
+ mm_init_cpumask(mm);
+ return mm_init(mm, current);
}
/*
@@ -559,7 +536,6 @@ struct mm_struct * mm_alloc(void)
void __mmdrop(struct mm_struct *mm)
{
BUG_ON(mm == &init_mm);
- free_cpumask_var(mm->cpu_vm_mask_var);
mm_free_pgd(mm);
destroy_context(mm);
mmu_notifier_mm_destroy(mm);
@@ -753,6 +729,7 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
goto fail_nomem;
memcpy(mm, oldmm, sizeof(*mm));
+ mm_init_cpumask(mm);
/* Initializing for Swap token stuff */
mm->token_priority = 0;
@@ -765,9 +742,6 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
if (!mm_init(mm, tsk))
goto fail_nomem;
- if (mm_init_cpumask(mm, oldmm))
- goto fail_nocpumask;
-
if (init_new_context(tsk, mm))
goto fail_nocontext;
@@ -794,9 +768,6 @@ fail_nomem:
return NULL;
fail_nocontext:
- free_cpumask_var(mm->cpu_vm_mask_var);
-
-fail_nocpumask:
/*
* If init_new_context() failed, we cannot use mmput() to free the mm
* because it calls destroy_context()
@@ -1591,6 +1562,13 @@ void __init proc_caches_init(void)
fs_cachep = kmem_cache_create("fs_cache",
sizeof(struct fs_struct), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
+ /*
+ * FIXME! The "sizeof(struct mm_struct)" currently includes the
+ * whole struct cpumask for the OFFSTACK case. We could change
+ * this to *only* allocate as much of it as required by the
+ * maximum number of CPU's we can ever have. The cpumask_allocation
+ * is at the end of the structure, exactly for that reason.
+ */
mm_cachep = kmem_cache_create("mm_struct",
sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 74d1c099fbd1..fa27e750dbc0 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -105,9 +105,12 @@ static int __jump_label_text_reserved(struct jump_entry *iter_start,
}
static void __jump_label_update(struct jump_label_key *key,
- struct jump_entry *entry, int enable)
+ struct jump_entry *entry,
+ struct jump_entry *stop, int enable)
{
- for (; entry->key == (jump_label_t)(unsigned long)key; entry++) {
+ for (; (entry < stop) &&
+ (entry->key == (jump_label_t)(unsigned long)key);
+ entry++) {
/*
* entry->code set to 0 invalidates module init text sections
* kernel_text_address() verifies we are not in core kernel
@@ -181,7 +184,11 @@ static void __jump_label_mod_update(struct jump_label_key *key, int enable)
struct jump_label_mod *mod = key->next;
while (mod) {
- __jump_label_update(key, mod->entries, enable);
+ struct module *m = mod->mod;
+
+ __jump_label_update(key, mod->entries,
+ m->jump_entries + m->num_jump_entries,
+ enable);
mod = mod->next;
}
}
@@ -245,7 +252,8 @@ static int jump_label_add_module(struct module *mod)
key->next = jlm;
if (jump_label_enabled(key))
- __jump_label_update(key, iter, JUMP_LABEL_ENABLE);
+ __jump_label_update(key, iter, iter_stop,
+ JUMP_LABEL_ENABLE);
}
return 0;
@@ -371,7 +379,7 @@ static void jump_label_update(struct jump_label_key *key, int enable)
/* if there are no users, entry can be NULL */
if (entry)
- __jump_label_update(key, entry, enable);
+ __jump_label_update(key, entry, __stop___jump_table, enable);
#ifdef CONFIG_MODULES
__jump_label_mod_update(key, enable);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 3b34d2732bce..4ba7cccb4994 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -202,8 +202,8 @@ void kthread_bind(struct task_struct *p, unsigned int cpu)
return;
}
- p->cpus_allowed = cpumask_of_cpu(cpu);
- p->rt.nr_cpus_allowed = 1;
+ /* It's safe because the task is inactive. */
+ do_set_cpus_allowed(p, cpumask_of(cpu));
p->flags |= PF_THREAD_BOUND;
}
EXPORT_SYMBOL(kthread_bind);
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c
index fd8d1e035df9..6824ca7d4d0c 100644
--- a/kernel/pm_qos_params.c
+++ b/kernel/pm_qos_params.c
@@ -54,11 +54,17 @@ enum pm_qos_type {
PM_QOS_MIN /* return the smallest value */
};
+/*
+ * Note: The lockless read path depends on the CPU accessing
+ * target_value atomically. Atomic access is only guaranteed on all CPU
+ * types linux supports for 32 bit quantites
+ */
struct pm_qos_object {
struct plist_head requests;
struct blocking_notifier_head *notifiers;
struct miscdevice pm_qos_power_miscdev;
char *name;
+ s32 target_value; /* Do not change to 64 bit */
s32 default_value;
enum pm_qos_type type;
};
@@ -71,7 +77,8 @@ static struct pm_qos_object cpu_dma_pm_qos = {
.requests = PLIST_HEAD_INIT(cpu_dma_pm_qos.requests, pm_qos_lock),
.notifiers = &cpu_dma_lat_notifier,
.name = "cpu_dma_latency",
- .default_value = 2000 * USEC_PER_SEC,
+ .target_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE,
+ .default_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE,
.type = PM_QOS_MIN,
};
@@ -80,7 +87,8 @@ static struct pm_qos_object network_lat_pm_qos = {
.requests = PLIST_HEAD_INIT(network_lat_pm_qos.requests, pm_qos_lock),
.notifiers = &network_lat_notifier,
.name = "network_latency",
- .default_value = 2000 * USEC_PER_SEC,
+ .target_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE,
+ .default_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE,
.type = PM_QOS_MIN
};
@@ -90,7 +98,8 @@ static struct pm_qos_object network_throughput_pm_qos = {
.requests = PLIST_HEAD_INIT(network_throughput_pm_qos.requests, pm_qos_lock),
.notifiers = &network_throughput_notifier,
.name = "network_throughput",
- .default_value = 0,
+ .target_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE,
+ .default_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE,
.type = PM_QOS_MAX,
};
@@ -136,6 +145,16 @@ static inline int pm_qos_get_value(struct pm_qos_object *o)
}
}
+static inline s32 pm_qos_read_value(struct pm_qos_object *o)
+{
+ return o->target_value;
+}
+
+static inline void pm_qos_set_value(struct pm_qos_object *o, s32 value)
+{
+ o->target_value = value;
+}
+
static void update_target(struct pm_qos_object *o, struct plist_node *node,
int del, int value)
{
@@ -160,6 +179,7 @@ static void update_target(struct pm_qos_object *o, struct plist_node *node,
plist_add(node, &o->requests);
}
curr_value = pm_qos_get_value(o);
+ pm_qos_set_value(o, curr_value);
spin_unlock_irqrestore(&pm_qos_lock, flags);
if (prev_value != curr_value)
@@ -194,18 +214,11 @@ static int find_pm_qos_object_by_minor(int minor)
* pm_qos_request - returns current system wide qos expectation
* @pm_qos_class: identification of which qos value is requested
*
- * This function returns the current target value in an atomic manner.
+ * This function returns the current target value.
*/
int pm_qos_request(int pm_qos_class)
{
- unsigned long flags;
- int value;
-
- spin_lock_irqsave(&pm_qos_lock, flags);
- value = pm_qos_get_value(pm_qos_array[pm_qos_class]);
- spin_unlock_irqrestore(&pm_qos_lock, flags);
-
- return value;
+ return pm_qos_read_value(pm_qos_array[pm_qos_class]);
}
EXPORT_SYMBOL_GPL(pm_qos_request);
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index f07d2f03181a..89419ff92e99 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -36,7 +36,7 @@
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <linux/nmi.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include <linux/bitops.h>
#include <linux/module.h>
#include <linux/completion.h>
@@ -95,7 +95,6 @@ static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu);
DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
-static DEFINE_PER_CPU(wait_queue_head_t, rcu_cpu_wq);
DEFINE_PER_CPU(char, rcu_cpu_has_work);
static char rcu_kthreads_spawnable;
@@ -163,7 +162,7 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch);
#ifdef CONFIG_NO_HZ
DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
.dynticks_nesting = 1,
- .dynticks = 1,
+ .dynticks = ATOMIC_INIT(1),
};
#endif /* #ifdef CONFIG_NO_HZ */
@@ -322,13 +321,25 @@ void rcu_enter_nohz(void)
unsigned long flags;
struct rcu_dynticks *rdtp;
- smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
local_irq_save(flags);
rdtp = &__get_cpu_var(rcu_dynticks);
- rdtp->dynticks++;
- rdtp->dynticks_nesting--;
- WARN_ON_ONCE(rdtp->dynticks & 0x1);
+ if (--rdtp->dynticks_nesting) {
+ local_irq_restore(flags);
+ return;
+ }
+ /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
+ smp_mb__before_atomic_inc(); /* See above. */
+ atomic_inc(&rdtp->dynticks);
+ smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */
+ WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
local_irq_restore(flags);
+
+ /* If the interrupt queued a callback, get out of dyntick mode. */
+ if (in_irq() &&
+ (__get_cpu_var(rcu_sched_data).nxtlist ||
+ __get_cpu_var(rcu_bh_data).nxtlist ||
+ rcu_preempt_needs_cpu(smp_processor_id())))
+ set_need_resched();
}
/*
@@ -344,11 +355,16 @@ void rcu_exit_nohz(void)
local_irq_save(flags);
rdtp = &__get_cpu_var(rcu_dynticks);
- rdtp->dynticks++;
- rdtp->dynticks_nesting++;
- WARN_ON_ONCE(!(rdtp->dynticks & 0x1));
+ if (rdtp->dynticks_nesting++) {
+ local_irq_restore(flags);
+ return;
+ }
+ smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */
+ atomic_inc(&rdtp->dynticks);
+ /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
+ smp_mb__after_atomic_inc(); /* See above. */
+ WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
local_irq_restore(flags);
- smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
}
/**
@@ -362,11 +378,15 @@ void rcu_nmi_enter(void)
{
struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
- if (rdtp->dynticks & 0x1)
+ if (rdtp->dynticks_nmi_nesting == 0 &&
+ (atomic_read(&rdtp->dynticks) & 0x1))
return;
- rdtp->dynticks_nmi++;
- WARN_ON_ONCE(!(rdtp->dynticks_nmi & 0x1));
- smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
+ rdtp->dynticks_nmi_nesting++;
+ smp_mb__before_atomic_inc(); /* Force delay from prior write. */
+ atomic_inc(&rdtp->dynticks);
+ /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
+ smp_mb__after_atomic_inc(); /* See above. */
+ WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
}
/**
@@ -380,11 +400,14 @@ void rcu_nmi_exit(void)
{
struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
- if (rdtp->dynticks & 0x1)
+ if (rdtp->dynticks_nmi_nesting == 0 ||
+ --rdtp->dynticks_nmi_nesting != 0)
return;
- smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
- rdtp->dynticks_nmi++;
- WARN_ON_ONCE(rdtp->dynticks_nmi & 0x1);
+ /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
+ smp_mb__before_atomic_inc(); /* See above. */
+ atomic_inc(&rdtp->dynticks);
+ smp_mb__after_atomic_inc(); /* Force delay to next write. */
+ WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
}
/**
@@ -395,13 +418,7 @@ void rcu_nmi_exit(void)
*/
void rcu_irq_enter(void)
{
- struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
-
- if (rdtp->dynticks_nesting++)
- return;
- rdtp->dynticks++;
- WARN_ON_ONCE(!(rdtp->dynticks & 0x1));
- smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
+ rcu_exit_nohz();
}
/**
@@ -413,18 +430,7 @@ void rcu_irq_enter(void)
*/
void rcu_irq_exit(void)
{
- struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
-
- if (--rdtp->dynticks_nesting)
- return;
- smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
- rdtp->dynticks++;
- WARN_ON_ONCE(rdtp->dynticks & 0x1);
-
- /* If the interrupt queued a callback, get out of dyntick mode. */
- if (__this_cpu_read(rcu_sched_data.nxtlist) ||
- __this_cpu_read(rcu_bh_data.nxtlist))
- set_need_resched();
+ rcu_enter_nohz();
}
#ifdef CONFIG_SMP
@@ -436,19 +442,8 @@ void rcu_irq_exit(void)
*/
static int dyntick_save_progress_counter(struct rcu_data *rdp)
{
- int ret;
- int snap;
- int snap_nmi;
-
- snap = rdp->dynticks->dynticks;
- snap_nmi = rdp->dynticks->dynticks_nmi;
- smp_mb(); /* Order sampling of snap with end of grace period. */
- rdp->dynticks_snap = snap;
- rdp->dynticks_nmi_snap = snap_nmi;
- ret = ((snap & 0x1) == 0) && ((snap_nmi & 0x1) == 0);
- if (ret)
- rdp->dynticks_fqs++;
- return ret;
+ rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks);
+ return 0;
}
/*
@@ -459,16 +454,11 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp)
*/
static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
{
- long curr;
- long curr_nmi;
- long snap;
- long snap_nmi;
+ unsigned long curr;
+ unsigned long snap;
- curr = rdp->dynticks->dynticks;
- snap = rdp->dynticks_snap;
- curr_nmi = rdp->dynticks->dynticks_nmi;
- snap_nmi = rdp->dynticks_nmi_snap;
- smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
+ curr = (unsigned long)atomic_add_return(0, &rdp->dynticks->dynticks);
+ snap = (unsigned long)rdp->dynticks_snap;
/*
* If the CPU passed through or entered a dynticks idle phase with
@@ -478,8 +468,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
* read-side critical section that started before the beginning
* of the current RCU grace period.
*/
- if ((curr != snap || (curr & 0x1) == 0) &&
- (curr_nmi != snap_nmi || (curr_nmi & 0x1) == 0)) {
+ if ((curr & 0x1) == 0 || ULONG_CMP_GE(curr, snap + 2)) {
rdp->dynticks_fqs++;
return 1;
}
@@ -908,6 +897,12 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
unsigned long gp_duration;
WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
+
+ /*
+ * Ensure that all grace-period and pre-grace-period activity
+ * is seen before the assignment to rsp->completed.
+ */
+ smp_mb(); /* See above block comment. */
gp_duration = jiffies - rsp->gp_start;
if (gp_duration > rsp->gp_max)
rsp->gp_max = gp_duration;
@@ -1455,25 +1450,11 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
*/
static void rcu_process_callbacks(void)
{
- /*
- * Memory references from any prior RCU read-side critical sections
- * executed by the interrupted code must be seen before any RCU
- * grace-period manipulations below.
- */
- smp_mb(); /* See above block comment. */
-
__rcu_process_callbacks(&rcu_sched_state,
&__get_cpu_var(rcu_sched_data));
__rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
rcu_preempt_process_callbacks();
- /*
- * Memory references from any later RCU read-side critical sections
- * executed by the interrupted code must be seen after any RCU
- * grace-period manipulations above.
- */
- smp_mb(); /* See above block comment. */
-
/* If we are last CPU on way to dyntick-idle mode, accelerate it. */
rcu_needs_cpu_flush();
}
@@ -1494,7 +1475,7 @@ static void invoke_rcu_cpu_kthread(void)
local_irq_restore(flags);
return;
}
- wake_up(&__get_cpu_var(rcu_cpu_wq));
+ wake_up_process(__this_cpu_read(rcu_cpu_kthread_task));
local_irq_restore(flags);
}
@@ -1544,13 +1525,10 @@ static void rcu_cpu_kthread_setrt(int cpu, int to_rt)
*/
static void rcu_cpu_kthread_timer(unsigned long arg)
{
- unsigned long flags;
struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg);
struct rcu_node *rnp = rdp->mynode;
- raw_spin_lock_irqsave(&rnp->lock, flags);
- rnp->wakemask |= rdp->grpmask;
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ atomic_or(rdp->grpmask, &rnp->wakemask);
invoke_rcu_node_kthread(rnp);
}
@@ -1617,14 +1595,12 @@ static int rcu_cpu_kthread(void *arg)
unsigned long flags;
int spincnt = 0;
unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu);
- wait_queue_head_t *wqp = &per_cpu(rcu_cpu_wq, cpu);
char work;
char *workp = &per_cpu(rcu_cpu_has_work, cpu);
for (;;) {
*statusp = RCU_KTHREAD_WAITING;
- wait_event_interruptible(*wqp,
- *workp != 0 || kthread_should_stop());
+ rcu_wait(*workp != 0 || kthread_should_stop());
local_bh_disable();
if (rcu_cpu_kthread_should_stop(cpu)) {
local_bh_enable();
@@ -1675,7 +1651,6 @@ static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu)
per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu;
WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL);
per_cpu(rcu_cpu_kthread_task, cpu) = t;
- wake_up_process(t);
sp.sched_priority = RCU_KTHREAD_PRIO;
sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
return 0;
@@ -1698,11 +1673,10 @@ static int rcu_node_kthread(void *arg)
for (;;) {
rnp->node_kthread_status = RCU_KTHREAD_WAITING;
- wait_event_interruptible(rnp->node_wq, rnp->wakemask != 0);
+ rcu_wait(atomic_read(&rnp->wakemask) != 0);
rnp->node_kthread_status = RCU_KTHREAD_RUNNING;
raw_spin_lock_irqsave(&rnp->lock, flags);
- mask = rnp->wakemask;
- rnp->wakemask = 0;
+ mask = atomic_xchg(&rnp->wakemask, 0);
rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */
for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) {
if ((mask & 0x1) == 0)
@@ -1783,13 +1757,14 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp,
raw_spin_lock_irqsave(&rnp->lock, flags);
rnp->node_kthread_task = t;
raw_spin_unlock_irqrestore(&rnp->lock, flags);
- wake_up_process(t);
sp.sched_priority = 99;
sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
}
return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index);
}
+static void rcu_wake_one_boost_kthread(struct rcu_node *rnp);
+
/*
* Spawn all kthreads -- called as soon as the scheduler is running.
*/
@@ -1797,24 +1772,31 @@ static int __init rcu_spawn_kthreads(void)
{
int cpu;
struct rcu_node *rnp;
+ struct task_struct *t;
rcu_kthreads_spawnable = 1;
for_each_possible_cpu(cpu) {
- init_waitqueue_head(&per_cpu(rcu_cpu_wq, cpu));
per_cpu(rcu_cpu_has_work, cpu) = 0;
- if (cpu_online(cpu))
+ if (cpu_online(cpu)) {
(void)rcu_spawn_one_cpu_kthread(cpu);
+ t = per_cpu(rcu_cpu_kthread_task, cpu);
+ if (t)
+ wake_up_process(t);
+ }
}
rnp = rcu_get_root(rcu_state);
- init_waitqueue_head(&rnp->node_wq);
- rcu_init_boost_waitqueue(rnp);
(void)rcu_spawn_one_node_kthread(rcu_state, rnp);
- if (NUM_RCU_NODES > 1)
+ if (rnp->node_kthread_task)
+ wake_up_process(rnp->node_kthread_task);
+ if (NUM_RCU_NODES > 1) {
rcu_for_each_leaf_node(rcu_state, rnp) {
- init_waitqueue_head(&rnp->node_wq);
- rcu_init_boost_waitqueue(rnp);
(void)rcu_spawn_one_node_kthread(rcu_state, rnp);
+ t = rnp->node_kthread_task;
+ if (t)
+ wake_up_process(t);
+ rcu_wake_one_boost_kthread(rnp);
}
+ }
return 0;
}
early_initcall(rcu_spawn_kthreads);
@@ -2218,14 +2200,14 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
}
-static void __cpuinit rcu_online_cpu(int cpu)
+static void __cpuinit rcu_prepare_cpu(int cpu)
{
rcu_init_percpu_data(cpu, &rcu_sched_state, 0);
rcu_init_percpu_data(cpu, &rcu_bh_state, 0);
rcu_preempt_init_percpu_data(cpu);
}
-static void __cpuinit rcu_online_kthreads(int cpu)
+static void __cpuinit rcu_prepare_kthreads(int cpu)
{
struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
struct rcu_node *rnp = rdp->mynode;
@@ -2239,6 +2221,31 @@ static void __cpuinit rcu_online_kthreads(int cpu)
}
/*
+ * kthread_create() creates threads in TASK_UNINTERRUPTIBLE state,
+ * but the RCU threads are woken on demand, and if demand is low this
+ * could be a while triggering the hung task watchdog.
+ *
+ * In order to avoid this, poke all tasks once the CPU is fully
+ * up and running.
+ */
+static void __cpuinit rcu_online_kthreads(int cpu)
+{
+ struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
+ struct rcu_node *rnp = rdp->mynode;
+ struct task_struct *t;
+
+ t = per_cpu(rcu_cpu_kthread_task, cpu);
+ if (t)
+ wake_up_process(t);
+
+ t = rnp->node_kthread_task;
+ if (t)
+ wake_up_process(t);
+
+ rcu_wake_one_boost_kthread(rnp);
+}
+
+/*
* Handle CPU online/offline notification events.
*/
static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
@@ -2251,10 +2258,11 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
- rcu_online_cpu(cpu);
- rcu_online_kthreads(cpu);
+ rcu_prepare_cpu(cpu);
+ rcu_prepare_kthreads(cpu);
break;
case CPU_ONLINE:
+ rcu_online_kthreads(cpu);
case CPU_DOWN_FAILED:
rcu_node_kthread_setaffinity(rnp, -1);
rcu_cpu_kthread_setrt(cpu, 1);
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 257664815d5d..7b9a08b4aaea 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -84,11 +84,9 @@
* Dynticks per-CPU state.
*/
struct rcu_dynticks {
- int dynticks_nesting; /* Track nesting level, sort of. */
- int dynticks; /* Even value for dynticks-idle, else odd. */
- int dynticks_nmi; /* Even value for either dynticks-idle or */
- /* not in nmi handler, else odd. So this */
- /* remains even for nmi from irq handler. */
+ int dynticks_nesting; /* Track irq/process nesting level. */
+ int dynticks_nmi_nesting; /* Track NMI nesting level. */
+ atomic_t dynticks; /* Even value for dynticks-idle, else odd. */
};
/* RCU's kthread states for tracing. */
@@ -121,7 +119,9 @@ struct rcu_node {
/* elements that need to drain to allow the */
/* current expedited grace period to */
/* complete (only for TREE_PREEMPT_RCU). */
- unsigned long wakemask; /* CPUs whose kthread needs to be awakened. */
+ atomic_t wakemask; /* CPUs whose kthread needs to be awakened. */
+ /* Since this has meaning only for leaf */
+ /* rcu_node structures, 32 bits suffices. */
unsigned long qsmaskinit;
/* Per-GP initial value for qsmask & expmask. */
unsigned long grpmask; /* Mask to apply to parent qsmask. */
@@ -159,9 +159,6 @@ struct rcu_node {
struct task_struct *boost_kthread_task;
/* kthread that takes care of priority */
/* boosting for this rcu_node structure. */
- wait_queue_head_t boost_wq;
- /* Wait queue on which to park the boost */
- /* kthread. */
unsigned int boost_kthread_status;
/* State of boost_kthread_task for tracing. */
unsigned long n_tasks_boosted;
@@ -188,9 +185,6 @@ struct rcu_node {
/* kthread that takes care of this rcu_node */
/* structure, for example, awakening the */
/* per-CPU kthreads as needed. */
- wait_queue_head_t node_wq;
- /* Wait queue on which to park the per-node */
- /* kthread. */
unsigned int node_kthread_status;
/* State of node_kthread_task for tracing. */
} ____cacheline_internodealigned_in_smp;
@@ -284,7 +278,6 @@ struct rcu_data {
/* 3) dynticks interface. */
struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */
int dynticks_snap; /* Per-GP tracking for dynticks. */
- int dynticks_nmi_snap; /* Per-GP tracking for dynticks_nmi. */
#endif /* #ifdef CONFIG_NO_HZ */
/* 4) reasons this CPU needed to be kicked by force_quiescent_state */
@@ -337,6 +330,16 @@ struct rcu_data {
/* scheduling clock irq */
/* before ratting on them. */
+#define rcu_wait(cond) \
+do { \
+ for (;;) { \
+ set_current_state(TASK_INTERRUPTIBLE); \
+ if (cond) \
+ break; \
+ schedule(); \
+ } \
+ __set_current_state(TASK_RUNNING); \
+} while (0)
/*
* RCU global state, including node hierarchy. This hierarchy is
@@ -446,7 +449,6 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
static void rcu_preempt_send_cbs_to_online(void);
static void __init __rcu_init_preempt(void);
static void rcu_needs_cpu_flush(void);
-static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp);
static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp,
cpumask_var_t cm);
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 3f6559a5f5cd..c8bff3099a89 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -1196,8 +1196,7 @@ static int rcu_boost_kthread(void *arg)
for (;;) {
rnp->boost_kthread_status = RCU_KTHREAD_WAITING;
- wait_event_interruptible(rnp->boost_wq, rnp->boost_tasks ||
- rnp->exp_tasks);
+ rcu_wait(rnp->boost_tasks || rnp->exp_tasks);
rnp->boost_kthread_status = RCU_KTHREAD_RUNNING;
more2boost = rcu_boost(rnp);
if (more2boost)
@@ -1275,14 +1274,6 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
}
/*
- * Initialize the RCU-boost waitqueue.
- */
-static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp)
-{
- init_waitqueue_head(&rnp->boost_wq);
-}
-
-/*
* Create an RCU-boost kthread for the specified node if one does not
* already exist. We only create this kthread for preemptible RCU.
* Returns zero if all is well, a negated errno otherwise.
@@ -1306,12 +1297,17 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
raw_spin_lock_irqsave(&rnp->lock, flags);
rnp->boost_kthread_task = t;
raw_spin_unlock_irqrestore(&rnp->lock, flags);
- wake_up_process(t);
sp.sched_priority = RCU_KTHREAD_PRIO;
sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
return 0;
}
+static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp)
+{
+ if (rnp->boost_kthread_task)
+ wake_up_process(rnp->boost_kthread_task);
+}
+
#else /* #ifdef CONFIG_RCU_BOOST */
static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
@@ -1328,10 +1324,6 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
{
}
-static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp)
-{
-}
-
static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
struct rcu_node *rnp,
int rnp_index)
@@ -1339,6 +1331,10 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
return 0;
}
+static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp)
+{
+}
+
#endif /* #else #ifdef CONFIG_RCU_BOOST */
#ifndef CONFIG_SMP
@@ -1520,7 +1516,6 @@ int rcu_needs_cpu(int cpu)
{
int c = 0;
int snap;
- int snap_nmi;
int thatcpu;
/* Check for being in the holdoff period. */
@@ -1531,10 +1526,10 @@ int rcu_needs_cpu(int cpu)
for_each_online_cpu(thatcpu) {
if (thatcpu == cpu)
continue;
- snap = per_cpu(rcu_dynticks, thatcpu).dynticks;
- snap_nmi = per_cpu(rcu_dynticks, thatcpu).dynticks_nmi;
+ snap = atomic_add_return(0, &per_cpu(rcu_dynticks,
+ thatcpu).dynticks);
smp_mb(); /* Order sampling of snap with end of grace period. */
- if (((snap & 0x1) != 0) || ((snap_nmi & 0x1) != 0)) {
+ if ((snap & 0x1) != 0) {
per_cpu(rcu_dyntick_drain, cpu) = 0;
per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
return rcu_needs_cpu_quick_check(cpu);
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index aa0fd72b4bc7..9678cc3650f5 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -69,10 +69,10 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
rdp->passed_quiesc, rdp->passed_quiesc_completed,
rdp->qs_pending);
#ifdef CONFIG_NO_HZ
- seq_printf(m, " dt=%d/%d dn=%d df=%lu",
- rdp->dynticks->dynticks,
+ seq_printf(m, " dt=%d/%d/%d df=%lu",
+ atomic_read(&rdp->dynticks->dynticks),
rdp->dynticks->dynticks_nesting,
- rdp->dynticks->dynticks_nmi,
+ rdp->dynticks->dynticks_nmi_nesting,
rdp->dynticks_fqs);
#endif /* #ifdef CONFIG_NO_HZ */
seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi);
@@ -141,9 +141,9 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
rdp->qs_pending);
#ifdef CONFIG_NO_HZ
seq_printf(m, ",%d,%d,%d,%lu",
- rdp->dynticks->dynticks,
+ atomic_read(&rdp->dynticks->dynticks),
rdp->dynticks->dynticks_nesting,
- rdp->dynticks->dynticks_nmi,
+ rdp->dynticks->dynticks_nmi_nesting,
rdp->dynticks_fqs);
#endif /* #ifdef CONFIG_NO_HZ */
seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi);
@@ -167,7 +167,7 @@ static int show_rcudata_csv(struct seq_file *m, void *unused)
{
seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\",");
#ifdef CONFIG_NO_HZ
- seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\",");
+ seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\",");
#endif /* #ifdef CONFIG_NO_HZ */
seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\",\"ci\",\"co\",\"ca\"\n");
#ifdef CONFIG_TREE_PREEMPT_RCU
diff --git a/kernel/sched.c b/kernel/sched.c
index 5e43e9dc65d1..cbb3a0eee58e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2573,7 +2573,26 @@ static void ttwu_queue_remote(struct task_struct *p, int cpu)
if (!next)
smp_send_reschedule(cpu);
}
-#endif
+
+#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+static int ttwu_activate_remote(struct task_struct *p, int wake_flags)
+{
+ struct rq *rq;
+ int ret = 0;
+
+ rq = __task_rq_lock(p);
+ if (p->on_cpu) {
+ ttwu_activate(rq, p, ENQUEUE_WAKEUP);
+ ttwu_do_wakeup(rq, p, wake_flags);
+ ret = 1;
+ }
+ __task_rq_unlock(rq);
+
+ return ret;
+
+}
+#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
+#endif /* CONFIG_SMP */
static void ttwu_queue(struct task_struct *p, int cpu)
{
@@ -2631,17 +2650,17 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
while (p->on_cpu) {
#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
/*
- * If called from interrupt context we could have landed in the
- * middle of schedule(), in this case we should take care not
- * to spin on ->on_cpu if p is current, since that would
- * deadlock.
+ * In case the architecture enables interrupts in
+ * context_switch(), we cannot busy wait, since that
+ * would lead to deadlocks when an interrupt hits and
+ * tries to wake up @prev. So bail and do a complete
+ * remote wakeup.
*/
- if (p == current) {
- ttwu_queue(p, cpu);
+ if (ttwu_activate_remote(p, wake_flags))
goto stat;
- }
-#endif
+#else
cpu_relax();
+#endif
}
/*
* Pairs with the smp_wmb() in finish_lock_switch().
@@ -5841,7 +5860,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
idle->state = TASK_RUNNING;
idle->se.exec_start = sched_clock();
- cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
+ do_set_cpus_allowed(idle, cpumask_of(cpu));
/*
* We're having a chicken and egg problem, even though we are
* holding rq->lock, the cpu isn't yet set to this cpu so the
@@ -5929,6 +5948,16 @@ static inline void sched_init_granularity(void)
}
#ifdef CONFIG_SMP
+void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
+{
+ if (p->sched_class && p->sched_class->set_cpus_allowed)
+ p->sched_class->set_cpus_allowed(p, new_mask);
+ else {
+ cpumask_copy(&p->cpus_allowed, new_mask);
+ p->rt.nr_cpus_allowed = cpumask_weight(new_mask);
+ }
+}
+
/*
* This is how migration works:
*
@@ -5974,12 +6003,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
goto out;
}
- if (p->sched_class->set_cpus_allowed)
- p->sched_class->set_cpus_allowed(p, new_mask);
- else {
- cpumask_copy(&p->cpus_allowed, new_mask);
- p->rt.nr_cpus_allowed = cpumask_weight(new_mask);
- }
+ do_set_cpus_allowed(p, new_mask);
/* Can the task run on the task's current CPU? If so, we're done */
if (cpumask_test_cpu(task_cpu(p), new_mask))
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index e32a9b70ee9c..433491c2dc8f 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1076,8 +1076,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
se->on_rq = 0;
update_cfs_load(cfs_rq, 0);
account_entity_dequeue(cfs_rq, se);
- update_min_vruntime(cfs_rq);
- update_cfs_shares(cfs_rq);
/*
* Normalize the entity after updating the min_vruntime because the
@@ -1086,6 +1084,9 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
*/
if (!(flags & DEQUEUE_SLEEP))
se->vruntime -= cfs_rq->min_vruntime;
+
+ update_min_vruntime(cfs_rq);
+ update_cfs_shares(cfs_rq);
}
/*
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 64b2a37c07d0..88725c939e0b 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1263,6 +1263,7 @@ static int find_lowest_rq(struct task_struct *task)
if (!cpumask_test_cpu(this_cpu, lowest_mask))
this_cpu = -1; /* Skip this_cpu opt if not among lowest */
+ rcu_read_lock();
for_each_domain(cpu, sd) {
if (sd->flags & SD_WAKE_AFFINE) {
int best_cpu;
@@ -1272,15 +1273,20 @@ static int find_lowest_rq(struct task_struct *task)
* remote processor.
*/
if (this_cpu != -1 &&
- cpumask_test_cpu(this_cpu, sched_domain_span(sd)))
+ cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {
+ rcu_read_unlock();
return this_cpu;
+ }
best_cpu = cpumask_first_and(lowest_mask,
sched_domain_span(sd));
- if (best_cpu < nr_cpu_ids)
+ if (best_cpu < nr_cpu_ids) {
+ rcu_read_unlock();
return best_cpu;
+ }
}
}
+ rcu_read_unlock();
/*
* And finally, if there were no matches within the domains
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index 48ddf431db0e..331e01bcd026 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -37,7 +37,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
#ifdef CONFIG_SMP
/* domain-specific stats */
- preempt_disable();
+ rcu_read_lock();
for_each_domain(cpu, sd) {
enum cpu_idle_type itype;
@@ -64,7 +64,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
sd->ttwu_wake_remote, sd->ttwu_move_affine,
sd->ttwu_move_balance);
}
- preempt_enable();
+ rcu_read_unlock();
#endif
}
kfree(mask_str);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index d017c2c82c44..1ee417fcbfa5 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -109,12 +109,18 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip);
static void ftrace_global_list_func(unsigned long ip,
unsigned long parent_ip)
{
- struct ftrace_ops *op = rcu_dereference_raw(ftrace_global_list); /*see above*/
+ struct ftrace_ops *op;
+
+ if (unlikely(trace_recursion_test(TRACE_GLOBAL_BIT)))
+ return;
+ trace_recursion_set(TRACE_GLOBAL_BIT);
+ op = rcu_dereference_raw(ftrace_global_list); /*see above*/
while (op != &ftrace_list_end) {
op->func(ip, parent_ip);
op = rcu_dereference_raw(op->next); /*see above*/
};
+ trace_recursion_clear(TRACE_GLOBAL_BIT);
}
static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip)
@@ -1638,12 +1644,12 @@ static void ftrace_startup_enable(int command)
ftrace_run_update_code(command);
}
-static void ftrace_startup(struct ftrace_ops *ops, int command)
+static int ftrace_startup(struct ftrace_ops *ops, int command)
{
bool hash_enable = true;
if (unlikely(ftrace_disabled))
- return;
+ return -ENODEV;
ftrace_start_up++;
command |= FTRACE_ENABLE_CALLS;
@@ -1662,6 +1668,8 @@ static void ftrace_startup(struct ftrace_ops *ops, int command)
ftrace_hash_rec_enable(ops, 1);
ftrace_startup_enable(command);
+
+ return 0;
}
static void ftrace_shutdown(struct ftrace_ops *ops, int command)
@@ -2501,7 +2509,7 @@ static void __enable_ftrace_function_probe(void)
ret = __register_ftrace_function(&trace_probe_ops);
if (!ret)
- ftrace_startup(&trace_probe_ops, 0);
+ ret = ftrace_startup(&trace_probe_ops, 0);
ftrace_probe_registered = 1;
}
@@ -3466,7 +3474,11 @@ device_initcall(ftrace_nodyn_init);
static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; }
static inline void ftrace_startup_enable(int command) { }
/* Keep as macros so we do not need to define the commands */
-# define ftrace_startup(ops, command) do { } while (0)
+# define ftrace_startup(ops, command) \
+ ({ \
+ (ops)->flags |= FTRACE_OPS_FL_ENABLED; \
+ 0; \
+ })
# define ftrace_shutdown(ops, command) do { } while (0)
# define ftrace_startup_sysctl() do { } while (0)
# define ftrace_shutdown_sysctl() do { } while (0)
@@ -3484,6 +3496,10 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip)
{
struct ftrace_ops *op;
+ if (unlikely(trace_recursion_test(TRACE_INTERNAL_BIT)))
+ return;
+
+ trace_recursion_set(TRACE_INTERNAL_BIT);
/*
* Some of the ops may be dynamically allocated,
* they must be freed after a synchronize_sched().
@@ -3496,6 +3512,7 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip)
op = rcu_dereference_raw(op->next);
};
preempt_enable_notrace();
+ trace_recursion_clear(TRACE_INTERNAL_BIT);
}
static void clear_ftrace_swapper(void)
@@ -3799,7 +3816,7 @@ int register_ftrace_function(struct ftrace_ops *ops)
ret = __register_ftrace_function(ops);
if (!ret)
- ftrace_startup(ops, 0);
+ ret = ftrace_startup(ops, 0);
out_unlock:
@@ -4045,7 +4062,7 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
ftrace_graph_return = retfunc;
ftrace_graph_entry = entryfunc;
- ftrace_startup(&global_ops, FTRACE_START_FUNC_RET);
+ ret = ftrace_startup(&global_ops, FTRACE_START_FUNC_RET);
out:
mutex_unlock(&ftrace_lock);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 0ef7b4b2a1f7..b0c7aa407943 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2216,7 +2216,7 @@ static noinline void trace_recursive_fail(void)
printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:"
"HC[%lu]:SC[%lu]:NMI[%lu]\n",
- current->trace_recursion,
+ trace_recursion_buffer(),
hardirq_count() >> HARDIRQ_SHIFT,
softirq_count() >> SOFTIRQ_SHIFT,
in_nmi());
@@ -2226,9 +2226,9 @@ static noinline void trace_recursive_fail(void)
static inline int trace_recursive_lock(void)
{
- current->trace_recursion++;
+ trace_recursion_inc();
- if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
+ if (likely(trace_recursion_buffer() < TRACE_RECURSIVE_DEPTH))
return 0;
trace_recursive_fail();
@@ -2238,9 +2238,9 @@ static inline int trace_recursive_lock(void)
static inline void trace_recursive_unlock(void)
{
- WARN_ON_ONCE(!current->trace_recursion);
+ WARN_ON_ONCE(!trace_recursion_buffer());
- current->trace_recursion--;
+ trace_recursion_dec();
}
#else
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 6b69c4bd306f..229f8591f61d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -784,4 +784,19 @@ extern const char *__stop___trace_bprintk_fmt[];
FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print))
#include "trace_entries.h"
+/* Only current can touch trace_recursion */
+#define trace_recursion_inc() do { (current)->trace_recursion++; } while (0)
+#define trace_recursion_dec() do { (current)->trace_recursion--; } while (0)
+
+/* Ring buffer has the 10 LSB bits to count */
+#define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff)
+
+/* for function tracing recursion */
+#define TRACE_INTERNAL_BIT (1<<11)
+#define TRACE_GLOBAL_BIT (1<<12)
+
+#define trace_recursion_set(bit) do { (current)->trace_recursion |= (bit); } while (0)
+#define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(bit); } while (0)
+#define trace_recursion_test(bit) ((current)->trace_recursion & (bit))
+
#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 2fe110341359..686ec399f2a8 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1657,7 +1657,12 @@ static struct ftrace_ops trace_ops __initdata =
static __init void event_trace_self_test_with_function(void)
{
- register_ftrace_function(&trace_ops);
+ int ret;
+ ret = register_ftrace_function(&trace_ops);
+ if (WARN_ON(ret < 0)) {
+ pr_info("Failed to enable function tracer for event tests\n");
+ return;
+ }
pr_info("Running tests again, along with the function tracer\n");
event_trace_self_tests();
unregister_ftrace_function(&trace_ops);
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index cf535ccedc86..e37de492a9e1 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -353,6 +353,33 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
}
EXPORT_SYMBOL(ftrace_print_symbols_seq);
+#if BITS_PER_LONG == 32
+const char *
+ftrace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val,
+ const struct trace_print_flags_u64 *symbol_array)
+{
+ int i;
+ const char *ret = p->buffer + p->len;
+
+ for (i = 0; symbol_array[i].name; i++) {
+
+ if (val != symbol_array[i].mask)
+ continue;
+
+ trace_seq_puts(p, symbol_array[i].name);
+ break;
+ }
+
+ if (!p->len)
+ trace_seq_printf(p, "0x%llx", val);
+
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+EXPORT_SYMBOL(ftrace_print_symbols_seq_u64);
+#endif
+
const char *
ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len)
{
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 7daa4b072e9f..3d0c56ad4792 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -415,15 +415,13 @@ static void watchdog_nmi_disable(int cpu) { return; }
#endif /* CONFIG_HARDLOCKUP_DETECTOR */
/* prepare/enable/disable routines */
-static int watchdog_prepare_cpu(int cpu)
+static void watchdog_prepare_cpu(int cpu)
{
struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);
WARN_ON(per_cpu(softlockup_watchdog, cpu));
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
hrtimer->function = watchdog_timer_fn;
-
- return 0;
}
static int watchdog_enable(int cpu)
@@ -542,17 +540,16 @@ static int __cpuinit
cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
{
int hotcpu = (unsigned long)hcpu;
- int err = 0;
switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
- err = watchdog_prepare_cpu(hotcpu);
+ watchdog_prepare_cpu(hotcpu);
break;
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
if (watchdog_enabled)
- err = watchdog_enable(hotcpu);
+ watchdog_enable(hotcpu);
break;
#ifdef CONFIG_HOTPLUG_CPU
case CPU_UP_CANCELED: