summaryrefslogtreecommitdiff
path: root/kernel/sched.c
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2010-02-15 14:45:54 +0100
committerThomas Gleixner <tglx@linutronix.de>2010-02-16 15:13:59 +0100
commit0970d2992dfd7d5ec2c787417cf464f01eeaf42a (patch)
tree5f0077dcafda733ef3f1e41d218a473978ce6a9f /kernel/sched.c
parent9000f05c6d1607f79c0deacf42b09693be673f4c (diff)
downloadlwn-0970d2992dfd7d5ec2c787417cf464f01eeaf42a.tar.gz
lwn-0970d2992dfd7d5ec2c787417cf464f01eeaf42a.zip
sched: Fix race between ttwu() and task_rq_lock()
Thomas found that due to ttwu() changing a task's cpu without holding the rq->lock, task_rq_lock() might end up locking the wrong rq. Avoid this by serializing against TASK_WAKING. Reported-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <1266241712.15770.420.camel@laptop> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c73
1 files changed, 46 insertions, 27 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 4d78aef4559d..404e2017c0cf 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -941,16 +941,33 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
#endif /* __ARCH_WANT_UNLOCKED_CTXSW */
/*
+ * Check whether the task is waking, we use this to synchronize against
+ * ttwu() so that task_cpu() reports a stable number.
+ *
+ * We need to make an exception for PF_STARTING tasks because the fork
+ * path might require task_rq_lock() to work, eg. it can call
+ * set_cpus_allowed_ptr() from the cpuset clone_ns code.
+ */
+static inline int task_is_waking(struct task_struct *p)
+{
+ return unlikely((p->state == TASK_WAKING) && !(p->flags & PF_STARTING));
+}
+
+/*
* __task_rq_lock - lock the runqueue a given task resides on.
* Must be called interrupts disabled.
*/
static inline struct rq *__task_rq_lock(struct task_struct *p)
__acquires(rq->lock)
{
+ struct rq *rq;
+
for (;;) {
- struct rq *rq = task_rq(p);
+ while (task_is_waking(p))
+ cpu_relax();
+ rq = task_rq(p);
raw_spin_lock(&rq->lock);
- if (likely(rq == task_rq(p)))
+ if (likely(rq == task_rq(p) && !task_is_waking(p)))
return rq;
raw_spin_unlock(&rq->lock);
}
@@ -967,10 +984,12 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
struct rq *rq;
for (;;) {
+ while (task_is_waking(p))
+ cpu_relax();
local_irq_save(*flags);
rq = task_rq(p);
raw_spin_lock(&rq->lock);
- if (likely(rq == task_rq(p)))
+ if (likely(rq == task_rq(p) && !task_is_waking(p)))
return rq;
raw_spin_unlock_irqrestore(&rq->lock, *flags);
}
@@ -2408,14 +2427,27 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
__task_rq_unlock(rq);
cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
- if (cpu != orig_cpu)
+ if (cpu != orig_cpu) {
+ /*
+ * Since we migrate the task without holding any rq->lock,
+ * we need to be careful with task_rq_lock(), since that
+ * might end up locking an invalid rq.
+ */
set_task_cpu(p, cpu);
+ }
- rq = __task_rq_lock(p);
+ rq = cpu_rq(cpu);
+ raw_spin_lock(&rq->lock);
update_rq_clock(rq);
+ /*
+ * We migrated the task without holding either rq->lock, however
+ * since the task is not on the task list itself, nobody else
+ * will try and migrate the task, hence the rq should match the
+ * cpu we just moved it to.
+ */
+ WARN_ON(task_cpu(p) != cpu);
WARN_ON(p->state != TASK_WAKING);
- cpu = task_cpu(p);
#ifdef CONFIG_SCHEDSTATS
schedstat_inc(rq, ttwu_count);
@@ -2647,7 +2679,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
{
unsigned long flags;
struct rq *rq;
- int cpu __maybe_unused = get_cpu();
+ int cpu = get_cpu();
#ifdef CONFIG_SMP
/*
@@ -2663,7 +2695,13 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
set_task_cpu(p, cpu);
#endif
- rq = task_rq_lock(p, &flags);
+ /*
+ * Since the task is not on the rq and we still have TASK_WAKING set
+ * nobody else will migrate this task.
+ */
+ rq = cpu_rq(cpu);
+ raw_spin_lock_irqsave(&rq->lock, flags);
+
BUG_ON(p->state != TASK_WAKING);
p->state = TASK_RUNNING;
update_rq_clock(rq);
@@ -7156,27 +7194,8 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
struct rq *rq;
int ret = 0;
- /*
- * Since we rely on wake-ups to migrate sleeping tasks, don't change
- * the ->cpus_allowed mask from under waking tasks, which would be
- * possible when we change rq->lock in ttwu(), so synchronize against
- * TASK_WAKING to avoid that.
- *
- * Make an exception for freshly cloned tasks, since cpuset namespaces
- * might move the task about, we have to validate the target in
- * wake_up_new_task() anyway since the cpu might have gone away.
- */
-again:
- while (p->state == TASK_WAKING && !(p->flags & PF_STARTING))
- cpu_relax();
-
rq = task_rq_lock(p, &flags);
- if (p->state == TASK_WAKING && !(p->flags & PF_STARTING)) {
- task_rq_unlock(rq, &flags);
- goto again;
- }
-
if (!cpumask_intersects(new_mask, cpu_active_mask)) {
ret = -EINVAL;
goto out;