summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2008-09-29 15:47:42 +0200
committerThomas Gleixner <tglx@linutronix.de>2008-09-29 17:09:14 +0200
commitccc7dadf736639da86f3e0c86832c11a66fc8221 (patch)
tree3f1c0df0a291fec4a10d72cc1ab278ad81e697c5
parentb00c1a99e7758f794923c61e5cd55268d61c9469 (diff)
downloadlwn-ccc7dadf736639da86f3e0c86832c11a66fc8221.tar.gz
lwn-ccc7dadf736639da86f3e0c86832c11a66fc8221.zip
hrtimer: prevent migration of per CPU hrtimers
Impact: per CPU hrtimers can be migrated from a dead CPU The hrtimer code has no knowledge about per CPU timers, but we need to prevent the migration of such timers and warn when such a timer is active at migration time. Explicitely mark the timers as per CPU and use a more understandable mode descriptor for the interrupts safe unlocked callback mode, which is used by hrtimer_sleeper and the scheduler code. Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r--include/linux/hrtimer.h14
-rw-r--r--kernel/hrtimer.c37
-rw-r--r--kernel/sched.c4
-rw-r--r--kernel/time/tick-sched.c2
-rw-r--r--kernel/trace/trace_sysprof.c2
5 files changed, 40 insertions, 19 deletions
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index bdd88df1b4e5..2f245fe63bda 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -47,14 +47,22 @@ enum hrtimer_restart {
* HRTIMER_CB_IRQSAFE: Callback may run in hardirq context
* HRTIMER_CB_IRQSAFE_NO_RESTART: Callback may run in hardirq context and
* does not restart the timer
- * HRTIMER_CB_IRQSAFE_NO_SOFTIRQ: Callback must run in hardirq context
- * Special mode for tick emultation
+ * HRTIMER_CB_IRQSAFE_PERCPU: Callback must run in hardirq context
+ * Special mode for tick emulation and
+ * scheduler timer. Such timers are per
+ * cpu and not allowed to be migrated on
+ * cpu unplug.
+ * HRTIMER_CB_IRQSAFE_UNLOCKED: Callback should run in hardirq context
+ * with timer->base lock unlocked
+ * used for timers which call wakeup to
+ * avoid lock order problems with rq->lock
*/
enum hrtimer_cb_mode {
HRTIMER_CB_SOFTIRQ,
HRTIMER_CB_IRQSAFE,
HRTIMER_CB_IRQSAFE_NO_RESTART,
- HRTIMER_CB_IRQSAFE_NO_SOFTIRQ,
+ HRTIMER_CB_IRQSAFE_PERCPU,
+ HRTIMER_CB_IRQSAFE_UNLOCKED,
};
/*
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index ace723dd1e52..cdec83e722fa 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -672,13 +672,14 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
*/
BUG_ON(timer->function(timer) != HRTIMER_NORESTART);
return 1;
- case HRTIMER_CB_IRQSAFE_NO_SOFTIRQ:
+ case HRTIMER_CB_IRQSAFE_PERCPU:
+ case HRTIMER_CB_IRQSAFE_UNLOCKED:
/*
* This is solely for the sched tick emulation with
* dynamic tick support to ensure that we do not
* restart the tick right on the edge and end up with
* the tick timer in the softirq ! The calling site
- * takes care of this.
+ * takes care of this. Also used for hrtimer sleeper !
*/
debug_hrtimer_deactivate(timer);
return 1;
@@ -1245,7 +1246,8 @@ static void __run_hrtimer(struct hrtimer *timer)
timer_stats_account_hrtimer(timer);
fn = timer->function;
- if (timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ) {
+ if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU ||
+ timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED) {
/*
* Used for scheduler timers, avoid lock inversion with
* rq->lock and tasklist_lock.
@@ -1452,7 +1454,7 @@ void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
sl->timer.function = hrtimer_wakeup;
sl->task = task;
#ifdef CONFIG_HIGH_RES_TIMERS
- sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
+ sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
#endif
}
@@ -1592,7 +1594,7 @@ static void __cpuinit init_hrtimers_cpu(int cpu)
#ifdef CONFIG_HOTPLUG_CPU
static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
- struct hrtimer_clock_base *new_base)
+ struct hrtimer_clock_base *new_base, int dcpu)
{
struct hrtimer *timer;
struct rb_node *node;
@@ -1604,6 +1606,18 @@ static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
debug_hrtimer_deactivate(timer);
/*
+ * Should not happen. Per CPU timers should be
+ * canceled _before_ the migration code is called
+ */
+ if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU) {
+ __remove_hrtimer(timer, old_base,
+ HRTIMER_STATE_INACTIVE, 0);
+ WARN(1, "hrtimer (%p %p)active but cpu %d dead\n",
+ timer, timer->function, dcpu);
+ continue;
+ }
+
+ /*
* Mark it as STATE_MIGRATE not INACTIVE otherwise the
* timer could be seen as !active and just vanish away
* under us on another CPU
@@ -1619,12 +1633,11 @@ static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
/*
* Happens with high res enabled when the timer was
* already expired and the callback mode is
- * HRTIMER_CB_IRQSAFE_NO_SOFTIRQ
- * (hrtimer_sleeper). The enqueue code does not move
- * them to the soft irq pending list for
- * performance/latency reasons, but in the migration
- * state, we need to do that otherwise we end up with
- * a stale timer.
+ * HRTIMER_CB_IRQSAFE_UNLOCKED (hrtimer_sleeper). The
+ * enqueue code does not move them to the soft irq
+ * pending list for performance/latency reasons, but
+ * in the migration state, we need to do that
+ * otherwise we end up with a stale timer.
*/
if (timer->state == HRTIMER_STATE_MIGRATE) {
timer->state = HRTIMER_STATE_PENDING;
@@ -1682,7 +1695,7 @@ static void migrate_hrtimers(int cpu)
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
if (migrate_hrtimer_list(&old_base->clock_base[i],
- &new_base->clock_base[i]))
+ &new_base->clock_base[i], cpu))
raise = 1;
}
diff --git a/kernel/sched.c b/kernel/sched.c
index 13dd2db9fb2d..ad1962dc0aa2 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -201,7 +201,7 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
hrtimer_init(&rt_b->rt_period_timer,
CLOCK_MONOTONIC, HRTIMER_MODE_REL);
rt_b->rt_period_timer.function = sched_rt_period_timer;
- rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
+ rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
}
static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
@@ -1119,7 +1119,7 @@ static void init_rq_hrtick(struct rq *rq)
hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
rq->hrtick_timer.function = hrtick;
- rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
+ rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
}
#else
static inline void hrtick_clear(struct rq *rq)
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 39019b3f7621..cb02324bdb88 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -625,7 +625,7 @@ void tick_setup_sched_timer(void)
*/
hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
ts->sched_timer.function = tick_sched_timer;
- ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
+ ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
/* Get the next period (per cpu) */
ts->sched_timer.expires = tick_init_jiffy_update();
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index bb948e52ce20..db58fb66a135 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -202,7 +202,7 @@ static void start_stack_timer(int cpu)
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
hrtimer->function = stack_trace_timer_fn;
- hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
+ hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL);
}