From b845b517b5e3706a3729f6ea83b88ab85f0725b0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 8 Aug 2008 21:47:09 +0200 Subject: printk: robustify printk Avoid deadlocks against rq->lock and xtime_lock by deferring the klogd wakeup by polling from the timer tick. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/time/tick-sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time/tick-sched.c') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 825b4c00fe44..c13d4f182370 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -255,7 +255,7 @@ void tick_nohz_stop_sched_tick(int inidle) next_jiffies = get_next_timer_interrupt(last_jiffies); delta_jiffies = next_jiffies - last_jiffies; - if (rcu_needs_cpu(cpu)) + if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu)) delta_jiffies = 1; /* * Do not stop the tick, if we are only one off -- cgit v1.2.3 From 6441402b1f173fa38e561d3cee7c01c32e5281ad Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Sep 2008 18:46:37 +0200 Subject: clockevents: prevent cpu online to interfere with nohz Impact: rare hang which can be triggered on CPU online. tick_do_timer_cpu keeps track of the CPU which updates jiffies via do_timer. The value -1 is used to signal, that currently no CPU is doing this. There are two cases, where the variable can have this state: boot: necessary for systems where the boot cpu id can be != 0 nohz long idle sleep: When the CPU which did the jiffies update last goes into a long idle sleep it drops the update jiffies duty so another CPU which is not idle can pick it up and keep jiffies going. Using the same value for both situations is wrong, as the CPU online code can see the -1 state when the timer of the newly onlined CPU is setup. The setup for a newly onlined CPU goes through periodic mode and can pick up the do_timer duty without being aware of the nohz / highres mode of the already running system. Use two separate states and make them constants to avoid magic numbers confusion. Signed-off-by: Thomas Gleixner --- kernel/time/tick-common.c | 7 ++++--- kernel/time/tick-internal.h | 4 ++++ kernel/time/tick-sched.c | 8 ++++---- 3 files changed, 12 insertions(+), 7 deletions(-) (limited to 'kernel/time/tick-sched.c') diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 019315ebf9de..b523d095decf 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -33,7 +33,7 @@ DEFINE_PER_CPU(struct tick_device, tick_cpu_device); */ ktime_t tick_next_period; ktime_t tick_period; -int tick_do_timer_cpu __read_mostly = -1; +int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT; DEFINE_SPINLOCK(tick_device_lock); /* @@ -148,7 +148,7 @@ static void tick_setup_device(struct tick_device *td, * If no cpu took the do_timer update, assign it to * this cpu: */ - if (tick_do_timer_cpu == -1) { + if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) { tick_do_timer_cpu = cpu; tick_next_period = ktime_get(); tick_period = ktime_set(0, NSEC_PER_SEC / HZ); @@ -300,7 +300,8 @@ static void tick_shutdown(unsigned int *cpup) if (*cpup == tick_do_timer_cpu) { int cpu = first_cpu(cpu_online_map); - tick_do_timer_cpu = (cpu != NR_CPUS) ? cpu : -1; + tick_do_timer_cpu = (cpu != NR_CPUS) ? cpu : + TICK_DO_TIMER_NONE; } spin_unlock_irqrestore(&tick_device_lock, flags); } diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 6e9db9734aa6..e18014fadf95 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -1,6 +1,10 @@ /* * tick internal variable and functions used by low/high res code */ + +#define TICK_DO_TIMER_NONE -1 +#define TICK_DO_TIMER_BOOT -2 + DECLARE_PER_CPU(struct tick_device, tick_cpu_device); extern spinlock_t tick_device_lock; extern ktime_t tick_next_period; diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index a87b0468568b..31a14e8caac1 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -221,7 +221,7 @@ void tick_nohz_stop_sched_tick(int inidle) */ if (unlikely(!cpu_online(cpu))) { if (cpu == tick_do_timer_cpu) - tick_do_timer_cpu = -1; + tick_do_timer_cpu = TICK_DO_TIMER_NONE; } if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) @@ -303,7 +303,7 @@ void tick_nohz_stop_sched_tick(int inidle) * invoked. */ if (cpu == tick_do_timer_cpu) - tick_do_timer_cpu = -1; + tick_do_timer_cpu = TICK_DO_TIMER_NONE; ts->idle_sleeps++; @@ -468,7 +468,7 @@ static void tick_nohz_handler(struct clock_event_device *dev) * this duty, then the jiffies update is still serialized by * xtime_lock. */ - if (unlikely(tick_do_timer_cpu == -1)) + if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) tick_do_timer_cpu = cpu; /* Check, if the jiffies need an update */ @@ -570,7 +570,7 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) * this duty, then the jiffies update is still serialized by * xtime_lock. */ - if (unlikely(tick_do_timer_cpu == -1)) + if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) tick_do_timer_cpu = cpu; #endif -- cgit v1.2.3 From 49d670fb8dd62d3ed4e3ed2513538ea65b051aed Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Sep 2008 18:56:01 +0200 Subject: clockevents: prevent stale tick_next_period for onlining CPUs Impact: possible hang on CPU onlining in timer one shot mode. The tick_next_period variable is only used during boot on nohz/highres enabled systems, but for CPU onlining it needs to be maintained when the per cpu clock events device operates in one shot mode. Signed-off-by: Thomas Gleixner --- kernel/time/tick-sched.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel/time/tick-sched.c') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 31a14e8caac1..39019b3f7621 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -75,6 +75,9 @@ static void tick_do_update_jiffies64(ktime_t now) incr * ticks); } do_timer(++ticks); + + /* Keep the tick_next_period variable up to date */ + tick_next_period = ktime_add(last_jiffies_update, tick_period); } write_sequnlock(&xtime_lock); } -- cgit v1.2.3 From ccc7dadf736639da86f3e0c86832c11a66fc8221 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 29 Sep 2008 15:47:42 +0200 Subject: hrtimer: prevent migration of per CPU hrtimers Impact: per CPU hrtimers can be migrated from a dead CPU The hrtimer code has no knowledge about per CPU timers, but we need to prevent the migration of such timers and warn when such a timer is active at migration time. Explicitely mark the timers as per CPU and use a more understandable mode descriptor for the interrupts safe unlocked callback mode, which is used by hrtimer_sleeper and the scheduler code. Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 14 +++++++++++--- kernel/hrtimer.c | 37 +++++++++++++++++++++++++------------ kernel/sched.c | 4 ++-- kernel/time/tick-sched.c | 2 +- kernel/trace/trace_sysprof.c | 2 +- 5 files changed, 40 insertions(+), 19 deletions(-) (limited to 'kernel/time/tick-sched.c') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index bdd88df1b4e5..2f245fe63bda 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -47,14 +47,22 @@ enum hrtimer_restart { * HRTIMER_CB_IRQSAFE: Callback may run in hardirq context * HRTIMER_CB_IRQSAFE_NO_RESTART: Callback may run in hardirq context and * does not restart the timer - * HRTIMER_CB_IRQSAFE_NO_SOFTIRQ: Callback must run in hardirq context - * Special mode for tick emultation + * HRTIMER_CB_IRQSAFE_PERCPU: Callback must run in hardirq context + * Special mode for tick emulation and + * scheduler timer. Such timers are per + * cpu and not allowed to be migrated on + * cpu unplug. + * HRTIMER_CB_IRQSAFE_UNLOCKED: Callback should run in hardirq context + * with timer->base lock unlocked + * used for timers which call wakeup to + * avoid lock order problems with rq->lock */ enum hrtimer_cb_mode { HRTIMER_CB_SOFTIRQ, HRTIMER_CB_IRQSAFE, HRTIMER_CB_IRQSAFE_NO_RESTART, - HRTIMER_CB_IRQSAFE_NO_SOFTIRQ, + HRTIMER_CB_IRQSAFE_PERCPU, + HRTIMER_CB_IRQSAFE_UNLOCKED, }; /* diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index ace723dd1e52..cdec83e722fa 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -672,13 +672,14 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, */ BUG_ON(timer->function(timer) != HRTIMER_NORESTART); return 1; - case HRTIMER_CB_IRQSAFE_NO_SOFTIRQ: + case HRTIMER_CB_IRQSAFE_PERCPU: + case HRTIMER_CB_IRQSAFE_UNLOCKED: /* * This is solely for the sched tick emulation with * dynamic tick support to ensure that we do not * restart the tick right on the edge and end up with * the tick timer in the softirq ! The calling site - * takes care of this. + * takes care of this. Also used for hrtimer sleeper ! */ debug_hrtimer_deactivate(timer); return 1; @@ -1245,7 +1246,8 @@ static void __run_hrtimer(struct hrtimer *timer) timer_stats_account_hrtimer(timer); fn = timer->function; - if (timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ) { + if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU || + timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED) { /* * Used for scheduler timers, avoid lock inversion with * rq->lock and tasklist_lock. @@ -1452,7 +1454,7 @@ void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task) sl->timer.function = hrtimer_wakeup; sl->task = task; #ifdef CONFIG_HIGH_RES_TIMERS - sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; + sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED; #endif } @@ -1592,7 +1594,7 @@ static void __cpuinit init_hrtimers_cpu(int cpu) #ifdef CONFIG_HOTPLUG_CPU static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base, - struct hrtimer_clock_base *new_base) + struct hrtimer_clock_base *new_base, int dcpu) { struct hrtimer *timer; struct rb_node *node; @@ -1603,6 +1605,18 @@ static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base, BUG_ON(hrtimer_callback_running(timer)); debug_hrtimer_deactivate(timer); + /* + * Should not happen. Per CPU timers should be + * canceled _before_ the migration code is called + */ + if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU) { + __remove_hrtimer(timer, old_base, + HRTIMER_STATE_INACTIVE, 0); + WARN(1, "hrtimer (%p %p)active but cpu %d dead\n", + timer, timer->function, dcpu); + continue; + } + /* * Mark it as STATE_MIGRATE not INACTIVE otherwise the * timer could be seen as !active and just vanish away @@ -1619,12 +1633,11 @@ static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base, /* * Happens with high res enabled when the timer was * already expired and the callback mode is - * HRTIMER_CB_IRQSAFE_NO_SOFTIRQ - * (hrtimer_sleeper). The enqueue code does not move - * them to the soft irq pending list for - * performance/latency reasons, but in the migration - * state, we need to do that otherwise we end up with - * a stale timer. + * HRTIMER_CB_IRQSAFE_UNLOCKED (hrtimer_sleeper). The + * enqueue code does not move them to the soft irq + * pending list for performance/latency reasons, but + * in the migration state, we need to do that + * otherwise we end up with a stale timer. */ if (timer->state == HRTIMER_STATE_MIGRATE) { timer->state = HRTIMER_STATE_PENDING; @@ -1682,7 +1695,7 @@ static void migrate_hrtimers(int cpu) for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { if (migrate_hrtimer_list(&old_base->clock_base[i], - &new_base->clock_base[i])) + &new_base->clock_base[i], cpu)) raise = 1; } diff --git a/kernel/sched.c b/kernel/sched.c index 13dd2db9fb2d..ad1962dc0aa2 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -201,7 +201,7 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) hrtimer_init(&rt_b->rt_period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); rt_b->rt_period_timer.function = sched_rt_period_timer; - rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; + rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED; } static void start_rt_bandwidth(struct rt_bandwidth *rt_b) @@ -1119,7 +1119,7 @@ static void init_rq_hrtick(struct rq *rq) hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); rq->hrtick_timer.function = hrtick; - rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; + rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU; } #else static inline void hrtick_clear(struct rq *rq) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 39019b3f7621..cb02324bdb88 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -625,7 +625,7 @@ void tick_setup_sched_timer(void) */ hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); ts->sched_timer.function = tick_sched_timer; - ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; + ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU; /* Get the next period (per cpu) */ ts->sched_timer.expires = tick_init_jiffy_update(); diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index bb948e52ce20..db58fb66a135 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c @@ -202,7 +202,7 @@ static void start_stack_timer(int cpu) hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hrtimer->function = stack_trace_timer_fn; - hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; + hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_PERCPU; hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL); } -- cgit v1.2.3 From 8083e4ad970e4eb567e31037060cdd4ba346f0c0 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Mon, 4 Aug 2008 11:59:11 -0700 Subject: [CPUFREQ][5/6] cpufreq: Changes to get_cpu_idle_time_us(), used by ondemand governor export get_cpu_idle_time_us() for it to be used in ondemand governor. Last update time can be current time when the CPU is currently non-idle, accounting for the busy time since last idle. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Dave Jones --- include/linux/tick.h | 2 +- kernel/time/tick-sched.c | 11 ++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'kernel/time/tick-sched.c') diff --git a/include/linux/tick.h b/include/linux/tick.h index 8cf8cfe2cc97..98921a3e1aa8 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -126,7 +126,7 @@ static inline ktime_t tick_nohz_get_sleep_length(void) return len; } static inline void tick_nohz_stop_idle(int cpu) { } -static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return 0; } +static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } # endif /* !NO_HZ */ #endif diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index cb02324bdb88..a4d219398167 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -20,6 +20,7 @@ #include #include #include +#include #include @@ -190,9 +191,17 @@ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); - *last_update_time = ktime_to_us(ts->idle_lastupdate); + if (!tick_nohz_enabled) + return -1; + + if (ts->idle_active) + *last_update_time = ktime_to_us(ts->idle_lastupdate); + else + *last_update_time = ktime_to_us(ktime_get()); + return ktime_to_us(ts->idle_sleeptime); } +EXPORT_SYMBOL_GPL(get_cpu_idle_time_us); /** * tick_nohz_stop_sched_tick - stop the idle tick from the idle task -- cgit v1.2.3