diff options
Diffstat (limited to 'kernel/time/hrtimer.c')
| -rw-r--r-- | kernel/time/hrtimer.c | 1381 |
1 files changed, 797 insertions, 584 deletions
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 80fe3749d2db..5bd6efe598f0 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> + * Copyright(C) 2005-2006, Linutronix GmbH, Thomas Gleixner <tglx@kernel.org> * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner * @@ -50,6 +50,36 @@ #include "tick-internal.h" /* + * Constants to set the queued state of the timer (INACTIVE, ENQUEUED) + * + * The callback state is kept separate in the CPU base because having it in + * the timer would required touching the timer after the callback, which + * makes it impossible to free the timer from the callback function. + * + * Therefore we track the callback state in: + * + * timer->base->cpu_base->running == timer + * + * On SMP it is possible to have a "callback function running and enqueued" + * status. It happens for example when a posix timer expired and the callback + * queued a signal. Between dropping the lock which protects the posix timer + * and reacquiring the base lock of the hrtimer, another CPU can deliver the + * signal and rearm the timer. + * + * All state transitions are protected by cpu_base->lock. + */ +#define HRTIMER_STATE_INACTIVE false +#define HRTIMER_STATE_ENQUEUED true + +/* + * The resolution of the clocks. The resolution value is returned in + * the clock_getres() system call to give application programmers an + * idea of the (in)accuracy of timers. Timer values are rounded up to + * this resolution values. + */ +#define HIGH_RES_NSEC 1 + +/* * Masks for selecting the soft and hard context timers from * cpu_base->active */ @@ -58,6 +88,9 @@ #define HRTIMER_ACTIVE_SOFT (HRTIMER_ACTIVE_HARD << MASK_SHIFT) #define HRTIMER_ACTIVE_ALL (HRTIMER_ACTIVE_SOFT | HRTIMER_ACTIVE_HARD) +static void retrigger_next_event(void *arg); +static ktime_t __hrtimer_cb_get_time(clockid_t clock_id); + /* * The timer bases: * @@ -66,90 +99,75 @@ * to reach a base using a clockid, hrtimer_clockid_to_base() * is used to convert from clockid to the proper hrtimer_base_type. */ + +#define BASE_INIT(idx, cid) \ + [idx] = { .index = idx, .clockid = cid } + DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = { .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock), - .clock_base = - { - { - .index = HRTIMER_BASE_MONOTONIC, - .clockid = CLOCK_MONOTONIC, - .get_time = &ktime_get, - }, - { - .index = HRTIMER_BASE_REALTIME, - .clockid = CLOCK_REALTIME, - .get_time = &ktime_get_real, - }, - { - .index = HRTIMER_BASE_BOOTTIME, - .clockid = CLOCK_BOOTTIME, - .get_time = &ktime_get_boottime, - }, - { - .index = HRTIMER_BASE_TAI, - .clockid = CLOCK_TAI, - .get_time = &ktime_get_clocktai, - }, - { - .index = HRTIMER_BASE_MONOTONIC_SOFT, - .clockid = CLOCK_MONOTONIC, - .get_time = &ktime_get, - }, - { - .index = HRTIMER_BASE_REALTIME_SOFT, - .clockid = CLOCK_REALTIME, - .get_time = &ktime_get_real, - }, - { - .index = HRTIMER_BASE_BOOTTIME_SOFT, - .clockid = CLOCK_BOOTTIME, - .get_time = &ktime_get_boottime, - }, - { - .index = HRTIMER_BASE_TAI_SOFT, - .clockid = CLOCK_TAI, - .get_time = &ktime_get_clocktai, - }, - } + .clock_base = { + BASE_INIT(HRTIMER_BASE_MONOTONIC, CLOCK_MONOTONIC), + BASE_INIT(HRTIMER_BASE_REALTIME, CLOCK_REALTIME), + BASE_INIT(HRTIMER_BASE_BOOTTIME, CLOCK_BOOTTIME), + BASE_INIT(HRTIMER_BASE_TAI, CLOCK_TAI), + BASE_INIT(HRTIMER_BASE_MONOTONIC_SOFT, CLOCK_MONOTONIC), + BASE_INIT(HRTIMER_BASE_REALTIME_SOFT, CLOCK_REALTIME), + BASE_INIT(HRTIMER_BASE_BOOTTIME_SOFT, CLOCK_BOOTTIME), + BASE_INIT(HRTIMER_BASE_TAI_SOFT, CLOCK_TAI), + }, + .csd = CSD_INIT(retrigger_next_event, NULL) }; -static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = { - /* Make sure we catch unsupported clockids */ - [0 ... MAX_CLOCKS - 1] = HRTIMER_MAX_CLOCK_BASES, +static inline bool hrtimer_base_is_online(struct hrtimer_cpu_base *base) +{ + if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) + return true; + else + return likely(base->online); +} - [CLOCK_REALTIME] = HRTIMER_BASE_REALTIME, - [CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC, - [CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME, - [CLOCK_TAI] = HRTIMER_BASE_TAI, -}; +#ifdef CONFIG_HIGH_RES_TIMERS +DEFINE_STATIC_KEY_FALSE(hrtimer_highres_enabled_key); + +static void hrtimer_hres_workfn(struct work_struct *work) +{ + static_branch_enable(&hrtimer_highres_enabled_key); +} + +static DECLARE_WORK(hrtimer_hres_work, hrtimer_hres_workfn); + +static inline void hrtimer_schedule_hres_work(void) +{ + if (!hrtimer_highres_enabled()) + schedule_work(&hrtimer_hres_work); +} +#else +static inline void hrtimer_schedule_hres_work(void) { } +#endif /* * Functions and macros which are different for UP/SMP systems are kept in a * single place */ #ifdef CONFIG_SMP - /* * We require the migration_base for lock_hrtimer_base()/switch_hrtimer_base() * such that hrtimer_callback_running() can unconditionally dereference * timer->base->cpu_base */ static struct hrtimer_cpu_base migration_cpu_base = { - .clock_base = { { - .cpu_base = &migration_cpu_base, - .seq = SEQCNT_RAW_SPINLOCK_ZERO(migration_cpu_base.seq, - &migration_cpu_base.lock), - }, }, + .clock_base = { + [0] = { + .cpu_base = &migration_cpu_base, + .seq = SEQCNT_RAW_SPINLOCK_ZERO(migration_cpu_base.seq, + &migration_cpu_base.lock), + }, + }, }; #define migration_base migration_cpu_base.clock_base[0] -static inline bool is_migration_base(struct hrtimer_clock_base *base) -{ - return base == &migration_base; -} - /* * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock * means that all timers which are tied to this base via timer->base are @@ -162,15 +180,13 @@ static inline bool is_migration_base(struct hrtimer_clock_base *base) * possible to set timer->base = &migration_base and drop the lock: the timer * remains locked. */ -static -struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, - unsigned long *flags) +static struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, + unsigned long *flags) __acquires(&timer->base->lock) { - struct hrtimer_clock_base *base; - for (;;) { - base = READ_ONCE(timer->base); + struct hrtimer_clock_base *base = READ_ONCE(timer->base); + if (likely(base != &migration_base)) { raw_spin_lock_irqsave(&base->cpu_base->lock, *flags); if (likely(base == timer->base)) @@ -183,27 +199,54 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, } /* - * We do not migrate the timer when it is expiring before the next - * event on the target cpu. When high resolution is enabled, we cannot - * reprogram the target cpu hardware and we would cause it to fire - * late. To keep it simple, we handle the high resolution enabled and - * disabled case similar. + * Check if the elected target is suitable considering its next + * event and the hotplug state of the current CPU. + * + * If the elected target is remote and its next event is after the timer + * to queue, then a remote reprogram is necessary. However there is no + * guarantee the IPI handling the operation would arrive in time to meet + * the high resolution deadline. In this case the local CPU becomes a + * preferred target, unless it is offline. + * + * High and low resolution modes are handled the same way for simplicity. * * Called with cpu_base->lock of target cpu held. */ -static int -hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base) +static bool hrtimer_suitable_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base, + struct hrtimer_cpu_base *new_cpu_base, + struct hrtimer_cpu_base *this_cpu_base) { ktime_t expires; + /* + * The local CPU clockevent can be reprogrammed. Also get_target_base() + * guarantees it is online. + */ + if (new_cpu_base == this_cpu_base) + return true; + + /* + * The offline local CPU can't be the default target if the + * next remote target event is after this timer. Keep the + * elected new base. An IPI will be issued to reprogram + * it as a last resort. + */ + if (!hrtimer_base_is_online(this_cpu_base)) + return true; + expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset); - return expires < new_base->cpu_base->expires_next; + + return expires >= new_base->cpu_base->expires_next; } -static inline -struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, - int pinned) +static inline struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, bool pinned) { + if (!hrtimer_base_is_online(base)) { + int cpu = cpumask_any_and(cpu_online_mask, housekeeping_cpumask(HK_TYPE_TIMER)); + + return &per_cpu(hrtimer_bases, cpu); + } + #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) if (static_branch_likely(&timers_migration_enabled) && !pinned) return &per_cpu(hrtimer_bases, get_nohz_timer_target()); @@ -224,8 +267,7 @@ struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, * the timer callback is currently running. */ static inline struct hrtimer_clock_base * -switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, - int pinned) +switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, bool pinned) { struct hrtimer_cpu_base *new_cpu_base, *this_cpu_base; struct hrtimer_clock_base *new_base; @@ -238,13 +280,12 @@ again: if (base != new_base) { /* - * We are trying to move timer to new_base. - * However we can't change timer's base while it is running, - * so we keep it on the same CPU. No hassle vs. reprogramming - * the event source in the high resolution case. The softirq - * code will take care of this when the timer function has - * completed. There is no conflict as we hold the lock until - * the timer is enqueued. + * We are trying to move timer to new_base. However we can't + * change timer's base while it is running, so we keep it on + * the same CPU. No hassle vs. reprogramming the event source + * in the high resolution case. The remote CPU will take care + * of this when the timer function has completed. There is no + * conflict as we hold the lock until the timer is enqueued. */ if (unlikely(hrtimer_callback_running(timer))) return base; @@ -254,8 +295,7 @@ again: raw_spin_unlock(&base->cpu_base->lock); raw_spin_lock(&new_base->cpu_base->lock); - if (new_cpu_base != this_cpu_base && - hrtimer_check_target(timer, new_base)) { + if (!hrtimer_suitable_target(timer, new_base, new_cpu_base, this_cpu_base)) { raw_spin_unlock(&new_base->cpu_base->lock); raw_spin_lock(&base->cpu_base->lock); new_cpu_base = this_cpu_base; @@ -264,8 +304,7 @@ again: } WRITE_ONCE(timer->base, new_base); } else { - if (new_cpu_base != this_cpu_base && - hrtimer_check_target(timer, new_base)) { + if (!hrtimer_suitable_target(timer, new_base, new_cpu_base, this_cpu_base)) { new_cpu_base = this_cpu_base; goto again; } @@ -275,19 +314,13 @@ again: #else /* CONFIG_SMP */ -static inline bool is_migration_base(struct hrtimer_clock_base *base) -{ - return false; -} - -static inline struct hrtimer_clock_base * -lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) +static inline struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, + unsigned long *flags) __acquires(&timer->base->cpu_base->lock) { struct hrtimer_clock_base *base = timer->base; raw_spin_lock_irqsave(&base->cpu_base->lock, *flags); - return base; } @@ -322,7 +355,7 @@ s64 __ktime_divns(const ktime_t kt, s64 div) return dclc < 0 ? -tmp : tmp; } EXPORT_SYMBOL_GPL(__ktime_divns); -#endif /* BITS_PER_LONG >= 64 */ +#endif /* BITS_PER_LONG < 64 */ /* * Add two ktime values and do a safety check for overflow: @@ -349,7 +382,7 @@ static const struct debug_obj_descr hrtimer_debug_descr; static void *hrtimer_debug_hint(void *addr) { - return ((struct hrtimer *) addr)->function; + return ACCESS_PRIVATE((struct hrtimer *)addr, function); } /* @@ -404,12 +437,37 @@ static bool hrtimer_fixup_free(void *addr, enum debug_obj_state state) } } +/* Stub timer callback for improperly used timers. */ +static enum hrtimer_restart stub_timer(struct hrtimer *unused) +{ + WARN_ON_ONCE(1); + return HRTIMER_NORESTART; +} + +/* + * hrtimer_fixup_assert_init is called when: + * - an untracked/uninit-ed object is found + */ +static bool hrtimer_fixup_assert_init(void *addr, enum debug_obj_state state) +{ + struct hrtimer *timer = addr; + + switch (state) { + case ODEBUG_STATE_NOTAVAILABLE: + hrtimer_setup(timer, stub_timer, CLOCK_MONOTONIC, 0); + return true; + default: + return false; + } +} + static const struct debug_obj_descr hrtimer_debug_descr = { - .name = "hrtimer", - .debug_hint = hrtimer_debug_hint, - .fixup_init = hrtimer_fixup_init, - .fixup_activate = hrtimer_fixup_activate, - .fixup_free = hrtimer_fixup_free, + .name = "hrtimer", + .debug_hint = hrtimer_debug_hint, + .fixup_init = hrtimer_fixup_init, + .fixup_activate = hrtimer_fixup_activate, + .fixup_free = hrtimer_fixup_free, + .fixup_assert_init = hrtimer_fixup_assert_init, }; static inline void debug_hrtimer_init(struct hrtimer *timer) @@ -422,8 +480,7 @@ static inline void debug_hrtimer_init_on_stack(struct hrtimer *timer) debug_object_init_on_stack(timer, &hrtimer_debug_descr); } -static inline void debug_hrtimer_activate(struct hrtimer *timer, - enum hrtimer_mode mode) +static inline void debug_hrtimer_activate(struct hrtimer *timer, enum hrtimer_mode mode) { debug_object_activate(timer, &hrtimer_debug_descr); } @@ -433,6 +490,11 @@ static inline void debug_hrtimer_deactivate(struct hrtimer *timer) debug_object_deactivate(timer, &hrtimer_debug_descr); } +static inline void debug_hrtimer_assert_init(struct hrtimer *timer) +{ + debug_object_assert_init(timer, &hrtimer_debug_descr); +} + void destroy_hrtimer_on_stack(struct hrtimer *timer) { debug_object_free(timer, &hrtimer_debug_descr); @@ -443,100 +505,98 @@ EXPORT_SYMBOL_GPL(destroy_hrtimer_on_stack); static inline void debug_hrtimer_init(struct hrtimer *timer) { } static inline void debug_hrtimer_init_on_stack(struct hrtimer *timer) { } -static inline void debug_hrtimer_activate(struct hrtimer *timer, - enum hrtimer_mode mode) { } +static inline void debug_hrtimer_activate(struct hrtimer *timer, enum hrtimer_mode mode) { } static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { } +static inline void debug_hrtimer_assert_init(struct hrtimer *timer) { } #endif -static inline void -debug_init(struct hrtimer *timer, clockid_t clockid, - enum hrtimer_mode mode) +static inline void debug_setup(struct hrtimer *timer, clockid_t clockid, enum hrtimer_mode mode) { debug_hrtimer_init(timer); - trace_hrtimer_init(timer, clockid, mode); + trace_hrtimer_setup(timer, clockid, mode); } -static inline void debug_init_on_stack(struct hrtimer *timer, clockid_t clockid, - enum hrtimer_mode mode) +static inline void debug_setup_on_stack(struct hrtimer *timer, clockid_t clockid, + enum hrtimer_mode mode) { debug_hrtimer_init_on_stack(timer); - trace_hrtimer_init(timer, clockid, mode); + trace_hrtimer_setup(timer, clockid, mode); } -static inline void debug_activate(struct hrtimer *timer, - enum hrtimer_mode mode) +static inline void debug_activate(struct hrtimer *timer, enum hrtimer_mode mode, bool was_armed) { debug_hrtimer_activate(timer, mode); - trace_hrtimer_start(timer, mode); + trace_hrtimer_start(timer, mode, was_armed); } -static inline void debug_deactivate(struct hrtimer *timer) -{ - debug_hrtimer_deactivate(timer); - trace_hrtimer_cancel(timer); -} +#define for_each_active_base(base, cpu_base, active) \ + for (unsigned int idx = ffs(active); idx--; idx = ffs((active))) \ + for (bool done = false; !done; active &= ~(1U << idx)) \ + for (base = &cpu_base->clock_base[idx]; !done; done = true) + +#define hrtimer_from_timerqueue_node(_n) container_of_const(_n, struct hrtimer, node) -static struct hrtimer_clock_base * -__next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active) +#if defined(CONFIG_NO_HZ_COMMON) +/* + * Same as hrtimer_bases_next_event() below, but skips the excluded timer and + * does not update cpu_base->next_timer/expires. + */ +static ktime_t hrtimer_bases_next_event_without(struct hrtimer_cpu_base *cpu_base, + const struct hrtimer *exclude, + unsigned int active, ktime_t expires_next) { - unsigned int idx; + struct hrtimer_clock_base *base; + ktime_t expires; + + lockdep_assert_held(&cpu_base->lock); - if (!*active) - return NULL; + for_each_active_base(base, cpu_base, active) { + expires = ktime_sub(base->expires_next, base->offset); + if (expires >= expires_next) + continue; - idx = __ffs(*active); - *active &= ~(1U << idx); + /* + * If the excluded timer is the first on this base evaluate the + * next timer. + */ + struct timerqueue_linked_node *node = timerqueue_linked_first(&base->active); - return &cpu_base->clock_base[idx]; + if (unlikely(&exclude->node == node)) { + node = timerqueue_linked_next(node); + if (!node) + continue; + expires = ktime_sub(node->expires, base->offset); + if (expires >= expires_next) + continue; + } + expires_next = expires; + } + /* If base->offset changed, the result might be negative */ + return max(expires_next, 0); } +#endif -#define for_each_active_base(base, cpu_base, active) \ - while ((base = __next_base((cpu_base), &(active)))) +static __always_inline struct hrtimer *clock_base_next_timer(struct hrtimer_clock_base *base) +{ + struct timerqueue_linked_node *next = timerqueue_linked_first(&base->active); -static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base, - const struct hrtimer *exclude, - unsigned int active, - ktime_t expires_next) + return hrtimer_from_timerqueue_node(next); +} + +/* Find the base with the earliest expiry */ +static void hrtimer_bases_first(struct hrtimer_cpu_base *cpu_base,unsigned int active, + ktime_t *expires_next, struct hrtimer **next_timer) { struct hrtimer_clock_base *base; ktime_t expires; for_each_active_base(base, cpu_base, active) { - struct timerqueue_node *next; - struct hrtimer *timer; - - next = timerqueue_getnext(&base->active); - timer = container_of(next, struct hrtimer, node); - if (timer == exclude) { - /* Get to the next timer in the queue. */ - next = timerqueue_iterate_next(next); - if (!next) - continue; - - timer = container_of(next, struct hrtimer, node); - } - expires = ktime_sub(hrtimer_get_expires(timer), base->offset); - if (expires < expires_next) { - expires_next = expires; - - /* Skip cpu_base update if a timer is being excluded. */ - if (exclude) - continue; - - if (timer->is_soft) - cpu_base->softirq_next_timer = timer; - else - cpu_base->next_timer = timer; + expires = ktime_sub(base->expires_next, base->offset); + if (expires < *expires_next) { + *expires_next = expires; + *next_timer = clock_base_next_timer(base); } } - /* - * clock_was_set() might have changed base->offset of any of - * the clock bases so the result might be negative. Fix it up - * to prevent a false positive in clockevents_program_event(). - */ - if (expires_next < 0) - expires_next = 0; - return expires_next; } /* @@ -559,30 +619,28 @@ static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base, * - HRTIMER_ACTIVE_SOFT, or * - HRTIMER_ACTIVE_HARD. */ -static ktime_t -__hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_mask) +static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_mask) { - unsigned int active; struct hrtimer *next_timer = NULL; ktime_t expires_next = KTIME_MAX; + unsigned int active; + + lockdep_assert_held(&cpu_base->lock); if (!cpu_base->softirq_activated && (active_mask & HRTIMER_ACTIVE_SOFT)) { active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT; - cpu_base->softirq_next_timer = NULL; - expires_next = __hrtimer_next_event_base(cpu_base, NULL, - active, KTIME_MAX); - - next_timer = cpu_base->softirq_next_timer; + if (active) + hrtimer_bases_first(cpu_base, active, &expires_next, &next_timer); + cpu_base->softirq_next_timer = next_timer; } if (active_mask & HRTIMER_ACTIVE_HARD) { active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD; + if (active) + hrtimer_bases_first(cpu_base, active, &expires_next, &next_timer); cpu_base->next_timer = next_timer; - expires_next = __hrtimer_next_event_base(cpu_base, NULL, active, - expires_next); } - - return expires_next; + return max(expires_next, 0); } static ktime_t hrtimer_update_next_event(struct hrtimer_cpu_base *cpu_base) @@ -622,8 +680,8 @@ static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset; ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset; - ktime_t now = ktime_get_update_offsets_now(&base->clock_was_set_seq, - offs_real, offs_boot, offs_tai); + ktime_t now = ktime_get_update_offsets_now(&base->clock_was_set_seq, offs_real, + offs_boot, offs_tai); base->clock_base[HRTIMER_BASE_REALTIME_SOFT].offset = *offs_real; base->clock_base[HRTIMER_BASE_BOOTTIME_SOFT].offset = *offs_boot; @@ -633,7 +691,9 @@ static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) } /* - * Is the high resolution mode active ? + * Is the high resolution mode active in the CPU base. This cannot use the + * static key as the CPUs are switched to high resolution mode + * asynchronously. */ static inline int hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base) { @@ -641,8 +701,13 @@ static inline int hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base) cpu_base->hres_active : 0; } -static void __hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base, - struct hrtimer *next_timer, +static inline void hrtimer_rearm_event(ktime_t expires_next, bool deferred) +{ + trace_hrtimer_rearm(expires_next, deferred); + tick_program_event(expires_next, 1); +} + +static void __hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base, struct hrtimer *next_timer, ktime_t expires_next) { cpu_base->expires_next = expires_next; @@ -667,20 +732,13 @@ static void __hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base, if (!hrtimer_hres_active(cpu_base) || cpu_base->hang_detected) return; - tick_program_event(expires_next, 1); + hrtimer_rearm_event(expires_next, false); } -/* - * Reprogram the event source with checking both queues for the - * next event - * Called with interrupts disabled and base->lock held - */ -static void -hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) +/* Reprogram the event source with a evaluation of all clock bases */ +static void hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, bool skip_equal) { - ktime_t expires_next; - - expires_next = hrtimer_update_next_event(cpu_base); + ktime_t expires_next = hrtimer_update_next_event(cpu_base); if (skip_equal && expires_next == cpu_base->expires_next) return; @@ -691,59 +749,49 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) /* High resolution timer related functions */ #ifdef CONFIG_HIGH_RES_TIMERS -/* - * High resolution timer enabled ? - */ +/* High resolution timer enabled ? */ static bool hrtimer_hres_enabled __read_mostly = true; unsigned int hrtimer_resolution __read_mostly = LOW_RES_NSEC; EXPORT_SYMBOL_GPL(hrtimer_resolution); -/* - * Enable / Disable high resolution mode - */ +/* Enable / Disable high resolution mode */ static int __init setup_hrtimer_hres(char *str) { return (kstrtobool(str, &hrtimer_hres_enabled) == 0); } - __setup("highres=", setup_hrtimer_hres); -/* - * hrtimer_high_res_enabled - query, if the highres mode is enabled - */ -static inline int hrtimer_is_hres_enabled(void) +/* hrtimer_high_res_enabled - query, if the highres mode is enabled */ +static inline bool hrtimer_is_hres_enabled(void) { return hrtimer_hres_enabled; } -static void retrigger_next_event(void *arg); - -/* - * Switch to high resolution mode - */ +/* Switch to high resolution mode */ static void hrtimer_switch_to_hres(void) { struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases); if (tick_init_highres()) { - pr_warn("Could not switch to high resolution mode on CPU %u\n", - base->cpu); + pr_warn("Could not switch to high resolution mode on CPU %u\n", base->cpu); return; } - base->hres_active = 1; + base->hres_active = true; hrtimer_resolution = HIGH_RES_NSEC; tick_setup_sched_timer(true); /* "Retrigger" the interrupt to get things going */ retrigger_next_event(NULL); + hrtimer_schedule_hres_work(); } #else -static inline int hrtimer_is_hres_enabled(void) { return 0; } +static inline bool hrtimer_is_hres_enabled(void) { return 0; } static inline void hrtimer_switch_to_hres(void) { } #endif /* CONFIG_HIGH_RES_TIMERS */ + /* * Retrigger next event is called after clock was set with interrupts * disabled through an SMP function call or directly from low level @@ -774,17 +822,16 @@ static void retrigger_next_event(void *arg) * of the next expiring timer is enough. The return from the SMP * function call will take care of the reprogramming in case the * CPU was in a NOHZ idle sleep. + * + * In periodic low resolution mode, the next softirq expiration + * must also be updated. */ - if (!hrtimer_hres_active(base) && !tick_nohz_active) - return; - - raw_spin_lock(&base->lock); + guard(raw_spinlock)(&base->lock); hrtimer_update_base(base); if (hrtimer_hres_active(base)) - hrtimer_force_reprogram(base, 0); + hrtimer_force_reprogram(base, /* skip_equal */ false); else hrtimer_update_next_event(base); - raw_spin_unlock(&base->lock); } /* @@ -798,10 +845,11 @@ static void hrtimer_reprogram(struct hrtimer *timer, bool reprogram) { struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); struct hrtimer_clock_base *base = timer->base; - ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); + ktime_t expires = hrtimer_get_expires(timer); - WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0); + WARN_ON_ONCE(expires < 0); + expires = ktime_sub(expires, base->offset); /* * CLOCK_REALTIME timer might be requested with an absolute * expiry time which is less than base->offset. Set it to 0. @@ -828,8 +876,7 @@ static void hrtimer_reprogram(struct hrtimer *timer, bool reprogram) timer_cpu_base->softirq_next_timer = timer; timer_cpu_base->softirq_expires_next = expires; - if (!ktime_before(expires, timer_cpu_base->expires_next) || - !reprogram) + if (!ktime_before(expires, timer_cpu_base->expires_next) || !reprogram) return; } @@ -843,11 +890,8 @@ static void hrtimer_reprogram(struct hrtimer *timer, bool reprogram) if (expires >= cpu_base->expires_next) return; - /* - * If the hrtimer interrupt is running, then it will reevaluate the - * clock bases and reprogram the clock event device. - */ - if (cpu_base->in_hrtirq) + /* If a deferred rearm is pending skip reprogramming the device */ + if (cpu_base->deferred_rearm) return; cpu_base->next_timer = timer; @@ -855,8 +899,7 @@ static void hrtimer_reprogram(struct hrtimer *timer, bool reprogram) __hrtimer_reprogram(cpu_base, timer, expires); } -static bool update_needs_ipi(struct hrtimer_cpu_base *cpu_base, - unsigned int active) +static bool update_needs_ipi(struct hrtimer_cpu_base *cpu_base, unsigned int active) { struct hrtimer_clock_base *base; unsigned int seq; @@ -882,13 +925,11 @@ static bool update_needs_ipi(struct hrtimer_cpu_base *cpu_base, if (seq == cpu_base->clock_was_set_seq) return false; - /* - * If the remote CPU is currently handling an hrtimer interrupt, it - * will reevaluate the first expiring timer of all clock bases - * before reprogramming. Nothing to do here. - */ - if (cpu_base->in_hrtirq) + /* If a deferred rearm is pending the remote CPU will take care of it */ + if (cpu_base->deferred_rearm) { + cpu_base->deferred_needs_update = true; return false; + } /* * Walk the affected clock bases and check whether the first expiring @@ -899,15 +940,15 @@ static bool update_needs_ipi(struct hrtimer_cpu_base *cpu_base, active &= cpu_base->active_bases; for_each_active_base(base, cpu_base, active) { - struct timerqueue_node *next; + struct timerqueue_linked_node *next; - next = timerqueue_getnext(&base->active); + next = timerqueue_linked_first(&base->active); expires = ktime_sub(next->expires, base->offset); if (expires < cpu_base->expires_next) return true; /* Extra check for softirq clock bases */ - if (base->clockid < HRTIMER_BASE_MONOTONIC_SOFT) + if (base->index < HRTIMER_BASE_MONOTONIC_SOFT) continue; if (cpu_base->softirq_activated) continue; @@ -933,11 +974,9 @@ static bool update_needs_ipi(struct hrtimer_cpu_base *cpu_base, */ void clock_was_set(unsigned int bases) { - struct hrtimer_cpu_base *cpu_base = raw_cpu_ptr(&hrtimer_bases); cpumask_var_t mask; - int cpu; - if (!hrtimer_hres_active(cpu_base) && !tick_nohz_active) + if (!hrtimer_highres_enabled() && !tick_nohz_is_active()) goto out_timerfd; if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { @@ -946,23 +985,19 @@ void clock_was_set(unsigned int bases) } /* Avoid interrupting CPUs if possible */ - cpus_read_lock(); - for_each_online_cpu(cpu) { - unsigned long flags; - - cpu_base = &per_cpu(hrtimer_bases, cpu); - raw_spin_lock_irqsave(&cpu_base->lock, flags); + scoped_guard(cpus_read_lock) { + int cpu; - if (update_needs_ipi(cpu_base, bases)) - cpumask_set_cpu(cpu, mask); + for_each_online_cpu(cpu) { + struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); - raw_spin_unlock_irqrestore(&cpu_base->lock, flags); + guard(raw_spinlock_irqsave)(&cpu_base->lock); + if (update_needs_ipi(cpu_base, bases)) + cpumask_set_cpu(cpu, mask); + } + scoped_guard(preempt) + smp_call_function_many(mask, retrigger_next_event, NULL, 1); } - - preempt_disable(); - smp_call_function_many(mask, retrigger_next_event, NULL, 1); - preempt_enable(); - cpus_read_unlock(); free_cpumask_var(mask); out_timerfd: @@ -997,11 +1032,8 @@ void hrtimers_resume_local(void) retrigger_next_event(NULL); } -/* - * Counterpart to lock_hrtimer_base above: - */ -static inline -void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) +/* Counterpart to lock_hrtimer_base above */ +static inline void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) __releases(&timer->base->cpu_base->lock) { raw_spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags); @@ -1018,7 +1050,7 @@ void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) * .. note:: * This only updates the timer expiry value and does not requeue the timer. * - * There is also a variant of the function hrtimer_forward_now(). + * There is also a variant of this function: hrtimer_forward_now(). * * Context: Can be safely called from the callback function of @timer. If called * from other contexts @timer must neither be enqueued nor running the @@ -1028,15 +1060,15 @@ void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) */ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) { - u64 orun = 1; ktime_t delta; + u64 orun = 1; delta = ktime_sub(now, hrtimer_get_expires(timer)); if (delta < 0) return 0; - if (WARN_ON(timer->state & HRTIMER_STATE_ENQUEUED)) + if (WARN_ON(timer->is_queued)) return 0; if (interval < hrtimer_resolution) @@ -1047,7 +1079,7 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) orun = ktime_divns(delta, incr); hrtimer_add_expires_ns(timer, incr * orun); - if (hrtimer_get_expires_tv64(timer) > now) + if (hrtimer_get_expires(timer) > now) return orun; /* * This (and the ktime_add() below) is the @@ -1065,74 +1097,98 @@ EXPORT_SYMBOL_GPL(hrtimer_forward); * enqueue_hrtimer - internal function to (re)start a timer * * The timer is inserted in expiry order. Insertion into the - * red black tree is O(log(n)). Must hold the base lock. + * red black tree is O(log(n)). * - * Returns 1 when the new timer is the leftmost timer in the tree. + * Returns true when the new timer is the leftmost timer in the tree. */ -static int enqueue_hrtimer(struct hrtimer *timer, - struct hrtimer_clock_base *base, - enum hrtimer_mode mode) +static bool enqueue_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, + enum hrtimer_mode mode, bool was_armed) { - debug_activate(timer, mode); + lockdep_assert_held(&base->cpu_base->lock); + + debug_activate(timer, mode, was_armed); WARN_ON_ONCE(!base->cpu_base->online); base->cpu_base->active_bases |= 1 << base->index; /* Pairs with the lockless read in hrtimer_is_queued() */ - WRITE_ONCE(timer->state, HRTIMER_STATE_ENQUEUED); + WRITE_ONCE(timer->is_queued, HRTIMER_STATE_ENQUEUED); + + if (!timerqueue_linked_add(&base->active, &timer->node)) + return false; + + base->expires_next = hrtimer_get_expires(timer); + return true; +} - return timerqueue_add(&base->active, &timer->node); +static inline void base_update_next_timer(struct hrtimer_clock_base *base) +{ + struct timerqueue_linked_node *next = timerqueue_linked_first(&base->active); + + base->expires_next = next ? next->expires : KTIME_MAX; } /* * __remove_hrtimer - internal function to remove a timer * - * Caller must hold the base lock. - * * High resolution timer mode reprograms the clock event device when the * timer is the one which expires next. The caller can disable this by setting * reprogram to zero. This is useful, when the context does a reprogramming * anyway (e.g. timer interrupt) */ -static void __remove_hrtimer(struct hrtimer *timer, - struct hrtimer_clock_base *base, - u8 newstate, int reprogram) +static void __remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, + bool newstate, bool reprogram) { struct hrtimer_cpu_base *cpu_base = base->cpu_base; - u8 state = timer->state; + bool was_first; - /* Pairs with the lockless read in hrtimer_is_queued() */ - WRITE_ONCE(timer->state, newstate); - if (!(state & HRTIMER_STATE_ENQUEUED)) + lockdep_assert_held(&cpu_base->lock); + + if (!timer->is_queued) return; - if (!timerqueue_del(&base->active, &timer->node)) + /* Pairs with the lockless read in hrtimer_is_queued() */ + WRITE_ONCE(timer->is_queued, newstate); + + was_first = !timerqueue_linked_prev(&timer->node); + + if (!timerqueue_linked_del(&base->active, &timer->node)) cpu_base->active_bases &= ~(1 << base->index); + /* Nothing to update if this was not the first timer in the base */ + if (!was_first) + return; + + base_update_next_timer(base); + /* - * Note: If reprogram is false we do not update - * cpu_base->next_timer. This happens when we remove the first - * timer on a remote cpu. No harm as we never dereference - * cpu_base->next_timer. So the worst thing what can happen is - * an superfluous call to hrtimer_force_reprogram() on the - * remote cpu later on if the same timer gets enqueued again. + * If reprogram is false don't update cpu_base->next_timer and do not + * touch the clock event device. + * + * This happens when removing the first timer on a remote CPU, which + * will be handled by the remote CPU's interrupt. It also happens when + * a local timer is removed to be immediately restarted. That's handled + * at the call site. */ - if (reprogram && timer == cpu_base->next_timer) - hrtimer_force_reprogram(cpu_base, 1); + if (!reprogram || timer != cpu_base->next_timer || timer->is_lazy) + return; + + if (cpu_base->deferred_rearm) + cpu_base->deferred_needs_update = true; + else + hrtimer_force_reprogram(cpu_base, /* skip_equal */ true); } -/* - * remove hrtimer, called with base lock held - */ -static inline int -remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, - bool restart, bool keep_local) +static inline bool remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, + bool newstate) { - u8 state = timer->state; + lockdep_assert_held(&base->cpu_base->lock); - if (state & HRTIMER_STATE_ENQUEUED) { + if (timer->is_queued) { bool reprogram; + debug_hrtimer_deactivate(timer); + /* * Remove the timer and force reprogramming when high * resolution mode is active and the timer is on the current @@ -1141,24 +1197,81 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, * reprogramming happens in the interrupt handler. This is a * rare case and less expensive than a smp call. */ - debug_deactivate(timer); reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases); - /* - * If the timer is not restarted then reprogramming is - * required if the timer is local. If it is local and about - * to be restarted, avoid programming it twice (on removal - * and a moment later when it's requeued). - */ - if (!restart) - state = HRTIMER_STATE_INACTIVE; - else - reprogram &= !keep_local; + __remove_hrtimer(timer, base, newstate, reprogram); + return true; + } + return false; +} + +/* + * Update in place has to retrieve the expiry times of the neighbour nodes + * if they exist. That is cache line neutral because the dequeue/enqueue + * operation is going to need the same cache lines. But there is a big win + * when the dequeue/enqueue can be avoided because the RB tree does not + * have to be rebalanced twice. + */ +static inline bool +hrtimer_can_update_in_place(struct hrtimer *timer, struct hrtimer_clock_base *base, ktime_t expires) +{ + struct timerqueue_linked_node *next = timerqueue_linked_next(&timer->node); + struct timerqueue_linked_node *prev = timerqueue_linked_prev(&timer->node); + + /* If the new expiry goes behind the next timer, requeue is required */ + if (next && expires > next->expires) + return false; - __remove_hrtimer(timer, base, state, reprogram); - return 1; + /* If this is the first timer, update in place */ + if (!prev) + return true; + + /* Update in place when it does not go ahead of the previous one */ + return expires >= prev->expires; +} + +static inline bool +remove_and_enqueue_same_base(struct hrtimer *timer, struct hrtimer_clock_base *base, + const enum hrtimer_mode mode, ktime_t expires, u64 delta_ns) +{ + bool was_first = false; + + /* Remove it from the timer queue if active */ + if (timer->is_queued) { + was_first = !timerqueue_linked_prev(&timer->node); + + /* Try to update in place to avoid the de/enqueue dance */ + if (hrtimer_can_update_in_place(timer, base, expires)) { + hrtimer_set_expires_range_ns(timer, expires, delta_ns); + trace_hrtimer_start(timer, mode, true); + if (was_first) + base->expires_next = expires; + return was_first; + } + + debug_hrtimer_deactivate(timer); + timerqueue_linked_del(&base->active, &timer->node); } - return 0; + + /* Set the new expiry time */ + hrtimer_set_expires_range_ns(timer, expires, delta_ns); + + debug_activate(timer, mode, timer->is_queued); + base->cpu_base->active_bases |= 1 << base->index; + + /* Pairs with the lockless read in hrtimer_is_queued() */ + WRITE_ONCE(timer->is_queued, HRTIMER_STATE_ENQUEUED); + + /* If it's the first expiring timer now or again, update base */ + if (timerqueue_linked_add(&base->active, &timer->node)) { + base->expires_next = expires; + return true; + } + + if (was_first) + base_update_next_timer(base); + + return false; } static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim, @@ -1177,48 +1290,93 @@ static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim, return tim; } -static void -hrtimer_update_softirq_timer(struct hrtimer_cpu_base *cpu_base, bool reprogram) +static void hrtimer_update_softirq_timer(struct hrtimer_cpu_base *cpu_base, bool reprogram) { - ktime_t expires; - - /* - * Find the next SOFT expiration. - */ - expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT); + ktime_t expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT); /* - * reprogramming needs to be triggered, even if the next soft - * hrtimer expires at the same time than the next hard + * Reprogramming needs to be triggered, even if the next soft + * hrtimer expires at the same time as the next hard * hrtimer. cpu_base->softirq_expires_next needs to be updated! */ if (expires == KTIME_MAX) return; /* - * cpu_base->*next_timer is recomputed by __hrtimer_get_next_event() - * cpu_base->*expires_next is only set by hrtimer_reprogram() + * cpu_base->next_timer is recomputed by __hrtimer_get_next_event() + * cpu_base->expires_next is only set by hrtimer_reprogram() */ hrtimer_reprogram(cpu_base->softirq_next_timer, reprogram); } -static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, - u64 delta_ns, const enum hrtimer_mode mode, - struct hrtimer_clock_base *base) +#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) +static __always_inline bool hrtimer_prefer_local(bool is_local, bool is_first, bool is_pinned) { - struct hrtimer_clock_base *new_base; - bool force_local, first; + if (static_branch_likely(&timers_migration_enabled)) { + /* + * If it is local and the first expiring timer keep it on the local + * CPU to optimize reprogramming of the clockevent device. Also + * avoid switch_hrtimer_base() overhead when local and pinned. + */ + if (!is_local) + return false; + if (is_first || is_pinned) + return true; + + /* Honour the NOHZ full restrictions */ + if (!housekeeping_cpu(smp_processor_id(), HK_TYPE_KERNEL_NOISE)) + return false; + + /* + * If the tick is not stopped or need_resched() is set, then + * there is no point in moving the timer somewhere else. + */ + return !tick_nohz_tick_stopped() || need_resched(); + } + return is_local; +} +#else +static __always_inline bool hrtimer_prefer_local(bool is_local, bool is_first, bool is_pinned) +{ + return is_local; +} +#endif + +static inline bool hrtimer_keep_base(struct hrtimer *timer, bool is_local, bool is_first, + bool is_pinned) +{ + /* If the timer is running the callback it has to stay on its CPU base. */ + if (unlikely(timer->base->running == timer)) + return true; + + return hrtimer_prefer_local(is_local, is_first, is_pinned); +} + +static bool __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, u64 delta_ns, + const enum hrtimer_mode mode, struct hrtimer_clock_base *base) +{ + struct hrtimer_cpu_base *this_cpu_base = this_cpu_ptr(&hrtimer_bases); + bool is_pinned, first, was_first, keep_base = false; + struct hrtimer_cpu_base *cpu_base = base->cpu_base; + + was_first = cpu_base->next_timer == timer; + is_pinned = !!(mode & HRTIMER_MODE_PINNED); /* - * If the timer is on the local cpu base and is the first expiring - * timer then this might end up reprogramming the hardware twice - * (on removal and on enqueue). To avoid that by prevent the - * reprogram on removal, keep the timer local to the current CPU - * and enforce reprogramming after it is queued no matter whether - * it is the new first expiring timer again or not. + * Don't keep it local if this enqueue happens on a unplugged CPU + * after hrtimer_cpu_dying() has been invoked. */ - force_local = base->cpu_base == this_cpu_ptr(&hrtimer_bases); - force_local &= base->cpu_base->next_timer == timer; + if (likely(this_cpu_base->online)) { + bool is_local = cpu_base == this_cpu_base; + + keep_base = hrtimer_keep_base(timer, is_local, was_first, is_pinned); + } + + /* Calculate absolute expiry time for relative timers */ + if (mode & HRTIMER_MODE_REL) + tim = ktime_add_safe(tim, __hrtimer_cb_get_time(base->clockid)); + /* Compensate for low resolution granularity */ + tim = hrtimer_update_lowres(timer, tim, mode); /* * Remove an active timer from the queue. In case it is not queued @@ -1230,35 +1388,75 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, * reprogramming later if it was the first expiring timer. This * avoids programming the underlying clock event twice (once at * removal and once after enqueue). + * + * @keep_base is also true if the timer callback is running on a + * remote CPU and for local pinned timers. */ - remove_hrtimer(timer, base, true, force_local); + if (likely(keep_base)) { + first = remove_and_enqueue_same_base(timer, base, mode, tim, delta_ns); + } else { + /* Keep the ENQUEUED state in case it is queued */ + bool was_armed = remove_hrtimer(timer, base, HRTIMER_STATE_ENQUEUED); - if (mode & HRTIMER_MODE_REL) - tim = ktime_add_safe(tim, base->get_time()); + hrtimer_set_expires_range_ns(timer, tim, delta_ns); - tim = hrtimer_update_lowres(timer, tim, mode); + /* Switch the timer base, if necessary: */ + base = switch_hrtimer_base(timer, base, is_pinned); + cpu_base = base->cpu_base; - hrtimer_set_expires_range_ns(timer, tim, delta_ns); + first = enqueue_hrtimer(timer, base, mode, was_armed); + } - /* Switch the timer base, if necessary: */ - if (!force_local) { - new_base = switch_hrtimer_base(timer, base, - mode & HRTIMER_MODE_PINNED); - } else { - new_base = base; + /* If a deferred rearm is pending skip reprogramming the device */ + if (cpu_base->deferred_rearm) { + cpu_base->deferred_needs_update = true; + return false; } - first = enqueue_hrtimer(timer, new_base, mode); - if (!force_local) - return first; + if (!was_first || cpu_base != this_cpu_base) { + /* + * If the current CPU base is online, then the timer is never + * queued on a remote CPU if it would be the first expiring + * timer there unless the timer callback is currently executed + * on the remote CPU. In the latter case the remote CPU will + * re-evaluate the first expiring timer after completing the + * callbacks. + */ + if (likely(hrtimer_base_is_online(this_cpu_base))) + return first; + + /* + * Timer was enqueued remote because the current base is + * already offline. If the timer is the first to expire, + * kick the remote CPU to reprogram the clock event. + */ + if (first) + smp_call_function_single_async(cpu_base->cpu, &cpu_base->csd); + return false; + } /* - * Timer was forced to stay on the current CPU to avoid - * reprogramming on removal and enqueue. Force reprogram the - * hardware by evaluating the new first expiring timer. + * Special case for the HRTICK timer. It is frequently rearmed and most + * of the time moves the expiry into the future. That's expensive in + * virtual machines and it's better to take the pointless already armed + * interrupt than reprogramming the hardware on every context switch. + * + * If the new expiry is before the armed time, then reprogramming is + * required. */ - hrtimer_force_reprogram(new_base->cpu_base, 1); - return 0; + if (timer->is_lazy) { + if (cpu_base->expires_next <= hrtimer_get_expires(timer)) + return false; + } + + /* + * Timer was the first expiring timer and forced to stay on the + * current CPU to avoid reprogramming on removal and enqueue. Force + * reprogram the hardware by evaluating the new first expiring + * timer. + */ + hrtimer_force_reprogram(cpu_base, /* skip_equal */ true); + return false; } /** @@ -1270,14 +1468,14 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED); * softirq based mode is considered for debug purpose only! */ -void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, - u64 delta_ns, const enum hrtimer_mode mode) +void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, u64 delta_ns, + const enum hrtimer_mode mode) { struct hrtimer_clock_base *base; unsigned long flags; - if (WARN_ON_ONCE(!timer->function)) - return; + debug_hrtimer_assert_init(timer); + /* * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft * match on CONFIG_PREEMPT_RT = n. With PREEMPT_RT check the hard @@ -1325,8 +1523,11 @@ int hrtimer_try_to_cancel(struct hrtimer *timer) base = lock_hrtimer_base(timer, &flags); - if (!hrtimer_callback_running(timer)) - ret = remove_hrtimer(timer, base, false, false); + if (!hrtimer_callback_running(timer)) { + ret = remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE); + if (ret) + trace_hrtimer_cancel(timer); + } unlock_hrtimer_base(timer, &flags); @@ -1360,8 +1561,7 @@ static void hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) * the timer callback to finish. Drop expiry_lock and reacquire it. That * allows the waiter to acquire the lock and make progress. */ -static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base, - unsigned long flags) +static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base, unsigned long flags) { if (atomic_read(&cpu_base->timer_waiters)) { raw_spin_unlock_irqrestore(&cpu_base->lock, flags); @@ -1371,13 +1571,25 @@ static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base, } } +#ifdef CONFIG_SMP +static __always_inline bool is_migration_base(struct hrtimer_clock_base *base) +{ + return base == &migration_base; +} +#else +static __always_inline bool is_migration_base(struct hrtimer_clock_base *base) +{ + return false; +} +#endif + /* * This function is called on PREEMPT_RT kernels when the fast path * deletion of a timer failed because the timer callback function was * running. * * This prevents priority inversion: if the soft irq thread is preempted - * in the middle of a timer callback, then calling del_timer_sync() can + * in the middle of a timer callback, then calling hrtimer_cancel() can * lead to two issues: * * - If the caller is on a remote CPU then it has to spin wait for the timer @@ -1414,14 +1626,10 @@ void hrtimer_cancel_wait_running(const struct hrtimer *timer) spin_unlock_bh(&base->cpu_base->softirq_expiry_lock); } #else -static inline void -hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) { } -static inline void -hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) { } -static inline void -hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) { } -static inline void hrtimer_sync_wait_running(struct hrtimer_cpu_base *base, - unsigned long flags) { } +static inline void hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) { } +static inline void hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) { } +static inline void hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) { } +static inline void hrtimer_sync_wait_running(struct hrtimer_cpu_base *base, unsigned long fl) { } #endif /** @@ -1477,15 +1685,11 @@ u64 hrtimer_get_next_event(void) { struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); u64 expires = KTIME_MAX; - unsigned long flags; - - raw_spin_lock_irqsave(&cpu_base->lock, flags); + guard(raw_spinlock_irqsave)(&cpu_base->lock); if (!hrtimer_hres_active(cpu_base)) expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL); - raw_spin_unlock_irqrestore(&cpu_base->lock, flags); - return expires; } @@ -1500,48 +1704,65 @@ u64 hrtimer_next_event_without(const struct hrtimer *exclude) { struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); u64 expires = KTIME_MAX; - unsigned long flags; - - raw_spin_lock_irqsave(&cpu_base->lock, flags); - - if (hrtimer_hres_active(cpu_base)) { - unsigned int active; + unsigned int active; - if (!cpu_base->softirq_activated) { - active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT; - expires = __hrtimer_next_event_base(cpu_base, exclude, - active, KTIME_MAX); - } - active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD; - expires = __hrtimer_next_event_base(cpu_base, exclude, active, - expires); - } + guard(raw_spinlock_irqsave)(&cpu_base->lock); + if (!hrtimer_hres_active(cpu_base)) + return expires; - raw_spin_unlock_irqrestore(&cpu_base->lock, flags); + active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT; + if (active && !cpu_base->softirq_activated) + expires = hrtimer_bases_next_event_without(cpu_base, exclude, active, KTIME_MAX); - return expires; + active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD; + if (!active) + return expires; + return hrtimer_bases_next_event_without(cpu_base, exclude, active, expires); } #endif static inline int hrtimer_clockid_to_base(clockid_t clock_id) { - if (likely(clock_id < MAX_CLOCKS)) { - int base = hrtimer_clock_to_base_table[clock_id]; + switch (clock_id) { + case CLOCK_MONOTONIC: + return HRTIMER_BASE_MONOTONIC; + case CLOCK_REALTIME: + return HRTIMER_BASE_REALTIME; + case CLOCK_BOOTTIME: + return HRTIMER_BASE_BOOTTIME; + case CLOCK_TAI: + return HRTIMER_BASE_TAI; + default: + WARN(1, "Invalid clockid %d. Using MONOTONIC\n", clock_id); + return HRTIMER_BASE_MONOTONIC; + } +} - if (likely(base != HRTIMER_MAX_CLOCK_BASES)) - return base; +static ktime_t __hrtimer_cb_get_time(clockid_t clock_id) +{ + switch (clock_id) { + case CLOCK_MONOTONIC: + return ktime_get(); + case CLOCK_REALTIME: + return ktime_get_real(); + case CLOCK_BOOTTIME: + return ktime_get_boottime(); + case CLOCK_TAI: + return ktime_get_clocktai(); + default: + WARN(1, "Invalid clockid %d. Using MONOTONIC\n", clock_id); + return ktime_get(); } - WARN(1, "Invalid clockid %d. Using MONOTONIC\n", clock_id); - return HRTIMER_BASE_MONOTONIC; } -static enum hrtimer_restart hrtimer_dummy_timeout(struct hrtimer *unused) +ktime_t hrtimer_cb_get_time(const struct hrtimer *timer) { - return HRTIMER_NORESTART; + return __hrtimer_cb_get_time(timer->base->clockid); } +EXPORT_SYMBOL_GPL(hrtimer_cb_get_time); -static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, - enum hrtimer_mode mode) +static void __hrtimer_setup(struct hrtimer *timer, enum hrtimer_restart (*fn)(struct hrtimer *), + clockid_t clock_id, enum hrtimer_mode mode) { bool softtimer = !!(mode & HRTIMER_MODE_SOFT); struct hrtimer_cpu_base *cpu_base; @@ -1572,43 +1793,17 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, base += hrtimer_clockid_to_base(clock_id); timer->is_soft = softtimer; timer->is_hard = !!(mode & HRTIMER_MODE_HARD); + timer->is_lazy = !!(mode & HRTIMER_MODE_LAZY_REARM); timer->base = &cpu_base->clock_base[base]; - timerqueue_init(&timer->node); -} - -static void __hrtimer_setup(struct hrtimer *timer, - enum hrtimer_restart (*function)(struct hrtimer *), - clockid_t clock_id, enum hrtimer_mode mode) -{ - __hrtimer_init(timer, clock_id, mode); + timerqueue_linked_init(&timer->node); - if (WARN_ON_ONCE(!function)) - timer->function = hrtimer_dummy_timeout; + if (WARN_ON_ONCE(!fn)) + ACCESS_PRIVATE(timer, function) = hrtimer_dummy_timeout; else - timer->function = function; + ACCESS_PRIVATE(timer, function) = fn; } /** - * hrtimer_init - initialize a timer to the given clock - * @timer: the timer to be initialized - * @clock_id: the clock to be used - * @mode: The modes which are relevant for initialization: - * HRTIMER_MODE_ABS, HRTIMER_MODE_REL, HRTIMER_MODE_ABS_SOFT, - * HRTIMER_MODE_REL_SOFT - * - * The PINNED variants of the above can be handed in, - * but the PINNED bit is ignored as pinning happens - * when the hrtimer is started - */ -void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, - enum hrtimer_mode mode) -{ - debug_init(timer, clock_id, mode); - __hrtimer_init(timer, clock_id, mode); -} -EXPORT_SYMBOL_GPL(hrtimer_init); - -/** * hrtimer_setup - initialize a timer to the given clock * @timer: the timer to be initialized * @function: the callback function @@ -1624,7 +1819,7 @@ EXPORT_SYMBOL_GPL(hrtimer_init); void hrtimer_setup(struct hrtimer *timer, enum hrtimer_restart (*function)(struct hrtimer *), clockid_t clock_id, enum hrtimer_mode mode) { - debug_init(timer, clock_id, mode); + debug_setup(timer, clock_id, mode); __hrtimer_setup(timer, function, clock_id, mode); } EXPORT_SYMBOL_GPL(hrtimer_setup); @@ -1643,7 +1838,7 @@ void hrtimer_setup_on_stack(struct hrtimer *timer, enum hrtimer_restart (*function)(struct hrtimer *), clockid_t clock_id, enum hrtimer_mode mode) { - debug_init_on_stack(timer, clock_id, mode); + debug_setup_on_stack(timer, clock_id, mode); __hrtimer_setup(timer, function, clock_id, mode); } EXPORT_SYMBOL_GPL(hrtimer_setup_on_stack); @@ -1664,12 +1859,10 @@ bool hrtimer_active(const struct hrtimer *timer) base = READ_ONCE(timer->base); seq = raw_read_seqcount_begin(&base->seq); - if (timer->state != HRTIMER_STATE_INACTIVE || - base->running == timer) + if (timer->is_queued || base->running == timer) return true; - } while (read_seqcount_retry(&base->seq, seq) || - base != READ_ONCE(timer->base)); + } while (read_seqcount_retry(&base->seq, seq) || base != READ_ONCE(timer->base)); return false; } @@ -1683,7 +1876,7 @@ EXPORT_SYMBOL_GPL(hrtimer_active); * - callback: the timer is being ran * - post: the timer is inactive or (re)queued * - * On the read side we ensure we observe timer->state and cpu_base->running + * On the read side we ensure we observe timer->is_queued and cpu_base->running * from the same section, if anything changed while we looked at it, we retry. * This includes timer->base changing because sequence numbers alone are * insufficient for that. @@ -1692,11 +1885,9 @@ EXPORT_SYMBOL_GPL(hrtimer_active); * a false negative if the read side got smeared over multiple consecutive * __run_hrtimer() invocations. */ - -static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, - struct hrtimer_clock_base *base, - struct hrtimer *timer, ktime_t *now, - unsigned long flags) __must_hold(&cpu_base->lock) +static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, struct hrtimer_clock_base *base, + struct hrtimer *timer, ktime_t now, unsigned long flags) + __must_hold(&cpu_base->lock) { enum hrtimer_restart (*fn)(struct hrtimer *); bool expires_in_hardirq; @@ -1704,20 +1895,20 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, lockdep_assert_held(&cpu_base->lock); - debug_deactivate(timer); + debug_hrtimer_deactivate(timer); base->running = timer; /* - * Separate the ->running assignment from the ->state assignment. + * Separate the ->running assignment from the ->is_queued assignment. * * As with a regular write barrier, this ensures the read side in * hrtimer_active() cannot observe base->running == NULL && - * timer->state == INACTIVE. + * timer->is_queued == INACTIVE. */ raw_write_seqcount_barrier(&base->seq); - __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0); - fn = timer->function; + __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, false); + fn = ACCESS_PRIVATE(timer, function); /* * Clear the 'is relative' flag for the TIME_LOW_RES case. If the @@ -1751,16 +1942,15 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, * hrtimer_start_range_ns() can have popped in and enqueued the timer * for us already. */ - if (restart != HRTIMER_NORESTART && - !(timer->state & HRTIMER_STATE_ENQUEUED)) - enqueue_hrtimer(timer, base, HRTIMER_MODE_ABS); + if (restart == HRTIMER_RESTART && !timer->is_queued) + enqueue_hrtimer(timer, base, HRTIMER_MODE_ABS, false); /* - * Separate the ->running assignment from the ->state assignment. + * Separate the ->running assignment from the ->is_queued assignment. * * As with a regular write barrier, this ensures the read side in * hrtimer_active() cannot observe base->running.timer == NULL && - * timer->state == INACTIVE. + * timer->is_queued == INACTIVE. */ raw_write_seqcount_barrier(&base->seq); @@ -1768,23 +1958,24 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, base->running = NULL; } +static __always_inline struct hrtimer *clock_base_next_timer_safe(struct hrtimer_clock_base *base) +{ + struct timerqueue_linked_node *next = timerqueue_linked_first(&base->active); + + return next ? hrtimer_from_timerqueue_node(next) : NULL; +} + static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now, unsigned long flags, unsigned int active_mask) { - struct hrtimer_clock_base *base; unsigned int active = cpu_base->active_bases & active_mask; + struct hrtimer_clock_base *base; for_each_active_base(base, cpu_base, active) { - struct timerqueue_node *node; - ktime_t basenow; - - basenow = ktime_add(now, base->offset); - - while ((node = timerqueue_getnext(&base->active))) { - struct hrtimer *timer; - - timer = container_of(node, struct hrtimer, node); + ktime_t basenow = ktime_add(now, base->offset); + struct hrtimer *timer; + while ((timer = clock_base_next_timer(base))) { /* * The immediate goal for using the softexpires is * minimizing wakeups, not running timers at the @@ -1797,10 +1988,10 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now, * are right-of a not yet expired timer, because that * timer will have to trigger a wakeup anyway. */ - if (basenow < hrtimer_get_softexpires_tv64(timer)) + if (basenow < hrtimer_get_softexpires(timer)) break; - __run_hrtimer(cpu_base, base, timer, &basenow, flags); + __run_hrtimer(cpu_base, base, timer, basenow, flags); if (active_mask == HRTIMER_ACTIVE_SOFT) hrtimer_sync_wait_running(cpu_base, flags); } @@ -1819,7 +2010,7 @@ static __latent_entropy void hrtimer_run_softirq(void) now = hrtimer_update_base(cpu_base); __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_SOFT); - cpu_base->softirq_activated = 0; + cpu_base->softirq_activated = false; hrtimer_update_softirq_timer(cpu_base, true); raw_spin_unlock_irqrestore(&cpu_base->lock, flags); @@ -1829,6 +2020,63 @@ static __latent_entropy void hrtimer_run_softirq(void) #ifdef CONFIG_HIGH_RES_TIMERS /* + * Very similar to hrtimer_force_reprogram(), except it deals with + * deferred_rearm and hang_detected. + */ +static void hrtimer_rearm(struct hrtimer_cpu_base *cpu_base, ktime_t expires_next, bool deferred) +{ + cpu_base->expires_next = expires_next; + cpu_base->deferred_rearm = false; + + if (unlikely(cpu_base->hang_detected)) { + /* + * Give the system a chance to do something else than looping + * on hrtimer interrupts. + */ + expires_next = ktime_add_ns(ktime_get(), + min(100 * NSEC_PER_MSEC, cpu_base->max_hang_time)); + } + hrtimer_rearm_event(expires_next, deferred); +} + +#ifdef CONFIG_HRTIMER_REARM_DEFERRED +void __hrtimer_rearm_deferred(void) +{ + struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); + ktime_t expires_next; + + if (!cpu_base->deferred_rearm) + return; + + guard(raw_spinlock)(&cpu_base->lock); + if (cpu_base->deferred_needs_update) { + hrtimer_update_base(cpu_base); + expires_next = hrtimer_update_next_event(cpu_base); + } else { + /* No timer added/removed. Use the cached value */ + expires_next = cpu_base->deferred_expires_next; + } + hrtimer_rearm(cpu_base, expires_next, true); +} + +static __always_inline void +hrtimer_interrupt_rearm(struct hrtimer_cpu_base *cpu_base, ktime_t expires_next) +{ + /* hrtimer_interrupt() just re-evaluated the first expiring timer */ + cpu_base->deferred_needs_update = false; + /* Cache the expiry time */ + cpu_base->deferred_expires_next = expires_next; + set_thread_flag(TIF_HRTIMER_REARM); +} +#else /* CONFIG_HRTIMER_REARM_DEFERRED */ +static __always_inline void +hrtimer_interrupt_rearm(struct hrtimer_cpu_base *cpu_base, ktime_t expires_next) +{ + hrtimer_rearm(cpu_base, expires_next, false); +} +#endif /* !CONFIG_HRTIMER_REARM_DEFERRED */ + +/* * High resolution timer interrupt * Called with interrupts disabled */ @@ -1842,86 +2090,55 @@ void hrtimer_interrupt(struct clock_event_device *dev) BUG_ON(!cpu_base->hres_active); cpu_base->nr_events++; dev->next_event = KTIME_MAX; + dev->next_event_forced = 0; raw_spin_lock_irqsave(&cpu_base->lock, flags); entry_time = now = hrtimer_update_base(cpu_base); retry: - cpu_base->in_hrtirq = 1; + cpu_base->deferred_rearm = true; /* - * We set expires_next to KTIME_MAX here with cpu_base->lock - * held to prevent that a timer is enqueued in our queue via - * the migration code. This does not affect enqueueing of - * timers which run their callback and need to be requeued on - * this CPU. + * Set expires_next to KTIME_MAX, which prevents that remote CPUs queue + * timers while __hrtimer_run_queues() is expiring the clock bases. + * Timers which are re/enqueued on the local CPU are not affected by + * this. */ cpu_base->expires_next = KTIME_MAX; if (!ktime_before(now, cpu_base->softirq_expires_next)) { cpu_base->softirq_expires_next = KTIME_MAX; - cpu_base->softirq_activated = 1; + cpu_base->softirq_activated = true; raise_timer_softirq(HRTIMER_SOFTIRQ); } __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); - /* Reevaluate the clock bases for the [soft] next expiry */ - expires_next = hrtimer_update_next_event(cpu_base); - /* - * Store the new expiry value so the migration code can verify - * against it. - */ - cpu_base->expires_next = expires_next; - cpu_base->in_hrtirq = 0; - raw_spin_unlock_irqrestore(&cpu_base->lock, flags); - - /* Reprogramming necessary ? */ - if (!tick_program_event(expires_next, 0)) { - cpu_base->hang_detected = 0; - return; - } - /* * The next timer was already expired due to: * - tracing * - long lasting callbacks * - being scheduled away when running in a VM * - * We need to prevent that we loop forever in the hrtimer - * interrupt routine. We give it 3 attempts to avoid - * overreacting on some spurious event. - * - * Acquire base lock for updating the offsets and retrieving - * the current time. + * We need to prevent that we loop forever in the hrtiner interrupt + * routine. We give it 3 attempts to avoid overreacting on some + * spurious event. */ - raw_spin_lock_irqsave(&cpu_base->lock, flags); now = hrtimer_update_base(cpu_base); - cpu_base->nr_retries++; - if (++retries < 3) - goto retry; - /* - * Give the system a chance to do something else than looping - * here. We stored the entry time, so we know exactly how long - * we spent here. We schedule the next event this amount of - * time away. - */ - cpu_base->nr_hangs++; - cpu_base->hang_detected = 1; - raw_spin_unlock_irqrestore(&cpu_base->lock, flags); + expires_next = hrtimer_update_next_event(cpu_base); + cpu_base->hang_detected = false; + if (expires_next < now) { + if (++retries < 3) + goto retry; + + delta = ktime_sub(now, entry_time); + cpu_base->max_hang_time = max_t(unsigned int, cpu_base->max_hang_time, delta); + cpu_base->nr_hangs++; + cpu_base->hang_detected = true; + } - delta = ktime_sub(now, entry_time); - if ((unsigned int)delta > cpu_base->max_hang_time) - cpu_base->max_hang_time = (unsigned int) delta; - /* - * Limit it to a sensible value as we enforce a longer - * delay. Give the CPU at least 100ms to catch up. - */ - if (delta > 100 * NSEC_PER_MSEC) - expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC); - else - expires_next = ktime_add(now, delta); - tick_program_event(expires_next, 1); - pr_warn_once("hrtimer: interrupt took %llu ns\n", ktime_to_ns(delta)); + hrtimer_interrupt_rearm(cpu_base, expires_next); + raw_spin_unlock_irqrestore(&cpu_base->lock, flags); } + #endif /* !CONFIG_HIGH_RES_TIMERS */ /* @@ -1953,7 +2170,7 @@ void hrtimer_run_queues(void) if (!ktime_before(now, cpu_base->softirq_expires_next)) { cpu_base->softirq_expires_next = KTIME_MAX; - cpu_base->softirq_activated = 1; + cpu_base->softirq_activated = true; raise_timer_softirq(HRTIMER_SOFTIRQ); } @@ -1966,8 +2183,7 @@ void hrtimer_run_queues(void) */ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer) { - struct hrtimer_sleeper *t = - container_of(timer, struct hrtimer_sleeper, timer); + struct hrtimer_sleeper *t = container_of(timer, struct hrtimer_sleeper, timer); struct task_struct *task = t->task; t->task = NULL; @@ -1985,14 +2201,13 @@ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer) * Wrapper around hrtimer_start_expires() for hrtimer_sleeper based timers * to allow PREEMPT_RT to tweak the delivery mode (soft/hardirq context) */ -void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl, - enum hrtimer_mode mode) +void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl, enum hrtimer_mode mode) { /* * Make the enqueue delivery mode check work on RT. If the sleeper * was initialized for hard interrupt delivery, force the mode bit. * This is a special case for hrtimer_sleepers because - * __hrtimer_init_sleeper() determines the delivery mode on RT so the + * __hrtimer_setup_sleeper() determines the delivery mode on RT so the * fiddling with this decision is avoided at the call sites. */ if (IS_ENABLED(CONFIG_PREEMPT_RT) && sl->timer.is_hard) @@ -2002,8 +2217,8 @@ void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl, } EXPORT_SYMBOL_GPL(hrtimer_sleeper_start_expires); -static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl, - clockid_t clock_id, enum hrtimer_mode mode) +static void __hrtimer_setup_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id, + enum hrtimer_mode mode) { /* * On PREEMPT_RT enabled kernels hrtimers which are not explicitly @@ -2029,8 +2244,7 @@ static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl, mode |= HRTIMER_MODE_HARD; } - __hrtimer_init(&sl->timer, clock_id, mode); - sl->timer.function = hrtimer_wakeup; + __hrtimer_setup(&sl->timer, hrtimer_wakeup, clock_id, mode); sl->task = current; } @@ -2040,11 +2254,11 @@ static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl, * @clock_id: the clock to be used * @mode: timer mode abs/rel */ -void hrtimer_setup_sleeper_on_stack(struct hrtimer_sleeper *sl, - clockid_t clock_id, enum hrtimer_mode mode) +void hrtimer_setup_sleeper_on_stack(struct hrtimer_sleeper *sl, clockid_t clock_id, + enum hrtimer_mode mode) { - debug_init_on_stack(&sl->timer, clock_id, mode); - __hrtimer_init_sleeper(sl, clock_id, mode); + debug_setup_on_stack(&sl->timer, clock_id, mode); + __hrtimer_setup_sleeper(sl, clock_id, mode); } EXPORT_SYMBOL_GPL(hrtimer_setup_sleeper_on_stack); @@ -2108,18 +2322,17 @@ static long __sched hrtimer_nanosleep_restart(struct restart_block *restart) int ret; hrtimer_setup_sleeper_on_stack(&t, restart->nanosleep.clockid, HRTIMER_MODE_ABS); - hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires); + hrtimer_set_expires(&t.timer, restart->nanosleep.expires); ret = do_nanosleep(&t, HRTIMER_MODE_ABS); destroy_hrtimer_on_stack(&t.timer); return ret; } -long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode, - const clockid_t clockid) +long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode, const clockid_t clockid) { struct restart_block *restart; struct hrtimer_sleeper t; - int ret = 0; + int ret; hrtimer_setup_sleeper_on_stack(&t, clockid, mode); hrtimer_set_expires_range_ns(&t.timer, rqtp, current->timer_slack_ns); @@ -2135,7 +2348,7 @@ long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode, restart = ¤t->restart_block; restart->nanosleep.clockid = t.timer.base->clockid; - restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer); + restart->nanosleep.expires = hrtimer_get_expires(&t.timer); set_restart_fn(restart, hrtimer_nanosleep_restart); out: destroy_hrtimer_on_stack(&t.timer); @@ -2158,8 +2371,7 @@ SYSCALL_DEFINE2(nanosleep, struct __kernel_timespec __user *, rqtp, current->restart_block.fn = do_no_restart_syscall; current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE; current->restart_block.nanosleep.rmtp = rmtp; - return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL, - CLOCK_MONOTONIC); + return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL, CLOCK_MONOTONIC); } #endif @@ -2167,7 +2379,7 @@ SYSCALL_DEFINE2(nanosleep, struct __kernel_timespec __user *, rqtp, #ifdef CONFIG_COMPAT_32BIT_TIME SYSCALL_DEFINE2(nanosleep_time32, struct old_timespec32 __user *, rqtp, - struct old_timespec32 __user *, rmtp) + struct old_timespec32 __user *, rmtp) { struct timespec64 tu; @@ -2180,8 +2392,7 @@ SYSCALL_DEFINE2(nanosleep_time32, struct old_timespec32 __user *, rqtp, current->restart_block.fn = do_no_restart_syscall; current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE; current->restart_block.nanosleep.compat_rmtp = rmtp; - return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL, - CLOCK_MONOTONIC); + return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL, CLOCK_MONOTONIC); } #endif @@ -2191,26 +2402,34 @@ SYSCALL_DEFINE2(nanosleep_time32, struct old_timespec32 __user *, rqtp, int hrtimers_prepare_cpu(unsigned int cpu) { struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); - int i; - for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { + for (int i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { struct hrtimer_clock_base *clock_b = &cpu_base->clock_base[i]; clock_b->cpu_base = cpu_base; seqcount_raw_spinlock_init(&clock_b->seq, &cpu_base->lock); - timerqueue_init_head(&clock_b->active); + timerqueue_linked_init_head(&clock_b->active); } cpu_base->cpu = cpu; + hrtimer_cpu_base_init_expiry_lock(cpu_base); + return 0; +} + +int hrtimers_cpu_starting(unsigned int cpu) +{ + struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); + + /* Clear out any left over state from a CPU down operation */ cpu_base->active_bases = 0; - cpu_base->hres_active = 0; - cpu_base->hang_detected = 0; + cpu_base->hres_active = false; + cpu_base->hang_detected = false; cpu_base->next_timer = NULL; cpu_base->softirq_next_timer = NULL; cpu_base->expires_next = KTIME_MAX; cpu_base->softirq_expires_next = KTIME_MAX; - cpu_base->online = 1; - hrtimer_cpu_base_init_expiry_lock(cpu_base); + cpu_base->softirq_activated = false; + cpu_base->online = true; return 0; } @@ -2219,20 +2438,20 @@ int hrtimers_prepare_cpu(unsigned int cpu) static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, struct hrtimer_clock_base *new_base) { + struct timerqueue_linked_node *node; struct hrtimer *timer; - struct timerqueue_node *node; - while ((node = timerqueue_getnext(&old_base->active))) { - timer = container_of(node, struct hrtimer, node); + while ((node = timerqueue_linked_first(&old_base->active))) { + timer = hrtimer_from_timerqueue_node(node); BUG_ON(hrtimer_callback_running(timer)); - debug_deactivate(timer); + debug_hrtimer_deactivate(timer); /* * Mark it as ENQUEUED not INACTIVE otherwise the * timer could be seen as !active and just vanish away * under us on another CPU */ - __remove_hrtimer(timer, old_base, HRTIMER_STATE_ENQUEUED, 0); + __remove_hrtimer(timer, old_base, HRTIMER_STATE_ENQUEUED, false); timer->base = new_base; /* * Enqueue the timers on the new cpu. This does not @@ -2242,13 +2461,13 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, * sort out already expired timers and reprogram the * event device. */ - enqueue_hrtimer(timer, new_base, HRTIMER_MODE_ABS); + enqueue_hrtimer(timer, new_base, HRTIMER_MODE_ABS, true); } } int hrtimers_cpu_dying(unsigned int dying_cpu) { - int i, ncpu = cpumask_any_and(cpu_active_mask, housekeeping_cpumask(HK_TYPE_TIMER)); + int ncpu = cpumask_any_and(cpu_active_mask, housekeeping_cpumask(HK_TYPE_TIMER)); struct hrtimer_cpu_base *old_base, *new_base; old_base = this_cpu_ptr(&hrtimer_bases); @@ -2261,21 +2480,14 @@ int hrtimers_cpu_dying(unsigned int dying_cpu) raw_spin_lock(&old_base->lock); raw_spin_lock_nested(&new_base->lock, SINGLE_DEPTH_NESTING); - for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { - migrate_hrtimer_list(&old_base->clock_base[i], - &new_base->clock_base[i]); - } + for (int i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) + migrate_hrtimer_list(&old_base->clock_base[i], &new_base->clock_base[i]); - /* - * The migration might have changed the first expiring softirq - * timer on this CPU. Update it. - */ - __hrtimer_get_next_event(new_base, HRTIMER_ACTIVE_SOFT); /* Tell the other CPU to retrigger the next event */ smp_call_function_single(ncpu, retrigger_next_event, NULL, 0); raw_spin_unlock(&new_base->lock); - old_base->online = 0; + old_base->online = false; raw_spin_unlock(&old_base->lock); return 0; @@ -2286,5 +2498,6 @@ int hrtimers_cpu_dying(unsigned int dying_cpu) void __init hrtimers_init(void) { hrtimers_prepare_cpu(smp_processor_id()); + hrtimers_cpu_starting(smp_processor_id()); open_softirq(HRTIMER_SOFTIRQ, hrtimer_run_softirq); } |
