From e1292ba164742e3a236e407148e00300b7196906 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 18 Mar 2010 20:19:27 -0700 Subject: ntp: Make time_adjust static Now that no arches are accessing time_adjust directly, make it static. Signed-off-by: John Stultz LKML-Reference: <1268968769-19209-1-git-send-email-johnstul@us.ibm.com> Signed-off-by: Thomas Gleixner --- include/linux/timex.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/timex.h b/include/linux/timex.h index 7a082b32d8e1..e2de51eedf05 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -238,7 +238,6 @@ extern int tickadj; /* amount of adjustment per tick */ * phase-lock loop variables */ extern int time_status; /* clock synchronization status bits */ -extern long time_adjust; /* The amount of adjtime left */ extern void ntp_init(void); extern void ntp_clear(void); -- cgit v1.2.3 From 3d0205bd1383aa3cac93c209b7c7d03b27930195 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 18 Mar 2010 20:19:28 -0700 Subject: ntp: Remove tickadj There are zero users of tickadj. So remove it. Signed-off-by: John Stultz LKML-Reference: <1268968769-19209-2-git-send-email-johnstul@us.ibm.com> Signed-off-by: Thomas Gleixner --- include/linux/timex.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/timex.h b/include/linux/timex.h index e2de51eedf05..32d852f8cbe4 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -232,7 +232,6 @@ struct timex { */ extern unsigned long tick_usec; /* USER_HZ period (usec) */ extern unsigned long tick_nsec; /* ACTHZ period (nsec) */ -extern int tickadj; /* amount of adjustment per tick */ /* * phase-lock loop variables @@ -270,9 +269,6 @@ extern void second_overflow(void); extern void update_ntp_one_tick(void); extern int do_adjtimex(struct timex *); -/* Don't use! Compatibility define for existing users. */ -#define tickadj (500/HZ ? : 1) - int read_current_timer(unsigned long *timer_val); /* The clock frequency of the i8253/i8254 PIT */ -- cgit v1.2.3 From 3bbb9ec946428b96657126768f65487a48dd090c Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Thu, 11 Mar 2010 14:04:36 -0800 Subject: timers: Introduce the concept of timer slack for legacy timers While HR timers have had the concept of timer slack for quite some time now, the legacy timers lacked this concept, and had to make do with round_jiffies() and friends. Timer slack is important for power management; grouping timers reduces the number of wakeups which in turn reduces power consumption. This patch introduces timer slack to the legacy timers using the following pieces: * A slack field in the timer struct * An api (set_timer_slack) that callers can use to set explicit timer slack * A default slack of 0.4% of the requested delay for callers that do not set any explicit slack * Rounding code that is part of mod_timer() that tries to group timers around jiffies values every 'power of two' (so quick timers will group around every 2, but longer timers will group around every 4, 8, 16, 32 etc) Signed-off-by: Arjan van de Ven Cc: johnstul@us.ibm.com Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- include/linux/timer.h | 10 ++++++++- kernel/timer.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/timer.h b/include/linux/timer.h index a2d1eb6cb3f0..ea965b857a50 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -10,13 +10,19 @@ struct tvec_base; struct timer_list { + /* + * All fields that change during normal runtime grouped to the + * same cacheline + */ struct list_head entry; unsigned long expires; + struct tvec_base *base; void (*function)(unsigned long); unsigned long data; - struct tvec_base *base; + int slack; + #ifdef CONFIG_TIMER_STATS void *start_site; char start_comm[16]; @@ -165,6 +171,8 @@ extern int mod_timer(struct timer_list *timer, unsigned long expires); extern int mod_timer_pending(struct timer_list *timer, unsigned long expires); extern int mod_timer_pinned(struct timer_list *timer, unsigned long expires); +extern void set_timer_slack(struct timer_list *time, int slack_hz); + #define TIMER_NOT_PINNED 0 #define TIMER_PINNED 1 /* diff --git a/kernel/timer.c b/kernel/timer.c index 7e12e7bc7ce6..49773f38c9bc 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -318,6 +318,24 @@ unsigned long round_jiffies_up_relative(unsigned long j) } EXPORT_SYMBOL_GPL(round_jiffies_up_relative); +/** + * set_timer_slack - set the allowed slack for a timer + * @slack_hz: the amount of time (in jiffies) allowed for rounding + * + * Set the amount of time, in jiffies, that a certain timer has + * in terms of slack. By setting this value, the timer subsystem + * will schedule the actual timer somewhere between + * the time mod_timer() asks for, and that time plus the slack. + * + * By setting the slack to -1, a percentage of the delay is used + * instead. + */ +void set_timer_slack(struct timer_list *timer, int slack_hz) +{ + timer->slack = slack_hz; +} +EXPORT_SYMBOL_GPL(set_timer_slack); + static inline void set_running_timer(struct tvec_base *base, struct timer_list *timer) @@ -549,6 +567,7 @@ static void __init_timer(struct timer_list *timer, { timer->entry.next = NULL; timer->base = __raw_get_cpu_var(tvec_bases); + timer->slack = -1; #ifdef CONFIG_TIMER_STATS timer->start_site = NULL; timer->start_pid = -1; @@ -714,6 +733,41 @@ int mod_timer_pending(struct timer_list *timer, unsigned long expires) } EXPORT_SYMBOL(mod_timer_pending); +/* + * Decide where to put the timer while taking the slack into account + * + * Algorithm: + * 1) calculate the maximum (absolute) time + * 2) calculate the highest bit where the expires and new max are different + * 3) use this bit to make a mask + * 4) use the bitmask to round down the maximum time, so that all last + * bits are zeros + */ +static inline +unsigned long apply_slack(struct timer_list *timer, unsigned long expires) +{ + unsigned long expires_limit, mask; + int bit; + + expires_limit = expires + timer->slack; + + if (timer->slack < 0) /* auto slack: use 0.4% */ + expires_limit = expires + (expires - jiffies)/256; + + mask = expires ^ expires_limit; + + if (mask == 0) + return expires; + + bit = find_last_bit(&mask, BITS_PER_LONG); + + mask = (1 << bit) - 1; + + expires_limit = expires_limit & ~(mask); + + return expires_limit; +} + /** * mod_timer - modify a timer's timeout * @timer: the timer to be modified @@ -744,6 +798,8 @@ int mod_timer(struct timer_list *timer, unsigned long expires) if (timer_pending(timer) && timer->expires == expires) return 1; + expires = apply_slack(timer, expires); + return __mod_timer(timer, expires, false, TIMER_NOT_PINNED); } EXPORT_SYMBOL(mod_timer); -- cgit v1.2.3 From 351b3f7a21e413a9b14d0393171497d2373bd702 Mon Sep 17 00:00:00 2001 From: Carsten Emde Date: Fri, 2 Apr 2010 22:40:19 +0200 Subject: hrtimers: Provide schedule_hrtimeout for CLOCK_REALTIME The current version of schedule_hrtimeout() always uses the monotonic clock. Some system calls such as mq_timedsend() and mq_timedreceive(), however, require the use of the wall clock due to the definition of the system call. This patch provides the infrastructure to use schedule_hrtimeout() with a CLOCK_REALTIME timer. Signed-off-by: Carsten Emde Tested-by: Pradyumna Sampath Cc: Andrew Morton Cc: Arjan van de Veen LKML-Reference: <20100402204331.167439615@osadl.org> Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 2 ++ kernel/hrtimer.c | 67 ++++++++++++++++++++++++++++++------------------- 2 files changed, 43 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 5d86fb2309d2..fd0c1b857d3d 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -422,6 +422,8 @@ extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, extern int schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, const enum hrtimer_mode mode); +extern int schedule_hrtimeout_range_clock(ktime_t *expires, + unsigned long delta, const enum hrtimer_mode mode, int clock); extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode); /* Soft interrupt function to run the hrtimer queues: */ diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 0086628b6e97..b9b134b35088 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1749,35 +1749,15 @@ void __init hrtimers_init(void) } /** - * schedule_hrtimeout_range - sleep until timeout + * schedule_hrtimeout_range_clock - sleep until timeout * @expires: timeout value (ktime_t) * @delta: slack in expires timeout (ktime_t) * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL - * - * Make the current task sleep until the given expiry time has - * elapsed. The routine will return immediately unless - * the current task state has been set (see set_current_state()). - * - * The @delta argument gives the kernel the freedom to schedule the - * actual wakeup to a time that is both power and performance friendly. - * The kernel give the normal best effort behavior for "@expires+@delta", - * but may decide to fire the timer earlier, but no earlier than @expires. - * - * You can set the task state as follows - - * - * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to - * pass before the routine returns. - * - * %TASK_INTERRUPTIBLE - the routine may return early if a signal is - * delivered to the current task. - * - * The current task state is guaranteed to be TASK_RUNNING when this - * routine returns. - * - * Returns 0 when the timer has expired otherwise -EINTR + * @clock: timer clock, CLOCK_MONOTONIC or CLOCK_REALTIME */ -int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, - const enum hrtimer_mode mode) +int __sched +schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta, + const enum hrtimer_mode mode, int clock) { struct hrtimer_sleeper t; @@ -1799,7 +1779,7 @@ int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, return -EINTR; } - hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, mode); + hrtimer_init_on_stack(&t.timer, clock, mode); hrtimer_set_expires_range_ns(&t.timer, *expires, delta); hrtimer_init_sleeper(&t, current); @@ -1818,6 +1798,41 @@ int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, return !t.task ? 0 : -EINTR; } + +/** + * schedule_hrtimeout_range - sleep until timeout + * @expires: timeout value (ktime_t) + * @delta: slack in expires timeout (ktime_t) + * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL + * + * Make the current task sleep until the given expiry time has + * elapsed. The routine will return immediately unless + * the current task state has been set (see set_current_state()). + * + * The @delta argument gives the kernel the freedom to schedule the + * actual wakeup to a time that is both power and performance friendly. + * The kernel give the normal best effort behavior for "@expires+@delta", + * but may decide to fire the timer earlier, but no earlier than @expires. + * + * You can set the task state as follows - + * + * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to + * pass before the routine returns. + * + * %TASK_INTERRUPTIBLE - the routine may return early if a signal is + * delivered to the current task. + * + * The current task state is guaranteed to be TASK_RUNNING when this + * routine returns. + * + * Returns 0 when the timer has expired otherwise -EINTR + */ +int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, + const enum hrtimer_mode mode) +{ + return schedule_hrtimeout_range_clock(expires, delta, mode, + CLOCK_MONOTONIC); +} EXPORT_SYMBOL_GPL(schedule_hrtimeout_range); /** -- cgit v1.2.3 From 6a867a395558a7f882d041783e4cdea6744ca2bf Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 6 Apr 2010 14:30:51 -0700 Subject: time: Remove xtime_cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the earlier logarithmic time accumulation patch, xtime will now always be within one "tick" of the current time, instead of possibly half a second off. This removes the need for the xtime_cache value, which always stored the time at the last interrupt, so this patch cleans that up removing the xtime_cache related code. This patch also addresses an issue with an earlier version of this change, where xtime_cache was normalizing xtime, which could in some cases be not valid (ie: tv_nsec == NSEC_PER_SEC). This is fixed by handling the edge case in update_wall_time(). Signed-off-by: John Stultz Cc: Petr Titěra LKML-Reference: <1270589451-30773-1-git-send-email-johnstul@us.ibm.com> Signed-off-by: Thomas Gleixner --- include/linux/time.h | 1 - kernel/time/timekeeping.c | 35 ++++++++++++++++------------------- 2 files changed, 16 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/time.h b/include/linux/time.h index 6e026e45a179..ea3559f0b3f2 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -150,7 +150,6 @@ extern struct timespec timespec_trunc(struct timespec t, unsigned gran); extern int timekeeping_valid_for_hres(void); extern u64 timekeeping_max_deferment(void); extern void update_wall_time(void); -extern void update_xtime_cache(u64 nsec); extern void timekeeping_leap_insert(int leapsecond); struct tms; diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 16736379a9ca..1137f245a4ba 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -165,13 +165,6 @@ struct timespec raw_time; /* flag for if timekeeping is suspended */ int __read_mostly timekeeping_suspended; -static struct timespec xtime_cache __attribute__ ((aligned (16))); -void update_xtime_cache(u64 nsec) -{ - xtime_cache = xtime; - timespec_add_ns(&xtime_cache, nsec); -} - /* must hold xtime_lock */ void timekeeping_leap_insert(int leapsecond) { @@ -332,8 +325,6 @@ int do_settimeofday(struct timespec *tv) xtime = *tv; - update_xtime_cache(0); - timekeeper.ntp_error = 0; ntp_clear(); @@ -559,7 +550,6 @@ void __init timekeeping_init(void) } set_normalized_timespec(&wall_to_monotonic, -boot.tv_sec, -boot.tv_nsec); - update_xtime_cache(0); total_sleep_time.tv_sec = 0; total_sleep_time.tv_nsec = 0; write_sequnlock_irqrestore(&xtime_lock, flags); @@ -593,7 +583,6 @@ static int timekeeping_resume(struct sys_device *dev) wall_to_monotonic = timespec_sub(wall_to_monotonic, ts); total_sleep_time = timespec_add_safe(total_sleep_time, ts); } - update_xtime_cache(0); /* re-base the last cycle value */ timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); timekeeper.ntp_error = 0; @@ -788,7 +777,6 @@ void update_wall_time(void) { struct clocksource *clock; cycle_t offset; - u64 nsecs; int shift = 0, maxshift; /* Make sure we're fully resumed: */ @@ -846,7 +834,9 @@ void update_wall_time(void) timekeeper.ntp_error += neg << timekeeper.ntp_error_shift; } - /* store full nanoseconds into xtime after rounding it up and + + /* + * Store full nanoseconds into xtime after rounding it up and * add the remainder to the error difference. */ xtime.tv_nsec = ((s64) timekeeper.xtime_nsec >> timekeeper.shift) + 1; @@ -854,8 +844,15 @@ void update_wall_time(void) timekeeper.ntp_error += timekeeper.xtime_nsec << timekeeper.ntp_error_shift; - nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift); - update_xtime_cache(nsecs); + /* + * Finally, make sure that after the rounding + * xtime.tv_nsec isn't larger then NSEC_PER_SEC + */ + if (unlikely(xtime.tv_nsec >= NSEC_PER_SEC)) { + xtime.tv_nsec -= NSEC_PER_SEC; + xtime.tv_sec++; + second_overflow(); + } /* check to see if there is a new clocksource to use */ update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); @@ -895,13 +892,13 @@ EXPORT_SYMBOL_GPL(monotonic_to_bootbased); unsigned long get_seconds(void) { - return xtime_cache.tv_sec; + return xtime.tv_sec; } EXPORT_SYMBOL(get_seconds); struct timespec __current_kernel_time(void) { - return xtime_cache; + return xtime; } struct timespec current_kernel_time(void) @@ -912,7 +909,7 @@ struct timespec current_kernel_time(void) do { seq = read_seqbegin(&xtime_lock); - now = xtime_cache; + now = xtime; } while (read_seqretry(&xtime_lock, seq)); return now; @@ -927,7 +924,7 @@ struct timespec get_monotonic_coarse(void) do { seq = read_seqbegin(&xtime_lock); - now = xtime_cache; + now = xtime; mono = wall_to_monotonic; } while (read_seqretry(&xtime_lock, seq)); -- cgit v1.2.3 From d7e81c269db899b800e0963dc4aceece1f82a680 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 7 May 2010 18:07:38 -0700 Subject: clocksource: Add clocksource_register_hz/khz interface How to pick good mult/shift pairs has always been difficult to describe to folks writing clocksource drivers, since it requires careful tradeoffs in adjustment accuracy vs overflow limits. Now, with the clocks_calc_mult_shift function, its much easier. However, not many clocksources have converted to using that function, and there is still the issue of the max interval length assumption being made by each clocksource driver independently. So this patch simplifies the registration process by having clocksources be registered with a hz/khz value and the registration function taking care of setting mult/shift. This should take most of the confusion out of writing a clocksource driver. Additionally it also keeps the shift size tradeoff (more accuracy vs longer possible nohz times) centralized so the timekeeping core can keep track of the assumptions being made. [ tglx: Coding style and comments fixed ] Signed-off-by: John Stultz LKML-Reference: <1273280858-30143-1-git-send-email-johnstul@us.ibm.com> Signed-off-by: Thomas Gleixner --- include/linux/clocksource.h | 19 +++++++++++++++++- kernel/time/clocksource.c | 48 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 4bca8b60cdf7..5ea3c60c160c 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -273,7 +273,6 @@ static inline s64 clocksource_cyc2ns(cycle_t cycles, u32 mult, u32 shift) } -/* used to install a new clocksource */ extern int clocksource_register(struct clocksource*); extern void clocksource_unregister(struct clocksource*); extern void clocksource_touch_watchdog(void); @@ -287,6 +286,24 @@ extern void clocksource_mark_unstable(struct clocksource *cs); extern void clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec); +/* + * Don't call __clocksource_register_scale directly, use + * clocksource_register_hz/khz + */ +extern int +__clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq); + +static inline int clocksource_register_hz(struct clocksource *cs, u32 hz) +{ + return __clocksource_register_scale(cs, 1, hz); +} + +static inline int clocksource_register_khz(struct clocksource *cs, u32 khz) +{ + return __clocksource_register_scale(cs, 1000, khz); +} + + static inline void clocksource_calc_mult_shift(struct clocksource *cs, u32 freq, u32 minsec) { diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 1f5dde637457..f08e99c1d561 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -625,6 +625,54 @@ static void clocksource_enqueue(struct clocksource *cs) list_add(&cs->list, entry); } + +/* + * Maximum time we expect to go between ticks. This includes idle + * tickless time. It provides the trade off between selecting a + * mult/shift pair that is very precise but can only handle a short + * period of time, vs. a mult/shift pair that can handle long periods + * of time but isn't as precise. + * + * This is a subsystem constant, and actual hardware limitations + * may override it (ie: clocksources that wrap every 3 seconds). + */ +#define MAX_UPDATE_LENGTH 5 /* Seconds */ + +/** + * __clocksource_register_scale - Used to install new clocksources + * @t: clocksource to be registered + * @scale: Scale factor multiplied against freq to get clocksource hz + * @freq: clocksource frequency (cycles per second) divided by scale + * + * Returns -EBUSY if registration fails, zero otherwise. + * + * This *SHOULD NOT* be called directly! Please use the + * clocksource_register_hz() or clocksource_register_khz helper functions. + */ +int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) +{ + + /* + * Ideally we want to use some of the limits used in + * clocksource_max_deferment, to provide a more informed + * MAX_UPDATE_LENGTH. But for now this just gets the + * register interface working properly. + */ + clocks_calc_mult_shift(&cs->mult, &cs->shift, freq, + NSEC_PER_SEC/scale, + MAX_UPDATE_LENGTH*scale); + cs->max_idle_ns = clocksource_max_deferment(cs); + + mutex_lock(&clocksource_mutex); + clocksource_enqueue(cs); + clocksource_select(); + clocksource_enqueue_watchdog(cs); + mutex_unlock(&clocksource_mutex); + return 0; +} +EXPORT_SYMBOL_GPL(__clocksource_register_scale); + + /** * clocksource_register - Used to install new clocksources * @t: clocksource to be registered -- cgit v1.2.3