diff options
author | Heiko Carstens <heiko.carstens@de.ibm.com> | 2008-04-17 07:46:25 +0200 |
---|---|---|
committer | Heiko Carstens <heiko.carstens@de.ibm.com> | 2008-04-17 07:47:05 +0200 |
commit | 5a62b192196af9a798e2f2f4c6a1324e7edf2f4b (patch) | |
tree | 0aa96ba3153b257000be22e49befbde2b5bd6917 /arch/s390 | |
parent | d7b906897e9caae452947e33674df0a2d6f7e10f (diff) | |
download | lwn-5a62b192196af9a798e2f2f4c6a1324e7edf2f4b.tar.gz lwn-5a62b192196af9a798e2f2f4c6a1324e7edf2f4b.zip |
[S390] Convert s390 to GENERIC_CLOCKEVENTS.
This way we get rid of s390's NO_IDLE_HZ and use the generic dynticks
variant instead. In addition we get high resolution timers for free.
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Diffstat (limited to 'arch/s390')
-rw-r--r-- | arch/s390/Kconfig | 24 | ||||
-rw-r--r-- | arch/s390/kernel/process.c | 4 | ||||
-rw-r--r-- | arch/s390/kernel/s390_ext.c | 9 | ||||
-rw-r--r-- | arch/s390/kernel/setup.c | 2 | ||||
-rw-r--r-- | arch/s390/kernel/time.c | 256 | ||||
-rw-r--r-- | arch/s390/lib/delay.c | 14 |
6 files changed, 92 insertions, 217 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index da6ea64cc34d..f6a68e178fc5 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -43,6 +43,9 @@ config GENERIC_HWEIGHT config GENERIC_TIME def_bool y +config GENERIC_CLOCKEVENTS + def_bool y + config GENERIC_BUG bool depends on BUG @@ -73,6 +76,8 @@ menu "Base setup" comment "Processor type and features" +source "kernel/time/Kconfig" + config 64BIT bool "64 bit kernel" help @@ -487,25 +492,6 @@ config APPLDATA_NET_SUM source kernel/Kconfig.hz -config NO_IDLE_HZ - bool "No HZ timer ticks in idle" - help - Switches the regular HZ timer off when the system is going idle. - This helps z/VM to detect that the Linux system is idle. VM can - then "swap-out" this guest which reduces memory usage. It also - reduces the overhead of idle systems. - - The HZ timer can be switched on/off via /proc/sys/kernel/hz_timer. - hz_timer=0 means HZ timer is disabled. hz_timer=1 means HZ - timer is active. - -config NO_IDLE_HZ_INIT - bool "HZ timer in idle off by default" - depends on NO_IDLE_HZ - help - The HZ timer is switched off in idle by default. That means the - HZ timer is already disabled at boot time. - config S390_HYPFS_FS bool "s390 hypervisor file system support" select SYS_HYPERVISOR diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index eb768ce88672..df033249f6b1 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -36,6 +36,7 @@ #include <linux/module.h> #include <linux/notifier.h> #include <linux/utsname.h> +#include <linux/tick.h> #include <asm/uaccess.h> #include <asm/pgtable.h> #include <asm/system.h> @@ -167,9 +168,10 @@ static void default_idle(void) void cpu_idle(void) { for (;;) { + tick_nohz_stop_sched_tick(); while (!need_resched()) default_idle(); - + tick_nohz_restart_sched_tick(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c index 3a8772d3baea..947d8c74403b 100644 --- a/arch/s390/kernel/s390_ext.c +++ b/arch/s390/kernel/s390_ext.c @@ -120,12 +120,9 @@ void do_extint(struct pt_regs *regs, unsigned short code) old_regs = set_irq_regs(regs); irq_enter(); s390_idle_check(); - if (S390_lowcore.int_clock >= S390_lowcore.jiffy_timer) - /** - * Make sure that the i/o interrupt did not "overtake" - * the last HZ timer interrupt. - */ - account_ticks(S390_lowcore.int_clock); + if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) + /* Serve timer interrupts first. */ + clock_comparator_work(); kstat_cpu(smp_processor_id()).irqs[EXTERNAL_INTERRUPT]++; index = ext_hash(code); for (p = ext_int_hash[index]; p; p = p->next) { diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 22040d087d8a..7141147e6b63 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -428,7 +428,7 @@ setup_lowcore(void) lc->io_new_psw.mask = psw_kernel_bits; lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler; lc->ipl_device = S390_lowcore.ipl_device; - lc->jiffy_timer = -1LL; + lc->clock_comparator = -1ULL; lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE; lc->async_stack = (unsigned long) __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0) + ASYNC_SIZE; diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 925f9dc0b0a0..17c4de9e1b6b 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -30,7 +30,7 @@ #include <linux/timex.h> #include <linux/notifier.h> #include <linux/clocksource.h> - +#include <linux/clockchips.h> #include <asm/uaccess.h> #include <asm/delay.h> #include <asm/s390_ext.h> @@ -57,9 +57,9 @@ static ext_int_info_t ext_int_info_cc; static ext_int_info_t ext_int_etr_cc; -static u64 init_timer_cc; static u64 jiffies_timer_cc; -static u64 xtime_cc; + +static DEFINE_PER_CPU(struct clock_event_device, comparators); /* * Scheduler clock - returns current time in nanosec units. @@ -95,162 +95,40 @@ void tod_to_timeval(__u64 todval, struct timespec *xtime) #define s390_do_profile() do { ; } while(0) #endif /* CONFIG_PROFILING */ -/* - * Advance the per cpu tick counter up to the time given with the - * "time" argument. The per cpu update consists of accounting - * the virtual cpu time, calling update_process_times and calling - * the profiling hook. If xtime is before time it is advanced as well. - */ -void account_ticks(u64 time) +void clock_comparator_work(void) { - __u32 ticks; - __u64 tmp; - - /* Calculate how many ticks have passed. */ - if (time < S390_lowcore.jiffy_timer) - return; - tmp = time - S390_lowcore.jiffy_timer; - if (tmp >= 2*CLK_TICKS_PER_JIFFY) { /* more than two ticks ? */ - ticks = __div(tmp, CLK_TICKS_PER_JIFFY) + 1; - S390_lowcore.jiffy_timer += - CLK_TICKS_PER_JIFFY * (__u64) ticks; - } else if (tmp >= CLK_TICKS_PER_JIFFY) { - ticks = 2; - S390_lowcore.jiffy_timer += 2*CLK_TICKS_PER_JIFFY; - } else { - ticks = 1; - S390_lowcore.jiffy_timer += CLK_TICKS_PER_JIFFY; - } - -#ifdef CONFIG_SMP - /* - * Do not rely on the boot cpu to do the calls to do_timer. - * Spread it over all cpus instead. - */ - write_seqlock(&xtime_lock); - if (S390_lowcore.jiffy_timer > xtime_cc) { - __u32 xticks; - tmp = S390_lowcore.jiffy_timer - xtime_cc; - if (tmp >= 2*CLK_TICKS_PER_JIFFY) { - xticks = __div(tmp, CLK_TICKS_PER_JIFFY); - xtime_cc += (__u64) xticks * CLK_TICKS_PER_JIFFY; - } else { - xticks = 1; - xtime_cc += CLK_TICKS_PER_JIFFY; - } - do_timer(xticks); - } - write_sequnlock(&xtime_lock); -#else - do_timer(ticks); -#endif - - while (ticks--) - update_process_times(user_mode(get_irq_regs())); + struct clock_event_device *cd; + S390_lowcore.clock_comparator = -1ULL; + set_clock_comparator(S390_lowcore.clock_comparator); + cd = &__get_cpu_var(comparators); + cd->event_handler(cd); s390_do_profile(); } -#ifdef CONFIG_NO_IDLE_HZ - -#ifdef CONFIG_NO_IDLE_HZ_INIT -int sysctl_hz_timer = 0; -#else -int sysctl_hz_timer = 1; -#endif - -/* - * Stop the HZ tick on the current CPU. - * Only cpu_idle may call this function. - */ -static void stop_hz_timer(void) -{ - unsigned long flags; - unsigned long seq, next; - __u64 timer, todval; - int cpu = smp_processor_id(); - - if (sysctl_hz_timer != 0) - return; - - cpu_set(cpu, nohz_cpu_mask); - - /* - * Leave the clock comparator set up for the next timer - * tick if either rcu or a softirq is pending. - */ - if (rcu_needs_cpu(cpu) || local_softirq_pending()) { - cpu_clear(cpu, nohz_cpu_mask); - return; - } - - /* - * This cpu is going really idle. Set up the clock comparator - * for the next event. - */ - next = next_timer_interrupt(); - do { - seq = read_seqbegin_irqsave(&xtime_lock, flags); - timer = ((__u64) next) - ((__u64) jiffies) + jiffies_64; - } while (read_seqretry_irqrestore(&xtime_lock, seq, flags)); - todval = -1ULL; - /* Be careful about overflows. */ - if (timer < (-1ULL / CLK_TICKS_PER_JIFFY)) { - timer = jiffies_timer_cc + timer * CLK_TICKS_PER_JIFFY; - if (timer >= jiffies_timer_cc) - todval = timer; - } - set_clock_comparator(todval); -} - /* - * Start the HZ tick on the current CPU. - * Only cpu_idle may call this function. + * Fixup the clock comparator. */ -static void start_hz_timer(void) +static void fixup_clock_comparator(unsigned long long delta) { - if (!cpu_isset(smp_processor_id(), nohz_cpu_mask)) + /* If nobody is waiting there's nothing to fix. */ + if (S390_lowcore.clock_comparator == -1ULL) return; - account_ticks(get_clock()); - set_clock_comparator(S390_lowcore.jiffy_timer + CPU_DEVIATION); - cpu_clear(smp_processor_id(), nohz_cpu_mask); -} - -static int nohz_idle_notify(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - switch (action) { - case S390_CPU_IDLE: - stop_hz_timer(); - break; - case S390_CPU_NOT_IDLE: - start_hz_timer(); - break; - } - return NOTIFY_OK; + S390_lowcore.clock_comparator += delta; + set_clock_comparator(S390_lowcore.clock_comparator); } -static struct notifier_block nohz_idle_nb = { - .notifier_call = nohz_idle_notify, -}; - -static void __init nohz_init(void) +static int s390_next_event(unsigned long delta, + struct clock_event_device *evt) { - if (register_idle_notifier(&nohz_idle_nb)) - panic("Couldn't register idle notifier"); + S390_lowcore.clock_comparator = get_clock() + delta; + set_clock_comparator(S390_lowcore.clock_comparator); + return 0; } -#endif - -/* - * Set up per cpu jiffy timer and set the clock comparator. - */ -static void setup_jiffy_timer(void) +static void s390_set_mode(enum clock_event_mode mode, + struct clock_event_device *evt) { - /* Set up clock comparator to next jiffy. */ - S390_lowcore.jiffy_timer = - jiffies_timer_cc + (jiffies_64 + 1) * CLK_TICKS_PER_JIFFY; - set_clock_comparator(S390_lowcore.jiffy_timer + CPU_DEVIATION); } /* @@ -259,7 +137,26 @@ static void setup_jiffy_timer(void) */ void init_cpu_timer(void) { - setup_jiffy_timer(); + struct clock_event_device *cd; + int cpu; + + S390_lowcore.clock_comparator = -1ULL; + set_clock_comparator(S390_lowcore.clock_comparator); + + cpu = smp_processor_id(); + cd = &per_cpu(comparators, cpu); + cd->name = "comparator"; + cd->features = CLOCK_EVT_FEAT_ONESHOT; + cd->mult = 16777; + cd->shift = 12; + cd->min_delta_ns = 1; + cd->max_delta_ns = LONG_MAX; + cd->rating = 400; + cd->cpumask = cpumask_of_cpu(cpu); + cd->set_next_event = s390_next_event; + cd->set_mode = s390_set_mode; + + clockevents_register_device(cd); /* Enable clock comparator timer interrupt. */ __ctl_set_bit(0,11); @@ -270,8 +167,6 @@ void init_cpu_timer(void) static void clock_comparator_interrupt(__u16 code) { - /* set clock comparator for next tick */ - set_clock_comparator(S390_lowcore.jiffy_timer + CPU_DEVIATION); } static void etr_reset(void); @@ -316,8 +211,9 @@ static struct clocksource clocksource_tod = { */ void __init time_init(void) { + u64 init_timer_cc; + init_timer_cc = reset_tod_clock(); - xtime_cc = init_timer_cc + CLK_TICKS_PER_JIFFY; jiffies_timer_cc = init_timer_cc - jiffies_64 * CLK_TICKS_PER_JIFFY; /* set xtime */ @@ -342,10 +238,6 @@ void __init time_init(void) /* Enable TOD clock interrupts on the boot cpu. */ init_cpu_timer(); -#ifdef CONFIG_NO_IDLE_HZ - nohz_init(); -#endif - #ifdef CONFIG_VIRT_TIMER vtime_init(); #endif @@ -699,53 +591,49 @@ static int etr_aib_follows(struct etr_aib *a1, struct etr_aib *a2, int p) } /* - * The time is "clock". xtime is what we think the time is. + * The time is "clock". old is what we think the time is. * Adjust the value by a multiple of jiffies and add the delta to ntp. * "delay" is an approximation how long the synchronization took. If * the time correction is positive, then "delay" is subtracted from * the time difference and only the remaining part is passed to ntp. */ -static void etr_adjust_time(unsigned long long clock, unsigned long long delay) +static unsigned long long etr_adjust_time(unsigned long long old, + unsigned long long clock, + unsigned long long delay) { unsigned long long delta, ticks; struct timex adjust; - /* - * We don't have to take the xtime lock because the cpu - * executing etr_adjust_time is running disabled in - * tasklet context and all other cpus are looping in - * etr_sync_cpu_start. - */ - if (clock > xtime_cc) { + if (clock > old) { /* It is later than we thought. */ - delta = ticks = clock - xtime_cc; + delta = ticks = clock - old; delta = ticks = (delta < delay) ? 0 : delta - delay; delta -= do_div(ticks, CLK_TICKS_PER_JIFFY); - init_timer_cc = init_timer_cc + delta; - jiffies_timer_cc = jiffies_timer_cc + delta; - xtime_cc = xtime_cc + delta; adjust.offset = ticks * (1000000 / HZ); } else { /* It is earlier than we thought. */ - delta = ticks = xtime_cc - clock; + delta = ticks = old - clock; delta -= do_div(ticks, CLK_TICKS_PER_JIFFY); - init_timer_cc = init_timer_cc - delta; - jiffies_timer_cc = jiffies_timer_cc - delta; - xtime_cc = xtime_cc - delta; + delta = -delta; adjust.offset = -ticks * (1000000 / HZ); } + jiffies_timer_cc += delta; if (adjust.offset != 0) { printk(KERN_NOTICE "etr: time adjusted by %li micro-seconds\n", adjust.offset); adjust.modes = ADJ_OFFSET_SINGLESHOT; do_adjtimex(&adjust); } + return delta; } +static struct { + int in_sync; + unsigned long long fixup_cc; +} etr_sync; + static void etr_sync_cpu_start(void *dummy) { - int *in_sync = dummy; - etr_enable_sync_clock(); /* * This looks like a busy wait loop but it isn't. etr_sync_cpus @@ -753,7 +641,7 @@ static void etr_sync_cpu_start(void *dummy) * __udelay will stop the cpu on an enabled wait psw until the * TOD is running again. */ - while (*in_sync == 0) { + while (etr_sync.in_sync == 0) { __udelay(1); /* * A different cpu changes *in_sync. Therefore use @@ -761,14 +649,14 @@ static void etr_sync_cpu_start(void *dummy) */ barrier(); } - if (*in_sync != 1) + if (etr_sync.in_sync != 1) /* Didn't work. Clear per-cpu in sync bit again. */ etr_disable_sync_clock(NULL); /* * This round of TOD syncing is done. Set the clock comparator * to the next tick and let the processor continue. */ - setup_jiffy_timer(); + fixup_clock_comparator(etr_sync.fixup_cc); } static void etr_sync_cpu_end(void *dummy) @@ -783,8 +671,8 @@ static void etr_sync_cpu_end(void *dummy) static int etr_sync_clock(struct etr_aib *aib, int port) { struct etr_aib *sync_port; - unsigned long long clock, delay; - int in_sync, follows; + unsigned long long clock, old_clock, delay, delta; + int follows; int rc; /* Check if the current aib is adjacent to the sync port aib. */ @@ -799,9 +687,9 @@ static int etr_sync_clock(struct etr_aib *aib, int port) * successfully synced the clock. smp_call_function will * return after all other cpus are in etr_sync_cpu_start. */ - in_sync = 0; + memset(&etr_sync, 0, sizeof(etr_sync)); preempt_disable(); - smp_call_function(etr_sync_cpu_start,&in_sync,0,0); + smp_call_function(etr_sync_cpu_start, NULL, 0, 0); local_irq_disable(); etr_enable_sync_clock(); @@ -809,6 +697,7 @@ static int etr_sync_clock(struct etr_aib *aib, int port) __ctl_set_bit(14, 21); __ctl_set_bit(0, 29); clock = ((unsigned long long) (aib->edf2.etv + 1)) << 32; + old_clock = get_clock(); if (set_clock(clock) == 0) { __udelay(1); /* Wait for the clock to start. */ __ctl_clear_bit(0, 29); @@ -817,16 +706,17 @@ static int etr_sync_clock(struct etr_aib *aib, int port) /* Adjust Linux timing variables. */ delay = (unsigned long long) (aib->edf2.etv - sync_port->edf2.etv) << 32; - etr_adjust_time(clock, delay); - setup_jiffy_timer(); + delta = etr_adjust_time(old_clock, clock, delay); + etr_sync.fixup_cc = delta; + fixup_clock_comparator(delta); /* Verify that the clock is properly set. */ if (!etr_aib_follows(sync_port, aib, port)) { /* Didn't work. */ etr_disable_sync_clock(NULL); - in_sync = -EAGAIN; + etr_sync.in_sync = -EAGAIN; rc = -EAGAIN; } else { - in_sync = 1; + etr_sync.in_sync = 1; rc = 0; } } else { @@ -834,7 +724,7 @@ static int etr_sync_clock(struct etr_aib *aib, int port) __ctl_clear_bit(0, 29); __ctl_clear_bit(14, 21); etr_disable_sync_clock(NULL); - in_sync = -EAGAIN; + etr_sync.in_sync = -EAGAIN; rc = -EAGAIN; } local_irq_enable(); diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index 70f2a862b670..eae21a8ac72d 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -34,7 +34,7 @@ void __delay(unsigned long loops) */ void __udelay(unsigned long usecs) { - u64 end, time, jiffy_timer = 0; + u64 end, time, old_cc = 0; unsigned long flags, cr0, mask, dummy; int irq_context; @@ -43,8 +43,8 @@ void __udelay(unsigned long usecs) local_bh_disable(); local_irq_save(flags); if (raw_irqs_disabled_flags(flags)) { - jiffy_timer = S390_lowcore.jiffy_timer; - S390_lowcore.jiffy_timer = -1ULL - (4096 << 12); + old_cc = S390_lowcore.clock_comparator; + S390_lowcore.clock_comparator = -1ULL; __ctl_store(cr0, 0, 0); dummy = (cr0 & 0xffff00e0) | 0x00000800; __ctl_load(dummy , 0, 0); @@ -55,8 +55,8 @@ void __udelay(unsigned long usecs) end = get_clock() + ((u64) usecs << 12); do { - time = end < S390_lowcore.jiffy_timer ? - end : S390_lowcore.jiffy_timer; + time = end < S390_lowcore.clock_comparator ? + end : S390_lowcore.clock_comparator; set_clock_comparator(time); trace_hardirqs_on(); __load_psw_mask(mask); @@ -65,10 +65,10 @@ void __udelay(unsigned long usecs) if (raw_irqs_disabled_flags(flags)) { __ctl_load(cr0, 0, 0); - S390_lowcore.jiffy_timer = jiffy_timer; + S390_lowcore.clock_comparator = old_cc; } if (!irq_context) _local_bh_enable(); - set_clock_comparator(S390_lowcore.jiffy_timer); + set_clock_comparator(S390_lowcore.clock_comparator); local_irq_restore(flags); } |