diff options
author | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2005-11-11 21:15:21 +1100 |
---|---|---|
committer | Paul Mackerras <paulus@samba.org> | 2005-11-11 22:25:39 +1100 |
commit | a7f290dad32ee34d931561b7943c858fe2aae503 (patch) | |
tree | 850f04ed9ffba8aef6e151fa9c9e8a0c667bb795 | |
parent | 6761c4a07378e19e3710bb69cea65795774529b1 (diff) | |
download | lwn-a7f290dad32ee34d931561b7943c858fe2aae503.tar.gz lwn-a7f290dad32ee34d931561b7943c858fe2aae503.zip |
[PATCH] powerpc: Merge vdso's and add vdso support to 32 bits kernel
This patch moves the vdso's to arch/powerpc, adds support for the 32
bits vdso to the 32 bits kernel, rename systemcfg (finally !), and adds
some new (still untested) routines to both vdso's: clock_gettime() with
support for CLOCK_REALTIME and CLOCK_MONOTONIC, clock_getres() (same
clocks) and get_tbfreq() for glibc to retreive the timebase frequency.
Tom,Steve: The implementation of get_tbfreq() I've done for 32 bits
returns a long long (r3, r4) not a long. This is such that if we ever
add support for >4Ghz timebases on ppc32, the userland interface won't
have to change.
I have tested gettimeofday() using some glibc patches in both ppc32 and
ppc64 kernels using 32 bits userland (I haven't had a chance to test a
64 bits userland yet, but the implementation didn't change and was
tested earlier). I haven't tested yet the new functions.
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
41 files changed, 1541 insertions, 430 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index c68eace12a9d..9a74b7ab03a4 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -12,11 +12,13 @@ CFLAGS_btext.o += -fPIC endif obj-y := semaphore.o cputable.o ptrace.o syscalls.o \ - irq.o signal_32.o pmc.o + irq.o signal_32.o pmc.o vdso.o +obj-y += vdso32/ obj-$(CONFIG_PPC64) += setup_64.o binfmt_elf32.o sys_ppc32.o \ signal_64.o ptrace32.o systbl.o \ paca.o ioctl32.o cpu_setup_power4.o \ firmware.o sysfs.o udbg.o +obj-$(CONFIG_PPC64) += vdso64/ obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o obj-$(CONFIG_POWER4) += idle_power4.o obj-$(CONFIG_PPC_OF) += of_device.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 8793102711a8..4550eb4f4fbd 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -37,12 +37,12 @@ #include <asm/cputable.h> #include <asm/thread_info.h> #include <asm/rtas.h> +#include <asm/vdso_datapage.h> #ifdef CONFIG_PPC64 #include <asm/paca.h> #include <asm/lppaca.h> #include <asm/iseries/hv_lp_event.h> #include <asm/cache.h> -#include <asm/systemcfg.h> #include <asm/compat.h> #endif @@ -251,25 +251,42 @@ int main(void) DEFINE(TASK_SIZE, TASK_SIZE); DEFINE(NUM_USER_SEGMENTS, TASK_SIZE>>28); -#else /* CONFIG_PPC64 */ - /* systemcfg offsets for use by vdso */ - DEFINE(CFG_TB_ORIG_STAMP, offsetof(struct systemcfg, tb_orig_stamp)); - DEFINE(CFG_TB_TICKS_PER_SEC, offsetof(struct systemcfg, tb_ticks_per_sec)); - DEFINE(CFG_TB_TO_XS, offsetof(struct systemcfg, tb_to_xs)); - DEFINE(CFG_STAMP_XSEC, offsetof(struct systemcfg, stamp_xsec)); - DEFINE(CFG_TB_UPDATE_COUNT, offsetof(struct systemcfg, tb_update_count)); - DEFINE(CFG_TZ_MINUTEWEST, offsetof(struct systemcfg, tz_minuteswest)); - DEFINE(CFG_TZ_DSTTIME, offsetof(struct systemcfg, tz_dsttime)); - DEFINE(CFG_SYSCALL_MAP32, offsetof(struct systemcfg, syscall_map_32)); - DEFINE(CFG_SYSCALL_MAP64, offsetof(struct systemcfg, syscall_map_64)); +#endif /* ! CONFIG_PPC64 */ - /* timeval/timezone offsets for use by vdso */ + /* datapage offsets for use by vdso */ + DEFINE(CFG_TB_ORIG_STAMP, offsetof(struct vdso_data, tb_orig_stamp)); + DEFINE(CFG_TB_TICKS_PER_SEC, offsetof(struct vdso_data, tb_ticks_per_sec)); + DEFINE(CFG_TB_TO_XS, offsetof(struct vdso_data, tb_to_xs)); + DEFINE(CFG_STAMP_XSEC, offsetof(struct vdso_data, stamp_xsec)); + DEFINE(CFG_TB_UPDATE_COUNT, offsetof(struct vdso_data, tb_update_count)); + DEFINE(CFG_TZ_MINUTEWEST, offsetof(struct vdso_data, tz_minuteswest)); + DEFINE(CFG_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime)); + DEFINE(CFG_SYSCALL_MAP32, offsetof(struct vdso_data, syscall_map_32)); + DEFINE(WTOM_CLOCK_SEC, offsetof(struct vdso_data, wtom_clock_sec)); + DEFINE(WTOM_CLOCK_NSEC, offsetof(struct vdso_data, wtom_clock_nsec)); +#ifdef CONFIG_PPC64 + DEFINE(CFG_SYSCALL_MAP64, offsetof(struct vdso_data, syscall_map_64)); DEFINE(TVAL64_TV_SEC, offsetof(struct timeval, tv_sec)); DEFINE(TVAL64_TV_USEC, offsetof(struct timeval, tv_usec)); DEFINE(TVAL32_TV_SEC, offsetof(struct compat_timeval, tv_sec)); DEFINE(TVAL32_TV_USEC, offsetof(struct compat_timeval, tv_usec)); + DEFINE(TSPC32_TV_SEC, offsetof(struct compat_timespec, tv_sec)); + DEFINE(TSPC32_TV_NSEC, offsetof(struct compat_timespec, tv_nsec)); +#else + DEFINE(TVAL32_TV_SEC, offsetof(struct timeval, tv_sec)); + DEFINE(TVAL32_TV_USEC, offsetof(struct timeval, tv_usec)); + DEFINE(TSPEC32_TV_SEC, offsetof(struct timespec, tv_sec)); + DEFINE(TSPEC32_TV_NSEC, offsetof(struct timespec, tv_nsec)); +#endif + /* timeval/timezone offsets for use by vdso */ DEFINE(TZONE_TZ_MINWEST, offsetof(struct timezone, tz_minuteswest)); DEFINE(TZONE_TZ_DSTTIME, offsetof(struct timezone, tz_dsttime)); -#endif /* CONFIG_PPC64 */ + + /* Other bits used by the vdso */ + DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); + DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); + DEFINE(NSEC_PER_SEC, NSEC_PER_SEC); + DEFINE(CLOCK_REALTIME_RES, TICK_NSEC); + return 0; } diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 3cf2517c5f91..a7b68f911eb1 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -15,17 +15,10 @@ #include <asm/processor.h> #include <asm/ptrace.h> #include <asm/page.h> -#include <asm/systemcfg.h> #include <asm/lppaca.h> #include <asm/iseries/it_lp_queue.h> #include <asm/paca.h> -static union { - struct systemcfg data; - u8 page[PAGE_SIZE]; -} systemcfg_store __attribute__((__section__(".data.page.aligned"))); -struct systemcfg *_systemcfg = &systemcfg_store.data; - /* This symbol is provided by the linker - let it fill in the paca * field correctly */ diff --git a/arch/powerpc/kernel/proc_ppc64.c b/arch/powerpc/kernel/proc_ppc64.c index a1c19502fe8b..7ba42a405f41 100644 --- a/arch/powerpc/kernel/proc_ppc64.c +++ b/arch/powerpc/kernel/proc_ppc64.c @@ -23,7 +23,7 @@ #include <linux/slab.h> #include <linux/kernel.h> -#include <asm/systemcfg.h> +#include <asm/vdso_datapage.h> #include <asm/rtas.h> #include <asm/uaccess.h> #include <asm/prom.h> @@ -72,7 +72,7 @@ static int __init proc_ppc64_init(void) if (!pde) return 1; pde->nlink = 1; - pde->data = _systemcfg; + pde->data = vdso_data; pde->size = PAGE_SIZE; pde->proc_fops = &page_map_fops; diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index ae1a36449ccd..7a95b8a28354 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -32,7 +32,6 @@ #include <asm/rtas.h> #include <asm/machdep.h> /* for ppc_md */ #include <asm/time.h> -#include <asm/systemcfg.h> /* Token for Sensors */ #define KEY_SWITCH 0x0001 diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index a754de63450f..33e7f2c7f194 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -33,7 +33,7 @@ #include <asm/io.h> #include <asm/prom.h> #include <asm/processor.h> -#include <asm/systemcfg.h> +#include <asm/vdso_datapage.h> #include <asm/pgtable.h> #include <asm/smp.h> #include <asm/elf.h> @@ -564,7 +564,7 @@ void __init smp_setup_cpu_maps(void) cpu_set(cpu ^ 0x1, cpu_sibling_map[cpu]); } - _systemcfg->processorCount = num_present_cpus(); + vdso_data->processorCount = num_present_cpus(); #endif /* CONFIG_PPC64 */ } #endif /* CONFIG_SMP */ diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 6791668213e7..fdbd9f9122f2 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -57,7 +57,6 @@ #include <asm/lmb.h> #include <asm/iseries/it_lp_naca.h> #include <asm/firmware.h> -#include <asm/systemcfg.h> #include <asm/xmon.h> #include <asm/udbg.h> @@ -375,9 +374,8 @@ static void __init initialize_cache_info(void) DBG("Argh, can't find dcache properties ! " "sizep: %p, lsizep: %p\n", sizep, lsizep); - _systemcfg->dcache_size = ppc64_caches.dsize = size; - _systemcfg->dcache_line_size = - ppc64_caches.dline_size = lsize; + ppc64_caches.dsize = size; + ppc64_caches.dline_size = lsize; ppc64_caches.log_dline_size = __ilog2(lsize); ppc64_caches.dlines_per_page = PAGE_SIZE / lsize; @@ -393,22 +391,13 @@ static void __init initialize_cache_info(void) DBG("Argh, can't find icache properties ! " "sizep: %p, lsizep: %p\n", sizep, lsizep); - _systemcfg->icache_size = ppc64_caches.isize = size; - _systemcfg->icache_line_size = - ppc64_caches.iline_size = lsize; + ppc64_caches.isize = size; + ppc64_caches.iline_size = lsize; ppc64_caches.log_iline_size = __ilog2(lsize); ppc64_caches.ilines_per_page = PAGE_SIZE / lsize; } } - /* Add an eye catcher and the systemcfg layout version number */ - strcpy(_systemcfg->eye_catcher, "SYSTEMCFG:PPC64"); - _systemcfg->version.major = SYSTEMCFG_MAJOR; - _systemcfg->version.minor = SYSTEMCFG_MINOR; - _systemcfg->processor = mfspr(SPRN_PVR); - _systemcfg->platform = _machine; - _systemcfg->physicalMemorySize = lmb_phys_mem_size(); - DBG(" <- initialize_cache_info()\n"); } @@ -495,15 +484,14 @@ void __init setup_system(void) printk("-----------------------------------------------------\n"); printk("ppc64_pft_size = 0x%lx\n", ppc64_pft_size); - printk("ppc64_interrupt_controller = 0x%ld\n", ppc64_interrupt_controller); - printk("systemcfg = 0x%p\n", _systemcfg); - printk("systemcfg->platform = 0x%x\n", _systemcfg->platform); - printk("systemcfg->processorCount = 0x%lx\n", _systemcfg->processorCount); - printk("systemcfg->physicalMemorySize = 0x%lx\n", _systemcfg->physicalMemorySize); + printk("ppc64_interrupt_controller = 0x%ld\n", + ppc64_interrupt_controller); + printk("platform = 0x%x\n", _machine); + printk("physicalMemorySize = 0x%lx\n", lmb_phys_mem_size()); printk("ppc64_caches.dcache_line_size = 0x%x\n", - ppc64_caches.dline_size); + ppc64_caches.dline_size); printk("ppc64_caches.icache_line_size = 0x%x\n", - ppc64_caches.iline_size); + ppc64_caches.iline_size); printk("htab_address = 0x%p\n", htab_address); printk("htab_hash_mask = 0x%lx\n", htab_hash_mask); printk("-----------------------------------------------------\n"); @@ -568,33 +556,6 @@ static void __init emergency_stack_init(void) } /* - * Called from setup_arch to initialize the bitmap of available - * syscalls in the systemcfg page - */ -void __init setup_syscall_map(void) -{ - unsigned int i, count64 = 0, count32 = 0; - extern unsigned long *sys_call_table; - extern unsigned long sys_ni_syscall; - - - for (i = 0; i < __NR_syscalls; i++) { - if (sys_call_table[i*2] != sys_ni_syscall) { - count64++; - _systemcfg->syscall_map_64[i >> 5] |= - 0x80000000UL >> (i & 0x1f); - } - if (sys_call_table[i*2+1] != sys_ni_syscall) { - count32++; - _systemcfg->syscall_map_32[i >> 5] |= - 0x80000000UL >> (i & 0x1f); - } - } - printk(KERN_INFO "Syscall map setup, %d 32-bit and %d 64-bit syscalls\n", - count32, count64); -} - -/* * Called into from start_kernel, after lock_kernel has been called. * Initializes bootmem, which is unsed to manage page allocation until * mem_init is called. @@ -635,9 +596,6 @@ void __init setup_arch(char **cmdline_p) do_init_bootmem(); sparse_init(); - /* initialize the syscall map in systemcfg */ - setup_syscall_map(); - #ifdef CONFIG_DUMMY_CONSOLE conswitchp = &dummy_con; #endif diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index a7c4515f320f..8bdf95b7e420 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -43,10 +43,10 @@ #include <asm/uaccess.h> #include <asm/cacheflush.h> #include <asm/sigcontext.h> +#include <asm/vdso.h> #ifdef CONFIG_PPC64 #include "ppc32.h" #include <asm/unistd.h> -#include <asm/vdso.h> #else #include <asm/ucontext.h> #include <asm/pgtable.h> @@ -809,14 +809,11 @@ static int handle_rt_signal(unsigned long sig, struct k_sigaction *ka, /* Save user registers on the stack */ frame = &rt_sf->uc.uc_mcontext; -#ifdef CONFIG_PPC64 if (vdso32_rt_sigtramp && current->thread.vdso_base) { if (save_user_regs(regs, frame, 0)) goto badframe; regs->link = current->thread.vdso_base + vdso32_rt_sigtramp; - } else -#endif - { + } else { if (save_user_regs(regs, frame, __NR_rt_sigreturn)) goto badframe; regs->link = (unsigned long) frame->tramp; @@ -1090,14 +1087,11 @@ static int handle_signal(unsigned long sig, struct k_sigaction *ka, || __put_user(sig, &sc->signal)) goto badframe; -#ifdef CONFIG_PPC64 if (vdso32_sigtramp && current->thread.vdso_base) { if (save_user_regs(regs, &frame->mctx, 0)) goto badframe; regs->link = current->thread.vdso_base + vdso32_sigtramp; - } else -#endif - { + } else { if (save_user_regs(regs, &frame->mctx, __NR_sigreturn)) goto badframe; regs->link = (unsigned long) frame->mctx.tramp; diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index e28a139c29d0..62dfc5b8d765 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -44,7 +44,7 @@ #include <asm/cputable.h> #include <asm/system.h> #include <asm/mpic.h> -#include <asm/systemcfg.h> +#include <asm/vdso_datapage.h> #ifdef CONFIG_PPC64 #include <asm/paca.h> #endif @@ -371,7 +371,7 @@ int generic_cpu_disable(void) cpu_clear(cpu, cpu_online_map); #ifdef CONFIG_PPC64 - _systemcfg->processorCount--; + vdso_data->processorCount--; fixup_irqs(cpu_online_map); #endif return 0; diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 850af198fb5f..0f0c3a9ae2e5 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -16,7 +16,6 @@ #include <asm/firmware.h> #include <asm/hvcall.h> #include <asm/prom.h> -#include <asm/systemcfg.h> #include <asm/paca.h> #include <asm/lppaca.h> #include <asm/machdep.h> diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 260b6ecd26a9..070b4b458aaf 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -62,8 +62,8 @@ #include <asm/irq.h> #include <asm/div64.h> #include <asm/smp.h> +#include <asm/vdso_datapage.h> #ifdef CONFIG_PPC64 -#include <asm/systemcfg.h> #include <asm/firmware.h> #endif #ifdef CONFIG_PPC_ISERIES @@ -261,7 +261,6 @@ static inline void update_gtod(u64 new_tb_stamp, u64 new_stamp_xsec, do_gtod.varp = temp_varp; do_gtod.var_idx = temp_idx; -#ifdef CONFIG_PPC64 /* * tb_update_count is used to allow the userspace gettimeofday code * to assure itself that it sees a consistent view of the tb_to_xs and @@ -271,14 +270,15 @@ static inline void update_gtod(u64 new_tb_stamp, u64 new_stamp_xsec, * tb_to_xs and stamp_xsec values are consistent. If not, then it * loops back and reads them again until this criteria is met. */ - ++(_systemcfg->tb_update_count); + ++(vdso_data->tb_update_count); smp_wmb(); - _systemcfg->tb_orig_stamp = new_tb_stamp; - _systemcfg->stamp_xsec = new_stamp_xsec; - _systemcfg->tb_to_xs = new_tb_to_xs; + vdso_data->tb_orig_stamp = new_tb_stamp; + vdso_data->stamp_xsec = new_stamp_xsec; + vdso_data->tb_to_xs = new_tb_to_xs; + vdso_data->wtom_clock_sec = wall_to_monotonic.tv_sec; + vdso_data->wtom_clock_nsec = wall_to_monotonic.tv_nsec; smp_wmb(); - ++(_systemcfg->tb_update_count); -#endif + ++(vdso_data->tb_update_count); } /* @@ -357,9 +357,8 @@ static void iSeries_tb_recal(void) do_gtod.tb_ticks_per_sec = tb_ticks_per_sec; tb_to_xs = divres.result_low; do_gtod.varp->tb_to_xs = tb_to_xs; - _systemcfg->tb_ticks_per_sec = - tb_ticks_per_sec; - _systemcfg->tb_to_xs = tb_to_xs; + vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; + vdso_data->tb_to_xs = tb_to_xs; } else { printk( "Titan recalibrate: FAILED (difference > 4 percent)\n" @@ -561,10 +560,8 @@ int do_settimeofday(struct timespec *tv) new_xsec += (u64)new_sec * XSEC_PER_SEC - tb_delta_xs; update_gtod(tb_last_jiffy, new_xsec, do_gtod.varp->tb_to_xs); -#ifdef CONFIG_PPC64 - _systemcfg->tz_minuteswest = sys_tz.tz_minuteswest; - _systemcfg->tz_dsttime = sys_tz.tz_dsttime; -#endif + vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; + vdso_data->tz_dsttime = sys_tz.tz_dsttime; write_sequnlock_irqrestore(&xtime_lock, flags); clock_was_set(); @@ -713,13 +710,12 @@ void __init time_init(void) do_gtod.tb_ticks_per_sec = tb_ticks_per_sec; do_gtod.varp->tb_to_xs = tb_to_xs; do_gtod.tb_to_us = tb_to_us; -#ifdef CONFIG_PPC64 - _systemcfg->tb_orig_stamp = tb_last_jiffy; - _systemcfg->tb_update_count = 0; - _systemcfg->tb_ticks_per_sec = tb_ticks_per_sec; - _systemcfg->stamp_xsec = xtime.tv_sec * XSEC_PER_SEC; - _systemcfg->tb_to_xs = tb_to_xs; -#endif + + vdso_data->tb_orig_stamp = tb_last_jiffy; + vdso_data->tb_update_count = 0; + vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; + vdso_data->stamp_xsec = xtime.tv_sec * XSEC_PER_SEC; + vdso_data->tb_to_xs = tb_to_xs; time_freq = 0; diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 2020bb7648fb..1511454c4690 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -49,7 +49,6 @@ #ifdef CONFIG_PPC64 #include <asm/firmware.h> #include <asm/processor.h> -#include <asm/systemcfg.h> #endif #ifdef CONFIG_PPC64 /* XXX */ diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c new file mode 100644 index 000000000000..0d4d8bec0df4 --- /dev/null +++ b/arch/powerpc/kernel/vdso.c @@ -0,0 +1,746 @@ +/* + * linux/arch/ppc64/kernel/vdso.c + * + * Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp. + * <benh@kernel.crashing.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/slab.h> +#include <linux/user.h> +#include <linux/elf.h> +#include <linux/security.h> +#include <linux/bootmem.h> + +#include <asm/pgtable.h> +#include <asm/system.h> +#include <asm/processor.h> +#include <asm/mmu.h> +#include <asm/mmu_context.h> +#include <asm/lmb.h> +#include <asm/machdep.h> +#include <asm/cputable.h> +#include <asm/sections.h> +#include <asm/vdso.h> +#include <asm/vdso_datapage.h> + +#undef DEBUG + +#ifdef DEBUG +#define DBG(fmt...) printk(fmt) +#else +#define DBG(fmt...) +#endif + +/* Max supported size for symbol names */ +#define MAX_SYMNAME 64 + +extern char vdso32_start, vdso32_end; +static void *vdso32_kbase = &vdso32_start; +unsigned int vdso32_pages; +unsigned long vdso32_sigtramp; +unsigned long vdso32_rt_sigtramp; + +#ifdef CONFIG_PPC64 +extern char vdso64_start, vdso64_end; +static void *vdso64_kbase = &vdso64_start; +unsigned int vdso64_pages; +unsigned long vdso64_rt_sigtramp; +#endif /* CONFIG_PPC64 */ + +/* + * The vdso data page (aka. systemcfg for old ppc64 fans) is here. + * Once the early boot kernel code no longer needs to muck around + * with it, it will become dynamically allocated + */ +static union { + struct vdso_data data; + u8 page[PAGE_SIZE]; +} vdso_data_store __attribute__((__section__(".data.page_aligned"))); +struct vdso_data *vdso_data = &vdso_data_store.data; + +/* Format of the patch table */ +struct vdso_patch_def +{ + unsigned long ftr_mask, ftr_value; + const char *gen_name; + const char *fix_name; +}; + +/* Table of functions to patch based on the CPU type/revision + * + * Currently, we only change sync_dicache to do nothing on processors + * with a coherent icache + */ +static struct vdso_patch_def vdso_patches[] = { + { + CPU_FTR_COHERENT_ICACHE, CPU_FTR_COHERENT_ICACHE, + "__kernel_sync_dicache", "__kernel_sync_dicache_p5" + }, + { + CPU_FTR_USE_TB, 0, + "__kernel_gettimeofday", NULL + }, +}; + +/* + * Some infos carried around for each of them during parsing at + * boot time. + */ +struct lib32_elfinfo +{ + Elf32_Ehdr *hdr; /* ptr to ELF */ + Elf32_Sym *dynsym; /* ptr to .dynsym section */ + unsigned long dynsymsize; /* size of .dynsym section */ + char *dynstr; /* ptr to .dynstr section */ + unsigned long text; /* offset of .text section in .so */ +}; + +struct lib64_elfinfo +{ + Elf64_Ehdr *hdr; + Elf64_Sym *dynsym; + unsigned long dynsymsize; + char *dynstr; + unsigned long text; +}; + + +#ifdef __DEBUG +static void dump_one_vdso_page(struct page *pg, struct page *upg) +{ + printk("kpg: %p (c:%d,f:%08lx)", __va(page_to_pfn(pg) << PAGE_SHIFT), + page_count(pg), + pg->flags); + if (upg/* && pg != upg*/) { + printk(" upg: %p (c:%d,f:%08lx)", __va(page_to_pfn(upg) + << PAGE_SHIFT), + page_count(upg), + upg->flags); + } + printk("\n"); +} + +static void dump_vdso_pages(struct vm_area_struct * vma) +{ + int i; + + if (!vma || test_thread_flag(TIF_32BIT)) { + printk("vDSO32 @ %016lx:\n", (unsigned long)vdso32_kbase); + for (i=0; i<vdso32_pages; i++) { + struct page *pg = virt_to_page(vdso32_kbase + + i*PAGE_SIZE); + struct page *upg = (vma && vma->vm_mm) ? + follow_page(vma->vm_mm, vma->vm_start + + i*PAGE_SIZE, 0) + : NULL; + dump_one_vdso_page(pg, upg); + } + } + if (!vma || !test_thread_flag(TIF_32BIT)) { + printk("vDSO64 @ %016lx:\n", (unsigned long)vdso64_kbase); + for (i=0; i<vdso64_pages; i++) { + struct page *pg = virt_to_page(vdso64_kbase + + i*PAGE_SIZE); + struct page *upg = (vma && vma->vm_mm) ? + follow_page(vma->vm_mm, vma->vm_start + + i*PAGE_SIZE, 0) + : NULL; + dump_one_vdso_page(pg, upg); + } + } +} +#endif /* DEBUG */ + +/* + * Keep a dummy vma_close for now, it will prevent VMA merging. + */ +static void vdso_vma_close(struct vm_area_struct * vma) +{ +} + +/* + * Our nopage() function, maps in the actual vDSO kernel pages, they will + * be mapped read-only by do_no_page(), and eventually COW'ed, either + * right away for an initial write access, or by do_wp_page(). + */ +static struct page * vdso_vma_nopage(struct vm_area_struct * vma, + unsigned long address, int *type) +{ + unsigned long offset = address - vma->vm_start; + struct page *pg; +#ifdef CONFIG_PPC64 + void *vbase = test_thread_flag(TIF_32BIT) ? + vdso32_kbase : vdso64_kbase; +#else + void *vbase = vdso32_kbase; +#endif + + DBG("vdso_vma_nopage(current: %s, address: %016lx, off: %lx)\n", + current->comm, address, offset); + + if (address < vma->vm_start || address > vma->vm_end) + return NOPAGE_SIGBUS; + + /* + * Last page is systemcfg. + */ + if ((vma->vm_end - address) <= PAGE_SIZE) + pg = virt_to_page(vdso_data); + else + pg = virt_to_page(vbase + offset); + + get_page(pg); + DBG(" ->page count: %d\n", page_count(pg)); + + return pg; +} + +static struct vm_operations_struct vdso_vmops = { + .close = vdso_vma_close, + .nopage = vdso_vma_nopage, +}; + +/* + * This is called from binfmt_elf, we create the special vma for the + * vDSO and insert it into the mm struct tree + */ +int arch_setup_additional_pages(struct linux_binprm *bprm, + int executable_stack) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long vdso_pages; + unsigned long vdso_base; + +#ifdef CONFIG_PPC64 + if (test_thread_flag(TIF_32BIT)) { + vdso_pages = vdso32_pages; + vdso_base = VDSO32_MBASE; + } else { + vdso_pages = vdso64_pages; + vdso_base = VDSO64_MBASE; + } +#else + vdso_pages = vdso32_pages; + vdso_base = VDSO32_MBASE; +#endif + + current->thread.vdso_base = 0; + + /* vDSO has a problem and was disabled, just don't "enable" it for the + * process + */ + if (vdso_pages == 0) + return 0; + + vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + if (vma == NULL) + return -ENOMEM; + + memset(vma, 0, sizeof(*vma)); + + /* Add a page to the vdso size for the data page */ + vdso_pages ++; + + /* + * pick a base address for the vDSO in process space. We try to put it + * at vdso_base which is the "natural" base for it, but we might fail + * and end up putting it elsewhere. + */ + vdso_base = get_unmapped_area(NULL, vdso_base, + vdso_pages << PAGE_SHIFT, 0, 0); + if (vdso_base & ~PAGE_MASK) { + kmem_cache_free(vm_area_cachep, vma); + return (int)vdso_base; + } + + current->thread.vdso_base = vdso_base; + + vma->vm_mm = mm; + vma->vm_start = current->thread.vdso_base; + vma->vm_end = vma->vm_start + (vdso_pages << PAGE_SHIFT); + + /* + * our vma flags don't have VM_WRITE so by default, the process isn't + * allowed to write those pages. + * gdb can break that with ptrace interface, and thus trigger COW on + * those pages but it's then your responsibility to never do that on + * the "data" page of the vDSO or you'll stop getting kernel updates + * and your nice userland gettimeofday will be totally dead. + * It's fine to use that for setting breakpoints in the vDSO code + * pages though + */ + vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | + VM_MAYEXEC | VM_RESERVED; + vma->vm_flags |= mm->def_flags; + vma->vm_page_prot = protection_map[vma->vm_flags & 0x7]; + vma->vm_ops = &vdso_vmops; + + down_write(&mm->mmap_sem); + if (insert_vm_struct(mm, vma)) { + up_write(&mm->mmap_sem); + kmem_cache_free(vm_area_cachep, vma); + return -ENOMEM; + } + mm->total_vm += (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + up_write(&mm->mmap_sem); + + return 0; +} + +static void * __init find_section32(Elf32_Ehdr *ehdr, const char *secname, + unsigned long *size) +{ + Elf32_Shdr *sechdrs; + unsigned int i; + char *secnames; + + /* Grab section headers and strings so we can tell who is who */ + sechdrs = (void *)ehdr + ehdr->e_shoff; + secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset; + + /* Find the section they want */ + for (i = 1; i < ehdr->e_shnum; i++) { + if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) { + if (size) + *size = sechdrs[i].sh_size; + return (void *)ehdr + sechdrs[i].sh_offset; + } + } + *size = 0; + return NULL; +} + +static Elf32_Sym * __init find_symbol32(struct lib32_elfinfo *lib, + const char *symname) +{ + unsigned int i; + char name[MAX_SYMNAME], *c; + + for (i = 0; i < (lib->dynsymsize / sizeof(Elf32_Sym)); i++) { + if (lib->dynsym[i].st_name == 0) + continue; + strlcpy(name, lib->dynstr + lib->dynsym[i].st_name, + MAX_SYMNAME); + c = strchr(name, '@'); + if (c) + *c = 0; + if (strcmp(symname, name) == 0) + return &lib->dynsym[i]; + } + return NULL; +} + +/* Note that we assume the section is .text and the symbol is relative to + * the library base + */ +static unsigned long __init find_function32(struct lib32_elfinfo *lib, + const char *symname) +{ + Elf32_Sym *sym = find_symbol32(lib, symname); + + if (sym == NULL) { + printk(KERN_WARNING "vDSO32: function %s not found !\n", + symname); + return 0; + } + return sym->st_value - VDSO32_LBASE; +} + +static int vdso_do_func_patch32(struct lib32_elfinfo *v32, + struct lib64_elfinfo *v64, + const char *orig, const char *fix) +{ + Elf32_Sym *sym32_gen, *sym32_fix; + + sym32_gen = find_symbol32(v32, orig); + if (sym32_gen == NULL) { + printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", orig); + return -1; + } + if (fix == NULL) { + sym32_gen->st_name = 0; + return 0; + } + sym32_fix = find_symbol32(v32, fix); + if (sym32_fix == NULL) { + printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", fix); + return -1; + } + sym32_gen->st_value = sym32_fix->st_value; + sym32_gen->st_size = sym32_fix->st_size; + sym32_gen->st_info = sym32_fix->st_info; + sym32_gen->st_other = sym32_fix->st_other; + sym32_gen->st_shndx = sym32_fix->st_shndx; + + return 0; +} + + +#ifdef CONFIG_PPC64 + +static void * __init find_section64(Elf64_Ehdr *ehdr, const char *secname, + unsigned long *size) +{ + Elf64_Shdr *sechdrs; + unsigned int i; + char *secnames; + + /* Grab section headers and strings so we can tell who is who */ + sechdrs = (void *)ehdr + ehdr->e_shoff; + secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset; + + /* Find the section they want */ + for (i = 1; i < ehdr->e_shnum; i++) { + if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) { + if (size) + *size = sechdrs[i].sh_size; + return (void *)ehdr + sechdrs[i].sh_offset; + } + } + if (size) + *size = 0; + return NULL; +} + +static Elf64_Sym * __init find_symbol64(struct lib64_elfinfo *lib, + const char *symname) +{ + unsigned int i; + char name[MAX_SYMNAME], *c; + + for (i = 0; i < (lib->dynsymsize / sizeof(Elf64_Sym)); i++) { + if (lib->dynsym[i].st_name == 0) + continue; + strlcpy(name, lib->dynstr + lib->dynsym[i].st_name, + MAX_SYMNAME); + c = strchr(name, '@'); + if (c) + *c = 0; + if (strcmp(symname, name) == 0) + return &lib->dynsym[i]; + } + return NULL; +} + +/* Note that we assume the section is .text and the symbol is relative to + * the library base + */ +static unsigned long __init find_function64(struct lib64_elfinfo *lib, + const char *symname) +{ + Elf64_Sym *sym = find_symbol64(lib, symname); + + if (sym == NULL) { + printk(KERN_WARNING "vDSO64: function %s not found !\n", + symname); + return 0; + } +#ifdef VDS64_HAS_DESCRIPTORS + return *((u64 *)(vdso64_kbase + sym->st_value - VDSO64_LBASE)) - + VDSO64_LBASE; +#else + return sym->st_value - VDSO64_LBASE; +#endif +} + +static int vdso_do_func_patch64(struct lib32_elfinfo *v32, + struct lib64_elfinfo *v64, + const char *orig, const char *fix) +{ + Elf64_Sym *sym64_gen, *sym64_fix; + + sym64_gen = find_symbol64(v64, orig); + if (sym64_gen == NULL) { + printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", orig); + return -1; + } + if (fix == NULL) { + sym64_gen->st_name = 0; + return 0; + } + sym64_fix = find_symbol64(v64, fix); + if (sym64_fix == NULL) { + printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", fix); + return -1; + } + sym64_gen->st_value = sym64_fix->st_value; + sym64_gen->st_size = sym64_fix->st_size; + sym64_gen->st_info = sym64_fix->st_info; + sym64_gen->st_other = sym64_fix->st_other; + sym64_gen->st_shndx = sym64_fix->st_shndx; + + return 0; +} + +#endif /* CONFIG_PPC64 */ + + +static __init int vdso_do_find_sections(struct lib32_elfinfo *v32, + struct lib64_elfinfo *v64) +{ + void *sect; + + /* + * Locate symbol tables & text section + */ + + v32->dynsym = find_section32(v32->hdr, ".dynsym", &v32->dynsymsize); + v32->dynstr = find_section32(v32->hdr, ".dynstr", NULL); + if (v32->dynsym == NULL || v32->dynstr == NULL) { + printk(KERN_ERR "vDSO32: required symbol section not found\n"); + return -1; + } + sect = find_section32(v32->hdr, ".text", NULL); + if (sect == NULL) { + printk(KERN_ERR "vDSO32: the .text section was not found\n"); + return -1; + } + v32->text = sect - vdso32_kbase; + +#ifdef CONFIG_PPC64 + v64->dynsym = find_section64(v64->hdr, ".dynsym", &v64->dynsymsize); + v64->dynstr = find_section64(v64->hdr, ".dynstr", NULL); + if (v64->dynsym == NULL || v64->dynstr == NULL) { + printk(KERN_ERR "vDSO64: required symbol section not found\n"); + return -1; + } + sect = find_section64(v64->hdr, ".text", NULL); + if (sect == NULL) { + printk(KERN_ERR "vDSO64: the .text section was not found\n"); + return -1; + } + v64->text = sect - vdso64_kbase; +#endif /* CONFIG_PPC64 */ + + return 0; +} + +static __init void vdso_setup_trampolines(struct lib32_elfinfo *v32, + struct lib64_elfinfo *v64) +{ + /* + * Find signal trampolines + */ + +#ifdef CONFIG_PPC64 + vdso64_rt_sigtramp = find_function64(v64, "__kernel_sigtramp_rt64"); +#endif + vdso32_sigtramp = find_function32(v32, "__kernel_sigtramp32"); + vdso32_rt_sigtramp = find_function32(v32, "__kernel_sigtramp_rt32"); +} + +static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32, + struct lib64_elfinfo *v64) +{ + Elf32_Sym *sym32; +#ifdef CONFIG_PPC64 + Elf64_Sym *sym64; + + sym64 = find_symbol64(v64, "__kernel_datapage_offset"); + if (sym64 == NULL) { + printk(KERN_ERR "vDSO64: Can't find symbol " + "__kernel_datapage_offset !\n"); + return -1; + } + *((int *)(vdso64_kbase + sym64->st_value - VDSO64_LBASE)) = + (vdso64_pages << PAGE_SHIFT) - + (sym64->st_value - VDSO64_LBASE); +#endif /* CONFIG_PPC64 */ + + sym32 = find_symbol32(v32, "__kernel_datapage_offset"); + if (sym32 == NULL) { + printk(KERN_ERR "vDSO32: Can't find symbol " + "__kernel_datapage_offset !\n"); + return -1; + } + *((int *)(vdso32_kbase + (sym32->st_value - VDSO32_LBASE))) = + (vdso32_pages << PAGE_SHIFT) - + (sym32->st_value - VDSO32_LBASE); + + return 0; +} + +static __init int vdso_fixup_alt_funcs(struct lib32_elfinfo *v32, + struct lib64_elfinfo *v64) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(vdso_patches); i++) { + struct vdso_patch_def *patch = &vdso_patches[i]; + int match = (cur_cpu_spec->cpu_features & patch->ftr_mask) + == patch->ftr_value; + if (!match) + continue; + + DBG("replacing %s with %s...\n", patch->gen_name, + patch->fix_name ? "NONE" : patch->fix_name); + + /* + * Patch the 32 bits and 64 bits symbols. Note that we do not + * patch the "." symbol on 64 bits. + * It would be easy to do, but doesn't seem to be necessary, + * patching the OPD symbol is enough. + */ + vdso_do_func_patch32(v32, v64, patch->gen_name, + patch->fix_name); +#ifdef CONFIG_PPC64 + vdso_do_func_patch64(v32, v64, patch->gen_name, + patch->fix_name); +#endif /* CONFIG_PPC64 */ + } + + return 0; +} + + +static __init int vdso_setup(void) +{ + struct lib32_elfinfo v32; + struct lib64_elfinfo v64; + + v32.hdr = vdso32_kbase; +#ifdef CONFIG_PPC64 + v64.hdr = vdso64_kbase; +#endif + if (vdso_do_find_sections(&v32, &v64)) + return -1; + + if (vdso_fixup_datapage(&v32, &v64)) + return -1; + + if (vdso_fixup_alt_funcs(&v32, &v64)) + return -1; + + vdso_setup_trampolines(&v32, &v64); + + return 0; +} + +/* + * Called from setup_arch to initialize the bitmap of available + * syscalls in the systemcfg page + */ +static void __init vdso_setup_syscall_map(void) +{ + unsigned int i; + extern unsigned long *sys_call_table; + extern unsigned long sys_ni_syscall; + + + for (i = 0; i < __NR_syscalls; i++) { +#ifdef CONFIG_PPC64 + if (sys_call_table[i*2] != sys_ni_syscall) + vdso_data->syscall_map_64[i >> 5] |= + 0x80000000UL >> (i & 0x1f); + if (sys_call_table[i*2+1] != sys_ni_syscall) + vdso_data->syscall_map_32[i >> 5] |= + 0x80000000UL >> (i & 0x1f); +#else /* CONFIG_PPC64 */ + if (sys_call_table[i] != sys_ni_syscall) + vdso_data->syscall_map_32[i >> 5] |= + 0x80000000UL >> (i & 0x1f); +#endif /* CONFIG_PPC64 */ + } +} + + +void __init vdso_init(void) +{ + int i; + +#ifdef CONFIG_PPC64 + /* + * Fill up the "systemcfg" stuff for backward compatiblity + */ + strcpy(vdso_data->eye_catcher, "SYSTEMCFG:PPC64"); + vdso_data->version.major = SYSTEMCFG_MAJOR; + vdso_data->version.minor = SYSTEMCFG_MINOR; + vdso_data->processor = mfspr(SPRN_PVR); + vdso_data->platform = _machine; + vdso_data->physicalMemorySize = lmb_phys_mem_size(); + vdso_data->dcache_size = ppc64_caches.dsize; + vdso_data->dcache_line_size = ppc64_caches.dline_size; + vdso_data->icache_size = ppc64_caches.isize; + vdso_data->icache_line_size = ppc64_caches.iline_size; + + /* + * Calculate the size of the 64 bits vDSO + */ + vdso64_pages = (&vdso64_end - &vdso64_start) >> PAGE_SHIFT; + DBG("vdso64_kbase: %p, 0x%x pages\n", vdso64_kbase, vdso64_pages); +#endif /* CONFIG_PPC64 */ + + + /* + * Calculate the size of the 32 bits vDSO + */ + vdso32_pages = (&vdso32_end - &vdso32_start) >> PAGE_SHIFT; + DBG("vdso32_kbase: %p, 0x%x pages\n", vdso32_kbase, vdso32_pages); + + + /* + * Setup the syscall map in the vDOS + */ + vdso_setup_syscall_map(); + /* + * Initialize the vDSO images in memory, that is do necessary + * fixups of vDSO symbols, locate trampolines, etc... + */ + if (vdso_setup()) { + printk(KERN_ERR "vDSO setup failure, not enabled !\n"); + vdso32_pages = 0; +#ifdef CONFIG_PPC64 + vdso64_pages = 0; +#endif + return; + } + + /* Make sure pages are in the correct state */ + for (i = 0; i < vdso32_pages; i++) { + struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE); + ClearPageReserved(pg); + get_page(pg); + + } +#ifdef CONFIG_PPC64 + for (i = 0; i < vdso64_pages; i++) { + struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE); + ClearPageReserved(pg); + get_page(pg); + } +#endif /* CONFIG_PPC64 */ + + get_page(virt_to_page(vdso_data)); +} + +int in_gate_area_no_task(unsigned long addr) +{ + return 0; +} + +int in_gate_area(struct task_struct *task, unsigned long addr) +{ + return 0; +} + +struct vm_area_struct *get_gate_vma(struct task_struct *tsk) +{ + return NULL; +} + diff --git a/arch/ppc64/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index 0b1b0df973eb..758331d4d1a5 100644 --- a/arch/ppc64/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -5,6 +5,10 @@ obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o # Build rules +ifeq ($(CONFIG_PPC32),y) +CROSS32CC := $(CC) +endif + targets := $(obj-vdso32) vdso32.so obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) diff --git a/arch/ppc64/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S index c8db993574ee..c8db993574ee 100644 --- a/arch/ppc64/kernel/vdso32/cacheflush.S +++ b/arch/powerpc/kernel/vdso32/cacheflush.S diff --git a/arch/ppc64/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S index 4f4eb0be3992..a08c26e87835 100644 --- a/arch/ppc64/kernel/vdso32/datapage.S +++ b/arch/powerpc/kernel/vdso32/datapage.S @@ -66,3 +66,19 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map) blr .cfi_endproc V_FUNCTION_END(__kernel_get_syscall_map) + +/* + * void unsigned long long __kernel_get_tbfreq(void); + * + * returns the timebase frequency in HZ + */ +V_FUNCTION_BEGIN(__kernel_get_tbfreq) + .cfi_startproc + mflr r12 + .cfi_register lr,r12 + bl __get_datapage@local + lwz r3,CFG_TB_TICKS_PER_SEC(r3) + lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3) + mtlr r12 + .cfi_endproc +V_FUNCTION_END(__kernel_get_tbfreq) diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S new file mode 100644 index 000000000000..aeb5fc9b87b3 --- /dev/null +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -0,0 +1,315 @@ +/* + * Userland implementation of gettimeofday() for 32 bits processes in a + * ppc64 kernel for use in the vDSO + * + * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org, + * IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/ppc_asm.h> +#include <asm/vdso.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + + .text +/* + * Exact prototype of gettimeofday + * + * int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); + * + */ +V_FUNCTION_BEGIN(__kernel_gettimeofday) + .cfi_startproc + mflr r12 + .cfi_register lr,r12 + + mr r10,r3 /* r10 saves tv */ + mr r11,r4 /* r11 saves tz */ + bl __get_datapage@local /* get data page */ + mr r9, r3 /* datapage ptr in r9 */ + bl __do_get_xsec@local /* get xsec from tb & kernel */ + bne- 2f /* out of line -> do syscall */ + + /* seconds are xsec >> 20 */ + rlwinm r5,r4,12,20,31 + rlwimi r5,r3,12,0,19 + stw r5,TVAL32_TV_SEC(r10) + + /* get remaining xsec and convert to usec. we scale + * up remaining xsec by 12 bits and get the top 32 bits + * of the multiplication + */ + rlwinm r5,r4,12,0,19 + lis r6,1000000@h + ori r6,r6,1000000@l + mulhwu r5,r5,r6 + stw r5,TVAL32_TV_USEC(r10) + + cmpli cr0,r11,0 /* check if tz is NULL */ + beq 1f + lwz r4,CFG_TZ_MINUTEWEST(r9)/* fill tz */ + lwz r5,CFG_TZ_DSTTIME(r9) + stw r4,TZONE_TZ_MINWEST(r11) + stw r5,TZONE_TZ_DSTTIME(r11) + +1: mtlr r12 + li r3,0 + blr + +2: + mtlr r12 + mr r3,r10 + mr r4,r11 + li r0,__NR_gettimeofday + sc + blr + .cfi_endproc +V_FUNCTION_END(__kernel_gettimeofday) + +/* + * Exact prototype of clock_gettime() + * + * int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); + * + */ +V_FUNCTION_BEGIN(__kernel_clock_gettime) + .cfi_startproc + /* Check for supported clock IDs */ + cmpli cr0,r3,CLOCK_REALTIME + cmpli cr1,r3,CLOCK_MONOTONIC + cror cr0,cr0,cr1 + bne cr0,99f + + mflr r12 /* r12 saves lr */ + .cfi_register lr,r12 + mr r10,r3 /* r10 saves id */ + mr r11,r4 /* r11 saves tp */ + bl __get_datapage@local /* get data page */ + mr r9, r3 /* datapage ptr in r9 */ + beq cr1,50f /* if monotonic -> jump there */ + + /* + * CLOCK_REALTIME + */ + + bl __do_get_xsec@local /* get xsec from tb & kernel */ + bne- 98f /* out of line -> do syscall */ + + /* seconds are xsec >> 20 */ + rlwinm r5,r4,12,20,31 + rlwimi r5,r3,12,0,19 + stw r5,TSPC32_TV_SEC(r11) + + /* get remaining xsec and convert to nsec. we scale + * up remaining xsec by 12 bits and get the top 32 bits + * of the multiplication, then we multiply by 1000 + */ + rlwinm r5,r4,12,0,19 + lis r6,1000000@h + ori r6,r6,1000000@l + mulhwu r5,r5,r6 + mulli r5,r5,1000 + stw r5,TSPC32_TV_NSEC(r11) + mtlr r12 + li r3,0 + blr + + /* + * CLOCK_MONOTONIC + */ + +50: bl __do_get_xsec@local /* get xsec from tb & kernel */ + bne- 98f /* out of line -> do syscall */ + + /* seconds are xsec >> 20 */ + rlwinm r6,r4,12,20,31 + rlwimi r6,r3,12,0,19 + + /* get remaining xsec and convert to nsec. we scale + * up remaining xsec by 12 bits and get the top 32 bits + * of the multiplication, then we multiply by 1000 + */ + rlwinm r7,r4,12,0,19 + lis r5,1000000@h + ori r5,r5,1000000@l + mulhwu r7,r7,r5 + mulli r7,r7,1000 + + /* now we must fixup using wall to monotonic. We need to snapshot + * that value and do the counter trick again. Fortunately, we still + * have the counter value in r8 that was returned by __do_get_xsec. + * At this point, r6,r7 contain our sec/nsec values, r3,r4 and r5 + * can be used + */ + + lwz r3,WTOM_CLOCK_SEC(r9) + lwz r4,WTOM_CLOCK_NSEC(r9) + + /* We now have our result in r3,r4. We create a fake dependency + * on that result and re-check the counter + */ + or r5,r4,r3 + xor r0,r5,r5 + add r9,r9,r0 +#ifdef CONFIG_PPC64 + lwz r0,(CFG_TB_UPDATE_COUNT+4)(r9) +#else + lwz r0,(CFG_TB_UPDATE_COUNT)(r9) +#endif + cmpl cr0,r8,r0 /* check if updated */ + bne- 50b + + /* Calculate and store result. Note that this mimmics the C code, + * which may cause funny results if nsec goes negative... is that + * possible at all ? + */ + add r3,r3,r6 + add r4,r4,r7 + lis r5,NSEC_PER_SEC@h + ori r5,r5,NSEC_PER_SEC@l + cmpli cr0,r4,r5 + blt 1f + subf r4,r5,r4 + addi r3,r3,1 +1: stw r3,TSPC32_TV_SEC(r11) + stw r4,TSPC32_TV_NSEC(r11) + + mtlr r12 + li r3,0 + blr + + /* + * syscall fallback + */ +98: + mtlr r12 + mr r3,r10 + mr r4,r11 +99: + li r0,__NR_clock_gettime + sc + blr + .cfi_endproc +V_FUNCTION_END(__kernel_clock_gettime) + + +/* + * Exact prototype of clock_getres() + * + * int __kernel_clock_getres(clockid_t clock_id, struct timespec *res); + * + */ +V_FUNCTION_BEGIN(__kernel_clock_getres) + .cfi_startproc + /* Check for supported clock IDs */ + cmpwi cr0,r3,CLOCK_REALTIME + cmpwi cr1,r3,CLOCK_MONOTONIC + cror cr0,cr0,cr1 + bne cr0,99f + + li r3,0 + cmpli cr0,r4,0 + beqlr + lis r5,CLOCK_REALTIME_RES@h + ori r5,r5,CLOCK_REALTIME_RES@l + stw r3,TSPC32_TV_SEC(r4) + stw r5,TSPC32_TV_NSEC(r4) + blr + + /* + * syscall fallback + */ +99: + li r0,__NR_clock_getres + sc + blr + .cfi_endproc +V_FUNCTION_END(__kernel_clock_getres) + + +/* + * This is the core of gettimeofday() & friends, it returns the xsec + * value in r3 & r4 and expects the datapage ptr (non clobbered) + * in r9. clobbers r0,r4,r5,r6,r7,r8. + * When returning, r8 contains the counter value that can be reused + * by the monotonic clock implementation + */ +__do_get_xsec: + .cfi_startproc + /* Check for update count & load values. We use the low + * order 32 bits of the update count + */ +#ifdef CONFIG_PPC64 +1: lwz r8,(CFG_TB_UPDATE_COUNT+4)(r9) +#else +1: lwz r8,(CFG_TB_UPDATE_COUNT)(r9) +#endif + andi. r0,r8,1 /* pending update ? loop */ + bne- 1b + xor r0,r8,r8 /* create dependency */ + add r9,r9,r0 + + /* Load orig stamp (offset to TB) */ + lwz r5,CFG_TB_ORIG_STAMP(r9) + lwz r6,(CFG_TB_ORIG_STAMP+4)(r9) + + /* Get a stable TB value */ +2: mftbu r3 + mftbl r4 + mftbu r0 + cmpl cr0,r3,r0 + bne- 2b + + /* Substract tb orig stamp. If the high part is non-zero, we jump to + * the slow path which call the syscall. + * If it's ok, then we have our 32 bits tb_ticks value in r7 + */ + subfc r7,r6,r4 + subfe. r0,r5,r3 + bne- 3f + + /* Load scale factor & do multiplication */ + lwz r5,CFG_TB_TO_XS(r9) /* load values */ + lwz r6,(CFG_TB_TO_XS+4)(r9) + mulhwu r4,r7,r5 + mulhwu r6,r7,r6 + mullw r0,r7,r5 + addc r6,r6,r0 + + /* At this point, we have the scaled xsec value in r4 + XER:CA + * we load & add the stamp since epoch + */ + lwz r5,CFG_STAMP_XSEC(r9) + lwz r6,(CFG_STAMP_XSEC+4)(r9) + adde r4,r4,r6 + addze r3,r5 + + /* We now have our result in r3,r4. We create a fake dependency + * on that result and re-check the counter + */ + or r6,r4,r3 + xor r0,r6,r6 + add r9,r9,r0 +#ifdef CONFIG_PPC64 + lwz r0,(CFG_TB_UPDATE_COUNT+4)(r9) +#else + lwz r0,(CFG_TB_UPDATE_COUNT)(r9) +#endif + cmpl cr0,r8,r0 /* check if updated */ + bne- 1b + + /* Warning ! The caller expects CR:EQ to be set to indicate a + * successful calculation (so it won't fallback to the syscall + * method). We have overriden that CR bit in the counter check, + * but fortunately, the loop exit condition _is_ CR:EQ set, so + * we can exit safely here. If you change this code, be careful + * of that side effect. + */ +3: blr + .cfi_endproc diff --git a/arch/ppc64/kernel/vdso32/note.S b/arch/powerpc/kernel/vdso32/note.S index d4b5be4f3d5f..d4b5be4f3d5f 100644 --- a/arch/ppc64/kernel/vdso32/note.S +++ b/arch/powerpc/kernel/vdso32/note.S diff --git a/arch/ppc64/kernel/vdso32/sigtramp.S b/arch/powerpc/kernel/vdso32/sigtramp.S index e04642781917..e04642781917 100644 --- a/arch/ppc64/kernel/vdso32/sigtramp.S +++ b/arch/powerpc/kernel/vdso32/sigtramp.S diff --git a/arch/ppc64/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S index 6f87a916a394..f4bad720cb0a 100644 --- a/arch/ppc64/kernel/vdso32/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S @@ -102,9 +102,12 @@ VERSION { VDSO_VERSION_STRING { global: - __kernel_datapage_offset; /* Has to be there for the kernel to find it */ + __kernel_datapage_offset; /* Has to be there for the kernel to find */ __kernel_get_syscall_map; __kernel_gettimeofday; + __kernel_clock_gettime; + __kernel_clock_getres; + __kernel_get_tbfreq; __kernel_sync_dicache; __kernel_sync_dicache_p5; __kernel_sigtramp32; diff --git a/arch/ppc64/kernel/vdso32/vdso32_wrapper.S b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S index 76ca28e09d29..556f0caa5d84 100644 --- a/arch/ppc64/kernel/vdso32/vdso32_wrapper.S +++ b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S @@ -6,7 +6,7 @@ .globl vdso32_start, vdso32_end .balign PAGE_SIZE vdso32_start: - .incbin "arch/ppc64/kernel/vdso32/vdso32.so" + .incbin "arch/powerpc/kernel/vdso32/vdso32.so" .balign PAGE_SIZE vdso32_end: diff --git a/arch/ppc64/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index ab39988452cc..ab39988452cc 100644 --- a/arch/ppc64/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile diff --git a/arch/ppc64/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S index d4a0ad28d534..d4a0ad28d534 100644 --- a/arch/ppc64/kernel/vdso64/cacheflush.S +++ b/arch/powerpc/kernel/vdso64/cacheflush.S diff --git a/arch/ppc64/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S index ed6e599ae824..e67eda0f8cda 100644 --- a/arch/ppc64/kernel/vdso64/datapage.S +++ b/arch/powerpc/kernel/vdso64/datapage.S @@ -66,3 +66,19 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map) blr .cfi_endproc V_FUNCTION_END(__kernel_get_syscall_map) + + +/* + * void unsigned long __kernel_get_tbfreq(void); + * + * returns the timebase frequency in HZ + */ +V_FUNCTION_BEGIN(__kernel_get_tbfreq) + .cfi_startproc + mflr r12 + .cfi_register lr,r12 + bl V_LOCAL_FUNC(__get_datapage) + ld r3,CFG_TB_TICKS_PER_SEC(r3) + mtlr r12 + .cfi_endproc +V_FUNCTION_END(__kernel_get_tbfreq) diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S new file mode 100644 index 000000000000..d371c02a8c0e --- /dev/null +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S @@ -0,0 +1,242 @@ +/* + * Userland implementation of gettimeofday() for 64 bits processes in a + * ppc64 kernel for use in the vDSO + * + * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), + * IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/ppc_asm.h> +#include <asm/vdso.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + + .text +/* + * Exact prototype of gettimeofday + * + * int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); + * + */ +V_FUNCTION_BEGIN(__kernel_gettimeofday) + .cfi_startproc + mflr r12 + .cfi_register lr,r12 + + mr r11,r3 /* r11 holds tv */ + mr r10,r4 /* r10 holds tz */ + bl V_LOCAL_FUNC(__get_datapage) /* get data page */ + bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */ + lis r7,15 /* r7 = 1000000 = USEC_PER_SEC */ + ori r7,r7,16960 + rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */ + rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */ + std r5,TVAL64_TV_SEC(r11) /* store sec in tv */ + subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */ + mulld r0,r0,r7 /* usec = (xsec * USEC_PER_SEC) / + * XSEC_PER_SEC + */ + rldicl r0,r0,44,20 + cmpldi cr0,r10,0 /* check if tz is NULL */ + std r0,TVAL64_TV_USEC(r11) /* store usec in tv */ + beq 1f + lwz r4,CFG_TZ_MINUTEWEST(r3)/* fill tz */ + lwz r5,CFG_TZ_DSTTIME(r3) + stw r4,TZONE_TZ_MINWEST(r10) + stw r5,TZONE_TZ_DSTTIME(r10) +1: mtlr r12 + li r3,0 /* always success */ + blr + .cfi_endproc +V_FUNCTION_END(__kernel_gettimeofday) + + +/* + * Exact prototype of clock_gettime() + * + * int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); + * + */ +V_FUNCTION_BEGIN(__kernel_clock_gettime) + .cfi_startproc + /* Check for supported clock IDs */ + cmpwi cr0,r3,CLOCK_REALTIME + cmpwi cr1,r3,CLOCK_MONOTONIC + cror cr0,cr0,cr1 + bne cr0,99f + + mflr r12 /* r12 saves lr */ + .cfi_register lr,r12 + mr r10,r3 /* r10 saves id */ + mr r11,r4 /* r11 saves tp */ + bl V_LOCAL_FUNC(__get_datapage) /* get data page */ + beq cr1,50f /* if monotonic -> jump there */ + + /* + * CLOCK_REALTIME + */ + + bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */ + + lis r7,0x3b9a /* r7 = 1000000000 = NSEC_PER_SEC */ + ori r7,r7,0xca00 + rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */ + rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */ + std r5,TSPC64_TV_SEC(r11) /* store sec in tv */ + subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */ + mulld r0,r0,r7 /* nsec = (xsec * NSEC_PER_SEC) / + * XSEC_PER_SEC + */ + rldicl r0,r0,44,20 + std r0,TSPC64_TV_NSEC(r11) /* store nsec in tp */ + + mtlr r12 + li r3,0 + blr + + /* + * CLOCK_MONOTONIC + */ + +50: bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */ + + lis r7,0x3b9a /* r7 = 1000000000 = NSEC_PER_SEC */ + ori r7,r7,0xca00 + rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */ + rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */ + subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */ + mulld r0,r0,r7 /* nsec = (xsec * NSEC_PER_SEC) / + * XSEC_PER_SEC + */ + rldicl r6,r0,44,20 + + /* now we must fixup using wall to monotonic. We need to snapshot + * that value and do the counter trick again. Fortunately, we still + * have the counter value in r8 that was returned by __do_get_xsec. + * At this point, r5,r6 contain our sec/nsec values. + * can be used + */ + + lwz r4,WTOM_CLOCK_SEC(r9) + lwz r7,WTOM_CLOCK_NSEC(r9) + + /* We now have our result in r4,r7. We create a fake dependency + * on that result and re-check the counter + */ + or r9,r4,r7 + xor r0,r9,r9 + add r3,r3,r0 + ld r0,CFG_TB_UPDATE_COUNT(r3) + cmpld cr0,r0,r8 /* check if updated */ + bne- 50b + + /* Calculate and store result. Note that this mimmics the C code, + * which may cause funny results if nsec goes negative... is that + * possible at all ? + */ + add r4,r4,r5 + add r7,r7,r6 + lis r9,NSEC_PER_SEC@h + ori r9,r9,NSEC_PER_SEC@l + cmpli cr0,r7,r9 + blt 1f + subf r7,r9,r7 + addi r4,r4,1 +1: std r4,TSPC64_TV_SEC(r11) + std r7,TSPC64_TV_NSEC(r11) + + mtlr r12 + li r3,0 + blr + + /* + * syscall fallback + */ +98: + mtlr r12 + mr r3,r10 + mr r4,r11 +99: + li r0,__NR_clock_gettime + sc + blr + .cfi_endproc +V_FUNCTION_END(__kernel_clock_gettime) + + +/* + * Exact prototype of clock_getres() + * + * int __kernel_clock_getres(clockid_t clock_id, struct timespec *res); + * + */ +V_FUNCTION_BEGIN(__kernel_clock_getres) + .cfi_startproc + /* Check for supported clock IDs */ + cmpwi cr0,r3,CLOCK_REALTIME + cmpwi cr1,r3,CLOCK_MONOTONIC + cror cr0,cr0,cr1 + bne cr0,99f + + li r3,0 + cmpli cr0,r4,0 + beqlr + lis r5,CLOCK_REALTIME_RES@h + ori r5,r5,CLOCK_REALTIME_RES@l + std r3,TSPC64_TV_SEC(r4) + std r5,TSPC64_TV_NSEC(r4) + blr + + /* + * syscall fallback + */ +99: + li r0,__NR_clock_getres + sc + blr + .cfi_endproc +V_FUNCTION_END(__kernel_clock_getres) + + +/* + * This is the core of gettimeofday(), it returns the xsec + * value in r4 and expects the datapage ptr (non clobbered) + * in r3. clobbers r0,r4,r5,r6,r7,r8 + * When returning, r8 contains the counter value that can be reused + */ +V_FUNCTION_BEGIN(__do_get_xsec) + .cfi_startproc + /* check for update count & load values */ +1: ld r8,CFG_TB_UPDATE_COUNT(r3) + andi. r0,r4,1 /* pending update ? loop */ + bne- 1b + xor r0,r4,r4 /* create dependency */ + add r3,r3,r0 + + /* Get TB & offset it */ + mftb r7 + ld r9,CFG_TB_ORIG_STAMP(r3) + subf r7,r9,r7 + + /* Scale result */ + ld r5,CFG_TB_TO_XS(r3) + mulhdu r7,r7,r5 + + /* Add stamp since epoch */ + ld r6,CFG_STAMP_XSEC(r3) + add r4,r6,r7 + + xor r0,r4,r4 + add r3,r3,r0 + ld r0,CFG_TB_UPDATE_COUNT(r3) + cmpld cr0,r0,r8 /* check if updated */ + bne- 1b + blr + .cfi_endproc +V_FUNCTION_END(__do_get_xsec) diff --git a/arch/ppc64/kernel/vdso64/note.S b/arch/powerpc/kernel/vdso64/note.S index dc2a509f7e8a..dc2a509f7e8a 100644 --- a/arch/ppc64/kernel/vdso64/note.S +++ b/arch/powerpc/kernel/vdso64/note.S diff --git a/arch/ppc64/kernel/vdso64/sigtramp.S b/arch/powerpc/kernel/vdso64/sigtramp.S index 31b604ab56de..31b604ab56de 100644 --- a/arch/ppc64/kernel/vdso64/sigtramp.S +++ b/arch/powerpc/kernel/vdso64/sigtramp.S diff --git a/arch/ppc64/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S index 9cb28181da80..4bdf224464ab 100644 --- a/arch/ppc64/kernel/vdso64/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S @@ -102,9 +102,12 @@ VERSION { VDSO_VERSION_STRING { global: - __kernel_datapage_offset; /* Has to be there for the kernel to find it */ + __kernel_datapage_offset; /* Has to be there for the kernel to find */ __kernel_get_syscall_map; __kernel_gettimeofday; + __kernel_clock_gettime; + __kernel_clock_getres; + __kernel_get_tbfreq; __kernel_sync_dicache; __kernel_sync_dicache_p5; __kernel_sigtramp_rt64; diff --git a/arch/ppc64/kernel/vdso64/vdso64_wrapper.S b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S index 771c2741c492..0529cb9e3b97 100644 --- a/arch/ppc64/kernel/vdso64/vdso64_wrapper.S +++ b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S @@ -6,7 +6,7 @@ .globl vdso64_start, vdso64_end .balign PAGE_SIZE vdso64_start: - .incbin "arch/ppc64/kernel/vdso64/vdso64.so" + .incbin "arch/powerpc/kernel/vdso64/vdso64.so" .balign PAGE_SIZE vdso64_end: diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 1dd3cc69a490..e2c95fcb8055 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -46,9 +46,7 @@ #include <asm/prom.h> #include <asm/lmb.h> #include <asm/sections.h> -#ifdef CONFIG_PPC64 #include <asm/vdso.h> -#endif #include "mmu_decl.h" @@ -397,10 +395,8 @@ void __init mem_init(void) mem_init_done = 1; -#ifdef CONFIG_PPC64 /* Initialize the vDSO */ vdso_init(); -#endif } /* diff --git a/arch/powerpc/oprofile/op_model_power4.c b/arch/powerpc/oprofile/op_model_power4.c index e3a024e324b6..a3401b46f3ba 100644 --- a/arch/powerpc/oprofile/op_model_power4.c +++ b/arch/powerpc/oprofile/op_model_power4.c @@ -14,7 +14,6 @@ #include <asm/system.h> #include <asm/processor.h> #include <asm/cputable.h> -#include <asm/systemcfg.h> #include <asm/rtas.h> #include <asm/oprofile_impl.h> #include <asm/reg.h> diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile index 7bf1a6f6f401..dac4cc20fa93 100644 --- a/arch/ppc64/kernel/Makefile +++ b/arch/ppc64/kernel/Makefile @@ -14,8 +14,7 @@ endif obj-y += idle.o dma.o \ align.o \ rtc.o \ - iommu.o vdso.o -obj-y += vdso32/ vdso64/ + iommu.o pci-obj-$(CONFIG_PPC_MULTIPLATFORM) += pci_dn.o pci_direct_iommu.o diff --git a/arch/ppc64/kernel/vdso32/gettimeofday.S b/arch/ppc64/kernel/vdso32/gettimeofday.S deleted file mode 100644 index e243c1d24af7..000000000000 --- a/arch/ppc64/kernel/vdso32/gettimeofday.S +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Userland implementation of gettimeofday() for 32 bits processes in a - * ppc64 kernel for use in the vDSO - * - * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include <linux/config.h> -#include <asm/processor.h> -#include <asm/ppc_asm.h> -#include <asm/vdso.h> -#include <asm/asm-offsets.h> -#include <asm/unistd.h> - - .text -/* - * Exact prototype of gettimeofday - * - * int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); - * - */ -V_FUNCTION_BEGIN(__kernel_gettimeofday) - .cfi_startproc - mflr r12 - .cfi_register lr,r12 - - mr r10,r3 /* r10 saves tv */ - mr r11,r4 /* r11 saves tz */ - bl __get_datapage@local /* get data page */ - mr r9, r3 /* datapage ptr in r9 */ - bl __do_get_xsec@local /* get xsec from tb & kernel */ - bne- 2f /* out of line -> do syscall */ - - /* seconds are xsec >> 20 */ - rlwinm r5,r4,12,20,31 - rlwimi r5,r3,12,0,19 - stw r5,TVAL32_TV_SEC(r10) - - /* get remaining xsec and convert to usec. we scale - * up remaining xsec by 12 bits and get the top 32 bits - * of the multiplication - */ - rlwinm r5,r4,12,0,19 - lis r6,1000000@h - ori r6,r6,1000000@l - mulhwu r5,r5,r6 - stw r5,TVAL32_TV_USEC(r10) - - cmpli cr0,r11,0 /* check if tz is NULL */ - beq 1f - lwz r4,CFG_TZ_MINUTEWEST(r9)/* fill tz */ - lwz r5,CFG_TZ_DSTTIME(r9) - stw r4,TZONE_TZ_MINWEST(r11) - stw r5,TZONE_TZ_DSTTIME(r11) - -1: mtlr r12 - li r3,0 - blr - -2: mr r3,r10 - mr r4,r11 - li r0,__NR_gettimeofday - sc - b 1b - .cfi_endproc -V_FUNCTION_END(__kernel_gettimeofday) - -/* - * This is the core of gettimeofday(), it returns the xsec - * value in r3 & r4 and expects the datapage ptr (non clobbered) - * in r9. clobbers r0,r4,r5,r6,r7,r8 -*/ -__do_get_xsec: - .cfi_startproc - /* Check for update count & load values. We use the low - * order 32 bits of the update count - */ -1: lwz r8,(CFG_TB_UPDATE_COUNT+4)(r9) - andi. r0,r8,1 /* pending update ? loop */ - bne- 1b - xor r0,r8,r8 /* create dependency */ - add r9,r9,r0 - - /* Load orig stamp (offset to TB) */ - lwz r5,CFG_TB_ORIG_STAMP(r9) - lwz r6,(CFG_TB_ORIG_STAMP+4)(r9) - - /* Get a stable TB value */ -2: mftbu r3 - mftbl r4 - mftbu r0 - cmpl cr0,r3,r0 - bne- 2b - - /* Substract tb orig stamp. If the high part is non-zero, we jump to the - * slow path which call the syscall. If it's ok, then we have our 32 bits - * tb_ticks value in r7 - */ - subfc r7,r6,r4 - subfe. r0,r5,r3 - bne- 3f - - /* Load scale factor & do multiplication */ - lwz r5,CFG_TB_TO_XS(r9) /* load values */ - lwz r6,(CFG_TB_TO_XS+4)(r9) - mulhwu r4,r7,r5 - mulhwu r6,r7,r6 - mullw r0,r7,r5 - addc r6,r6,r0 - - /* At this point, we have the scaled xsec value in r4 + XER:CA - * we load & add the stamp since epoch - */ - lwz r5,CFG_STAMP_XSEC(r9) - lwz r6,(CFG_STAMP_XSEC+4)(r9) - adde r4,r4,r6 - addze r3,r5 - - /* We now have our result in r3,r4. We create a fake dependency - * on that result and re-check the counter - */ - xor r0,r4,r4 - add r9,r9,r0 - lwz r0,(CFG_TB_UPDATE_COUNT+4)(r9) - cmpl cr0,r8,r0 /* check if updated */ - bne- 1b - - /* Warning ! The caller expects CR:EQ to be set to indicate a - * successful calculation (so it won't fallback to the syscall - * method). We have overriden that CR bit in the counter check, - * but fortunately, the loop exit condition _is_ CR:EQ set, so - * we can exit safely here. If you change this code, be careful - * of that side effect. - */ -3: blr - .cfi_endproc diff --git a/arch/ppc64/kernel/vdso64/gettimeofday.S b/arch/ppc64/kernel/vdso64/gettimeofday.S deleted file mode 100644 index f6df8028570a..000000000000 --- a/arch/ppc64/kernel/vdso64/gettimeofday.S +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Userland implementation of gettimeofday() for 64 bits processes in a - * ppc64 kernel for use in the vDSO - * - * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), - * IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include <linux/config.h> -#include <asm/processor.h> -#include <asm/ppc_asm.h> -#include <asm/vdso.h> -#include <asm/asm-offsets.h> - - .text -/* - * Exact prototype of gettimeofday - * - * int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); - * - */ -V_FUNCTION_BEGIN(__kernel_gettimeofday) - .cfi_startproc - mflr r12 - .cfi_register lr,r12 - - mr r11,r3 /* r11 holds tv */ - mr r10,r4 /* r10 holds tz */ - bl V_LOCAL_FUNC(__get_datapage) /* get data page */ - bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */ - lis r7,15 /* r7 = 1000000 = USEC_PER_SEC */ - ori r7,r7,16960 - rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */ - rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */ - std r5,TVAL64_TV_SEC(r11) /* store sec in tv */ - subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */ - mulld r0,r0,r7 /* usec = (xsec * USEC_PER_SEC) / XSEC_PER_SEC */ - rldicl r0,r0,44,20 - cmpldi cr0,r10,0 /* check if tz is NULL */ - std r0,TVAL64_TV_USEC(r11) /* store usec in tv */ - beq 1f - lwz r4,CFG_TZ_MINUTEWEST(r3)/* fill tz */ - lwz r5,CFG_TZ_DSTTIME(r3) - stw r4,TZONE_TZ_MINWEST(r10) - stw r5,TZONE_TZ_DSTTIME(r10) -1: mtlr r12 - li r3,0 /* always success */ - blr - .cfi_endproc -V_FUNCTION_END(__kernel_gettimeofday) - - -/* - * This is the core of gettimeofday(), it returns the xsec - * value in r4 and expects the datapage ptr (non clobbered) - * in r3. clobbers r0,r4,r5,r6,r7,r8 -*/ -V_FUNCTION_BEGIN(__do_get_xsec) - .cfi_startproc - /* check for update count & load values */ -1: ld r7,CFG_TB_UPDATE_COUNT(r3) - andi. r0,r4,1 /* pending update ? loop */ - bne- 1b - xor r0,r4,r4 /* create dependency */ - add r3,r3,r0 - - /* Get TB & offset it */ - mftb r8 - ld r9,CFG_TB_ORIG_STAMP(r3) - subf r8,r9,r8 - - /* Scale result */ - ld r5,CFG_TB_TO_XS(r3) - mulhdu r8,r8,r5 - - /* Add stamp since epoch */ - ld r6,CFG_STAMP_XSEC(r3) - add r4,r6,r8 - - xor r0,r4,r4 - add r3,r3,r0 - ld r0,CFG_TB_UPDATE_COUNT(r3) - cmpld cr0,r0,r7 /* check if updated */ - bne- 1b - blr - .cfi_endproc -V_FUNCTION_END(__do_get_xsec) diff --git a/include/asm-powerpc/auxvec.h b/include/asm-powerpc/auxvec.h index 79d8c4732309..19a099b62cd6 100644 --- a/include/asm-powerpc/auxvec.h +++ b/include/asm-powerpc/auxvec.h @@ -14,8 +14,6 @@ /* The vDSO location. We have to use the same value as x86 for glibc's * sake :-) */ -#ifdef __powerpc64__ #define AT_SYSINFO_EHDR 33 -#endif #endif diff --git a/include/asm-powerpc/elf.h b/include/asm-powerpc/elf.h index feac3458d71f..3dcd65edf978 100644 --- a/include/asm-powerpc/elf.h +++ b/include/asm-powerpc/elf.h @@ -269,14 +269,12 @@ extern int dcache_bsize; extern int icache_bsize; extern int ucache_bsize; -#ifdef __powerpc64__ +/* vDSO has arch_setup_additional_pages */ +#define ARCH_HAS_SETUP_ADDITIONAL_PAGES struct linux_binprm; -#define ARCH_HAS_SETUP_ADDITIONAL_PAGES /* vDSO has arch_setup_additional_pages */ -extern int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack); +extern int arch_setup_additional_pages(struct linux_binprm *bprm, + int executable_stack); #define VDSO_AUX_ENT(a,b) NEW_AUX_ENT(a,b); -#else -#define VDSO_AUX_ENT(a,b) -#endif /* __powerpc64__ */ /* * The requirements here are: diff --git a/include/asm-powerpc/processor.h b/include/asm-powerpc/processor.h index f6f186b56b0f..d12382d292d4 100644 --- a/include/asm-powerpc/processor.h +++ b/include/asm-powerpc/processor.h @@ -177,8 +177,8 @@ struct thread_struct { #ifdef CONFIG_PPC64 unsigned long start_tb; /* Start purr when proc switched in */ unsigned long accum_tb; /* Total accumilated purr for process */ - unsigned long vdso_base; /* base of the vDSO library */ #endif + unsigned long vdso_base; /* base of the vDSO library */ unsigned long dabr; /* Data address breakpoint register */ #ifdef CONFIG_ALTIVEC /* Complete AltiVec register set */ diff --git a/include/asm-powerpc/systemcfg.h b/include/asm-powerpc/systemcfg.h deleted file mode 100644 index 36b5cbe466f1..000000000000 --- a/include/asm-powerpc/systemcfg.h +++ /dev/null @@ -1,64 +0,0 @@ -#ifndef _SYSTEMCFG_H -#define _SYSTEMCFG_H - -/* - * Copyright (C) 2002 Peter Bergner <bergner@vnet.ibm.com>, IBM - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -/* Change Activity: - * 2002/09/30 : bergner : Created - * End Change Activity - */ - -/* - * If the major version changes we are incompatible. - * Minor version changes are a hint. - */ -#define SYSTEMCFG_MAJOR 1 -#define SYSTEMCFG_MINOR 1 - -#ifndef __ASSEMBLY__ - -#include <linux/unistd.h> - -#define SYSCALL_MAP_SIZE ((__NR_syscalls + 31) / 32) - -struct systemcfg { - __u8 eye_catcher[16]; /* Eyecatcher: SYSTEMCFG:PPC64 0x00 */ - struct { /* Systemcfg version numbers */ - __u32 major; /* Major number 0x10 */ - __u32 minor; /* Minor number 0x14 */ - } version; - - __u32 platform; /* Platform flags 0x18 */ - __u32 processor; /* Processor type 0x1C */ - __u64 processorCount; /* # of physical processors 0x20 */ - __u64 physicalMemorySize; /* Size of real memory(B) 0x28 */ - __u64 tb_orig_stamp; /* Timebase at boot 0x30 */ - __u64 tb_ticks_per_sec; /* Timebase tics / sec 0x38 */ - __u64 tb_to_xs; /* Inverse of TB to 2^20 0x40 */ - __u64 stamp_xsec; /* 0x48 */ - __u64 tb_update_count; /* Timebase atomicity ctr 0x50 */ - __u32 tz_minuteswest; /* Minutes west of Greenwich 0x58 */ - __u32 tz_dsttime; /* Type of dst correction 0x5C */ - /* next four are no longer used except to be exported to /proc */ - __u32 dcache_size; /* L1 d-cache size 0x60 */ - __u32 dcache_line_size; /* L1 d-cache line size 0x64 */ - __u32 icache_size; /* L1 i-cache size 0x68 */ - __u32 icache_line_size; /* L1 i-cache line size 0x6C */ - __u32 syscall_map_64[SYSCALL_MAP_SIZE]; /* map of available syscalls 0x70 */ - __u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of available syscalls */ -}; - -#ifdef __KERNEL__ -extern struct systemcfg *_systemcfg; /* to be renamed */ -#endif - -#endif /* __ASSEMBLY__ */ - -#endif /* _SYSTEMCFG_H */ diff --git a/include/asm-ppc64/vdso.h b/include/asm-powerpc/vdso.h index 85d8a7be25c4..85d8a7be25c4 100644 --- a/include/asm-ppc64/vdso.h +++ b/include/asm-powerpc/vdso.h diff --git a/include/asm-powerpc/vdso_datapage.h b/include/asm-powerpc/vdso_datapage.h new file mode 100644 index 000000000000..fc323b51366b --- /dev/null +++ b/include/asm-powerpc/vdso_datapage.h @@ -0,0 +1,108 @@ +#ifndef _VDSO_DATAPAGE_H +#define _VDSO_DATAPAGE_H + +/* + * Copyright (C) 2002 Peter Bergner <bergner@vnet.ibm.com>, IBM + * Copyright (C) 2005 Benjamin Herrenschmidy <benh@kernel.crashing.org>, + * IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + + +/* + * Note about this structure: + * + * This structure was historically called systemcfg and exposed to + * userland via /proc/ppc64/systemcfg. Unfortunately, this became an + * ABI issue as some proprietary software started relying on being able + * to mmap() it, thus we have to keep the base layout at least for a + * few kernel versions. + * + * However, since ppc32 doesn't suffer from this backward handicap, + * a simpler version of the data structure is used there with only the + * fields actually used by the vDSO. + * + */ + +/* + * If the major version changes we are incompatible. + * Minor version changes are a hint. + */ +#define SYSTEMCFG_MAJOR 1 +#define SYSTEMCFG_MINOR 1 + +#ifndef __ASSEMBLY__ + +#include <linux/unistd.h> + +#define SYSCALL_MAP_SIZE ((__NR_syscalls + 31) / 32) + +/* + * So here is the ppc64 backward compatible version + */ + +#ifdef CONFIG_PPC64 + +struct vdso_data { + __u8 eye_catcher[16]; /* Eyecatcher: SYSTEMCFG:PPC64 0x00 */ + struct { /* Systemcfg version numbers */ + __u32 major; /* Major number 0x10 */ + __u32 minor; /* Minor number 0x14 */ + } version; + + __u32 platform; /* Platform flags 0x18 */ + __u32 processor; /* Processor type 0x1C */ + __u64 processorCount; /* # of physical processors 0x20 */ + __u64 physicalMemorySize; /* Size of real memory(B) 0x28 */ + __u64 tb_orig_stamp; /* Timebase at boot 0x30 */ + __u64 tb_ticks_per_sec; /* Timebase tics / sec 0x38 */ + __u64 tb_to_xs; /* Inverse of TB to 2^20 0x40 */ + __u64 stamp_xsec; /* 0x48 */ + __u64 tb_update_count; /* Timebase atomicity ctr 0x50 */ + __u32 tz_minuteswest; /* Minutes west of Greenwich 0x58 */ + __u32 tz_dsttime; /* Type of dst correction 0x5C */ + __u32 dcache_size; /* L1 d-cache size 0x60 */ + __u32 dcache_line_size; /* L1 d-cache line size 0x64 */ + __u32 icache_size; /* L1 i-cache size 0x68 */ + __u32 icache_line_size; /* L1 i-cache line size 0x6C */ + + /* those additional ones don't have to be located anywhere + * special as they were not part of the original systemcfg + */ + __s64 wtom_clock_sec; /* Wall to monotonic clock */ + __s32 wtom_clock_nsec; + __u32 syscall_map_64[SYSCALL_MAP_SIZE]; /* map of syscalls */ + __u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */ +}; + +#else /* CONFIG_PPC64 */ + +/* + * And here is the simpler 32 bits version + */ +struct vdso_data { + __u64 tb_orig_stamp; /* Timebase at boot 0x30 */ + __u64 tb_ticks_per_sec; /* Timebase tics / sec 0x38 */ + __u64 tb_to_xs; /* Inverse of TB to 2^20 0x40 */ + __u64 stamp_xsec; /* 0x48 */ + __u32 tb_update_count; /* Timebase atomicity ctr 0x50 */ + __u32 tz_minuteswest; /* Minutes west of Greenwich 0x58 */ + __u32 tz_dsttime; /* Type of dst correction 0x5C */ + __s32 wtom_clock_sec; /* Wall to monotonic clock */ + __s32 wtom_clock_nsec; + __u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */ +}; + +#endif /* CONFIG_PPC64 */ + +#ifdef __KERNEL__ +extern struct vdso_data *vdso_data; +#endif + +#endif /* __ASSEMBLY__ */ + +#endif /* _SYSTEMCFG_H */ diff --git a/include/asm-ppc/page.h b/include/asm-ppc/page.h index fc44f7ca62d7..538e0c8ab243 100644 --- a/include/asm-ppc/page.h +++ b/include/asm-ppc/page.h @@ -1,9 +1,12 @@ #ifndef _PPC_PAGE_H #define _PPC_PAGE_H +#include <linux/config.h> +#include <asm/asm-compat.h> + /* PAGE_SHIFT determines the page size */ #define PAGE_SHIFT 12 -#define PAGE_SIZE (1UL << PAGE_SHIFT) +#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT) /* * Subtle: this is an int (not an unsigned long) and so it @@ -169,5 +172,8 @@ extern __inline__ int get_order(unsigned long size) #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +/* We do define AT_SYSINFO_EHDR but don't use the gate mecanism */ +#define __HAVE_ARCH_GATE_AREA 1 + #endif /* __KERNEL__ */ #endif /* _PPC_PAGE_H */ |