From dd78bc11fb2050b6a3990d0421feca4c68ca4335 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 23 Jul 2013 17:32:04 -0400 Subject: tile: convert uses of "inv" to "finv" The "inv" (invalidate) instruction is generally less safe than "finv" (flush and invalidate), as it will drop dirty data from the cache. It turns out we have almost no need for "inv" (other than for the older 32-bit architecture in some limited cases), so convert to "finv" where possible and delete the extra "inv" infrastructure. Signed-off-by: Chris Metcalf --- arch/tile/kernel/head_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/tile/kernel/head_64.S') diff --git a/arch/tile/kernel/head_64.S b/arch/tile/kernel/head_64.S index 6093964fa5c7..e23e5f7b91d9 100644 --- a/arch/tile/kernel/head_64.S +++ b/arch/tile/kernel/head_64.S @@ -77,7 +77,7 @@ ENTRY(_start) { /* After initializing swapper_pgprot, HV_PTE_GLOBAL is set. */ bfextu r7, r1, HV_PTE_INDEX_GLOBAL, HV_PTE_INDEX_GLOBAL - inv r4 + finv r4 } bnez r7, .Lno_write { -- cgit v1.2.3 From ba02f0eb826da6dbdc5a2958ac52304ee441234f Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 7 Aug 2013 11:55:35 -0400 Subject: tile: improve big-endian support First, fix a bug in asm/unaligned.h; we need to just use the asm-generic unaligned.h so we properly choose endian-correct flavors. Second, keep the hv/hypervisor.h ABI fully "native" in the sense that we don't have __BIG_ENDIAN__ ifdefs there. Instead, we use macros in the head_NN.S assembly code to properly extract two 32-bit structure members from a 64-bit register holding the structure. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/unaligned.h | 14 +++++++++----- arch/tile/include/hv/hypervisor.h | 16 ---------------- arch/tile/kernel/head_64.S | 24 ++++++++++++++---------- 3 files changed, 23 insertions(+), 31 deletions(-) (limited to 'arch/tile/kernel/head_64.S') diff --git a/arch/tile/include/asm/unaligned.h b/arch/tile/include/asm/unaligned.h index 37dfbe598872..5a58a0d11449 100644 --- a/arch/tile/include/asm/unaligned.h +++ b/arch/tile/include/asm/unaligned.h @@ -15,11 +15,15 @@ #ifndef _ASM_TILE_UNALIGNED_H #define _ASM_TILE_UNALIGNED_H -#include -#include -#include -#define get_unaligned __get_unaligned_le -#define put_unaligned __put_unaligned_le +/* + * We could implement faster get_unaligned_[be/le]64 using the ldna + * instruction on tilegx; however, we need to either copy all of the + * other generic functions to here (which is pretty ugly) or else + * modify both the generic code and other arch code to allow arch + * specific unaligned data access functions. Given these functions + * are not often called, we'll stick with the generic version. + */ +#include /* * Is the kernel doing fixups of unaligned accesses? If <0, no kernel diff --git a/arch/tile/include/hv/hypervisor.h b/arch/tile/include/hv/hypervisor.h index f882ebcf43a9..0971ebbde1b7 100644 --- a/arch/tile/include/hv/hypervisor.h +++ b/arch/tile/include/hv/hypervisor.h @@ -564,16 +564,11 @@ int hv_confstr(HV_ConfstrQuery query, HV_VirtAddr buf, int len); /** Tile coordinate */ typedef struct { -#ifndef __BIG_ENDIAN__ /** X coordinate, relative to supervisor's top-left coordinate */ int x; /** Y coordinate, relative to supervisor's top-left coordinate */ int y; -#else - int y; - int x; -#endif } HV_Coord; @@ -1119,13 +1114,8 @@ HV_VirtAddrRange hv_inquire_virtual(int idx); /** A range of ASID values. */ typedef struct { -#ifndef __BIG_ENDIAN__ HV_ASID start; /**< First ASID in the range. */ unsigned int size; /**< Number of ASIDs. Zero for an invalid range. */ -#else - unsigned int size; /**< Number of ASIDs. Zero for an invalid range. */ - HV_ASID start; /**< First ASID in the range. */ -#endif } HV_ASIDRange; /** Returns information about a range of ASIDs. @@ -1449,7 +1439,6 @@ typedef enum /** Message recipient. */ typedef struct { -#ifndef __BIG_ENDIAN__ /** X coordinate, relative to supervisor's top-left coordinate */ unsigned int x:11; @@ -1458,11 +1447,6 @@ typedef struct /** Status of this recipient */ HV_Recip_State state:10; -#else //__BIG_ENDIAN__ - HV_Recip_State state:10; - unsigned int y:11; - unsigned int x:11; -#endif } HV_Recipient; /** Send a message to a set of recipients. diff --git a/arch/tile/kernel/head_64.S b/arch/tile/kernel/head_64.S index e23e5f7b91d9..ed51320847ce 100644 --- a/arch/tile/kernel/head_64.S +++ b/arch/tile/kernel/head_64.S @@ -25,6 +25,15 @@ #include #include +/* Extract two 32-bit bit values that were read into one register. */ +#ifdef __BIG_ENDIAN__ +#define GET_FIRST_INT(rd, rs) shrsi rd, rs, 32 +#define GET_SECOND_INT(rd, rs) addxi rd, rs, 0 +#else +#define GET_FIRST_INT(rd, rs) addxi rd, rs, 0 +#define GET_SECOND_INT(rd, rs) shrsi rd, rs, 32 +#endif + /* * This module contains the entry code for kernel images. It performs the * minimal setup needed to call the generic C routines. @@ -61,7 +70,7 @@ ENTRY(_start) * other CPUs should see a properly-constructed page table. */ { - v4int_l r2, zero, r0 /* ASID for hv_install_context */ + GET_FIRST_INT(r2, r0) /* ASID for hv_install_context */ moveli r4, hw1_last(swapper_pgprot - PAGE_OFFSET) } { @@ -131,19 +140,14 @@ ENTRY(_start) shl16insli r0, r0, hw0(MEM_SV_START) mtspr SPR_INTERRUPT_VECTOR_BASE_K, r0 - /* - * Get our processor number and save it away in SAVE_K_0. - * Extract stuff from the topology structure: r4 = y, r6 = x, - * r5 = width. FIXME: consider whether we want to just make these - * 64-bit values (and if so fix smp_topology write below, too). - */ + /* Get our processor number and save it away in SAVE_K_0. */ jal hv_inquire_topology { - v4int_l r5, zero, r1 /* r5 = width */ - shrui r4, r0, 32 /* r4 = y */ + GET_FIRST_INT(r5, r1) /* r5 = width */ + GET_SECOND_INT(r4, r0) /* r4 = y */ } { - v4int_l r6, zero, r0 /* r6 = x */ + GET_FIRST_INT(r6, r0) /* r6 = x */ mul_lu_lu r4, r4, r5 } { -- cgit v1.2.3 From 9ae09838470a68edf0245cd60c623df2d5993a8f Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 7 Aug 2013 16:03:08 -0400 Subject: tile: provide traceability for hypervisor calls This change adds infrastructure (CONFIG_TILE_HVGLUE_TRACE) that provides C code wrappers for the calls the kernel makes to the Tilera hypervisor. This allows standard kernel infrastructure like FTRACE to be able to instrument hypervisor calls. To allow direct calls to the true API, we export their names with a leading underscore as well. This is important for the few contexts where we need to make hypervisor calls without touching the stack. As part of this change, we also switch from creating the symbols with linker magic to creating them with assembler magic. This lets us provide a symbol type and generally make them appear more as symbols and less as just random values in the Elf namespace. Signed-off-by: Chris Metcalf --- arch/tile/Kconfig.debug | 8 ++ arch/tile/kernel/Makefile | 3 +- arch/tile/kernel/head_32.S | 8 +- arch/tile/kernel/head_64.S | 8 +- arch/tile/kernel/hvglue.S | 74 +++++++++++ arch/tile/kernel/hvglue.lds | 60 --------- arch/tile/kernel/hvglue_trace.c | 266 ++++++++++++++++++++++++++++++++++++++++ arch/tile/kernel/intvec_32.S | 2 +- arch/tile/kernel/intvec_64.S | 2 +- arch/tile/kernel/vmlinux.lds.S | 5 +- arch/tile/mm/migrate_32.S | 4 +- arch/tile/mm/migrate_64.S | 4 +- 12 files changed, 368 insertions(+), 76 deletions(-) create mode 100644 arch/tile/kernel/hvglue.S delete mode 100644 arch/tile/kernel/hvglue.lds create mode 100644 arch/tile/kernel/hvglue_trace.c (limited to 'arch/tile/kernel/head_64.S') diff --git a/arch/tile/Kconfig.debug b/arch/tile/Kconfig.debug index 9165ea979e85..b8d69f2757a1 100644 --- a/arch/tile/Kconfig.debug +++ b/arch/tile/Kconfig.debug @@ -24,4 +24,12 @@ config DEBUG_EXTRA_FLAGS size and build time noticeably. Such flags are often helpful if the main use of debug info is line number info. +config TILE_HVGLUE_TRACE + bool "Provide wrapper functions for hypervisor ABI calls" + default n + help + Provide wrapper functions for the hypervisor ABI calls + defined in arch/tile/kernel/hvglue.S. This allows tracing + mechanisms, etc., to have visibility into those calls. + endmenu diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile index 5157d1c4b4ed..c4a957aad26a 100644 --- a/arch/tile/kernel/Makefile +++ b/arch/tile/kernel/Makefile @@ -3,7 +3,7 @@ # extra-y := vmlinux.lds head_$(BITS).o -obj-y := backtrace.o entry.o irq.o messaging.o \ +obj-y := backtrace.o entry.o hvglue.o irq.o messaging.o \ pci-dma.o proc.o process.o ptrace.o reboot.o \ setup.o signal.o single_step.o stack.o sys.o \ sysfs.o time.o traps.o unaligned.o vdso.o \ @@ -21,5 +21,6 @@ else obj-$(CONFIG_PCI) += pci.o endif obj-$(CONFIG_TILE_USB) += usb.o +obj-$(CONFIG_TILE_HVGLUE_TRACE) += hvglue_trace.o obj-y += vdso/ diff --git a/arch/tile/kernel/head_32.S b/arch/tile/kernel/head_32.S index 80cd407925cd..d1527fce2861 100644 --- a/arch/tile/kernel/head_32.S +++ b/arch/tile/kernel/head_32.S @@ -39,12 +39,12 @@ ENTRY(_start) } { moveli r0, _HV_VERSION_OLD_HV_INIT - jal hv_init + jal _hv_init } /* Get a reasonable default ASID in r0 */ { move r0, zero - jal hv_inquire_asid + jal _hv_inquire_asid } /* Install the default page table */ { @@ -73,12 +73,12 @@ ENTRY(_start) } { auli lr, lr, ha16(1f) - j hv_install_context + j _hv_install_context } 1: /* Get our processor number and save it away in SAVE_K_0. */ - jal hv_inquire_topology + jal _hv_inquire_topology mulll_uu r4, r1, r2 /* r1 == y, r2 == width */ add r4, r4, r0 /* r0 == x, so r4 == cpu == y*width + x */ diff --git a/arch/tile/kernel/head_64.S b/arch/tile/kernel/head_64.S index ed51320847ce..969e4f81f3b3 100644 --- a/arch/tile/kernel/head_64.S +++ b/arch/tile/kernel/head_64.S @@ -55,11 +55,11 @@ ENTRY(_start) movei r2, TILE_CHIP_REV movei r3, KERNEL_PL } - jal hv_init + jal _hv_init /* Get a reasonable default ASID in r0 */ { move r0, zero - jal hv_inquire_asid + jal _hv_inquire_asid } /* @@ -130,7 +130,7 @@ ENTRY(_start) } { moveli r3, CTX_PAGE_FLAG - j hv_install_context + j _hv_install_context } 1: @@ -141,7 +141,7 @@ ENTRY(_start) mtspr SPR_INTERRUPT_VECTOR_BASE_K, r0 /* Get our processor number and save it away in SAVE_K_0. */ - jal hv_inquire_topology + jal _hv_inquire_topology { GET_FIRST_INT(r5, r1) /* r5 = width */ GET_SECOND_INT(r4, r0) /* r4 = y */ diff --git a/arch/tile/kernel/hvglue.S b/arch/tile/kernel/hvglue.S new file mode 100644 index 000000000000..2ab456622391 --- /dev/null +++ b/arch/tile/kernel/hvglue.S @@ -0,0 +1,74 @@ +/* Hypervisor call vector addresses; see */ +.macro gensym sym, val, size +.org \val +.global _\sym +.type _\sym,function +_\sym: +.size _\sym,\size +#ifndef CONFIG_TILE_HVGLUE_TRACE +.globl \sym +.set \sym,_\sym +#endif +.endm + +.section .hvglue,"x",@nobits +.align 8 +gensym hv_init, 0x20, 32 +gensym hv_install_context, 0x40, 32 +gensym hv_sysconf, 0x60, 32 +gensym hv_get_rtc, 0x80, 32 +gensym hv_set_rtc, 0xa0, 32 +gensym hv_flush_asid, 0xc0, 32 +gensym hv_flush_page, 0xe0, 32 +gensym hv_flush_pages, 0x100, 32 +gensym hv_restart, 0x120, 32 +gensym hv_halt, 0x140, 32 +gensym hv_power_off, 0x160, 32 +gensym hv_inquire_physical, 0x180, 32 +gensym hv_inquire_memory_controller, 0x1a0, 32 +gensym hv_inquire_virtual, 0x1c0, 32 +gensym hv_inquire_asid, 0x1e0, 32 +gensym hv_nanosleep, 0x200, 32 +gensym hv_console_read_if_ready, 0x220, 32 +gensym hv_console_write, 0x240, 32 +gensym hv_downcall_dispatch, 0x260, 32 +gensym hv_inquire_topology, 0x280, 32 +gensym hv_fs_findfile, 0x2a0, 32 +gensym hv_fs_fstat, 0x2c0, 32 +gensym hv_fs_pread, 0x2e0, 32 +gensym hv_physaddr_read64, 0x300, 32 +gensym hv_physaddr_write64, 0x320, 32 +gensym hv_get_command_line, 0x340, 32 +gensym hv_set_caching, 0x360, 32 +gensym hv_bzero_page, 0x380, 32 +gensym hv_register_message_state, 0x3a0, 32 +gensym hv_send_message, 0x3c0, 32 +gensym hv_receive_message, 0x3e0, 32 +gensym hv_inquire_context, 0x400, 32 +gensym hv_start_all_tiles, 0x420, 32 +gensym hv_dev_open, 0x440, 32 +gensym hv_dev_close, 0x460, 32 +gensym hv_dev_pread, 0x480, 32 +gensym hv_dev_pwrite, 0x4a0, 32 +gensym hv_dev_poll, 0x4c0, 32 +gensym hv_dev_poll_cancel, 0x4e0, 32 +gensym hv_dev_preada, 0x500, 32 +gensym hv_dev_pwritea, 0x520, 32 +gensym hv_flush_remote, 0x540, 32 +gensym hv_console_putc, 0x560, 32 +gensym hv_inquire_tiles, 0x580, 32 +gensym hv_confstr, 0x5a0, 32 +gensym hv_reexec, 0x5c0, 32 +gensym hv_set_command_line, 0x5e0, 32 +gensym hv_clear_intr, 0x600, 32 +gensym hv_enable_intr, 0x620, 32 +gensym hv_disable_intr, 0x640, 32 +gensym hv_raise_intr, 0x660, 32 +gensym hv_trigger_ipi, 0x680, 32 +gensym hv_store_mapping, 0x6a0, 32 +gensym hv_inquire_realpa, 0x6c0, 32 +gensym hv_flush_all, 0x6e0, 32 +gensym hv_get_ipi_pte, 0x700, 32 +gensym hv_set_pte_super_shift, 0x720, 32 +gensym hv_console_set_ipi, 0x7e0, 32 +gensym hv_glue_internals, 0x800, 30720 diff --git a/arch/tile/kernel/hvglue.lds b/arch/tile/kernel/hvglue.lds deleted file mode 100644 index ef522900633a..000000000000 --- a/arch/tile/kernel/hvglue.lds +++ /dev/null @@ -1,60 +0,0 @@ -/* Hypervisor call vector addresses; see */ -hv_init = TEXT_OFFSET + 0x10020; -hv_install_context = TEXT_OFFSET + 0x10040; -hv_sysconf = TEXT_OFFSET + 0x10060; -hv_get_rtc = TEXT_OFFSET + 0x10080; -hv_set_rtc = TEXT_OFFSET + 0x100a0; -hv_flush_asid = TEXT_OFFSET + 0x100c0; -hv_flush_page = TEXT_OFFSET + 0x100e0; -hv_flush_pages = TEXT_OFFSET + 0x10100; -hv_restart = TEXT_OFFSET + 0x10120; -hv_halt = TEXT_OFFSET + 0x10140; -hv_power_off = TEXT_OFFSET + 0x10160; -hv_inquire_physical = TEXT_OFFSET + 0x10180; -hv_inquire_memory_controller = TEXT_OFFSET + 0x101a0; -hv_inquire_virtual = TEXT_OFFSET + 0x101c0; -hv_inquire_asid = TEXT_OFFSET + 0x101e0; -hv_nanosleep = TEXT_OFFSET + 0x10200; -hv_console_read_if_ready = TEXT_OFFSET + 0x10220; -hv_console_write = TEXT_OFFSET + 0x10240; -hv_downcall_dispatch = TEXT_OFFSET + 0x10260; -hv_inquire_topology = TEXT_OFFSET + 0x10280; -hv_fs_findfile = TEXT_OFFSET + 0x102a0; -hv_fs_fstat = TEXT_OFFSET + 0x102c0; -hv_fs_pread = TEXT_OFFSET + 0x102e0; -hv_physaddr_read64 = TEXT_OFFSET + 0x10300; -hv_physaddr_write64 = TEXT_OFFSET + 0x10320; -hv_get_command_line = TEXT_OFFSET + 0x10340; -hv_set_caching = TEXT_OFFSET + 0x10360; -hv_bzero_page = TEXT_OFFSET + 0x10380; -hv_register_message_state = TEXT_OFFSET + 0x103a0; -hv_send_message = TEXT_OFFSET + 0x103c0; -hv_receive_message = TEXT_OFFSET + 0x103e0; -hv_inquire_context = TEXT_OFFSET + 0x10400; -hv_start_all_tiles = TEXT_OFFSET + 0x10420; -hv_dev_open = TEXT_OFFSET + 0x10440; -hv_dev_close = TEXT_OFFSET + 0x10460; -hv_dev_pread = TEXT_OFFSET + 0x10480; -hv_dev_pwrite = TEXT_OFFSET + 0x104a0; -hv_dev_poll = TEXT_OFFSET + 0x104c0; -hv_dev_poll_cancel = TEXT_OFFSET + 0x104e0; -hv_dev_preada = TEXT_OFFSET + 0x10500; -hv_dev_pwritea = TEXT_OFFSET + 0x10520; -hv_flush_remote = TEXT_OFFSET + 0x10540; -hv_console_putc = TEXT_OFFSET + 0x10560; -hv_inquire_tiles = TEXT_OFFSET + 0x10580; -hv_confstr = TEXT_OFFSET + 0x105a0; -hv_reexec = TEXT_OFFSET + 0x105c0; -hv_set_command_line = TEXT_OFFSET + 0x105e0; -hv_clear_intr = TEXT_OFFSET + 0x10600; -hv_enable_intr = TEXT_OFFSET + 0x10620; -hv_disable_intr = TEXT_OFFSET + 0x10640; -hv_raise_intr = TEXT_OFFSET + 0x10660; -hv_trigger_ipi = TEXT_OFFSET + 0x10680; -hv_store_mapping = TEXT_OFFSET + 0x106a0; -hv_inquire_realpa = TEXT_OFFSET + 0x106c0; -hv_flush_all = TEXT_OFFSET + 0x106e0; -hv_get_ipi_pte = TEXT_OFFSET + 0x10700; -hv_set_pte_super_shift = TEXT_OFFSET + 0x10720; -hv_console_set_ipi = TEXT_OFFSET + 0x107e0; -hv_glue_internals = TEXT_OFFSET + 0x10800; diff --git a/arch/tile/kernel/hvglue_trace.c b/arch/tile/kernel/hvglue_trace.c new file mode 100644 index 000000000000..85c74ad29312 --- /dev/null +++ b/arch/tile/kernel/hvglue_trace.c @@ -0,0 +1,266 @@ +/* + * Copyright 2013 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* + * Pull in the hypervisor header so we declare all the ABI functions + * with the underscore versions, then undef the names so that we can + * provide our own wrapper versions. + */ +#define hv_init _hv_init +#define hv_install_context _hv_install_context +#define hv_sysconf _hv_sysconf +#define hv_get_rtc _hv_get_rtc +#define hv_set_rtc _hv_set_rtc +#define hv_flush_asid _hv_flush_asid +#define hv_flush_page _hv_flush_page +#define hv_flush_pages _hv_flush_pages +#define hv_restart _hv_restart +#define hv_halt _hv_halt +#define hv_power_off _hv_power_off +#define hv_inquire_physical _hv_inquire_physical +#define hv_inquire_memory_controller _hv_inquire_memory_controller +#define hv_inquire_virtual _hv_inquire_virtual +#define hv_inquire_asid _hv_inquire_asid +#define hv_nanosleep _hv_nanosleep +#define hv_console_read_if_ready _hv_console_read_if_ready +#define hv_console_write _hv_console_write +#define hv_downcall_dispatch _hv_downcall_dispatch +#define hv_inquire_topology _hv_inquire_topology +#define hv_fs_findfile _hv_fs_findfile +#define hv_fs_fstat _hv_fs_fstat +#define hv_fs_pread _hv_fs_pread +#define hv_physaddr_read64 _hv_physaddr_read64 +#define hv_physaddr_write64 _hv_physaddr_write64 +#define hv_get_command_line _hv_get_command_line +#define hv_set_caching _hv_set_caching +#define hv_bzero_page _hv_bzero_page +#define hv_register_message_state _hv_register_message_state +#define hv_send_message _hv_send_message +#define hv_receive_message _hv_receive_message +#define hv_inquire_context _hv_inquire_context +#define hv_start_all_tiles _hv_start_all_tiles +#define hv_dev_open _hv_dev_open +#define hv_dev_close _hv_dev_close +#define hv_dev_pread _hv_dev_pread +#define hv_dev_pwrite _hv_dev_pwrite +#define hv_dev_poll _hv_dev_poll +#define hv_dev_poll_cancel _hv_dev_poll_cancel +#define hv_dev_preada _hv_dev_preada +#define hv_dev_pwritea _hv_dev_pwritea +#define hv_flush_remote _hv_flush_remote +#define hv_console_putc _hv_console_putc +#define hv_inquire_tiles _hv_inquire_tiles +#define hv_confstr _hv_confstr +#define hv_reexec _hv_reexec +#define hv_set_command_line _hv_set_command_line +#define hv_clear_intr _hv_clear_intr +#define hv_enable_intr _hv_enable_intr +#define hv_disable_intr _hv_disable_intr +#define hv_raise_intr _hv_raise_intr +#define hv_trigger_ipi _hv_trigger_ipi +#define hv_store_mapping _hv_store_mapping +#define hv_inquire_realpa _hv_inquire_realpa +#define hv_flush_all _hv_flush_all +#define hv_get_ipi_pte _hv_get_ipi_pte +#define hv_set_pte_super_shift _hv_set_pte_super_shift +#define hv_console_set_ipi _hv_console_set_ipi +#include +#undef hv_init +#undef hv_install_context +#undef hv_sysconf +#undef hv_get_rtc +#undef hv_set_rtc +#undef hv_flush_asid +#undef hv_flush_page +#undef hv_flush_pages +#undef hv_restart +#undef hv_halt +#undef hv_power_off +#undef hv_inquire_physical +#undef hv_inquire_memory_controller +#undef hv_inquire_virtual +#undef hv_inquire_asid +#undef hv_nanosleep +#undef hv_console_read_if_ready +#undef hv_console_write +#undef hv_downcall_dispatch +#undef hv_inquire_topology +#undef hv_fs_findfile +#undef hv_fs_fstat +#undef hv_fs_pread +#undef hv_physaddr_read64 +#undef hv_physaddr_write64 +#undef hv_get_command_line +#undef hv_set_caching +#undef hv_bzero_page +#undef hv_register_message_state +#undef hv_send_message +#undef hv_receive_message +#undef hv_inquire_context +#undef hv_start_all_tiles +#undef hv_dev_open +#undef hv_dev_close +#undef hv_dev_pread +#undef hv_dev_pwrite +#undef hv_dev_poll +#undef hv_dev_poll_cancel +#undef hv_dev_preada +#undef hv_dev_pwritea +#undef hv_flush_remote +#undef hv_console_putc +#undef hv_inquire_tiles +#undef hv_confstr +#undef hv_reexec +#undef hv_set_command_line +#undef hv_clear_intr +#undef hv_enable_intr +#undef hv_disable_intr +#undef hv_raise_intr +#undef hv_trigger_ipi +#undef hv_store_mapping +#undef hv_inquire_realpa +#undef hv_flush_all +#undef hv_get_ipi_pte +#undef hv_set_pte_super_shift +#undef hv_console_set_ipi + +/* + * Provide macros based on to provide a wrapper + * function that invokes the same function with an underscore prefix. + * We can't use the existing __SC_xxx macros because we need to + * support up to nine arguments rather than up to six, and also this + * way the file stands alone from possible changes in the + * implementation of . + */ +#define HV_WRAP0(type, name) \ + type name(void); \ + type name(void) \ + { \ + return _##name(); \ + } +#define __HV_DECL1(t1, a1) t1 a1 +#define __HV_DECL2(t2, a2, ...) t2 a2, __HV_DECL1(__VA_ARGS__) +#define __HV_DECL3(t3, a3, ...) t3 a3, __HV_DECL2(__VA_ARGS__) +#define __HV_DECL4(t4, a4, ...) t4 a4, __HV_DECL3(__VA_ARGS__) +#define __HV_DECL5(t5, a5, ...) t5 a5, __HV_DECL4(__VA_ARGS__) +#define __HV_DECL6(t6, a6, ...) t6 a6, __HV_DECL5(__VA_ARGS__) +#define __HV_DECL7(t7, a7, ...) t7 a7, __HV_DECL6(__VA_ARGS__) +#define __HV_DECL8(t8, a8, ...) t8 a8, __HV_DECL7(__VA_ARGS__) +#define __HV_DECL9(t9, a9, ...) t9 a9, __HV_DECL8(__VA_ARGS__) +#define __HV_PASS1(t1, a1) a1 +#define __HV_PASS2(t2, a2, ...) a2, __HV_PASS1(__VA_ARGS__) +#define __HV_PASS3(t3, a3, ...) a3, __HV_PASS2(__VA_ARGS__) +#define __HV_PASS4(t4, a4, ...) a4, __HV_PASS3(__VA_ARGS__) +#define __HV_PASS5(t5, a5, ...) a5, __HV_PASS4(__VA_ARGS__) +#define __HV_PASS6(t6, a6, ...) a6, __HV_PASS5(__VA_ARGS__) +#define __HV_PASS7(t7, a7, ...) a7, __HV_PASS6(__VA_ARGS__) +#define __HV_PASS8(t8, a8, ...) a8, __HV_PASS7(__VA_ARGS__) +#define __HV_PASS9(t9, a9, ...) a9, __HV_PASS8(__VA_ARGS__) +#define HV_WRAPx(x, type, name, ...) \ + type name(__HV_DECL##x(__VA_ARGS__)); \ + type name(__HV_DECL##x(__VA_ARGS__)) \ + { \ + return _##name(__HV_PASS##x(__VA_ARGS__)); \ + } +#define HV_WRAP1(type, name, ...) HV_WRAPx(1, type, name, __VA_ARGS__) +#define HV_WRAP2(type, name, ...) HV_WRAPx(2, type, name, __VA_ARGS__) +#define HV_WRAP3(type, name, ...) HV_WRAPx(3, type, name, __VA_ARGS__) +#define HV_WRAP4(type, name, ...) HV_WRAPx(4, type, name, __VA_ARGS__) +#define HV_WRAP5(type, name, ...) HV_WRAPx(5, type, name, __VA_ARGS__) +#define HV_WRAP6(type, name, ...) HV_WRAPx(6, type, name, __VA_ARGS__) +#define HV_WRAP7(type, name, ...) HV_WRAPx(7, type, name, __VA_ARGS__) +#define HV_WRAP8(type, name, ...) HV_WRAPx(8, type, name, __VA_ARGS__) +#define HV_WRAP9(type, name, ...) HV_WRAPx(9, type, name, __VA_ARGS__) + +/* List all the hypervisor API functions. */ +HV_WRAP4(void, hv_init, HV_VersionNumber, interface_version_number, + int, chip_num, int, chip_rev_num, int, client_pl) +HV_WRAP1(long, hv_sysconf, HV_SysconfQuery, query) +HV_WRAP3(int, hv_confstr, HV_ConfstrQuery, query, HV_VirtAddr, buf, int, len) +#if CHIP_HAS_IPI() +HV_WRAP3(int, hv_get_ipi_pte, HV_Coord, tile, int, pl, HV_PTE*, pte) +HV_WRAP3(int, hv_console_set_ipi, int, ipi, int, event, HV_Coord, coord); +#else +HV_WRAP1(void, hv_enable_intr, HV_IntrMask, enab_mask) +HV_WRAP1(void, hv_disable_intr, HV_IntrMask, disab_mask) +HV_WRAP1(void, hv_clear_intr, HV_IntrMask, clear_mask) +HV_WRAP1(void, hv_raise_intr, HV_IntrMask, raise_mask) +HV_WRAP2(HV_Errno, hv_trigger_ipi, HV_Coord, tile, int, interrupt) +#endif /* !CHIP_HAS_IPI() */ +HV_WRAP3(int, hv_store_mapping, HV_VirtAddr, va, unsigned int, len, + HV_PhysAddr, pa) +HV_WRAP2(HV_PhysAddr, hv_inquire_realpa, HV_PhysAddr, cpa, unsigned int, len) +HV_WRAP0(HV_RTCTime, hv_get_rtc) +HV_WRAP1(void, hv_set_rtc, HV_RTCTime, time) +HV_WRAP4(int, hv_install_context, HV_PhysAddr, page_table, HV_PTE, access, + HV_ASID, asid, __hv32, flags) +HV_WRAP2(int, hv_set_pte_super_shift, int, level, int, log2_count) +HV_WRAP0(HV_Context, hv_inquire_context) +HV_WRAP1(int, hv_flush_asid, HV_ASID, asid) +HV_WRAP2(int, hv_flush_page, HV_VirtAddr, address, HV_PageSize, page_size) +HV_WRAP3(int, hv_flush_pages, HV_VirtAddr, start, HV_PageSize, page_size, + unsigned long, size) +HV_WRAP1(int, hv_flush_all, int, preserve_global) +HV_WRAP2(void, hv_restart, HV_VirtAddr, cmd, HV_VirtAddr, args) +HV_WRAP0(void, hv_halt) +HV_WRAP0(void, hv_power_off) +HV_WRAP1(int, hv_reexec, HV_PhysAddr, entry) +HV_WRAP0(HV_Topology, hv_inquire_topology) +HV_WRAP3(HV_Errno, hv_inquire_tiles, HV_InqTileSet, set, HV_VirtAddr, cpumask, + int, length) +HV_WRAP1(HV_PhysAddrRange, hv_inquire_physical, int, idx) +HV_WRAP2(HV_MemoryControllerInfo, hv_inquire_memory_controller, HV_Coord, coord, + int, controller) +HV_WRAP1(HV_VirtAddrRange, hv_inquire_virtual, int, idx) +HV_WRAP1(HV_ASIDRange, hv_inquire_asid, int, idx) +HV_WRAP1(void, hv_nanosleep, int, nanosecs) +HV_WRAP0(int, hv_console_read_if_ready) +HV_WRAP1(void, hv_console_putc, int, byte) +HV_WRAP2(int, hv_console_write, HV_VirtAddr, bytes, int, len) +HV_WRAP0(void, hv_downcall_dispatch) +HV_WRAP1(int, hv_fs_findfile, HV_VirtAddr, filename) +HV_WRAP1(HV_FS_StatInfo, hv_fs_fstat, int, inode) +HV_WRAP4(int, hv_fs_pread, int, inode, HV_VirtAddr, buf, + int, length, int, offset) +HV_WRAP2(unsigned long long, hv_physaddr_read64, HV_PhysAddr, addr, + HV_PTE, access) +HV_WRAP3(void, hv_physaddr_write64, HV_PhysAddr, addr, HV_PTE, access, + unsigned long long, val) +HV_WRAP2(int, hv_get_command_line, HV_VirtAddr, buf, int, length) +HV_WRAP2(HV_Errno, hv_set_command_line, HV_VirtAddr, buf, int, length) +HV_WRAP1(void, hv_set_caching, unsigned long, bitmask) +HV_WRAP2(void, hv_bzero_page, HV_VirtAddr, va, unsigned int, size) +HV_WRAP1(HV_Errno, hv_register_message_state, HV_MsgState*, msgstate) +HV_WRAP4(int, hv_send_message, HV_Recipient *, recips, int, nrecip, + HV_VirtAddr, buf, int, buflen) +HV_WRAP3(HV_RcvMsgInfo, hv_receive_message, HV_MsgState, msgstate, + HV_VirtAddr, buf, int, buflen) +HV_WRAP0(void, hv_start_all_tiles) +HV_WRAP2(int, hv_dev_open, HV_VirtAddr, name, __hv32, flags) +HV_WRAP1(int, hv_dev_close, int, devhdl) +HV_WRAP5(int, hv_dev_pread, int, devhdl, __hv32, flags, HV_VirtAddr, va, + __hv32, len, __hv64, offset) +HV_WRAP5(int, hv_dev_pwrite, int, devhdl, __hv32, flags, HV_VirtAddr, va, + __hv32, len, __hv64, offset) +HV_WRAP3(int, hv_dev_poll, int, devhdl, __hv32, events, HV_IntArg, intarg) +HV_WRAP1(int, hv_dev_poll_cancel, int, devhdl) +HV_WRAP6(int, hv_dev_preada, int, devhdl, __hv32, flags, __hv32, sgl_len, + HV_SGL *, sglp, __hv64, offset, HV_IntArg, intarg) +HV_WRAP6(int, hv_dev_pwritea, int, devhdl, __hv32, flags, __hv32, sgl_len, + HV_SGL *, sglp, __hv64, offset, HV_IntArg, intarg) +HV_WRAP9(int, hv_flush_remote, HV_PhysAddr, cache_pa, + unsigned long, cache_control, unsigned long*, cache_cpumask, + HV_VirtAddr, tlb_va, unsigned long, tlb_length, + unsigned long, tlb_pgsize, unsigned long*, tlb_cpumask, + HV_Remote_ASID*, asids, int, asidcount) diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index 10767655689e..9c0c3cb6aab0 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -758,7 +758,7 @@ intvec_\vecname: .macro dc_dispatch vecnum, vecname .org (\vecnum << 8) intvec_\vecname: - j hv_downcall_dispatch + j _hv_downcall_dispatch ENDPROC(intvec_\vecname) .endm diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S index 562886db780b..df19d4f3946e 100644 --- a/arch/tile/kernel/intvec_64.S +++ b/arch/tile/kernel/intvec_64.S @@ -772,7 +772,7 @@ intvec_\vecname: .macro dc_dispatch vecnum, vecname .org (\vecnum << 8) intvec_\vecname: - j hv_downcall_dispatch + j _hv_downcall_dispatch ENDPROC(intvec_\vecname) .endm diff --git a/arch/tile/kernel/vmlinux.lds.S b/arch/tile/kernel/vmlinux.lds.S index a13ed902afbb..0f0edaf68f94 100644 --- a/arch/tile/kernel/vmlinux.lds.S +++ b/arch/tile/kernel/vmlinux.lds.S @@ -31,7 +31,10 @@ SECTIONS } :intrpt1 =0 /* Hypervisor call vectors */ - #include "hvglue.lds" + . = ALIGN(0x10000); + .hvglue : AT (ADDR(.hvglue) - LOAD_OFFSET) { + *(.hvglue) + } :NONE /* Now the real code */ . = ALIGN(0x20000); diff --git a/arch/tile/mm/migrate_32.S b/arch/tile/mm/migrate_32.S index 5305814bf187..772085491bf9 100644 --- a/arch/tile/mm/migrate_32.S +++ b/arch/tile/mm/migrate_32.S @@ -136,7 +136,7 @@ STD_ENTRY(flush_and_install_context) move r8, zero /* asids */ move r9, zero /* asidcount */ } - jal hv_flush_remote + jal _hv_flush_remote bnz r0, .Ldone /* Now install the new page table. */ @@ -152,7 +152,7 @@ STD_ENTRY(flush_and_install_context) move r4, r_asid moveli r5, HV_CTX_DIRECTIO | CTX_PAGE_FLAG } - jal hv_install_context + jal _hv_install_context bnz r0, .Ldone /* Finally, flush the TLB. */ diff --git a/arch/tile/mm/migrate_64.S b/arch/tile/mm/migrate_64.S index 1d15b10833d1..a49eee38f872 100644 --- a/arch/tile/mm/migrate_64.S +++ b/arch/tile/mm/migrate_64.S @@ -123,7 +123,7 @@ STD_ENTRY(flush_and_install_context) } { move r8, zero /* asidcount */ - jal hv_flush_remote + jal _hv_flush_remote } bnez r0, 1f @@ -136,7 +136,7 @@ STD_ENTRY(flush_and_install_context) move r2, r_asid moveli r3, HV_CTX_DIRECTIO | CTX_PAGE_FLAG } - jal hv_install_context + jal _hv_install_context bnez r0, 1f /* Finally, flush the TLB. */ -- cgit v1.2.3 From 35f059761c5ac313d13372fe3cdaa41bce3d0dbf Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Sat, 10 Aug 2013 12:35:02 -0400 Subject: tilegx: change how we find the kernel stack Previously, we used a special-purpose register (SPR_SYSTEM_SAVE_K_0) to hold the CPU number and the top of the current kernel stack by using the low bits to hold the CPU number, and using the high bits to hold the address of the page just above where we'd want the kernel stack to be. That way we could initialize a new SP when first entering the kernel by just masking the SPR value and subtracting a couple of words. However, it's actually more useful to be able to place an arbitrary kernel-top value in the SPR. This allows us to create a new stack context (e.g. for virtualization) with an arbitrary top-of-stack VA. To make this work, we now store the CPU number in the high bits, above the highest legal VA bit (42 bits in the current tilegx microarchitecture). The full 42 bits are thus available to store the top of stack value. Getting the current cpu (a relatively common operation) is still fast; it's now a shift rather than a mask. We make this change only for tilegx, since tilepro has too few SPR bits to do this, and we don't need this support on tilepro anyway. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/processor.h | 47 +++++++++++++++++++++++++++------------ arch/tile/kernel/head_32.S | 3 +-- arch/tile/kernel/head_64.S | 6 ++--- arch/tile/kernel/intvec_32.S | 7 ++++-- arch/tile/kernel/intvec_64.S | 21 +++++++++-------- arch/tile/kernel/stack.c | 10 ++++----- 6 files changed, 57 insertions(+), 37 deletions(-) (limited to 'arch/tile/kernel/head_64.S') diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h index 461322b473b5..230b830e94d4 100644 --- a/arch/tile/include/asm/processor.h +++ b/arch/tile/include/asm/processor.h @@ -148,9 +148,10 @@ struct thread_struct { /* * Start with "sp" this many bytes below the top of the kernel stack. - * This preserves the invariant that a called function may write to *sp. + * This allows us to be cache-aware when handling the initial save + * of the pt_regs value to the stack. */ -#define STACK_TOP_DELTA 8 +#define STACK_TOP_DELTA 64 /* * When entering the kernel via a fault, start with the top of the @@ -234,15 +235,15 @@ extern int do_work_pending(struct pt_regs *regs, u32 flags); unsigned long get_wchan(struct task_struct *p); /* Return initial ksp value for given task. */ -#define task_ksp0(task) ((unsigned long)(task)->stack + THREAD_SIZE) +#define task_ksp0(task) \ + ((unsigned long)(task)->stack + THREAD_SIZE - STACK_TOP_DELTA) /* Return some info about the user process TASK. */ -#define KSTK_TOP(task) (task_ksp0(task) - STACK_TOP_DELTA) #define task_pt_regs(task) \ - ((struct pt_regs *)(task_ksp0(task) - KSTK_PTREGS_GAP) - 1) + ((struct pt_regs *)(task_ksp0(task) - KSTK_PTREGS_GAP) - 1) #define current_pt_regs() \ - ((struct pt_regs *)((stack_pointer | (THREAD_SIZE - 1)) - \ - (KSTK_PTREGS_GAP - 1)) - 1) + ((struct pt_regs *)((stack_pointer | (THREAD_SIZE - 1)) - \ + STACK_TOP_DELTA - (KSTK_PTREGS_GAP - 1)) - 1) #define task_sp(task) (task_pt_regs(task)->sp) #define task_pc(task) (task_pt_regs(task)->pc) /* Aliases for pc and sp (used in fs/proc/array.c) */ @@ -355,20 +356,38 @@ extern int kdata_huge; #define KERNEL_PL CONFIG_KERNEL_PL /* SYSTEM_SAVE_K_0 holds the current cpu number ORed with ksp0. */ -#define CPU_LOG_MASK_VALUE 12 -#define CPU_MASK_VALUE ((1 << CPU_LOG_MASK_VALUE) - 1) -#if CONFIG_NR_CPUS > CPU_MASK_VALUE -# error Too many cpus! +#ifdef __tilegx__ +#define CPU_SHIFT 48 +#if CHIP_VA_WIDTH() > CPU_SHIFT +# error Too many VA bits! #endif +#define MAX_CPU_ID ((1 << (64 - CPU_SHIFT)) - 1) #define raw_smp_processor_id() \ - ((int)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & CPU_MASK_VALUE) + ((int)(__insn_mfspr(SPR_SYSTEM_SAVE_K_0) >> CPU_SHIFT)) #define get_current_ksp0() \ - (__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & ~CPU_MASK_VALUE) + ((unsigned long)(((long)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) << \ + (64 - CPU_SHIFT)) >> (64 - CPU_SHIFT))) +#define next_current_ksp0(task) ({ \ + unsigned long __ksp0 = task_ksp0(task) & ((1UL << CPU_SHIFT) - 1); \ + unsigned long __cpu = (long)raw_smp_processor_id() << CPU_SHIFT; \ + __ksp0 | __cpu; \ +}) +#else +#define LOG2_NR_CPU_IDS 6 +#define MAX_CPU_ID ((1 << LOG2_NR_CPU_IDS) - 1) +#define raw_smp_processor_id() \ + ((int)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & MAX_CPU_ID) +#define get_current_ksp0() \ + (__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & ~MAX_CPU_ID) #define next_current_ksp0(task) ({ \ unsigned long __ksp0 = task_ksp0(task); \ int __cpu = raw_smp_processor_id(); \ - BUG_ON(__ksp0 & CPU_MASK_VALUE); \ + BUG_ON(__ksp0 & MAX_CPU_ID); \ __ksp0 | __cpu; \ }) +#endif +#if CONFIG_NR_CPUS > (MAX_CPU_ID + 1) +# error Too many cpus! +#endif #endif /* _ASM_TILE_PROCESSOR_H */ diff --git a/arch/tile/kernel/head_32.S b/arch/tile/kernel/head_32.S index d1527fce2861..f3f17b0283ff 100644 --- a/arch/tile/kernel/head_32.S +++ b/arch/tile/kernel/head_32.S @@ -86,7 +86,7 @@ ENTRY(_start) /* * Load up our per-cpu offset. When the first (master) tile * boots, this value is still zero, so we will load boot_pc - * with start_kernel, and boot_sp with init_stack + THREAD_SIZE. + * with start_kernel, and boot_sp at the top of init_stack. * The master tile initializes the per-cpu offset array, so that * when subsequent (secondary) tiles boot, they will instead load * from their per-cpu versions of boot_sp and boot_pc. @@ -126,7 +126,6 @@ ENTRY(_start) lw sp, r1 or r4, sp, r4 mtspr SPR_SYSTEM_SAVE_K_0, r4 /* save ksp0 + cpu */ - addi sp, sp, -STACK_TOP_DELTA { move lr, zero /* stop backtraces in the called function */ jr r0 diff --git a/arch/tile/kernel/head_64.S b/arch/tile/kernel/head_64.S index 969e4f81f3b3..652b81426158 100644 --- a/arch/tile/kernel/head_64.S +++ b/arch/tile/kernel/head_64.S @@ -158,7 +158,7 @@ ENTRY(_start) /* * Load up our per-cpu offset. When the first (master) tile * boots, this value is still zero, so we will load boot_pc - * with start_kernel, and boot_sp with init_stack + THREAD_SIZE. + * with start_kernel, and boot_sp with at the top of init_stack. * The master tile initializes the per-cpu offset array, so that * when subsequent (secondary) tiles boot, they will instead load * from their per-cpu versions of boot_sp and boot_pc. @@ -202,9 +202,9 @@ ENTRY(_start) } ld r0, r0 ld sp, r1 - or r4, sp, r4 + shli r4, r4, CPU_SHIFT + bfins r4, sp, 0, CPU_SHIFT-1 mtspr SPR_SYSTEM_SAVE_K_0, r4 /* save ksp0 + cpu */ - addi sp, sp, -STACK_TOP_DELTA { move lr, zero /* stop backtraces in the called function */ jr r0 diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index 9c0c3cb6aab0..f3d26f48e659 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -185,7 +185,7 @@ intvec_\vecname: * point sp at the top aligned address on the actual stack page. */ mfspr r0, SPR_SYSTEM_SAVE_K_0 - mm r0, r0, zero, LOG2_THREAD_SIZE, 31 + mm r0, r0, zero, LOG2_NR_CPU_IDS, 31 0: /* @@ -203,6 +203,9 @@ intvec_\vecname: * cache line 1: r14...r29 * cache line 0: 2 x frame, r0..r13 */ +#if STACK_TOP_DELTA != 64 +#error STACK_TOP_DELTA must be 64 for assumptions here and in task_pt_regs() +#endif andi r0, r0, -64 /* @@ -464,7 +467,7 @@ intvec_\vecname: } { auli r21, r21, ha16(__per_cpu_offset) - mm r20, r20, zero, 0, LOG2_THREAD_SIZE-1 + mm r20, r20, zero, 0, LOG2_NR_CPU_IDS-1 } s2a r20, r20, r21 lw tp, r20 diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S index df19d4f3946e..3b35bb490d3e 100644 --- a/arch/tile/kernel/intvec_64.S +++ b/arch/tile/kernel/intvec_64.S @@ -132,13 +132,9 @@ intvec_\vecname: mfspr r3, SPR_SYSTEM_SAVE_K_0 /* Get &thread_info->unalign_jit_tmp[0] in r3. */ + bfexts r3, r3, 0, CPU_SHIFT-1 mm r3, zero, LOG2_THREAD_SIZE, 63 -#if THREAD_SIZE < 65536 - addli r3, r3, -(PAGE_SIZE - THREAD_INFO_UNALIGN_JIT_TMP_OFFSET) -#else - addli r3, r3, -(PAGE_SIZE/2) - addli r3, r3, -(PAGE_SIZE/2 - THREAD_INFO_UNALIGN_JIT_TMP_OFFSET) -#endif + addli r3, r3, THREAD_INFO_UNALIGN_JIT_TMP_OFFSET /* * Save r0, r1, r2 into thread_info array r3 points to @@ -365,13 +361,13 @@ intvec_\vecname: 2: /* - * SYSTEM_SAVE_K_0 holds the cpu number in the low bits, and - * the current stack top in the higher bits. So we recover - * our stack top by just masking off the low bits, then + * SYSTEM_SAVE_K_0 holds the cpu number in the high bits, and + * the current stack top in the lower bits. So we recover + * our starting stack value by sign-extending the low bits, then * point sp at the top aligned address on the actual stack page. */ mfspr r0, SPR_SYSTEM_SAVE_K_0 - mm r0, zero, LOG2_THREAD_SIZE, 63 + bfexts r0, r0, 0, CPU_SHIFT-1 0: /* @@ -393,6 +389,9 @@ intvec_\vecname: * cache line 1: r6...r13 * cache line 0: 2 x frame, r0..r5 */ +#if STACK_TOP_DELTA != 64 +#error STACK_TOP_DELTA must be 64 for assumptions here and in task_pt_regs() +#endif andi r0, r0, -64 /* @@ -690,7 +689,7 @@ intvec_\vecname: } { shl16insli r21, r21, hw1(__per_cpu_offset) - bfextu r20, r20, 0, LOG2_THREAD_SIZE-1 + bfextu r20, r20, CPU_SHIFT, 63 } shl16insli r21, r21, hw0(__per_cpu_offset) shl3add r20, r20, r21 diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index a9db923bb9eb..24fd223df65d 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c @@ -197,19 +197,19 @@ static void validate_stack(struct pt_regs *regs) { int cpu = raw_smp_processor_id(); unsigned long ksp0 = get_current_ksp0(); - unsigned long ksp0_base = ksp0 - THREAD_SIZE; + unsigned long ksp0_base = ksp0 & -THREAD_SIZE; unsigned long sp = stack_pointer; if (EX1_PL(regs->ex1) == KERNEL_PL && regs->sp >= ksp0) { - pr_err("WARNING: cpu %d: kernel stack page %#lx underrun!\n" + pr_err("WARNING: cpu %d: kernel stack %#lx..%#lx underrun!\n" " sp %#lx (%#lx in caller), caller pc %#lx, lr %#lx\n", - cpu, ksp0_base, sp, regs->sp, regs->pc, regs->lr); + cpu, ksp0_base, ksp0, sp, regs->sp, regs->pc, regs->lr); } else if (sp < ksp0_base + sizeof(struct thread_info)) { - pr_err("WARNING: cpu %d: kernel stack page %#lx overrun!\n" + pr_err("WARNING: cpu %d: kernel stack %#lx..%#lx overrun!\n" " sp %#lx (%#lx in caller), caller pc %#lx, lr %#lx\n", - cpu, ksp0_base, sp, regs->sp, regs->pc, regs->lr); + cpu, ksp0_base, ksp0, sp, regs->sp, regs->pc, regs->lr); } } -- cgit v1.2.3 From acbde1db294932623aad15dd8cc6e37b28340f26 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 3 Sep 2013 14:41:36 -0400 Subject: tile: parameterize VA and PA space more cleanly The existing code relied on the hardware definition () to specify how much VA and PA space was available. It's convenient to allow customizing this for some configurations, so provide symbols MAX_PA_WIDTH and MAX_VA_WIDTH in that can be modified if desired. Additionally, move away from the MEM_XX_INTRPT nomenclature to define the start of various regions within the VA space. In fact the cleaner symbol is, for example, MEM_SV_START, to indicate the start of the area used for supervisor code; the actual address of the interrupt vectors is not as important, and can be changed if desired. As part of this change, convert from "intrpt1" nomenclature (which built in the old privilege-level 1 model) to a simple "intrpt". Also strip out some tilepro-specific code supporting modifying the PL the kernel could run at, since we don't actually support using different PLs in tilepro, only tilegx. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/page.h | 52 +++++++++++++++----------------------- arch/tile/include/asm/pgtable_32.h | 2 +- arch/tile/include/asm/pgtable_64.h | 3 +-- arch/tile/include/asm/processor.h | 2 +- arch/tile/kernel/head_32.S | 4 +-- arch/tile/kernel/head_64.S | 6 ++--- arch/tile/kernel/intvec_32.S | 6 ++--- arch/tile/kernel/intvec_64.S | 8 +++--- arch/tile/kernel/setup.c | 8 +++--- arch/tile/kernel/traps.c | 2 +- arch/tile/kernel/vmlinux.lds.S | 10 ++++---- arch/tile/mm/init.c | 8 +++--- 12 files changed, 51 insertions(+), 60 deletions(-) (limited to 'arch/tile/kernel/head_64.S') diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h index b4f96c0024df..980843dd983e 100644 --- a/arch/tile/include/asm/page.h +++ b/arch/tile/include/asm/page.h @@ -148,8 +148,12 @@ static inline __attribute_const__ int get_order(unsigned long size) #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA #endif +/* Allow overriding how much VA or PA the kernel will use. */ +#define MAX_PA_WIDTH CHIP_PA_WIDTH() +#define MAX_VA_WIDTH CHIP_VA_WIDTH() + /* Each memory controller has PAs distinct in their high bits. */ -#define NR_PA_HIGHBIT_SHIFT (CHIP_PA_WIDTH() - CHIP_LOG_NUM_MSHIMS()) +#define NR_PA_HIGHBIT_SHIFT (MAX_PA_WIDTH - CHIP_LOG_NUM_MSHIMS()) #define NR_PA_HIGHBIT_VALUES (1 << CHIP_LOG_NUM_MSHIMS()) #define __pa_to_highbits(pa) ((phys_addr_t)(pa) >> NR_PA_HIGHBIT_SHIFT) #define __pfn_to_highbits(pfn) ((pfn) >> (NR_PA_HIGHBIT_SHIFT - PAGE_SHIFT)) @@ -160,7 +164,7 @@ static inline __attribute_const__ int get_order(unsigned long size) * We reserve the lower half of memory for user-space programs, and the * upper half for system code. We re-map all of physical memory in the * upper half, which takes a quarter of our VA space. Then we have - * the vmalloc regions. The supervisor code lives at 0xfffffff700000000, + * the vmalloc regions. The supervisor code lives at the highest address, * with the hypervisor above that. * * Loadable kernel modules are placed immediately after the static @@ -172,26 +176,19 @@ static inline __attribute_const__ int get_order(unsigned long size) * Similarly, for now we don't play any struct page mapping games. */ -#if CHIP_PA_WIDTH() + 2 > CHIP_VA_WIDTH() +#if MAX_PA_WIDTH + 2 > MAX_VA_WIDTH # error Too much PA to map with the VA available! #endif -#define HALF_VA_SPACE (_AC(1, UL) << (CHIP_VA_WIDTH() - 1)) -#define MEM_LOW_END (HALF_VA_SPACE - 1) /* low half */ -#define MEM_HIGH_START (-HALF_VA_SPACE) /* high half */ -#define PAGE_OFFSET MEM_HIGH_START -#define FIXADDR_BASE _AC(0xfffffff400000000, UL) /* 4 GB */ -#define FIXADDR_TOP _AC(0xfffffff500000000, UL) /* 4 GB */ +#define PAGE_OFFSET (-(_AC(1, UL) << (MAX_VA_WIDTH - 1))) +#define KERNEL_HIGH_VADDR _AC(0xfffffff800000000, UL) /* high 32GB */ +#define FIXADDR_BASE (KERNEL_HIGH_VADDR - 0x400000000) /* 4 GB */ +#define FIXADDR_TOP (KERNEL_HIGH_VADDR - 0x300000000) /* 4 GB */ #define _VMALLOC_START FIXADDR_TOP -#define HUGE_VMAP_BASE _AC(0xfffffff600000000, UL) /* 4 GB */ -#define MEM_SV_START _AC(0xfffffff700000000, UL) /* 256 MB */ -#define MEM_SV_INTRPT MEM_SV_START -#define MEM_MODULE_START _AC(0xfffffff710000000, UL) /* 256 MB */ +#define HUGE_VMAP_BASE (KERNEL_HIGH_VADDR - 0x200000000) /* 4 GB */ +#define MEM_SV_START (KERNEL_HIGH_VADDR - 0x100000000) /* 256 MB */ +#define MEM_MODULE_START (MEM_SV_START + (256*1024*1024)) /* 256 MB */ #define MEM_MODULE_END (MEM_MODULE_START + (256*1024*1024)) -#define MEM_HV_START _AC(0xfffffff800000000, UL) /* 32 GB */ - -/* Highest DTLB address we will use */ -#define KERNEL_HIGH_VADDR MEM_SV_START #else /* !__tilegx__ */ @@ -213,25 +210,18 @@ static inline __attribute_const__ int get_order(unsigned long size) * values, and after that, we show "typical" values, since the actual * addresses depend on kernel #defines. * - * MEM_HV_INTRPT 0xfe000000 - * MEM_SV_INTRPT (kernel code) 0xfd000000 + * MEM_HV_START 0xfe000000 + * MEM_SV_START (kernel code) 0xfd000000 * MEM_USER_INTRPT (user vector) 0xfc000000 - * FIX_KMAP_xxx 0xf8000000 (via NR_CPUS * KM_TYPE_NR) - * PKMAP_BASE 0xf7000000 (via LAST_PKMAP) - * HUGE_VMAP 0xf3000000 (via CONFIG_NR_HUGE_VMAPS) - * VMALLOC_START 0xf0000000 (via __VMALLOC_RESERVE) + * FIX_KMAP_xxx 0xfa000000 (via NR_CPUS * KM_TYPE_NR) + * PKMAP_BASE 0xf9000000 (via LAST_PKMAP) + * VMALLOC_START 0xf7000000 (via VMALLOC_RESERVE) * mapped LOWMEM 0xc0000000 */ #define MEM_USER_INTRPT _AC(0xfc000000, UL) -#if CONFIG_KERNEL_PL == 1 -#define MEM_SV_INTRPT _AC(0xfd000000, UL) -#define MEM_HV_INTRPT _AC(0xfe000000, UL) -#else -#define MEM_GUEST_INTRPT _AC(0xfd000000, UL) -#define MEM_SV_INTRPT _AC(0xfe000000, UL) -#define MEM_HV_INTRPT _AC(0xff000000, UL) -#endif +#define MEM_SV_START _AC(0xfd000000, UL) +#define MEM_HV_START _AC(0xfe000000, UL) #define INTRPT_SIZE 0x4000 diff --git a/arch/tile/include/asm/pgtable_32.h b/arch/tile/include/asm/pgtable_32.h index e5bdc0ea85c6..63142ab3b3dd 100644 --- a/arch/tile/include/asm/pgtable_32.h +++ b/arch/tile/include/asm/pgtable_32.h @@ -89,7 +89,7 @@ static inline int pud_huge_page(pud_t pud) { return 0; } /* We don't define any pgds for these addresses. */ static inline int pgd_addr_invalid(unsigned long addr) { - return addr >= MEM_HV_INTRPT; + return addr >= MEM_HV_START; } /* diff --git a/arch/tile/include/asm/pgtable_64.h b/arch/tile/include/asm/pgtable_64.h index 7cb8d355f91b..3421177f7370 100644 --- a/arch/tile/include/asm/pgtable_64.h +++ b/arch/tile/include/asm/pgtable_64.h @@ -140,8 +140,7 @@ static inline unsigned long pgd_addr_normalize(unsigned long addr) /* We don't define any pgds for these addresses. */ static inline int pgd_addr_invalid(unsigned long addr) { - return addr >= MEM_HV_START || - (addr > MEM_LOW_END && addr < MEM_HIGH_START); + return addr >= KERNEL_HIGH_VADDR || addr != pgd_addr_normalize(addr); } /* diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h index c72fcba7016a..5aa54319d2ef 100644 --- a/arch/tile/include/asm/processor.h +++ b/arch/tile/include/asm/processor.h @@ -168,7 +168,7 @@ struct thread_struct { #ifndef __ASSEMBLY__ #ifdef __tilegx__ -#define TASK_SIZE_MAX (MEM_LOW_END + 1) +#define TASK_SIZE_MAX (_AC(1, UL) << (MAX_VA_WIDTH - 1)) #else #define TASK_SIZE_MAX PAGE_OFFSET #endif diff --git a/arch/tile/kernel/head_32.S b/arch/tile/kernel/head_32.S index f3f17b0283ff..8d5b40ff2922 100644 --- a/arch/tile/kernel/head_32.S +++ b/arch/tile/kernel/head_32.S @@ -162,8 +162,8 @@ ENTRY(swapper_pg_dir) .set addr, addr + PGDIR_SIZE .endr - /* The true text VAs are mapped as VA = PA + MEM_SV_INTRPT */ - PTE MEM_SV_INTRPT, 0, (1 << (HV_PTE_INDEX_READABLE - 32)) | \ + /* The true text VAs are mapped as VA = PA + MEM_SV_START */ + PTE MEM_SV_START, 0, (1 << (HV_PTE_INDEX_READABLE - 32)) | \ (1 << (HV_PTE_INDEX_EXECUTABLE - 32)) .org swapper_pg_dir + PGDIR_SIZE END(swapper_pg_dir) diff --git a/arch/tile/kernel/head_64.S b/arch/tile/kernel/head_64.S index 652b81426158..bd0e12f283f3 100644 --- a/arch/tile/kernel/head_64.S +++ b/arch/tile/kernel/head_64.S @@ -135,9 +135,9 @@ ENTRY(_start) 1: /* Install the interrupt base. */ - moveli r0, hw2_last(MEM_SV_START) - shl16insli r0, r0, hw1(MEM_SV_START) - shl16insli r0, r0, hw0(MEM_SV_START) + moveli r0, hw2_last(intrpt_start) + shl16insli r0, r0, hw1(intrpt_start) + shl16insli r0, r0, hw0(intrpt_start) mtspr SPR_INTERRUPT_VECTOR_BASE_K, r0 /* Get our processor number and save it away in SAVE_K_0. */ diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index f3d26f48e659..f084f1c7afde 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -353,7 +353,7 @@ intvec_\vecname: #ifdef __COLLECT_LINKER_FEEDBACK__ .pushsection .text.intvec_feedback,"ax" .org (\vecnum << 5) - FEEDBACK_ENTER_EXPLICIT(intvec_\vecname, .intrpt1, 1 << 8) + FEEDBACK_ENTER_EXPLICIT(intvec_\vecname, .intrpt, 1 << 8) jrp lr .popsection #endif @@ -1890,8 +1890,8 @@ int_unalign: push_extra_callee_saves r0 j do_trap -/* Include .intrpt1 array of interrupt vectors */ - .section ".intrpt1", "ax" +/* Include .intrpt array of interrupt vectors */ + .section ".intrpt", "ax" #define op_handle_perf_interrupt bad_intr #define op_handle_aux_perf_interrupt bad_intr diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S index f020f01960cf..c3a2335fa6a8 100644 --- a/arch/tile/kernel/intvec_64.S +++ b/arch/tile/kernel/intvec_64.S @@ -535,7 +535,7 @@ intvec_\vecname: #ifdef __COLLECT_LINKER_FEEDBACK__ .pushsection .text.intvec_feedback,"ax" .org (\vecnum << 5) - FEEDBACK_ENTER_EXPLICIT(intvec_\vecname, .intrpt1, 1 << 8) + FEEDBACK_ENTER_EXPLICIT(intvec_\vecname, .intrpt, 1 << 8) jrp lr .popsection #endif @@ -1485,8 +1485,10 @@ STD_ENTRY(fill_ra_stack) __int_hand \vecnum, \vecname, \c_routine, \processing .endm -/* Include .intrpt1 array of interrupt vectors */ - .section ".intrpt1", "ax" +/* Include .intrpt array of interrupt vectors */ + .section ".intrpt", "ax" + .global intrpt_start +intrpt_start: #define op_handle_perf_interrupt bad_intr #define op_handle_aux_perf_interrupt bad_intr diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 774e819f6a5f..10217844052a 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -268,7 +268,7 @@ early_param("vmalloc", parse_vmalloc); /* * Determine for each controller where its lowmem is mapped and how much of * it is mapped there. On controller zero, the first few megabytes are - * already mapped in as code at MEM_SV_INTRPT, so in principle we could + * already mapped in as code at MEM_SV_START, so in principle we could * start our data mappings higher up, but for now we don't bother, to avoid * additional confusion. * @@ -1242,7 +1242,7 @@ static void __init validate_va(void) #ifndef __tilegx__ /* FIXME: GX: probably some validation relevant here */ /* * Similarly, make sure we're only using allowed VAs. - * We assume we can contiguously use MEM_USER_INTRPT .. MEM_HV_INTRPT, + * We assume we can contiguously use MEM_USER_INTRPT .. MEM_HV_START, * and 0 .. KERNEL_HIGH_VADDR. * In addition, make sure we CAN'T use the end of memory, since * we use the last chunk of each pgd for the pgd_list. @@ -1257,7 +1257,7 @@ static void __init validate_va(void) if (range.size == 0) break; if (range.start <= MEM_USER_INTRPT && - range.start + range.size >= MEM_HV_INTRPT) + range.start + range.size >= MEM_HV_START) user_kernel_ok = 1; if (range.start == 0) max_va = range.size; @@ -1693,7 +1693,7 @@ insert_ram_resource(u64 start_pfn, u64 end_pfn, bool reserved) static int __init request_standard_resources(void) { int i; - enum { CODE_DELTA = MEM_SV_INTRPT - PAGE_OFFSET }; + enum { CODE_DELTA = MEM_SV_START - PAGE_OFFSET }; #if defined(CONFIG_PCI) && !defined(__tilegx__) insert_non_bus_resource(); diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c index cfff6f958d58..628661f6a929 100644 --- a/arch/tile/kernel/traps.c +++ b/arch/tile/kernel/traps.c @@ -30,7 +30,7 @@ void __init trap_init(void) { - /* Nothing needed here since we link code at .intrpt1 */ + /* Nothing needed here since we link code at .intrpt */ } int unaligned_fixup = 1; diff --git a/arch/tile/kernel/vmlinux.lds.S b/arch/tile/kernel/vmlinux.lds.S index c7ae53df429e..8b2016307eb0 100644 --- a/arch/tile/kernel/vmlinux.lds.S +++ b/arch/tile/kernel/vmlinux.lds.S @@ -5,7 +5,7 @@ #include /* Text loads starting from the supervisor interrupt vector address. */ -#define TEXT_OFFSET MEM_SV_INTRPT +#define TEXT_OFFSET MEM_SV_START OUTPUT_ARCH(tile) ENTRY(_start) @@ -13,7 +13,7 @@ jiffies = jiffies_64; PHDRS { - intrpt1 PT_LOAD ; + intrpt PT_LOAD ; text PT_LOAD ; data PT_LOAD ; } @@ -24,11 +24,11 @@ SECTIONS #define LOAD_OFFSET TEXT_OFFSET /* Interrupt vectors */ - .intrpt1 (LOAD_OFFSET) : AT ( 0 ) /* put at the start of physical memory */ + .intrpt (LOAD_OFFSET) : AT ( 0 ) /* put at the start of physical memory */ { _text = .; - *(.intrpt1) - } :intrpt1 =0 + *(.intrpt) + } :intrpt =0 /* Hypervisor call vectors */ . = ALIGN(0x10000); diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index 3bfa1275e333..c6d21601ec4d 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c @@ -234,7 +234,7 @@ static pgprot_t __init init_pgprot(ulong address) { int cpu; unsigned long page; - enum { CODE_DELTA = MEM_SV_INTRPT - PAGE_OFFSET }; + enum { CODE_DELTA = MEM_SV_START - PAGE_OFFSET }; #if CHIP_HAS_CBOX_HOME_MAP() /* For kdata=huge, everything is just hash-for-home. */ @@ -538,7 +538,7 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) } } - address = MEM_SV_INTRPT; + address = MEM_SV_START; pmd = get_pmd(pgtables, address); pfn = 0; /* code starts at PA 0 */ if (ktext_small) { @@ -1021,7 +1021,7 @@ static void free_init_pages(char *what, unsigned long begin, unsigned long end) void free_initmem(void) { - const unsigned long text_delta = MEM_SV_INTRPT - PAGE_OFFSET; + const unsigned long text_delta = MEM_SV_START - PAGE_OFFSET; /* * Evict the dirty initdata on the boot cpu, evict the w1data @@ -1040,7 +1040,7 @@ void free_initmem(void) /* * Free the pages mapped from 0xc0000000 that correspond to code - * pages from MEM_SV_INTRPT that we won't use again after init. + * pages from MEM_SV_START that we won't use again after init. */ free_init_pages("unused kernel text", (unsigned long)_sinittext - text_delta, -- cgit v1.2.3