diff options
Diffstat (limited to 'arch/tile')
28 files changed, 34 insertions, 1121 deletions
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index b2be42524483..6e1ed55f6cfc 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -207,7 +207,7 @@ config SYSVIPC_COMPAT def_bool y depends on COMPAT && SYSVIPC -# We do not currently support disabling HIGHMEM on tile64 and tilepro. +# We do not currently support disabling HIGHMEM on tilepro. config HIGHMEM bool # "Support for more than 512 MB of RAM" default !TILEGX diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h index e7fb5cfb9597..96156f5ba640 100644 --- a/arch/tile/include/asm/atomic_32.h +++ b/arch/tile/include/asm/atomic_32.h @@ -252,21 +252,6 @@ static inline void atomic64_set(atomic64_t *v, u64 n) * Internal definitions only beyond this point. */ -#define ATOMIC_LOCKS_FOUND_VIA_TABLE() \ - (!CHIP_HAS_CBOX_HOME_MAP() && defined(CONFIG_SMP)) - -#if ATOMIC_LOCKS_FOUND_VIA_TABLE() - -/* Number of entries in atomic_lock_ptr[]. */ -#define ATOMIC_HASH_L1_SHIFT 6 -#define ATOMIC_HASH_L1_SIZE (1 << ATOMIC_HASH_L1_SHIFT) - -/* Number of locks in each struct pointed to by atomic_lock_ptr[]. */ -#define ATOMIC_HASH_L2_SHIFT (CHIP_L2_LOG_LINE_SIZE() - 2) -#define ATOMIC_HASH_L2_SIZE (1 << ATOMIC_HASH_L2_SHIFT) - -#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ - /* * Number of atomic locks in atomic_locks[]. Must be a power of two. * There is no reason for more than PAGE_SIZE / 8 entries, since that @@ -281,8 +266,6 @@ static inline void atomic64_set(atomic64_t *v, u64 n) extern int atomic_locks[]; #endif -#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ - /* * All the code that may fault while holding an atomic lock must * place the pointer to the lock in ATOMIC_LOCK_REG so the fault code diff --git a/arch/tile/include/asm/barrier.h b/arch/tile/include/asm/barrier.h index 990a217a0b72..a9a73da5865d 100644 --- a/arch/tile/include/asm/barrier.h +++ b/arch/tile/include/asm/barrier.h @@ -77,7 +77,6 @@ #define __sync() __insn_mf() -#if !CHIP_HAS_MF_WAITS_FOR_VICTIMS() #include <hv/syscall_public.h> /* * Issue an uncacheable load to each memory controller, then @@ -96,7 +95,6 @@ static inline void __mb_incoherent(void) "r20", "r21", "r22", "r23", "r24", "r25", "r26", "r27", "r28", "r29"); } -#endif /* Fence to guarantee visibility of stores to incoherent memory. */ static inline void @@ -104,7 +102,6 @@ mb_incoherent(void) { __insn_mf(); -#if !CHIP_HAS_MF_WAITS_FOR_VICTIMS() { #if CHIP_HAS_TILE_WRITE_PENDING() const unsigned long WRITE_TIMEOUT_CYCLES = 400; @@ -116,7 +113,6 @@ mb_incoherent(void) #endif /* CHIP_HAS_TILE_WRITE_PENDING() */ (void) __mb_incoherent(); } -#endif /* CHIP_HAS_MF_WAITS_FOR_VICTIMS() */ } #define fast_wmb() __sync() diff --git a/arch/tile/include/asm/elf.h b/arch/tile/include/asm/elf.h index e1da88e8aa9f..41d9878a9686 100644 --- a/arch/tile/include/asm/elf.h +++ b/arch/tile/include/asm/elf.h @@ -30,7 +30,6 @@ typedef unsigned long elf_greg_t; #define ELF_NGREG (sizeof(struct pt_regs) / sizeof(elf_greg_t)) typedef elf_greg_t elf_gregset_t[ELF_NGREG]; -#define EM_TILE64 187 #define EM_TILEPRO 188 #define EM_TILEGX 191 diff --git a/arch/tile/include/asm/homecache.h b/arch/tile/include/asm/homecache.h index 49d19dfc0630..7ddd1b8d6910 100644 --- a/arch/tile/include/asm/homecache.h +++ b/arch/tile/include/asm/homecache.h @@ -33,8 +33,7 @@ struct zone; /* * Is this page immutable (unwritable) and thus able to be cached more - * widely than would otherwise be possible? On tile64 this means we - * mark the PTE to cache locally; on tilepro it means we have "nc" set. + * widely than would otherwise be possible? This means we have "nc" set. */ #define PAGE_HOME_IMMUTABLE -2 diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h index 5aa54319d2ef..42323636c459 100644 --- a/arch/tile/include/asm/processor.h +++ b/arch/tile/include/asm/processor.h @@ -113,18 +113,14 @@ struct thread_struct { unsigned long intctrl_0; /* Is this task currently doing a backtrace? */ bool in_backtrace; -#if CHIP_HAS_PROC_STATUS_SPR() /* Any other miscellaneous processor state bits */ unsigned long proc_status; -#endif #if !CHIP_HAS_FIXED_INTVEC_BASE() /* Interrupt base for PL0 interrupts */ unsigned long interrupt_vector_base; #endif -#if CHIP_HAS_TILE_RTF_HWM() /* Tile cache retry fifo high-water mark */ unsigned long tile_rtf_hwm; -#endif #if CHIP_HAS_DSTREAM_PF() /* Data stream prefetch control */ unsigned long dstream_pf; @@ -137,12 +133,6 @@ struct thread_struct { /* Async DMA TLB fault information */ struct async_tlb dma_async_tlb; #endif -#if CHIP_HAS_SN_PROC() - /* Was static network processor when we were switched out? */ - int sn_proc_running; - /* Async SNI TLB fault information */ - struct async_tlb sn_async_tlb; -#endif }; #endif /* !__ASSEMBLY__ */ @@ -286,7 +276,6 @@ extern char chip_model[64]; /* Data on which physical memory controller corresponds to which NUMA node. */ extern int node_controller[]; -#if CHIP_HAS_CBOX_HOME_MAP() /* Does the heap allocator return hash-for-home pages by default? */ extern int hash_default; @@ -296,11 +285,6 @@ extern int kstack_hash; /* Does MAP_ANONYMOUS return hash-for-home pages by default? */ #define uheap_hash hash_default -#else -#define hash_default 0 -#define kstack_hash 0 -#define uheap_hash 0 -#endif /* Are we using huge pages in the TLB for kernel data? */ extern int kdata_huge; diff --git a/arch/tile/include/asm/smp.h b/arch/tile/include/asm/smp.h index 1aa759aeb5b3..9a326b64f7ae 100644 --- a/arch/tile/include/asm/smp.h +++ b/arch/tile/include/asm/smp.h @@ -101,10 +101,8 @@ void print_disabled_cpus(void); extern struct cpumask cpu_lotar_map; #define cpu_is_valid_lotar(cpu) cpumask_test_cpu((cpu), &cpu_lotar_map) -#if CHIP_HAS_CBOX_HOME_MAP() /* Which processors are used for hash-for-home mapping */ extern struct cpumask hash_for_home_map; -#endif /* Which cpus can have their cache flushed by hv_flush_remote(). */ extern struct cpumask cpu_cacheable_map; diff --git a/arch/tile/include/asm/traps.h b/arch/tile/include/asm/traps.h index 5f172b2403a6..4b99a1c3aab2 100644 --- a/arch/tile/include/asm/traps.h +++ b/arch/tile/include/asm/traps.h @@ -21,7 +21,7 @@ /* mm/fault.c */ void do_page_fault(struct pt_regs *, int fault_num, unsigned long address, unsigned long write); -#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() +#if CHIP_HAS_TILE_DMA() void do_async_page_fault(struct pt_regs *); #endif diff --git a/arch/tile/include/uapi/arch/Kbuild b/arch/tile/include/uapi/arch/Kbuild index 4ebc34f4768d..97dfbecec6b6 100644 --- a/arch/tile/include/uapi/arch/Kbuild +++ b/arch/tile/include/uapi/arch/Kbuild @@ -1,7 +1,6 @@ # UAPI Header export list header-y += abi.h header-y += chip.h -header-y += chip_tile64.h header-y += chip_tilegx.h header-y += chip_tilepro.h header-y += icache.h diff --git a/arch/tile/include/uapi/arch/chip.h b/arch/tile/include/uapi/arch/chip.h index 926d3db0e91e..4c91f90b9369 100644 --- a/arch/tile/include/uapi/arch/chip.h +++ b/arch/tile/include/uapi/arch/chip.h @@ -12,9 +12,7 @@ * more details. */ -#if __tile_chip__ == 0 -#include <arch/chip_tile64.h> -#elif __tile_chip__ == 1 +#if __tile_chip__ == 1 #include <arch/chip_tilepro.h> #elif defined(__tilegx__) #include <arch/chip_tilegx.h> diff --git a/arch/tile/include/uapi/arch/chip_tile64.h b/arch/tile/include/uapi/arch/chip_tile64.h deleted file mode 100644 index 261aaba092d4..000000000000 --- a/arch/tile/include/uapi/arch/chip_tile64.h +++ /dev/null @@ -1,258 +0,0 @@ -/* - * Copyright 2010 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - */ - -/* - * @file - * Global header file. - * This header file specifies defines for TILE64. - */ - -#ifndef __ARCH_CHIP_H__ -#define __ARCH_CHIP_H__ - -/** Specify chip version. - * When possible, prefer the CHIP_xxx symbols below for future-proofing. - * This is intended for cross-compiling; native compilation should - * use the predefined __tile_chip__ symbol. - */ -#define TILE_CHIP 0 - -/** Specify chip revision. - * This provides for the case of a respin of a particular chip type; - * the normal value for this symbol is "0". - * This is intended for cross-compiling; native compilation should - * use the predefined __tile_chip_rev__ symbol. - */ -#define TILE_CHIP_REV 0 - -/** The name of this architecture. */ -#define CHIP_ARCH_NAME "tile64" - -/** The ELF e_machine type for binaries for this chip. */ -#define CHIP_ELF_TYPE() EM_TILE64 - -/** The alternate ELF e_machine type for binaries for this chip. */ -#define CHIP_COMPAT_ELF_TYPE() 0x2506 - -/** What is the native word size of the machine? */ -#define CHIP_WORD_SIZE() 32 - -/** How many bits of a virtual address are used. Extra bits must be - * the sign extension of the low bits. - */ -#define CHIP_VA_WIDTH() 32 - -/** How many bits are in a physical address? */ -#define CHIP_PA_WIDTH() 36 - -/** Size of the L2 cache, in bytes. */ -#define CHIP_L2_CACHE_SIZE() 65536 - -/** Log size of an L2 cache line in bytes. */ -#define CHIP_L2_LOG_LINE_SIZE() 6 - -/** Size of an L2 cache line, in bytes. */ -#define CHIP_L2_LINE_SIZE() (1 << CHIP_L2_LOG_LINE_SIZE()) - -/** Associativity of the L2 cache. */ -#define CHIP_L2_ASSOC() 2 - -/** Size of the L1 data cache, in bytes. */ -#define CHIP_L1D_CACHE_SIZE() 8192 - -/** Log size of an L1 data cache line in bytes. */ -#define CHIP_L1D_LOG_LINE_SIZE() 4 - -/** Size of an L1 data cache line, in bytes. */ -#define CHIP_L1D_LINE_SIZE() (1 << CHIP_L1D_LOG_LINE_SIZE()) - -/** Associativity of the L1 data cache. */ -#define CHIP_L1D_ASSOC() 2 - -/** Size of the L1 instruction cache, in bytes. */ -#define CHIP_L1I_CACHE_SIZE() 8192 - -/** Log size of an L1 instruction cache line in bytes. */ -#define CHIP_L1I_LOG_LINE_SIZE() 6 - -/** Size of an L1 instruction cache line, in bytes. */ -#define CHIP_L1I_LINE_SIZE() (1 << CHIP_L1I_LOG_LINE_SIZE()) - -/** Associativity of the L1 instruction cache. */ -#define CHIP_L1I_ASSOC() 1 - -/** Stride with which flush instructions must be issued. */ -#define CHIP_FLUSH_STRIDE() CHIP_L2_LINE_SIZE() - -/** Stride with which inv instructions must be issued. */ -#define CHIP_INV_STRIDE() CHIP_L1D_LINE_SIZE() - -/** Stride with which finv instructions must be issued. */ -#define CHIP_FINV_STRIDE() CHIP_L1D_LINE_SIZE() - -/** Can the local cache coherently cache data that is homed elsewhere? */ -#define CHIP_HAS_COHERENT_LOCAL_CACHE() 0 - -/** How many simultaneous outstanding victims can the L2 cache have? */ -#define CHIP_MAX_OUTSTANDING_VICTIMS() 2 - -/** Does the TLB support the NC and NOALLOC bits? */ -#define CHIP_HAS_NC_AND_NOALLOC_BITS() 0 - -/** Does the chip support hash-for-home caching? */ -#define CHIP_HAS_CBOX_HOME_MAP() 0 - -/** Number of entries in the chip's home map tables. */ -/* #define CHIP_CBOX_HOME_MAP_SIZE() -- does not apply to chip 0 */ - -/** Do uncacheable requests miss in the cache regardless of whether - * there is matching data? */ -#define CHIP_HAS_ENFORCED_UNCACHEABLE_REQUESTS() 0 - -/** Does the mf instruction wait for victims? */ -#define CHIP_HAS_MF_WAITS_FOR_VICTIMS() 1 - -/** Does the chip have an "inv" instruction that doesn't also flush? */ -#define CHIP_HAS_INV() 0 - -/** Does the chip have a "wh64" instruction? */ -#define CHIP_HAS_WH64() 0 - -/** Does this chip have a 'dword_align' instruction? */ -#define CHIP_HAS_DWORD_ALIGN() 0 - -/** Number of performance counters. */ -#define CHIP_PERFORMANCE_COUNTERS() 2 - -/** Does this chip have auxiliary performance counters? */ -#define CHIP_HAS_AUX_PERF_COUNTERS() 0 - -/** Is the CBOX_MSR1 SPR supported? */ -#define CHIP_HAS_CBOX_MSR1() 0 - -/** Is the TILE_RTF_HWM SPR supported? */ -#define CHIP_HAS_TILE_RTF_HWM() 0 - -/** Is the TILE_WRITE_PENDING SPR supported? */ -#define CHIP_HAS_TILE_WRITE_PENDING() 0 - -/** Is the PROC_STATUS SPR supported? */ -#define CHIP_HAS_PROC_STATUS_SPR() 0 - -/** Is the DSTREAM_PF SPR supported? */ -#define CHIP_HAS_DSTREAM_PF() 0 - -/** Log of the number of mshims we have. */ -#define CHIP_LOG_NUM_MSHIMS() 2 - -/** Are the bases of the interrupt vector areas fixed? */ -#define CHIP_HAS_FIXED_INTVEC_BASE() 1 - -/** Are the interrupt masks split up into 2 SPRs? */ -#define CHIP_HAS_SPLIT_INTR_MASK() 1 - -/** Is the cycle count split up into 2 SPRs? */ -#define CHIP_HAS_SPLIT_CYCLE() 1 - -/** Does the chip have a static network? */ -#define CHIP_HAS_SN() 1 - -/** Does the chip have a static network processor? */ -#define CHIP_HAS_SN_PROC() 1 - -/** Size of the L1 static network processor instruction cache, in bytes. */ -#define CHIP_L1SNI_CACHE_SIZE() 2048 - -/** Does the chip have DMA support in each tile? */ -#define CHIP_HAS_TILE_DMA() 1 - -/** Does the chip have the second revision of the directly accessible - * dynamic networks? This encapsulates a number of characteristics, - * including the absence of the catch-all, the absence of inline message - * tags, the absence of support for network context-switching, and so on. - */ -#define CHIP_HAS_REV1_XDN() 0 - -/** Does the chip have cmpexch and similar (fetchadd, exch, etc.)? */ -#define CHIP_HAS_CMPEXCH() 0 - -/** Does the chip have memory-mapped I/O support? */ -#define CHIP_HAS_MMIO() 0 - -/** Does the chip have post-completion interrupts? */ -#define CHIP_HAS_POST_COMPLETION_INTERRUPTS() 0 - -/** Does the chip have native single step support? */ -#define CHIP_HAS_SINGLE_STEP() 0 - -#ifndef __OPEN_SOURCE__ /* features only relevant to hypervisor-level code */ - -/** How many entries are present in the instruction TLB? */ -#define CHIP_ITLB_ENTRIES() 8 - -/** How many entries are present in the data TLB? */ -#define CHIP_DTLB_ENTRIES() 16 - -/** How many MAF entries does the XAUI shim have? */ -#define CHIP_XAUI_MAF_ENTRIES() 16 - -/** Does the memory shim have a source-id table? */ -#define CHIP_HAS_MSHIM_SRCID_TABLE() 1 - -/** Does the L1 instruction cache clear on reset? */ -#define CHIP_HAS_L1I_CLEAR_ON_RESET() 0 - -/** Does the chip come out of reset with valid coordinates on all tiles? - * Note that if defined, this also implies that the upper left is 1,1. - */ -#define CHIP_HAS_VALID_TILE_COORD_RESET() 0 - -/** Does the chip have unified packet formats? */ -#define CHIP_HAS_UNIFIED_PACKET_FORMATS() 0 - -/** Does the chip support write reordering? */ -#define CHIP_HAS_WRITE_REORDERING() 0 - -/** Does the chip support Y-X routing as well as X-Y? */ -#define CHIP_HAS_Y_X_ROUTING() 0 - -/** Is INTCTRL_3 managed with the correct MPL? */ -#define CHIP_HAS_INTCTRL_3_STATUS_FIX() 0 - -/** Is it possible to configure the chip to be big-endian? */ -#define CHIP_HAS_BIG_ENDIAN_CONFIG() 0 - -/** Is the CACHE_RED_WAY_OVERRIDDEN SPR supported? */ -#define CHIP_HAS_CACHE_RED_WAY_OVERRIDDEN() 0 - -/** Is the DIAG_TRACE_WAY SPR supported? */ -#define CHIP_HAS_DIAG_TRACE_WAY() 0 - -/** Is the MEM_STRIPE_CONFIG SPR supported? */ -#define CHIP_HAS_MEM_STRIPE_CONFIG() 0 - -/** Are the TLB_PERF SPRs supported? */ -#define CHIP_HAS_TLB_PERF() 0 - -/** Is the VDN_SNOOP_SHIM_CTL SPR supported? */ -#define CHIP_HAS_VDN_SNOOP_SHIM_CTL() 0 - -/** Does the chip support rev1 DMA packets? */ -#define CHIP_HAS_REV1_DMA_PACKETS() 0 - -/** Does the chip have an IPI shim? */ -#define CHIP_HAS_IPI() 0 - -#endif /* !__OPEN_SOURCE__ */ -#endif /* __ARCH_CHIP_H__ */ diff --git a/arch/tile/include/uapi/arch/spr_def_32.h b/arch/tile/include/uapi/arch/spr_def_32.h index c689446e6284..78daa3146d25 100644 --- a/arch/tile/include/uapi/arch/spr_def_32.h +++ b/arch/tile/include/uapi/arch/spr_def_32.h @@ -200,8 +200,6 @@ #define SPR_SIM_CONTROL 0x4e0c #define SPR_SNCTL 0x0805 #define SPR_SNCTL__FRZFABRIC_MASK 0x1 -#define SPR_SNCTL__FRZPROC_MASK 0x2 -#define SPR_SNPC 0x080b #define SPR_SNSTATIC 0x080c #define SPR_SYSTEM_SAVE_0_0 0x4b00 #define SPR_SYSTEM_SAVE_0_1 0x4b01 diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index f084f1c7afde..088d5c141e68 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -32,12 +32,6 @@ #define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR) -#if !CHIP_HAS_WH64() - /* By making this an empty macro, we can use wh64 in the code. */ - .macro wh64 reg - .endm -#endif - .macro push_reg reg, ptr=sp, delta=-4 { sw \ptr, \reg @@ -325,18 +319,14 @@ intvec_\vecname: movei r3, -1 /* not used, but set for consistency */ } .else -#if CHIP_HAS_AUX_PERF_COUNTERS() .ifc \c_routine, op_handle_aux_perf_interrupt { mfspr r2, AUX_PERF_COUNT_STS movei r3, -1 /* not used, but set for consistency */ } .else -#endif movei r3, 0 -#if CHIP_HAS_AUX_PERF_COUNTERS() .endif -#endif .endif .endif .endif @@ -561,7 +551,6 @@ intvec_\vecname: .endif mtspr INTERRUPT_CRITICAL_SECTION, zero -#if CHIP_HAS_WH64() /* * Prepare the first 256 stack bytes to be rapidly accessible * without having to fetch the background data. We don't really @@ -582,7 +571,6 @@ intvec_\vecname: addi r52, r52, -64 } wh64 r52 -#endif #ifdef CONFIG_TRACE_IRQFLAGS .ifnc \function,handle_nmi @@ -1533,12 +1521,10 @@ STD_ENTRY(_sys_clone) __HEAD .align 64 /* Align much later jump on the start of a cache line. */ -#if !ATOMIC_LOCKS_FOUND_VIA_TABLE() nop #if PAGE_SIZE >= 0x10000 nop #endif -#endif ENTRY(sys_cmpxchg) /* @@ -1572,45 +1558,6 @@ ENTRY(sys_cmpxchg) # error Code here assumes PAGE_OFFSET can be loaded with just hi16() #endif -#if ATOMIC_LOCKS_FOUND_VIA_TABLE() - { - /* Check for unaligned input. */ - bnz sp, .Lcmpxchg_badaddr - mm r25, r0, zero, 3, PAGE_SHIFT-1 - } - { - crc32_32 r25, zero, r25 - moveli r21, lo16(atomic_lock_ptr) - } - { - auli r21, r21, ha16(atomic_lock_ptr) - auli r23, zero, hi16(PAGE_OFFSET) /* hugepage-aligned */ - } - { - shri r20, r25, 32 - ATOMIC_HASH_L1_SHIFT - slt_u r23, r0, r23 - lw r26, r0 /* see comment in the "#else" for the "lw r26". */ - } - { - s2a r21, r20, r21 - bbns r23, .Lcmpxchg_badaddr - } - { - lw r21, r21 - seqi r23, TREG_SYSCALL_NR_NAME, __NR_FAST_cmpxchg64 - andi r25, r25, ATOMIC_HASH_L2_SIZE - 1 - } - { - /* Branch away at this point if we're doing a 64-bit cmpxchg. */ - bbs r23, .Lcmpxchg64 - andi r23, r0, 7 /* Precompute alignment for cmpxchg64. */ - } - { - s2a ATOMIC_LOCK_REG_NAME, r25, r21 - j .Lcmpxchg32_tns /* see comment in the #else for the jump. */ - } - -#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ { /* Check for unaligned input. */ bnz sp, .Lcmpxchg_badaddr @@ -1635,12 +1582,9 @@ ENTRY(sys_cmpxchg) /* * Ensure that the TLB is loaded before we take out the lock. - * On tilepro, this will start fetching the value all the way - * into our L1 as well (and if it gets modified before we - * grab the lock, it will be invalidated from our cache - * before we reload it). On tile64, we'll start fetching it - * into our L1 if we're the home, and if we're not, we'll - * still at least start fetching it into the home's L2. + * This will start fetching the value all the way into our L1 + * as well (and if it gets modified before we grab the lock, + * it will be invalidated from our cache before we reload it). */ lw r26, r0 } @@ -1683,8 +1627,6 @@ ENTRY(sys_cmpxchg) j .Lcmpxchg32_tns } -#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ - /* Symbol for do_page_fault_ics() to use to compare against the PC. */ .global __sys_cmpxchg_grab_lock __sys_cmpxchg_grab_lock: @@ -1822,9 +1764,6 @@ __sys_cmpxchg_grab_lock: .align 64 .Lcmpxchg64: { -#if ATOMIC_LOCKS_FOUND_VIA_TABLE() - s2a ATOMIC_LOCK_REG_NAME, r25, r21 -#endif bzt r23, .Lcmpxchg64_tns } j .Lcmpxchg_badaddr @@ -1959,10 +1898,8 @@ int_unalign: do_page_fault int_hand INT_SN_CPL, SN_CPL, bad_intr int_hand INT_DOUBLE_FAULT, DOUBLE_FAULT, do_trap -#if CHIP_HAS_AUX_PERF_COUNTERS() int_hand INT_AUX_PERF_COUNT, AUX_PERF_COUNT, \ op_handle_aux_perf_interrupt, handle_nmi -#endif /* Synthetic interrupt delivered only by the simulator */ int_hand INT_BREAKPOINT, BREAKPOINT, do_breakpoint diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S index c3a2335fa6a8..ec755d3f3734 100644 --- a/arch/tile/kernel/intvec_64.S +++ b/arch/tile/kernel/intvec_64.S @@ -511,12 +511,10 @@ intvec_\vecname: .else .ifc \c_routine, op_handle_perf_interrupt mfspr r2, PERF_COUNT_STS -#if CHIP_HAS_AUX_PERF_COUNTERS() .else .ifc \c_routine, op_handle_aux_perf_interrupt mfspr r2, AUX_PERF_COUNT_STS .endif -#endif .endif .endif .endif diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c index 0e6c521b8a89..d8ba06058fd0 100644 --- a/arch/tile/kernel/irq.c +++ b/arch/tile/kernel/irq.c @@ -74,7 +74,7 @@ static DEFINE_SPINLOCK(available_irqs_lock); /* * The interrupt handling path, implemented in terms of HV interrupt - * emulation on TILE64 and TILEPro, and IPI hardware on TILE-Gx. + * emulation on TILEPro, and IPI hardware on TILE-Gx. * Entered with interrupts disabled. */ void tile_dev_intr(struct pt_regs *regs, int intnum) @@ -235,7 +235,7 @@ void tile_irq_activate(unsigned int irq, int tile_irq_type) { /* * We use handle_level_irq() by default because the pending - * interrupt vector (whether modeled by the HV on TILE64 and + * interrupt vector (whether modeled by the HV on * TILEPro or implemented in hardware on TILE-Gx) has * level-style semantics for each bit. An interrupt fires * whenever a bit is high, not just at edges. diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 44cdc4aa59e8..16ed58948757 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -187,16 +187,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, memset(&p->thread.dma_async_tlb, 0, sizeof(struct async_tlb)); #endif -#if CHIP_HAS_SN_PROC() - /* Likewise, the new thread is not running static processor code. */ - p->thread.sn_proc_running = 0; - memset(&p->thread.sn_async_tlb, 0, sizeof(struct async_tlb)); -#endif - -#if CHIP_HAS_PROC_STATUS_SPR() /* New thread has its miscellaneous processor state bits clear. */ p->thread.proc_status = 0; -#endif #ifdef CONFIG_HARDWALL /* New thread does not own any networks. */ @@ -378,15 +370,11 @@ static void save_arch_state(struct thread_struct *t) t->system_save[2] = __insn_mfspr(SPR_SYSTEM_SAVE_0_2); t->system_save[3] = __insn_mfspr(SPR_SYSTEM_SAVE_0_3); t->intctrl_0 = __insn_mfspr(SPR_INTCTRL_0_STATUS); -#if CHIP_HAS_PROC_STATUS_SPR() t->proc_status = __insn_mfspr(SPR_PROC_STATUS); -#endif #if !CHIP_HAS_FIXED_INTVEC_BASE() t->interrupt_vector_base = __insn_mfspr(SPR_INTERRUPT_VECTOR_BASE_0); #endif -#if CHIP_HAS_TILE_RTF_HWM() t->tile_rtf_hwm = __insn_mfspr(SPR_TILE_RTF_HWM); -#endif #if CHIP_HAS_DSTREAM_PF() t->dstream_pf = __insn_mfspr(SPR_DSTREAM_PF); #endif @@ -407,15 +395,11 @@ static void restore_arch_state(const struct thread_struct *t) __insn_mtspr(SPR_SYSTEM_SAVE_0_2, t->system_save[2]); __insn_mtspr(SPR_SYSTEM_SAVE_0_3, t->system_save[3]); __insn_mtspr(SPR_INTCTRL_0_STATUS, t->intctrl_0); -#if CHIP_HAS_PROC_STATUS_SPR() __insn_mtspr(SPR_PROC_STATUS, t->proc_status); -#endif #if !CHIP_HAS_FIXED_INTVEC_BASE() __insn_mtspr(SPR_INTERRUPT_VECTOR_BASE_0, t->interrupt_vector_base); #endif -#if CHIP_HAS_TILE_RTF_HWM() __insn_mtspr(SPR_TILE_RTF_HWM, t->tile_rtf_hwm); -#endif #if CHIP_HAS_DSTREAM_PF() __insn_mtspr(SPR_DSTREAM_PF, t->dstream_pf); #endif @@ -424,26 +408,11 @@ static void restore_arch_state(const struct thread_struct *t) void _prepare_arch_switch(struct task_struct *next) { -#if CHIP_HAS_SN_PROC() - int snctl; -#endif #if CHIP_HAS_TILE_DMA() struct tile_dma_state *dma = ¤t->thread.tile_dma_state; if (dma->enabled) save_tile_dma_state(dma); #endif -#if CHIP_HAS_SN_PROC() - /* - * Suspend the static network processor if it was running. - * We do not suspend the fabric itself, just like we don't - * try to suspend the UDN. - */ - snctl = __insn_mfspr(SPR_SNCTL); - current->thread.sn_proc_running = - (snctl & SPR_SNCTL__FRZPROC_MASK) == 0; - if (current->thread.sn_proc_running) - __insn_mtspr(SPR_SNCTL, snctl | SPR_SNCTL__FRZPROC_MASK); -#endif } @@ -471,17 +440,6 @@ struct task_struct *__sched _switch_to(struct task_struct *prev, /* Restore other arch state. */ restore_arch_state(&next->thread); -#if CHIP_HAS_SN_PROC() - /* - * Restart static network processor in the new process - * if it was running before. - */ - if (next->thread.sn_proc_running) { - int snctl = __insn_mfspr(SPR_SNCTL); - __insn_mtspr(SPR_SNCTL, snctl & ~SPR_SNCTL__FRZPROC_MASK); - } -#endif - #ifdef CONFIG_HARDWALL /* Enable or disable access to the network registers appropriately. */ hardwall_switch_tasks(prev, next); @@ -523,7 +481,7 @@ int do_work_pending(struct pt_regs *regs, u32 thread_info_flags) schedule(); return 1; } -#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() +#if CHIP_HAS_TILE_DMA() if (thread_info_flags & _TIF_ASYNC_TLB) { do_async_page_fault(regs); return 1; diff --git a/arch/tile/kernel/relocate_kernel_32.S b/arch/tile/kernel/relocate_kernel_32.S index f7fd37b64a78..e44fbcf8cbd5 100644 --- a/arch/tile/kernel/relocate_kernel_32.S +++ b/arch/tile/kernel/relocate_kernel_32.S @@ -77,7 +77,6 @@ STD_ENTRY(relocate_new_kernel) move r30, sp addi sp, sp, -8 -#if CHIP_HAS_CBOX_HOME_MAP() /* * On TILEPro, we need to flush all tiles' caches, since we may * have been doing hash-for-home caching there. Note that we @@ -113,7 +112,6 @@ STD_ENTRY(relocate_new_kernel) } jalr r20 -#endif /* r33 is destination pointer, default to zero */ diff --git a/arch/tile/kernel/relocate_kernel_64.S b/arch/tile/kernel/relocate_kernel_64.S index 02bc44621021..d9d8cf6176e8 100644 --- a/arch/tile/kernel/relocate_kernel_64.S +++ b/arch/tile/kernel/relocate_kernel_64.S @@ -78,7 +78,6 @@ STD_ENTRY(relocate_new_kernel) move r30, sp addi sp, sp, -16 -#if CHIP_HAS_CBOX_HOME_MAP() /* * On TILE-GX, we need to flush all tiles' caches, since we may * have been doing hash-for-home caching there. Note that we @@ -116,7 +115,6 @@ STD_ENTRY(relocate_new_kernel) shl16insli r20, r20, hw0(hv_flush_remote) jalr r20 -#endif /* r33 is destination pointer, default to zero */ diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index b79c312ca3cb..128a2d0b8650 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -1046,9 +1046,6 @@ void __cpuinit setup_cpu(int boot) arch_local_irq_unmask(INT_DMATLB_MISS); arch_local_irq_unmask(INT_DMATLB_ACCESS); #endif -#if CHIP_HAS_SN_PROC() - arch_local_irq_unmask(INT_SNITLB_MISS); -#endif #ifdef __tilegx__ arch_local_irq_unmask(INT_SINGLE_STEP_K); #endif @@ -1063,10 +1060,6 @@ void __cpuinit setup_cpu(int boot) /* Static network is not restricted. */ __insn_mtspr(SPR_MPL_SN_ACCESS_SET_0, 1); #endif -#if CHIP_HAS_SN_PROC() - __insn_mtspr(SPR_MPL_SN_NOTIFY_SET_0, 1); - __insn_mtspr(SPR_MPL_SN_CPL_SET_0, 1); -#endif /* * Set the MPL for interrupt control 0 & 1 to the corresponding @@ -1291,7 +1284,6 @@ static void __init validate_va(void) struct cpumask __write_once cpu_lotar_map; EXPORT_SYMBOL(cpu_lotar_map); -#if CHIP_HAS_CBOX_HOME_MAP() /* * hash_for_home_map lists all the tiles that hash-for-home data * will be cached on. Note that this may includes tiles that are not @@ -1301,7 +1293,6 @@ EXPORT_SYMBOL(cpu_lotar_map); */ struct cpumask hash_for_home_map; EXPORT_SYMBOL(hash_for_home_map); -#endif /* * cpu_cacheable_map lists all the cpus whose caches the hypervisor can @@ -1394,7 +1385,6 @@ static void __init setup_cpu_maps(void) cpu_lotar_map = *cpu_possible_mask; } -#if CHIP_HAS_CBOX_HOME_MAP() /* Retrieve set of CPUs used for hash-for-home caching */ rc = hv_inquire_tiles(HV_INQ_TILES_HFH_CACHE, (HV_VirtAddr) hash_for_home_map.bits, @@ -1402,9 +1392,6 @@ static void __init setup_cpu_maps(void) if (rc < 0) early_panic("hv_inquire_tiles(HFH_CACHE) failed: rc %d\n", rc); cpumask_or(&cpu_cacheable_map, cpu_possible_mask, &hash_for_home_map); -#else - cpu_cacheable_map = *cpu_possible_mask; -#endif } diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c index 5ef2e9eae5c5..de07fa7d1315 100644 --- a/arch/tile/kernel/single_step.c +++ b/arch/tile/kernel/single_step.c @@ -546,7 +546,6 @@ void single_step_once(struct pt_regs *regs) } break; -#if CHIP_HAS_WH64() /* postincrement operations */ case IMM_0_OPCODE_X1: switch (get_ImmOpcodeExtension_X1(bundle)) { @@ -581,7 +580,6 @@ void single_step_once(struct pt_regs *regs) break; } break; -#endif /* CHIP_HAS_WH64() */ } if (state->update) { diff --git a/arch/tile/lib/Makefile b/arch/tile/lib/Makefile index 9adfd76fbdd8..c4211cbb2021 100644 --- a/arch/tile/lib/Makefile +++ b/arch/tile/lib/Makefile @@ -7,7 +7,7 @@ lib-y = cacheflush.o checksum.o cpumask.o delay.o uaccess.o \ strchr_$(BITS).o strlen_$(BITS).o strnlen_$(BITS).o lib-$(CONFIG_TILEGX) += memcpy_user_64.o -lib-$(CONFIG_TILEPRO) += atomic_32.o atomic_asm_32.o memcpy_tile64.o +lib-$(CONFIG_TILEPRO) += atomic_32.o atomic_asm_32.o lib-$(CONFIG_SMP) += spinlock_$(BITS).o usercopy_$(BITS).o obj-$(CONFIG_MODULES) += exports.o diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c index 42eacb1f737a..5d91d1860640 100644 --- a/arch/tile/lib/atomic_32.c +++ b/arch/tile/lib/atomic_32.c @@ -20,50 +20,12 @@ #include <linux/atomic.h> #include <arch/chip.h> -/* See <asm/atomic_32.h> */ -#if ATOMIC_LOCKS_FOUND_VIA_TABLE() - -/* - * A block of memory containing locks for atomic ops. Each instance of this - * struct will be homed on a different CPU. - */ -struct atomic_locks_on_cpu { - int lock[ATOMIC_HASH_L2_SIZE]; -} __attribute__((aligned(ATOMIC_HASH_L2_SIZE * 4))); - -static DEFINE_PER_CPU(struct atomic_locks_on_cpu, atomic_lock_pool); - -/* The locks we'll use until __init_atomic_per_cpu is called. */ -static struct atomic_locks_on_cpu __initdata initial_atomic_locks; - -/* Hash into this vector to get a pointer to lock for the given atomic. */ -struct atomic_locks_on_cpu *atomic_lock_ptr[ATOMIC_HASH_L1_SIZE] - __write_once = { - [0 ... ATOMIC_HASH_L1_SIZE-1] (&initial_atomic_locks) -}; - -#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ - /* This page is remapped on startup to be hash-for-home. */ int atomic_locks[PAGE_SIZE / sizeof(int)] __page_aligned_bss; -#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ - int *__atomic_hashed_lock(volatile void *v) { /* NOTE: this code must match "sys_cmpxchg" in kernel/intvec_32.S */ -#if ATOMIC_LOCKS_FOUND_VIA_TABLE() - unsigned long i = - (unsigned long) v & ((PAGE_SIZE-1) & -sizeof(long long)); - unsigned long n = __insn_crc32_32(0, i); - - /* Grab high bits for L1 index. */ - unsigned long l1_index = n >> ((sizeof(n) * 8) - ATOMIC_HASH_L1_SHIFT); - /* Grab low bits for L2 index. */ - unsigned long l2_index = n & (ATOMIC_HASH_L2_SIZE - 1); - - return &atomic_lock_ptr[l1_index]->lock[l2_index]; -#else /* * Use bits [3, 3 + ATOMIC_HASH_SHIFT) as the lock index. * Using mm works here because atomic_locks is page aligned. @@ -72,26 +34,13 @@ int *__atomic_hashed_lock(volatile void *v) (unsigned long)atomic_locks, 2, (ATOMIC_HASH_SHIFT + 2) - 1); return (int *)ptr; -#endif } #ifdef CONFIG_SMP /* Return whether the passed pointer is a valid atomic lock pointer. */ static int is_atomic_lock(int *p) { -#if ATOMIC_LOCKS_FOUND_VIA_TABLE() - int i; - for (i = 0; i < ATOMIC_HASH_L1_SIZE; ++i) { - - if (p >= &atomic_lock_ptr[i]->lock[0] && - p < &atomic_lock_ptr[i]->lock[ATOMIC_HASH_L2_SIZE]) { - return 1; - } - } - return 0; -#else return p >= &atomic_locks[0] && p < &atomic_locks[ATOMIC_HASH_SIZE]; -#endif } void __atomic_fault_unlock(int *irqlock_word) @@ -210,43 +159,6 @@ struct __get_user __atomic_bad_address(int __user *addr) void __init __init_atomic_per_cpu(void) { -#if ATOMIC_LOCKS_FOUND_VIA_TABLE() - - unsigned int i; - int actual_cpu; - - /* - * Before this is called from setup, we just have one lock for - * all atomic objects/operations. Here we replace the - * elements of atomic_lock_ptr so that they point at per_cpu - * integers. This seemingly over-complex approach stems from - * the fact that DEFINE_PER_CPU defines an entry for each cpu - * in the grid, not each cpu from 0..ATOMIC_HASH_SIZE-1. But - * for efficient hashing of atomics to their locks we want a - * compile time constant power of 2 for the size of this - * table, so we use ATOMIC_HASH_SIZE. - * - * Here we populate atomic_lock_ptr from the per cpu - * atomic_lock_pool, interspersing by actual cpu so that - * subsequent elements are homed on consecutive cpus. - */ - - actual_cpu = cpumask_first(cpu_possible_mask); - - for (i = 0; i < ATOMIC_HASH_L1_SIZE; ++i) { - /* - * Preincrement to slightly bias against using cpu 0, - * which has plenty of stuff homed on it already. - */ - actual_cpu = cpumask_next(actual_cpu, cpu_possible_mask); - if (actual_cpu >= nr_cpu_ids) - actual_cpu = cpumask_first(cpu_possible_mask); - - atomic_lock_ptr[i] = &per_cpu(atomic_lock_pool, actual_cpu); - } - -#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ - /* Validate power-of-two and "bigger than cpus" assumption */ BUILD_BUG_ON(ATOMIC_HASH_SIZE & (ATOMIC_HASH_SIZE-1)); BUG_ON(ATOMIC_HASH_SIZE < nr_cpu_ids); @@ -270,6 +182,4 @@ void __init __init_atomic_per_cpu(void) * That should not produce more indices than ATOMIC_HASH_SIZE. */ BUILD_BUG_ON((PAGE_SIZE >> 3) > ATOMIC_HASH_SIZE); - -#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ } diff --git a/arch/tile/lib/memcpy_32.S b/arch/tile/lib/memcpy_32.S index 8ba7626cfeb1..a2771ae5da53 100644 --- a/arch/tile/lib/memcpy_32.S +++ b/arch/tile/lib/memcpy_32.S @@ -22,14 +22,6 @@ #include <linux/linkage.h> -/* On TILE64, we wrap these functions via arch/tile/lib/memcpy_tile64.c */ -#if !CHIP_HAS_COHERENT_LOCAL_CACHE() -#define memcpy __memcpy_asm -#define __copy_to_user_inatomic __copy_to_user_inatomic_asm -#define __copy_from_user_inatomic __copy_from_user_inatomic_asm -#define __copy_from_user_zeroing __copy_from_user_zeroing_asm -#endif - #define IS_MEMCPY 0 #define IS_COPY_FROM_USER 1 #define IS_COPY_FROM_USER_ZEROING 2 @@ -159,12 +151,9 @@ EX: { sw r0, r3; addi r0, r0, 4; addi r2, r2, -4 } { addi r3, r1, 60; andi r9, r9, -64 } -#if CHIP_HAS_WH64() /* No need to prefetch dst, we'll just do the wh64 * right before we copy a line. */ -#endif - EX: { lw r5, r3; addi r3, r3, 64; movei r4, 1 } /* Intentionally stall for a few cycles to leave L2 cache alone. */ { bnzt zero, .; move r27, lr } @@ -172,21 +161,6 @@ EX: { lw r6, r3; addi r3, r3, 64 } /* Intentionally stall for a few cycles to leave L2 cache alone. */ { bnzt zero, . } EX: { lw r7, r3; addi r3, r3, 64 } -#if !CHIP_HAS_WH64() - /* Prefetch the dest */ - /* Intentionally stall for a few cycles to leave L2 cache alone. */ - { bnzt zero, . } - /* Use a real load to cause a TLB miss if necessary. We aren't using - * r28, so this should be fine. - */ -EX: { lw r28, r9; addi r9, r9, 64 } - /* Intentionally stall for a few cycles to leave L2 cache alone. */ - { bnzt zero, . } - { prefetch r9; addi r9, r9, 64 } - /* Intentionally stall for a few cycles to leave L2 cache alone. */ - { bnzt zero, . } - { prefetch r9; addi r9, r9, 64 } -#endif /* Intentionally stall for a few cycles to leave L2 cache alone. */ { bz zero, .Lbig_loop2 } @@ -287,13 +261,8 @@ EX: { lw r7, r3; addi r3, r3, 64 } /* Fill second L1D line. */ EX: { lw r17, r17; addi r1, r1, 48; mvz r3, r13, r1 } /* r17 = WORD_4 */ -#if CHIP_HAS_WH64() /* Prepare destination line for writing. */ EX: { wh64 r9; addi r9, r9, 64 } -#else - /* Prefetch dest line */ - { prefetch r9; addi r9, r9, 64 } -#endif /* Load seven words that are L1D hits to cover wh64 L2 usage. */ /* Load the three remaining words from the last L1D line, which @@ -331,16 +300,7 @@ EX: { lw r18, r1; addi r1, r1, 4 } /* r18 = WORD_8 */ EX: { sw r0, r16; addi r0, r0, 4; add r16, r0, r2 } /* store(WORD_0) */ EX: { sw r0, r13; addi r0, r0, 4; andi r16, r16, -64 } /* store(WORD_1) */ EX: { sw r0, r14; addi r0, r0, 4; slt_u r16, r9, r16 } /* store(WORD_2) */ -#if CHIP_HAS_WH64() EX: { sw r0, r15; addi r0, r0, 4; addi r13, sp, -64 } /* store(WORD_3) */ -#else - /* Back up the r9 to a cache line we are already storing to - * if it gets past the end of the dest vector. Strictly speaking, - * we don't need to back up to the start of a cache line, but it's free - * and tidy, so why not? - */ -EX: { sw r0, r15; addi r0, r0, 4; andi r13, r0, -64 } /* store(WORD_3) */ -#endif /* Store second L1D line. */ EX: { sw r0, r17; addi r0, r0, 4; mvz r9, r16, r13 }/* store(WORD_4) */ EX: { sw r0, r19; addi r0, r0, 4 } /* store(WORD_5) */ @@ -404,7 +364,6 @@ EX: { sb r0, r3; addi r0, r0, 1; addi r2, r2, -1 } .Ldest_is_word_aligned: -#if CHIP_HAS_DWORD_ALIGN() EX: { andi r8, r0, 63; lwadd_na r6, r1, 4} { slti_u r9, r2, 64; bz r8, .Ldest_is_L2_line_aligned } @@ -512,26 +471,6 @@ EX: { swadd r0, r13, 4; addi r2, r2, -32 } /* Move r1 back to the point where it corresponds to r0. */ { addi r1, r1, -4 } -#else /* !CHIP_HAS_DWORD_ALIGN() */ - - /* Compute right/left shift counts and load initial source words. */ - { andi r5, r1, -4; andi r3, r1, 3 } -EX: { lw r6, r5; addi r5, r5, 4; shli r3, r3, 3 } -EX: { lw r7, r5; addi r5, r5, 4; sub r4, zero, r3 } - - /* Load and store one word at a time, using shifts and ORs - * to correct for the misaligned src. - */ -.Lcopy_unaligned_src_loop: - { shr r6, r6, r3; shl r8, r7, r4 } -EX: { lw r7, r5; or r8, r8, r6; move r6, r7 } -EX: { sw r0, r8; addi r0, r0, 4; addi r2, r2, -4 } - { addi r5, r5, 4; slti_u r8, r2, 8 } - { bzt r8, .Lcopy_unaligned_src_loop; addi r1, r1, 4 } - - { bz r2, .Lcopy_unaligned_done } -#endif /* !CHIP_HAS_DWORD_ALIGN() */ - /* Fall through */ /* diff --git a/arch/tile/lib/memcpy_tile64.c b/arch/tile/lib/memcpy_tile64.c deleted file mode 100644 index 0290c222847b..000000000000 --- a/arch/tile/lib/memcpy_tile64.c +++ /dev/null @@ -1,280 +0,0 @@ -/* - * Copyright 2010 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - */ - -#include <linux/string.h> -#include <linux/smp.h> -#include <linux/module.h> -#include <linux/uaccess.h> -#include <asm/fixmap.h> -#include <asm/kmap_types.h> -#include <asm/tlbflush.h> -#include <hv/hypervisor.h> -#include <arch/chip.h> - - -#if !CHIP_HAS_COHERENT_LOCAL_CACHE() - -/* Defined in memcpy.S */ -extern unsigned long __memcpy_asm(void *to, const void *from, unsigned long n); -extern unsigned long __copy_to_user_inatomic_asm( - void __user *to, const void *from, unsigned long n); -extern unsigned long __copy_from_user_inatomic_asm( - void *to, const void __user *from, unsigned long n); -extern unsigned long __copy_from_user_zeroing_asm( - void *to, const void __user *from, unsigned long n); - -typedef unsigned long (*memcpy_t)(void *, const void *, unsigned long); - -/* Size above which to consider TLB games for performance */ -#define LARGE_COPY_CUTOFF 2048 - -/* Communicate to the simulator what we are trying to do. */ -#define sim_allow_multiple_caching(b) \ - __insn_mtspr(SPR_SIM_CONTROL, \ - SIM_CONTROL_ALLOW_MULTIPLE_CACHING | ((b) << _SIM_CONTROL_OPERATOR_BITS)) - -/* - * Copy memory by briefly enabling incoherent cacheline-at-a-time mode. - * - * We set up our own source and destination PTEs that we fully control. - * This is the only way to guarantee that we don't race with another - * thread that is modifying the PTE; we can't afford to try the - * copy_{to,from}_user() technique of catching the interrupt, since - * we must run with interrupts disabled to avoid the risk of some - * other code seeing the incoherent data in our cache. (Recall that - * our cache is indexed by PA, so even if the other code doesn't use - * our kmap_atomic virtual addresses, they'll still hit in cache using - * the normal VAs that aren't supposed to hit in cache.) - */ -static void memcpy_multicache(void *dest, const void *source, - pte_t dst_pte, pte_t src_pte, int len) -{ - int idx; - unsigned long flags, newsrc, newdst; - pmd_t *pmdp; - pte_t *ptep; - int type0, type1; - int cpu = smp_processor_id(); - - /* - * Disable interrupts so that we don't recurse into memcpy() - * in an interrupt handler, nor accidentally reference - * the PA of the source from an interrupt routine. Also - * notify the simulator that we're playing games so we don't - * generate spurious coherency warnings. - */ - local_irq_save(flags); - sim_allow_multiple_caching(1); - - /* Set up the new dest mapping */ - type0 = kmap_atomic_idx_push(); - idx = FIX_KMAP_BEGIN + (KM_TYPE_NR * cpu) + type0; - newdst = __fix_to_virt(idx) + ((unsigned long)dest & (PAGE_SIZE-1)); - pmdp = pmd_offset(pud_offset(pgd_offset_k(newdst), newdst), newdst); - ptep = pte_offset_kernel(pmdp, newdst); - if (pte_val(*ptep) != pte_val(dst_pte)) { - set_pte(ptep, dst_pte); - local_flush_tlb_page(NULL, newdst, PAGE_SIZE); - } - - /* Set up the new source mapping */ - type1 = kmap_atomic_idx_push(); - idx += (type0 - type1); - src_pte = hv_pte_set_nc(src_pte); - src_pte = hv_pte_clear_writable(src_pte); /* be paranoid */ - newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1)); - pmdp = pmd_offset(pud_offset(pgd_offset_k(newsrc), newsrc), newsrc); - ptep = pte_offset_kernel(pmdp, newsrc); - __set_pte(ptep, src_pte); /* set_pte() would be confused by this */ - local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); - - /* Actually move the data. */ - __memcpy_asm((void *)newdst, (const void *)newsrc, len); - - /* - * Remap the source as locally-cached and not OLOC'ed so that - * we can inval without also invaling the remote cpu's cache. - * This also avoids known errata with inv'ing cacheable oloc data. - */ - src_pte = hv_pte_set_mode(src_pte, HV_PTE_MODE_CACHE_NO_L3); - src_pte = hv_pte_set_writable(src_pte); /* need write access for inv */ - __set_pte(ptep, src_pte); /* set_pte() would be confused by this */ - local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); - - /* - * Do the actual invalidation, covering the full L2 cache line - * at the end since __memcpy_asm() is somewhat aggressive. - */ - __inv_buffer((void *)newsrc, len); - - /* - * We're done: notify the simulator that all is back to normal, - * and re-enable interrupts and pre-emption. - */ - kmap_atomic_idx_pop(); - kmap_atomic_idx_pop(); - sim_allow_multiple_caching(0); - local_irq_restore(flags); -} - -/* - * Identify large copies from remotely-cached memory, and copy them - * via memcpy_multicache() if they look good, otherwise fall back - * to the particular kind of copying passed as the memcpy_t function. - */ -static unsigned long fast_copy(void *dest, const void *source, int len, - memcpy_t func) -{ - int cpu = get_cpu(); - unsigned long retval; - - /* - * Check if it's big enough to bother with. We may end up doing a - * small copy via TLB manipulation if we're near a page boundary, - * but presumably we'll make it up when we hit the second page. - */ - while (len >= LARGE_COPY_CUTOFF) { - int copy_size, bytes_left_on_page; - pte_t *src_ptep, *dst_ptep; - pte_t src_pte, dst_pte; - struct page *src_page, *dst_page; - - /* Is the source page oloc'ed to a remote cpu? */ -retry_source: - src_ptep = virt_to_pte(current->mm, (unsigned long)source); - if (src_ptep == NULL) - break; - src_pte = *src_ptep; - if (!hv_pte_get_present(src_pte) || - !hv_pte_get_readable(src_pte) || - hv_pte_get_mode(src_pte) != HV_PTE_MODE_CACHE_TILE_L3) - break; - if (get_remote_cache_cpu(src_pte) == cpu) - break; - src_page = pfn_to_page(pte_pfn(src_pte)); - get_page(src_page); - if (pte_val(src_pte) != pte_val(*src_ptep)) { - put_page(src_page); - goto retry_source; - } - if (pte_huge(src_pte)) { - /* Adjust the PTE to correspond to a small page */ - int pfn = pte_pfn(src_pte); - pfn += (((unsigned long)source & (HPAGE_SIZE-1)) - >> PAGE_SHIFT); - src_pte = pfn_pte(pfn, src_pte); - src_pte = pte_mksmall(src_pte); - } - - /* Is the destination page writable? */ -retry_dest: - dst_ptep = virt_to_pte(current->mm, (unsigned long)dest); - if (dst_ptep == NULL) { - put_page(src_page); - break; - } - dst_pte = *dst_ptep; - if (!hv_pte_get_present(dst_pte) || - !hv_pte_get_writable(dst_pte)) { - put_page(src_page); - break; - } - dst_page = pfn_to_page(pte_pfn(dst_pte)); - if (dst_page == src_page) { - /* - * Source and dest are on the same page; this - * potentially exposes us to incoherence if any - * part of src and dest overlap on a cache line. - * Just give up rather than trying to be precise. - */ - put_page(src_page); - break; - } - get_page(dst_page); - if (pte_val(dst_pte) != pte_val(*dst_ptep)) { - put_page(dst_page); - goto retry_dest; - } - if (pte_huge(dst_pte)) { - /* Adjust the PTE to correspond to a small page */ - int pfn = pte_pfn(dst_pte); - pfn += (((unsigned long)dest & (HPAGE_SIZE-1)) - >> PAGE_SHIFT); - dst_pte = pfn_pte(pfn, dst_pte); - dst_pte = pte_mksmall(dst_pte); - } - - /* All looks good: create a cachable PTE and copy from it */ - copy_size = len; - bytes_left_on_page = - PAGE_SIZE - (((int)source) & (PAGE_SIZE-1)); - if (copy_size > bytes_left_on_page) - copy_size = bytes_left_on_page; - bytes_left_on_page = - PAGE_SIZE - (((int)dest) & (PAGE_SIZE-1)); - if (copy_size > bytes_left_on_page) - copy_size = bytes_left_on_page; - memcpy_multicache(dest, source, dst_pte, src_pte, copy_size); - - /* Release the pages */ - put_page(dst_page); - put_page(src_page); - - /* Continue on the next page */ - dest += copy_size; - source += copy_size; - len -= copy_size; - } - - retval = func(dest, source, len); - put_cpu(); - return retval; -} - -void *memcpy(void *to, const void *from, __kernel_size_t n) -{ - if (n < LARGE_COPY_CUTOFF) - return (void *)__memcpy_asm(to, from, n); - else - return (void *)fast_copy(to, from, n, __memcpy_asm); -} - -unsigned long __copy_to_user_inatomic(void __user *to, const void *from, - unsigned long n) -{ - if (n < LARGE_COPY_CUTOFF) - return __copy_to_user_inatomic_asm(to, from, n); - else - return fast_copy(to, from, n, __copy_to_user_inatomic_asm); -} - -unsigned long __copy_from_user_inatomic(void *to, const void __user *from, - unsigned long n) -{ - if (n < LARGE_COPY_CUTOFF) - return __copy_from_user_inatomic_asm(to, from, n); - else - return fast_copy(to, from, n, __copy_from_user_inatomic_asm); -} - -unsigned long __copy_from_user_zeroing(void *to, const void __user *from, - unsigned long n) -{ - if (n < LARGE_COPY_CUTOFF) - return __copy_from_user_zeroing_asm(to, from, n); - else - return fast_copy(to, from, n, __copy_from_user_zeroing_asm); -} - -#endif /* !CHIP_HAS_COHERENT_LOCAL_CACHE() */ diff --git a/arch/tile/lib/memset_32.c b/arch/tile/lib/memset_32.c index 9a7837d11f7d..2042bfe6595f 100644 --- a/arch/tile/lib/memset_32.c +++ b/arch/tile/lib/memset_32.c @@ -23,11 +23,7 @@ void *memset(void *s, int c, size_t n) int n32; uint32_t v16, v32; uint8_t *out8 = s; -#if !CHIP_HAS_WH64() - int ahead32; -#else int to_align32; -#endif /* Experimentation shows that a trivial tight loop is a win up until * around a size of 20, where writing a word at a time starts to win. @@ -58,21 +54,6 @@ void *memset(void *s, int c, size_t n) return s; } -#if !CHIP_HAS_WH64() - /* Use a spare issue slot to start prefetching the first cache - * line early. This instruction is free as the store can be buried - * in otherwise idle issue slots doing ALU ops. - */ - __insn_prefetch(out8); - - /* We prefetch the end so that a short memset that spans two cache - * lines gets some prefetching benefit. Again we believe this is free - * to issue. - */ - __insn_prefetch(&out8[n - 1]); -#endif /* !CHIP_HAS_WH64() */ - - /* Align 'out8'. We know n >= 3 so this won't write past the end. */ while (((uintptr_t) out8 & 3) != 0) { *out8++ = c; @@ -93,90 +74,6 @@ void *memset(void *s, int c, size_t n) /* This must be at least 8 or the following loop doesn't work. */ #define CACHE_LINE_SIZE_IN_WORDS (CHIP_L2_LINE_SIZE() / 4) -#if !CHIP_HAS_WH64() - - ahead32 = CACHE_LINE_SIZE_IN_WORDS; - - /* We already prefetched the first and last cache lines, so - * we only need to do more prefetching if we are storing - * to more than two cache lines. - */ - if (n32 > CACHE_LINE_SIZE_IN_WORDS * 2) { - int i; - - /* Prefetch the next several cache lines. - * This is the setup code for the software-pipelined - * loop below. - */ -#define MAX_PREFETCH 5 - ahead32 = n32 & -CACHE_LINE_SIZE_IN_WORDS; - if (ahead32 > MAX_PREFETCH * CACHE_LINE_SIZE_IN_WORDS) - ahead32 = MAX_PREFETCH * CACHE_LINE_SIZE_IN_WORDS; - - for (i = CACHE_LINE_SIZE_IN_WORDS; - i < ahead32; i += CACHE_LINE_SIZE_IN_WORDS) - __insn_prefetch(&out32[i]); - } - - if (n32 > ahead32) { - while (1) { - int j; - - /* Prefetch by reading one word several cache lines - * ahead. Since loads are non-blocking this will - * cause the full cache line to be read while we are - * finishing earlier cache lines. Using a store - * here causes microarchitectural performance - * problems where a victimizing store miss goes to - * the head of the retry FIFO and locks the pipe for - * a few cycles. So a few subsequent stores in this - * loop go into the retry FIFO, and then later - * stores see other stores to the same cache line - * are already in the retry FIFO and themselves go - * into the retry FIFO, filling it up and grinding - * to a halt waiting for the original miss to be - * satisfied. - */ - __insn_prefetch(&out32[ahead32]); - -#if CACHE_LINE_SIZE_IN_WORDS % 4 != 0 -#error "Unhandled CACHE_LINE_SIZE_IN_WORDS" -#endif - - n32 -= CACHE_LINE_SIZE_IN_WORDS; - - /* Save icache space by only partially unrolling - * this loop. - */ - for (j = CACHE_LINE_SIZE_IN_WORDS / 4; j > 0; j--) { - *out32++ = v32; - *out32++ = v32; - *out32++ = v32; - *out32++ = v32; - } - - /* To save compiled code size, reuse this loop even - * when we run out of prefetching to do by dropping - * ahead32 down. - */ - if (n32 <= ahead32) { - /* Not even a full cache line left, - * so stop now. - */ - if (n32 < CACHE_LINE_SIZE_IN_WORDS) - break; - - /* Choose a small enough value that we don't - * prefetch past the end. There's no sense - * in touching cache lines we don't have to. - */ - ahead32 = CACHE_LINE_SIZE_IN_WORDS - 1; - } - } - } - -#else /* CHIP_HAS_WH64() */ - /* Determine how many words we need to emit before the 'out32' * pointer becomes aligned modulo the cache line size. */ @@ -233,8 +130,6 @@ void *memset(void *s, int c, size_t n) n32 &= CACHE_LINE_SIZE_IN_WORDS - 1; } -#endif /* CHIP_HAS_WH64() */ - /* Now handle any leftover values. */ if (n32 != 0) { do { diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index 39c48cbe0a96..111d5a9b76f1 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c @@ -466,28 +466,15 @@ good_area: } } -#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() - /* - * If this was an asynchronous fault, - * restart the appropriate engine. - */ - switch (fault_num) { #if CHIP_HAS_TILE_DMA() + /* If this was a DMA TLB fault, restart the DMA engine. */ + switch (fault_num) { case INT_DMATLB_MISS: case INT_DMATLB_MISS_DWNCL: case INT_DMATLB_ACCESS: case INT_DMATLB_ACCESS_DWNCL: __insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__REQUEST_MASK); break; -#endif -#if CHIP_HAS_SN_PROC() - case INT_SNITLB_MISS: - case INT_SNITLB_MISS_DWNCL: - __insn_mtspr(SPR_SNCTL, - __insn_mfspr(SPR_SNCTL) & - ~SPR_SNCTL__FRZPROC_MASK); - break; -#endif } #endif @@ -804,10 +791,6 @@ void do_page_fault(struct pt_regs *regs, int fault_num, case INT_DMATLB_MISS: case INT_DMATLB_MISS_DWNCL: #endif -#if CHIP_HAS_SN_PROC() - case INT_SNITLB_MISS: - case INT_SNITLB_MISS_DWNCL: -#endif is_page_fault = 1; break; @@ -823,7 +806,7 @@ void do_page_fault(struct pt_regs *regs, int fault_num, panic("Bad fault number %d in do_page_fault", fault_num); } -#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() +#if CHIP_HAS_TILE_DMA() if (!user_mode(regs)) { struct async_tlb *async; switch (fault_num) { @@ -835,12 +818,6 @@ void do_page_fault(struct pt_regs *regs, int fault_num, async = ¤t->thread.dma_async_tlb; break; #endif -#if CHIP_HAS_SN_PROC() - case INT_SNITLB_MISS: - case INT_SNITLB_MISS_DWNCL: - async = ¤t->thread.sn_async_tlb; - break; -#endif default: async = NULL; } @@ -873,14 +850,22 @@ void do_page_fault(struct pt_regs *regs, int fault_num, } -#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() +#if CHIP_HAS_TILE_DMA() /* - * Check an async_tlb structure to see if a deferred fault is waiting, - * and if so pass it to the page-fault code. + * This routine effectively re-issues asynchronous page faults + * when we are returning to user space. */ -static void handle_async_page_fault(struct pt_regs *regs, - struct async_tlb *async) +void do_async_page_fault(struct pt_regs *regs) { + struct async_tlb *async = ¤t->thread.dma_async_tlb; + + /* + * Clear thread flag early. If we re-interrupt while processing + * code here, we will reset it and recall this routine before + * returning to user space. + */ + clear_thread_flag(TIF_ASYNC_TLB); + if (async->fault_num) { /* * Clear async->fault_num before calling the page-fault @@ -894,28 +879,7 @@ static void handle_async_page_fault(struct pt_regs *regs, async->address, async->is_write); } } - -/* - * This routine effectively re-issues asynchronous page faults - * when we are returning to user space. - */ -void do_async_page_fault(struct pt_regs *regs) -{ - /* - * Clear thread flag early. If we re-interrupt while processing - * code here, we will reset it and recall this routine before - * returning to user space. - */ - clear_thread_flag(TIF_ASYNC_TLB); - -#if CHIP_HAS_TILE_DMA() - handle_async_page_fault(regs, ¤t->thread.dma_async_tlb); -#endif -#if CHIP_HAS_SN_PROC() - handle_async_page_fault(regs, ¤t->thread.sn_async_tlb); -#endif -} -#endif /* CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() */ +#endif /* CHIP_HAS_TILE_DMA() */ void vmalloc_sync_all(void) diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c index e3ee55b0327a..004ba568d93f 100644 --- a/arch/tile/mm/homecache.c +++ b/arch/tile/mm/homecache.c @@ -43,12 +43,9 @@ #include "migrate.h" -#if CHIP_HAS_COHERENT_LOCAL_CACHE() - /* * The noallocl2 option suppresses all use of the L2 cache to cache - * locally from a remote home. There's no point in using it if we - * don't have coherent local caching, though. + * locally from a remote home. */ static int __write_once noallocl2; static int __init set_noallocl2(char *str) @@ -58,12 +55,6 @@ static int __init set_noallocl2(char *str) } early_param("noallocl2", set_noallocl2); -#else - -#define noallocl2 0 - -#endif - /* * Update the irq_stat for cpus that we are going to interrupt @@ -265,10 +256,8 @@ static int pte_to_home(pte_t pte) return PAGE_HOME_INCOHERENT; case HV_PTE_MODE_UNCACHED: return PAGE_HOME_UNCACHED; -#if CHIP_HAS_CBOX_HOME_MAP() case HV_PTE_MODE_CACHE_HASH_L3: return PAGE_HOME_HASH; -#endif } panic("Bad PTE %#llx\n", pte.val); } @@ -325,20 +314,16 @@ pte_t pte_set_home(pte_t pte, int home) HV_PTE_MODE_CACHE_NO_L3); } } else -#if CHIP_HAS_CBOX_HOME_MAP() if (hash_default) pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_HASH_L3); else -#endif pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3); pte = hv_pte_set_nc(pte); break; -#if CHIP_HAS_CBOX_HOME_MAP() case PAGE_HOME_HASH: pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_HASH_L3); break; -#endif default: BUG_ON(home < 0 || home >= NR_CPUS || @@ -348,7 +333,6 @@ pte_t pte_set_home(pte_t pte, int home) break; } -#if CHIP_HAS_NC_AND_NOALLOC_BITS() if (noallocl2) pte = hv_pte_set_no_alloc_l2(pte); @@ -357,7 +341,6 @@ pte_t pte_set_home(pte_t pte, int home) hv_pte_get_mode(pte) == HV_PTE_MODE_CACHE_NO_L3) { pte = hv_pte_set_mode(pte, HV_PTE_MODE_UNCACHED); } -#endif /* Checking this case here gives a better panic than from the hv. */ BUG_ON(hv_pte_get_mode(pte) == 0); @@ -373,16 +356,10 @@ EXPORT_SYMBOL(pte_set_home); * so they're not suitable for anything but infrequent use. */ -#if CHIP_HAS_CBOX_HOME_MAP() -static inline int initial_page_home(void) { return PAGE_HOME_HASH; } -#else -static inline int initial_page_home(void) { return 0; } -#endif - int page_home(struct page *page) { if (PageHighMem(page)) { - return initial_page_home(); + return PAGE_HOME_HASH; } else { unsigned long kva = (unsigned long)page_address(page); return pte_to_home(*virt_to_kpte(kva)); @@ -438,7 +415,7 @@ struct page *homecache_alloc_pages_node(int nid, gfp_t gfp_mask, void __homecache_free_pages(struct page *page, unsigned int order) { if (put_page_testzero(page)) { - homecache_change_page_home(page, order, initial_page_home()); + homecache_change_page_home(page, order, PAGE_HOME_HASH); if (order == 0) { free_hot_cold_page(page, 0); } else { diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index c8f58c12866d..22e41cf5a2a9 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c @@ -106,10 +106,8 @@ pte_t *get_prealloc_pte(unsigned long pfn) */ static int initial_heap_home(void) { -#if CHIP_HAS_CBOX_HOME_MAP() if (hash_default) return PAGE_HOME_HASH; -#endif return smp_processor_id(); } @@ -190,14 +188,11 @@ static void __init page_table_range_init(unsigned long start, } -#if CHIP_HAS_CBOX_HOME_MAP() - static int __initdata ktext_hash = 1; /* .text pages */ static int __initdata kdata_hash = 1; /* .data and .bss pages */ int __write_once hash_default = 1; /* kernel allocator pages */ EXPORT_SYMBOL(hash_default); int __write_once kstack_hash = 1; /* if no homecaching, use h4h */ -#endif /* CHIP_HAS_CBOX_HOME_MAP */ /* * CPUs to use to for striping the pages of kernel data. If hash-for-home @@ -215,14 +210,12 @@ int __write_once kdata_huge; /* if no homecaching, small pages */ static pgprot_t __init construct_pgprot(pgprot_t prot, int home) { prot = pte_set_home(prot, home); -#if CHIP_HAS_CBOX_HOME_MAP() if (home == PAGE_HOME_IMMUTABLE) { if (ktext_hash) prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_HASH_L3); else prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_NO_L3); } -#endif return prot; } @@ -236,20 +229,15 @@ static pgprot_t __init init_pgprot(ulong address) unsigned long page; enum { CODE_DELTA = MEM_SV_START - PAGE_OFFSET }; -#if CHIP_HAS_CBOX_HOME_MAP() /* For kdata=huge, everything is just hash-for-home. */ if (kdata_huge) return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH); -#endif /* We map the aliased pages of permanent text inaccessible. */ if (address < (ulong) _sinittext - CODE_DELTA) return PAGE_NONE; - /* - * We map read-only data non-coherent for performance. We could - * use neighborhood caching on TILE64, but it's not clear it's a win. - */ + /* We map read-only data non-coherent for performance. */ if ((address >= (ulong) __start_rodata && address < (ulong) __end_rodata) || address == (ulong) empty_zero_page) { @@ -257,12 +245,10 @@ static pgprot_t __init init_pgprot(ulong address) } #ifndef __tilegx__ -#if !ATOMIC_LOCKS_FOUND_VIA_TABLE() /* Force the atomic_locks[] array page to be hash-for-home. */ if (address == (ulong) atomic_locks) return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH); #endif -#endif /* * Everything else that isn't data or bss is heap, so mark it @@ -280,11 +266,9 @@ static pgprot_t __init init_pgprot(ulong address) if (address >= (ulong) _end || address < (ulong) _einitdata) return construct_pgprot(PAGE_KERNEL, initial_heap_home()); -#if CHIP_HAS_CBOX_HOME_MAP() /* Use hash-for-home if requested for data/bss. */ if (kdata_hash) return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH); -#endif /* * Make the w1data homed like heap to start with, to avoid @@ -311,11 +295,9 @@ static pgprot_t __init init_pgprot(ulong address) if (page == (ulong)empty_zero_page) continue; #ifndef __tilegx__ -#if !ATOMIC_LOCKS_FOUND_VIA_TABLE() if (page == (ulong)atomic_locks) continue; #endif -#endif cpu = cpumask_next(cpu, &kdata_mask); if (cpu == NR_CPUS) cpu = cpumask_first(&kdata_mask); @@ -358,7 +340,7 @@ static int __init setup_ktext(char *str) ktext_arg_seen = 1; - /* Default setting on Tile64: use a huge page */ + /* Default setting: use a huge page */ if (strcmp(str, "huge") == 0) pr_info("ktext: using one huge locally cached page\n"); @@ -404,10 +386,8 @@ static inline pgprot_t ktext_set_nocache(pgprot_t prot) { if (!ktext_nocache) prot = hv_pte_set_nc(prot); -#if CHIP_HAS_NC_AND_NOALLOC_BITS() else prot = hv_pte_set_no_alloc_l2(prot); -#endif return prot; } @@ -440,7 +420,6 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) struct cpumask kstripe_mask; int rc, i; -#if CHIP_HAS_CBOX_HOME_MAP() if (ktext_arg_seen && ktext_hash) { pr_warning("warning: \"ktext\" boot argument ignored" " if \"kcache_hash\" sets up text hash-for-home\n"); @@ -457,7 +436,6 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) " kcache_hash=all or =allbutstack\n"); kdata_huge = 0; } -#endif /* * Set up a mask for cpus to use for kernel striping. @@ -585,13 +563,11 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) } else { pte_t pteval = pfn_pte(0, PAGE_KERNEL_EXEC); pteval = pte_mkhuge(pteval); -#if CHIP_HAS_CBOX_HOME_MAP() if (ktext_hash) { pteval = hv_pte_set_mode(pteval, HV_PTE_MODE_CACHE_HASH_L3); pteval = ktext_set_nocache(pteval); } else -#endif /* CHIP_HAS_CBOX_HOME_MAP() */ if (cpumask_weight(&ktext_mask) == 1) { pteval = set_remote_cache_cpu(pteval, cpumask_first(&ktext_mask)); @@ -938,26 +914,6 @@ void __init pgtable_cache_init(void) panic("pgtable_cache_init(): Cannot create pgd cache"); } -#if !CHIP_HAS_COHERENT_LOCAL_CACHE() -/* - * The __w1data area holds data that is only written during initialization, - * and is read-only and thus freely cacheable thereafter. Fix the page - * table entries that cover that region accordingly. - */ -static void mark_w1data_ro(void) -{ - /* Loop over page table entries */ - unsigned long addr = (unsigned long)__w1data_begin; - BUG_ON((addr & (PAGE_SIZE-1)) != 0); - for (; addr <= (unsigned long)__w1data_end - 1; addr += PAGE_SIZE) { - unsigned long pfn = kaddr_to_pfn((void *)addr); - pte_t *ptep = virt_to_kpte(addr); - BUG_ON(pte_huge(*ptep)); /* not relevant for kdata_huge */ - set_pte_at(&init_mm, addr, ptep, pfn_pte(pfn, PAGE_KERNEL_RO)); - } -} -#endif - #ifdef CONFIG_DEBUG_PAGEALLOC static long __write_once initfree; #else @@ -1026,10 +982,7 @@ void free_initmem(void) /* * Evict the dirty initdata on the boot cpu, evict the w1data * wherever it's homed, and evict all the init code everywhere. - * We are guaranteed that no one will touch the init pages any - * more, and although other cpus may be touching the w1data, - * we only actually change the caching on tile64, which won't - * be keeping local copies in the other tiles' caches anyway. + * We are guaranteed that no one will touch the init pages any more. */ homecache_evict(&cpu_cacheable_map); @@ -1045,21 +998,6 @@ void free_initmem(void) free_init_pages("unused kernel text", (unsigned long)_sinittext - text_delta, (unsigned long)_einittext - text_delta); - -#if !CHIP_HAS_COHERENT_LOCAL_CACHE() - /* - * Upgrade the .w1data section to globally cached. - * We don't do this on tilepro, since the cache architecture - * pretty much makes it irrelevant, and in any case we end - * up having racing issues with other tiles that may touch - * the data after we flush the cache but before we update - * the PTEs and flush the TLBs, causing sharer shootdowns - * later. Even though this is to clean data, it seems like - * an unnecessary complication. - */ - mark_w1data_ro(); -#endif - /* Do a global TLB flush so everyone sees the changes. */ flush_tlb_all(); } |