summaryrefslogtreecommitdiff
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-07-30 21:01:36 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2016-07-30 21:01:36 -0700
commitbad60e6f259a01cf9f29a1ef8d435ab6c60b2de9 (patch)
treec9aaa8166735659761239c117af2b11b022bc6cb /arch/powerpc/mm
parentdd0f0cf58af70dc9267409f113bea772d57f675c (diff)
parent719dbb2df78fc9a40e28392b07cd715bfc5a665c (diff)
downloadlwn-bad60e6f259a01cf9f29a1ef8d435ab6c60b2de9.tar.gz
lwn-bad60e6f259a01cf9f29a1ef8d435ab6c60b2de9.zip
Merge tag 'powerpc-4.8-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman: "Highlights: - PowerNV PCI hotplug support. - Lots more Power9 support. - eBPF JIT support on ppc64le. - Lots of cxl updates. - Boot code consolidation. Bug fixes: - Fix spin_unlock_wait() from Boqun Feng - Fix stack pointer corruption in __tm_recheckpoint() from Michael Neuling - Fix multiple bugs in memory_hotplug_max() from Bharata B Rao - mm: Ensure "special" zones are empty from Oliver O'Halloran - ftrace: Separate the heuristics for checking call sites from Michael Ellerman - modules: Never restore r2 for a mprofile-kernel style mcount() call from Michael Ellerman - Fix endianness when reading TCEs from Alexey Kardashevskiy - start rtasd before PCI probing from Greg Kurz - PCI: rpaphp: Fix slot registration for multiple slots under a PHB from Tyrel Datwyler - powerpc/mm: Add memory barrier in __hugepte_alloc() from Sukadev Bhattiprolu Cleanups & fixes: - Drop support for MPIC in pseries from Rashmica Gupta - Define and use PPC64_ELF_ABI_v2/v1 from Michael Ellerman - Remove unused symbols in asm-offsets.c from Rashmica Gupta - Fix SRIOV not building without EEH enabled from Russell Currey - Remove kretprobe_trampoline_holder from Thiago Jung Bauermann - Reduce log level of PCI I/O space warning from Benjamin Herrenschmidt - Add array bounds checking to crash_shutdown_handlers from Suraj Jitindar Singh - Avoid -maltivec when using clang integrated assembler from Anton Blanchard - Fix array overrun in ppc_rtas() syscall from Andrew Donnellan - Fix error return value in cmm_mem_going_offline() from Rasmus Villemoes - export cpu_to_core_id() from Mauricio Faria de Oliveira - Remove old symbols from defconfigs from Andrew Donnellan - Update obsolete comments in setup_32.c about entry conditions from Benjamin Herrenschmidt - Add comment explaining the purpose of setup_kdump_trampoline() from Benjamin Herrenschmidt - Merge the RELOCATABLE config entries for ppc32 and ppc64 from Kevin Hao - Remove RELOCATABLE_PPC32 from Kevin Hao - Fix .long's in tlb-radix.c to more meaningful from Balbir Singh Minor cleanups & fixes: - Andrew Donnellan, Anna-Maria Gleixner, Anton Blanchard, Benjamin Herrenschmidt, Bharata B Rao, Christophe Leroy, Colin Ian King, Geliang Tang, Greg Kurz, Madhavan Srinivasan, Michael Ellerman, Michael Ellerman, Stephen Rothwell, Stewart Smith. Freescale updates from Scott: - "Highlights include more 8xx optimizations, device tree updates, and MVME7100 support." PowerNV PCI hotplug from Gavin Shan: - PCI: Add pcibios_setup_bridge() - Override pcibios_setup_bridge() - Remove PCI_RESET_DELAY_US - Move pnv_pci_ioda_setup_opal_tce_kill() around - Increase PE# capacity - Allocate PE# in reverse order - Create PEs in pcibios_setup_bridge() - Setup PE for root bus - Extend PCI bridge resources - Make pnv_ioda_deconfigure_pe() visible - Dynamically release PE - Update bridge windows on PCI plug - Delay populating pdn - Support PCI slot ID - Use PCI slot reset infrastructure - Introduce pnv_pci_get_slot_id() - Functions to get/set PCI slot state - PCI/hotplug: PowerPC PowerNV PCI hotplug driver - Print correct PHB type names Power9 idle support from Shreyas B. Prabhu: - set power_save func after the idle states are initialized - Use PNV_THREAD_WINKLE macro while requesting for winkle - make hypervisor state restore a function - Rename idle_power7.S to idle_book3s.S - Rename reusable idle functions to hardware agnostic names - Make pnv_powersave_common more generic - abstraction for saving SPRs before entering deep idle states - Add platform support for stop instruction - cpuidle/powernv: Use CPUIDLE_STATE_MAX instead of MAX_POWERNV_IDLE_STATES - cpuidle/powernv: cleanup cpuidle-powernv.c - cpuidle/powernv: Add support for POWER ISA v3 idle states - Use deepest stop state when cpu is offlined Power9 PMU from Madhavan Srinivasan: - factor out power8 pmu macros and defines - factor out power8 pmu functions - factor out power8 __init_pmu code - Add power9 event list macros for generic and cache events - Power9 PMU support - Export Power9 generic and cache events to sysfs Power9 preliminary interrupt & PCI support from Benjamin Herrenschmidt: - Add XICS emulation APIs - Move a few exception common handlers to make room - Add support for HV virtualization interrupts - Add mechanism to force a replay of interrupts - Add ICP OPAL backend - Discover IODA3 PHBs - pci: Remove obsolete SW invalidate - opal: Add real mode call wrappers - Rename TCE invalidation calls - Remove SWINV constants and obsolete TCE code - Rework accessing the TCE invalidate register - Fallback to OPAL for TCE invalidations - Use the device-tree to get available range of M64's - Check status of a PHB before using it - pci: Don't try to allocate resources that will be reassigned Other Power9: - Send SIGBUS on unaligned copy and paste from Chris Smart - Large Decrementer support from Oliver O'Halloran - Load Monitor Register Support from Jack Miller Performance improvements from Anton Blanchard: - Avoid load hit store in __giveup_fpu() and __giveup_altivec() - Avoid load hit store in setup_sigcontext() - Remove assembly versions of strcpy, strcat, strlen and strcmp - Align hot loops of some string functions eBPF JIT from Naveen N. Rao: - Fix/enhance 32-bit Load Immediate implementation - Optimize 64-bit Immediate loads - Introduce rotate immediate instructions - A few cleanups - Isolate classic BPF JIT specifics into a separate header - Implement JIT compiler for extended BPF Operator Panel driver from Suraj Jitindar Singh: - devicetree/bindings: Add binding for operator panel on FSP machines - Add inline function to get rc from an ASYNC_COMP opal_msg - Add driver for operator panel on FSP machines Sparse fixes from Daniel Axtens: - make some things static - Introduce asm-prototypes.h - Include headers containing prototypes - Use #ifdef __BIG_ENDIAN__ #else for REG_BYTE - kvm: Clarify __user annotations - Pass endianness to sparse - Make ppc_md.{halt, restart} __noreturn MM fixes & cleanups from Aneesh Kumar K.V: - radix: Update LPCR HR bit as per ISA - use _raw variant of page table accessors - Compile out radix related functions if RADIX_MMU is disabled - Clear top 16 bits of va only on older cpus - Print formation regarding the the MMU mode - hash: Update SDR1 size encoding as documented in ISA 3.0 - radix: Update PID switch sequence - radix: Update machine call back to support new HCALL. - radix: Add LPID based tlb flush helpers - radix: Add a kernel command line to disable radix - Cleanup LPCR defines Boot code consolidation from Benjamin Herrenschmidt: - Move epapr_paravirt_early_init() to early_init_devtree() - cell: Don't use flat device-tree after boot - ge_imp3a: Don't use the flat device-tree after boot - mpc85xx_ds: Don't use the flat device-tree after boot - mpc85xx_rdb: Don't use the flat device-tree after boot - Don't test for machine type in rtas_initialize() - Don't test for machine type in smp_setup_cpu_maps() - dt: Add of_device_compatible_match() - Factor do_feature_fixup calls - Move 64-bit feature fixup earlier - Move 64-bit memory reserves to setup_arch() - Use a cachable DART - Move FW feature probing out of pseries probe() - Put exception configuration in a common place - Remove early allocation of the SMU command buffer - Move MMU backend selection out of platform code - pasemi: Remove IOBMAP allocation from platform probe() - mm/hash: Don't use machine_is() early during boot - Don't test for machine type to detect HEA special case - pmac: Remove spurrious machine type test - Move hash table ops to a separate structure - Ensure that ppc_md is empty before probing for machine type - Move 64-bit probe_machine() to later in the boot process - Move 32-bit probe() machine to later in the boot process - Get rid of ppc_md.init_early() - Move the boot time info banner to a separate function - Move setting of {i,d}cache_bsize to initialize_cache_info() - Move the content of setup_system() to setup_arch() - Move cache info inits to a separate function - Re-order the call to smp_setup_cpu_maps() - Re-order setup_panic() - Make a few boot functions __init - Merge 32-bit and 64-bit setup_arch() Other new features: - tty/hvc: Use IRQF_SHARED for OPAL hvc consoles from Sam Mendoza-Jonas - tty/hvc: Use opal irqchip interface if available from Sam Mendoza-Jonas - powerpc: Add module autoloading based on CPU features from Alastair D'Silva - crypto: vmx - Convert to CPU feature based module autoloading from Alastair D'Silva - Wake up kopald polling thread before waiting for events from Benjamin Herrenschmidt - xmon: Dump ISA 2.06 SPRs from Michael Ellerman - xmon: Dump ISA 2.07 SPRs from Michael Ellerman - Add a parameter to disable 1TB segs from Oliver O'Halloran - powerpc/boot: Add OPAL console to epapr wrappers from Oliver O'Halloran - Assign fixed PHB number based on device-tree properties from Guilherme G. Piccoli - pseries: Add pseries hotplug workqueue from John Allen - pseries: Add support for hotplug interrupt source from John Allen - pseries: Use kernel hotplug queue for PowerVM hotplug events from John Allen - pseries: Move property cloning into its own routine from Nathan Fontenot - pseries: Dynamic add entires to associativity lookup array from Nathan Fontenot - pseries: Auto-online hotplugged memory from Nathan Fontenot - pseries: Remove call to memblock_add() from Nathan Fontenot cxl: - Add set and get private data to context struct from Michael Neuling - make base more explicitly non-modular from Paul Gortmaker - Use for_each_compatible_node() macro from Wei Yongjun - Frederic Barrat - Abstract the differences between the PSL and XSL - Make vPHB device node match adapter's - Philippe Bergheaud - Add mechanism for delivering AFU driver specific events - Ignore CAPI adapters misplaced in switched slots - Refine slice error debug messages - Andrew Donnellan - static-ify variables to fix sparse warnings - PCI/hotplug: pnv_php: export symbols and move struct types needed by cxl - PCI/hotplug: pnv_php: handle OPAL_PCI_SLOT_OFFLINE power state - Add cxl_check_and_switch_mode() API to switch bi-modal cards - remove dead Kconfig options - fix potential NULL dereference in free_adapter() - Ian Munsie - Update process element after allocating interrupts - Add support for CAPP DMA mode - Fix allowing bogus AFU descriptors with 0 maximum processes - Fix allocating a minimum of 2 pages for the SPA - Fix bug where AFU disable operation had no effect - Workaround XSL bug that does not clear the RA bit after a reset - Fix NULL pointer dereference on kernel contexts with no AFU interrupts - powerpc/powernv: Split cxl code out into a separate file - Add cxl_slot_is_supported API - Enable bus mastering for devices using CAPP DMA mode - Move cxl_afu_get / cxl_afu_put to base - Allow a default context to be associated with an external pci_dev - Do not create vPHB if there are no AFU configuration records - powerpc/powernv: Add support for the cxl kernel api on the real phb - Add support for using the kernel API with a real PHB - Add kernel APIs to get & set the max irqs per context - Add preliminary workaround for CX4 interrupt limitation - Add support for interrupts on the Mellanox CX4 - Workaround PE=0 hardware limitation in Mellanox CX4 - powerpc/powernv: Fix pci-cxl.c build when CONFIG_MODULES=n selftests: - Test unaligned copy and paste from Chris Smart - Load Monitor Register Tests from Jack Miller - Cyril Bur - exec() with suspended transaction - Use signed long to read perf_event_paranoid - Fix usage message in context_switch - Fix generation of vector instructions/types in context_switch - Michael Ellerman - Use "Delta" rather than "Error" in normal output - Import Anton's mmap & futex micro benchmarks - Add a test for PROT_SAO" * tag 'powerpc-4.8-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (263 commits) powerpc/mm: Parenthesise IS_ENABLED() in if condition tty/hvc: Use opal irqchip interface if available tty/hvc: Use IRQF_SHARED for OPAL hvc consoles selftests/powerpc: exec() with suspended transaction powerpc: Improve comment explaining why we modify VRSAVE powerpc/mm: Drop unused externs for hpte_init_beat[_v3]() powerpc/mm: Rename hpte_init_lpar() and move the fallback to a header powerpc/mm: Fix build break when PPC_NATIVE=n crypto: vmx - Convert to CPU feature based module autoloading powerpc: Add module autoloading based on CPU features powerpc/powernv/ioda: Fix endianness when reading TCEs powerpc/mm: Add memory barrier in __hugepte_alloc() powerpc/modules: Never restore r2 for a mprofile-kernel style mcount() call powerpc/ftrace: Separate the heuristics for checking call sites powerpc: Merge 32-bit and 64-bit setup_arch() powerpc/64: Make a few boot functions __init powerpc: Re-order setup_panic() powerpc: Re-order the call to smp_setup_cpu_maps() powerpc/32: Move cache info inits to a separate function powerpc/64: Move the content of setup_system() to setup_arch() ...
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/8xx_mmu.c131
-rw-r--r--arch/powerpc/mm/hash64_4k.c18
-rw-r--r--arch/powerpc/mm/hash64_64k.c39
-rw-r--r--arch/powerpc/mm/hash_native_64.c36
-rw-r--r--arch/powerpc/mm/hash_utils_64.c153
-rw-r--r--arch/powerpc/mm/hugepage-hash64.c17
-rw-r--r--arch/powerpc/mm/hugetlbpage-hash64.c4
-rw-r--r--arch/powerpc/mm/hugetlbpage.c7
-rw-r--r--arch/powerpc/mm/init_32.c5
-rw-r--r--arch/powerpc/mm/mem.c18
-rw-r--r--arch/powerpc/mm/mmu_context_book3s64.c5
-rw-r--r--arch/powerpc/mm/mmu_decl.h3
-rw-r--r--arch/powerpc/mm/numa.c36
-rw-r--r--arch/powerpc/mm/pgtable-radix.c17
-rw-r--r--arch/powerpc/mm/tlb-radix.c65
15 files changed, 361 insertions, 193 deletions
diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c
index 949100577db5..6c5025e81236 100644
--- a/arch/powerpc/mm/8xx_mmu.c
+++ b/arch/powerpc/mm/8xx_mmu.c
@@ -13,62 +13,115 @@
*/
#include <linux/memblock.h>
+#include <asm/fixmap.h>
+#include <asm/code-patching.h>
#include "mmu_decl.h"
+#define IMMR_SIZE (FIX_IMMR_SIZE << PAGE_SHIFT)
+
extern int __map_without_ltlbs;
+
/*
- * MMU_init_hw does the chip-specific initialization of the MMU hardware.
+ * Return PA for this VA if it is in IMMR area, or 0
*/
-void __init MMU_init_hw(void)
+phys_addr_t v_block_mapped(unsigned long va)
{
- /* Nothing to do for the time being but keep it similar to other PPC */
+ unsigned long p = PHYS_IMMR_BASE;
+
+ if (__map_without_ltlbs)
+ return 0;
+ if (va >= VIRT_IMMR_BASE && va < VIRT_IMMR_BASE + IMMR_SIZE)
+ return p + va - VIRT_IMMR_BASE;
+ return 0;
+}
+
+/*
+ * Return VA for a given PA or 0 if not mapped
+ */
+unsigned long p_block_mapped(phys_addr_t pa)
+{
+ unsigned long p = PHYS_IMMR_BASE;
+
+ if (__map_without_ltlbs)
+ return 0;
+ if (pa >= p && pa < p + IMMR_SIZE)
+ return VIRT_IMMR_BASE + pa - p;
+ return 0;
}
-#define LARGE_PAGE_SIZE_4M (1<<22)
#define LARGE_PAGE_SIZE_8M (1<<23)
-#define LARGE_PAGE_SIZE_64M (1<<26)
-unsigned long __init mmu_mapin_ram(unsigned long top)
+/*
+ * MMU_init_hw does the chip-specific initialization of the MMU hardware.
+ */
+void __init MMU_init_hw(void)
{
- unsigned long v, s, mapped;
- phys_addr_t p;
+ /* PIN up to the 3 first 8Mb after IMMR in DTLB table */
+#ifdef CONFIG_PIN_TLB
+ unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe000000;
+ unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY;
+#ifdef CONFIG_PIN_TLB_IMMR
+ int i = 29;
+#else
+ int i = 28;
+#endif
+ unsigned long addr = 0;
+ unsigned long mem = total_lowmem;
+
+ for (; i < 32 && mem >= LARGE_PAGE_SIZE_8M; i++) {
+ mtspr(SPRN_MD_CTR, ctr | (i << 8));
+ mtspr(SPRN_MD_EPN, (unsigned long)__va(addr) | MD_EVALID);
+ mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID);
+ mtspr(SPRN_MD_RPN, addr | flags | _PAGE_PRESENT);
+ addr += LARGE_PAGE_SIZE_8M;
+ mem -= LARGE_PAGE_SIZE_8M;
+ }
+#endif
+}
- v = KERNELBASE;
- p = 0;
- s = top;
+static void mmu_mapin_immr(void)
+{
+ unsigned long p = PHYS_IMMR_BASE;
+ unsigned long v = VIRT_IMMR_BASE;
+ unsigned long f = pgprot_val(PAGE_KERNEL_NCG);
+ int offset;
- if (__map_without_ltlbs)
- return 0;
+ for (offset = 0; offset < IMMR_SIZE; offset += PAGE_SIZE)
+ map_page(v + offset, p + offset, f);
+}
-#ifdef CONFIG_PPC_4K_PAGES
- while (s >= LARGE_PAGE_SIZE_8M) {
- pmd_t *pmdp;
- unsigned long val = p | MD_PS8MEG;
+/* Address of instructions to patch */
+#ifndef CONFIG_PIN_TLB_IMMR
+extern unsigned int DTLBMiss_jmp;
+#endif
+extern unsigned int DTLBMiss_cmp, FixupDAR_cmp;
- pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v);
- *pmdp++ = __pmd(val);
- *pmdp++ = __pmd(val + LARGE_PAGE_SIZE_4M);
+void mmu_patch_cmp_limit(unsigned int *addr, unsigned long mapped)
+{
+ unsigned int instr = *addr;
- v += LARGE_PAGE_SIZE_8M;
- p += LARGE_PAGE_SIZE_8M;
- s -= LARGE_PAGE_SIZE_8M;
- }
-#else /* CONFIG_PPC_16K_PAGES */
- while (s >= LARGE_PAGE_SIZE_64M) {
- pmd_t *pmdp;
- unsigned long val = p | MD_PS8MEG;
+ instr &= 0xffff0000;
+ instr |= (unsigned long)__va(mapped) >> 16;
+ patch_instruction(addr, instr);
+}
- pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v);
- *pmdp++ = __pmd(val);
+unsigned long __init mmu_mapin_ram(unsigned long top)
+{
+ unsigned long mapped;
- v += LARGE_PAGE_SIZE_64M;
- p += LARGE_PAGE_SIZE_64M;
- s -= LARGE_PAGE_SIZE_64M;
- }
+ if (__map_without_ltlbs) {
+ mapped = 0;
+ mmu_mapin_immr();
+#ifndef CONFIG_PIN_TLB_IMMR
+ patch_instruction(&DTLBMiss_jmp, PPC_INST_NOP);
#endif
+ } else {
+ mapped = top & ~(LARGE_PAGE_SIZE_8M - 1);
+ }
- mapped = top - s;
+ mmu_patch_cmp_limit(&DTLBMiss_cmp, mapped);
+ mmu_patch_cmp_limit(&FixupDAR_cmp, mapped);
/* If the size of RAM is not an exact power of two, we may not
* have covered RAM in its entirety with 8 MiB
@@ -77,7 +130,8 @@ unsigned long __init mmu_mapin_ram(unsigned long top)
* coverage with normal-sized pages (or other reasons) do not
* attempt to allocate outside the allowed range.
*/
- memblock_set_current_limit(mapped);
+ if (mapped)
+ memblock_set_current_limit(mapped);
return mapped;
}
@@ -90,13 +144,8 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base,
*/
BUG_ON(first_memblock_base != 0);
-#ifdef CONFIG_PIN_TLB
/* 8xx can only access 24MB at the moment */
memblock_set_current_limit(min_t(u64, first_memblock_size, 0x01800000));
-#else
- /* 8xx can only access 8MB at the moment */
- memblock_set_current_limit(min_t(u64, first_memblock_size, 0x00800000));
-#endif
}
/*
diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c
index 6333b273d2d5..42c702b3be1f 100644
--- a/arch/powerpc/mm/hash64_4k.c
+++ b/arch/powerpc/mm/hash64_4k.c
@@ -70,8 +70,8 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT;
- if (ppc_md.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_4K,
- MMU_PAGE_4K, ssize, flags) == -1)
+ if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_4K,
+ MMU_PAGE_4K, ssize, flags) == -1)
old_pte &= ~_PAGE_HPTEFLAGS;
}
@@ -84,21 +84,23 @@ repeat:
hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
/* Insert into the hash table, primary slot */
- slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0,
- MMU_PAGE_4K, MMU_PAGE_4K, ssize);
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0,
+ MMU_PAGE_4K, MMU_PAGE_4K, ssize);
/*
* Primary is full, try the secondary
*/
if (unlikely(slot == -1)) {
hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
- slot = ppc_md.hpte_insert(hpte_group, vpn, pa,
- rflags, HPTE_V_SECONDARY,
- MMU_PAGE_4K, MMU_PAGE_4K, ssize);
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa,
+ rflags,
+ HPTE_V_SECONDARY,
+ MMU_PAGE_4K,
+ MMU_PAGE_4K, ssize);
if (slot == -1) {
if (mftb() & 0x1)
hpte_group = ((hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
- ppc_md.hpte_remove(hpte_group);
+ mmu_hash_ops.hpte_remove(hpte_group);
/*
* FIXME!! Should be try the group from which we removed ?
*/
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
index 16644e1f4e6b..3bbbea07378c 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/hash64_64k.c
@@ -133,9 +133,9 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
slot += hidx & _PTEIDX_GROUP_IX;
- ret = ppc_md.hpte_updatepp(slot, rflags, vpn,
- MMU_PAGE_4K, MMU_PAGE_4K,
- ssize, flags);
+ ret = mmu_hash_ops.hpte_updatepp(slot, rflags, vpn,
+ MMU_PAGE_4K, MMU_PAGE_4K,
+ ssize, flags);
/*
*if we failed because typically the HPTE wasn't really here
* we try an insertion.
@@ -166,21 +166,22 @@ repeat:
hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
/* Insert into the hash table, primary slot */
- slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0,
- MMU_PAGE_4K, MMU_PAGE_4K, ssize);
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0,
+ MMU_PAGE_4K, MMU_PAGE_4K, ssize);
/*
* Primary is full, try the secondary
*/
if (unlikely(slot == -1)) {
hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
- slot = ppc_md.hpte_insert(hpte_group, vpn, pa,
- rflags, HPTE_V_SECONDARY,
- MMU_PAGE_4K, MMU_PAGE_4K, ssize);
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa,
+ rflags, HPTE_V_SECONDARY,
+ MMU_PAGE_4K, MMU_PAGE_4K,
+ ssize);
if (slot == -1) {
if (mftb() & 0x1)
hpte_group = ((hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
- ppc_md.hpte_remove(hpte_group);
+ mmu_hash_ops.hpte_remove(hpte_group);
/*
* FIXME!! Should be try the group from which we removed ?
*/
@@ -272,8 +273,9 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT;
- if (ppc_md.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_64K,
- MMU_PAGE_64K, ssize, flags) == -1)
+ if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_64K,
+ MMU_PAGE_64K, ssize,
+ flags) == -1)
old_pte &= ~_PAGE_HPTEFLAGS;
}
@@ -286,21 +288,24 @@ repeat:
hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
/* Insert into the hash table, primary slot */
- slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0,
- MMU_PAGE_64K, MMU_PAGE_64K, ssize);
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0,
+ MMU_PAGE_64K, MMU_PAGE_64K,
+ ssize);
/*
* Primary is full, try the secondary
*/
if (unlikely(slot == -1)) {
hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
- slot = ppc_md.hpte_insert(hpte_group, vpn, pa,
- rflags, HPTE_V_SECONDARY,
- MMU_PAGE_64K, MMU_PAGE_64K, ssize);
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa,
+ rflags,
+ HPTE_V_SECONDARY,
+ MMU_PAGE_64K,
+ MMU_PAGE_64K, ssize);
if (slot == -1) {
if (mftb() & 0x1)
hpte_group = ((hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
- ppc_md.hpte_remove(hpte_group);
+ mmu_hash_ops.hpte_remove(hpte_group);
/*
* FIXME!! Should be try the group from which we removed ?
*/
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index f8a871a72985..88ce7d212320 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -55,7 +55,7 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
* We need 14 to 65 bits of va for a tlibe of 4K page
* With vpn we ignore the lower VPN_SHIFT bits already.
* And top two bits are already ignored because we can
- * only accomadate 76 bits in a 64 bit vpn with a VPN_SHIFT
+ * only accomodate 76 bits in a 64 bit vpn with a VPN_SHIFT
* of 12.
*/
va = vpn << VPN_SHIFT;
@@ -64,7 +64,8 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
* Older versions of the architecture (2.02 and earler) require the
* masking of the top 16 bits.
*/
- va &= ~(0xffffULL << 48);
+ if (mmu_has_feature(MMU_FTR_TLBIE_CROP_VA))
+ va &= ~(0xffffULL << 48);
switch (psize) {
case MMU_PAGE_4K:
@@ -113,7 +114,8 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
* Older versions of the architecture (2.02 and earler) require the
* masking of the top 16 bits.
*/
- va &= ~(0xffffULL << 48);
+ if (mmu_has_feature(MMU_FTR_TLBIE_CROP_VA))
+ va &= ~(0xffffULL << 48);
switch (psize) {
case MMU_PAGE_4K:
@@ -605,7 +607,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
* crashdump and all bets are off anyway.
*
* TODO: add batching support when enabled. remember, no dynamic memory here,
- * athough there is the control page available...
+ * although there is the control page available...
*/
static void native_hpte_clear(void)
{
@@ -723,23 +725,29 @@ static void native_flush_hash_range(unsigned long number, int local)
local_irq_restore(flags);
}
-static int native_update_partition_table(u64 patb1)
+static int native_register_proc_table(unsigned long base, unsigned long page_size,
+ unsigned long table_size)
{
+ unsigned long patb1 = base << 25; /* VSID */
+
+ patb1 |= (page_size << 5); /* sllp */
+ patb1 |= table_size;
+
partition_tb->patb1 = cpu_to_be64(patb1);
return 0;
}
void __init hpte_init_native(void)
{
- ppc_md.hpte_invalidate = native_hpte_invalidate;
- ppc_md.hpte_updatepp = native_hpte_updatepp;
- ppc_md.hpte_updateboltedpp = native_hpte_updateboltedpp;
- ppc_md.hpte_insert = native_hpte_insert;
- ppc_md.hpte_remove = native_hpte_remove;
- ppc_md.hpte_clear_all = native_hpte_clear;
- ppc_md.flush_hash_range = native_flush_hash_range;
- ppc_md.hugepage_invalidate = native_hugepage_invalidate;
+ mmu_hash_ops.hpte_invalidate = native_hpte_invalidate;
+ mmu_hash_ops.hpte_updatepp = native_hpte_updatepp;
+ mmu_hash_ops.hpte_updateboltedpp = native_hpte_updateboltedpp;
+ mmu_hash_ops.hpte_insert = native_hpte_insert;
+ mmu_hash_ops.hpte_remove = native_hpte_remove;
+ mmu_hash_ops.hpte_clear_all = native_hpte_clear;
+ mmu_hash_ops.flush_hash_range = native_flush_hash_range;
+ mmu_hash_ops.hugepage_invalidate = native_hugepage_invalidate;
if (cpu_has_feature(CPU_FTR_ARCH_300))
- ppc_md.update_partition_table = native_update_partition_table;
+ ppc_md.register_process_table = native_register_proc_table;
}
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 2971ea18c768..b78b5d211278 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -34,6 +34,7 @@
#include <linux/signal.h>
#include <linux/memblock.h>
#include <linux/context_tracking.h>
+#include <linux/libfdt.h>
#include <asm/processor.h>
#include <asm/pgtable.h>
@@ -58,6 +59,7 @@
#include <asm/firmware.h>
#include <asm/tm.h>
#include <asm/trace.h>
+#include <asm/ps3.h>
#ifdef DEBUG
#define DBG(fmt...) udbg_printf(fmt)
@@ -87,10 +89,6 @@
*
*/
-#ifdef CONFIG_U3_DART
-extern unsigned long dart_tablebase;
-#endif /* CONFIG_U3_DART */
-
static unsigned long _SDR1;
struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
EXPORT_SYMBOL_GPL(mmu_psize_defs);
@@ -120,6 +118,8 @@ static u8 *linear_map_hash_slots;
static unsigned long linear_map_hash_count;
static DEFINE_SPINLOCK(linear_map_hash_lock);
#endif /* CONFIG_DEBUG_PAGEALLOC */
+struct mmu_hash_ops mmu_hash_ops;
+EXPORT_SYMBOL(mmu_hash_ops);
/* There are definitions of page sizes arrays to be used when none
* is provided by the firmware.
@@ -278,9 +278,10 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
hash = hpt_hash(vpn, shift, ssize);
hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
- BUG_ON(!ppc_md.hpte_insert);
- ret = ppc_md.hpte_insert(hpteg, vpn, paddr, tprot,
- HPTE_V_BOLTED, psize, psize, ssize);
+ BUG_ON(!mmu_hash_ops.hpte_insert);
+ ret = mmu_hash_ops.hpte_insert(hpteg, vpn, paddr, tprot,
+ HPTE_V_BOLTED, psize, psize,
+ ssize);
if (ret < 0)
break;
@@ -305,11 +306,11 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend,
shift = mmu_psize_defs[psize].shift;
step = 1 << shift;
- if (!ppc_md.hpte_removebolted)
+ if (!mmu_hash_ops.hpte_removebolted)
return -ENODEV;
for (vaddr = vstart; vaddr < vend; vaddr += step) {
- rc = ppc_md.hpte_removebolted(vaddr, psize, ssize);
+ rc = mmu_hash_ops.hpte_removebolted(vaddr, psize, ssize);
if (rc == -ENOENT) {
ret = -ENOENT;
continue;
@@ -321,6 +322,15 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend,
return ret;
}
+static bool disable_1tb_segments = false;
+
+static int __init parse_disable_1tb_segments(char *p)
+{
+ disable_1tb_segments = true;
+ return 0;
+}
+early_param("disable_1tb_segments", parse_disable_1tb_segments);
+
static int __init htab_dt_scan_seg_sizes(unsigned long node,
const char *uname, int depth,
void *data)
@@ -339,6 +349,12 @@ static int __init htab_dt_scan_seg_sizes(unsigned long node,
for (; size >= 4; size -= 4, ++prop) {
if (be32_to_cpu(prop[0]) == 40) {
DBG("1T segment support detected\n");
+
+ if (disable_1tb_segments) {
+ DBG("1T segments disabled by command line\n");
+ break;
+ }
+
cur_cpu_spec->mmu_features |= MMU_FTR_1T_SEGMENT;
return 1;
}
@@ -514,7 +530,8 @@ static bool might_have_hea(void)
* we will never see an HEA ethernet device.
*/
#ifdef CONFIG_IBMEBUS
- return !cpu_has_feature(CPU_FTR_ARCH_207S);
+ return !cpu_has_feature(CPU_FTR_ARCH_207S) &&
+ !firmware_has_feature(FW_FEATURE_SPLPAR);
#else
return false;
#endif
@@ -580,7 +597,7 @@ found:
* would stop us accessing the HEA ethernet. So if we
* have the chance of ever seeing one, stay at 4k.
*/
- if (!might_have_hea() || !machine_is(pseries))
+ if (!might_have_hea())
mmu_io_psize = MMU_PAGE_64K;
} else
mmu_ci_restrictions = 1;
@@ -699,10 +716,9 @@ int remove_section_mapping(unsigned long start, unsigned long end)
#endif /* CONFIG_MEMORY_HOTPLUG */
static void __init hash_init_partition_table(phys_addr_t hash_table,
- unsigned long pteg_count)
+ unsigned long htab_size)
{
unsigned long ps_field;
- unsigned long htab_size;
unsigned long patb_size = 1UL << PATB_SIZE_SHIFT;
/*
@@ -710,7 +726,7 @@ static void __init hash_init_partition_table(phys_addr_t hash_table,
* We can ignore that for lpid 0
*/
ps_field = 0;
- htab_size = __ilog2(pteg_count) - 11;
+ htab_size = __ilog2(htab_size) - 18;
BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large.");
partition_tb = __va(memblock_alloc_base(patb_size, patb_size,
@@ -724,7 +740,7 @@ static void __init hash_init_partition_table(phys_addr_t hash_table,
* For now UPRT is 0 for us.
*/
partition_tb->patb1 = 0;
- DBG("Partition table %p\n", partition_tb);
+ pr_info("Partition table %p\n", partition_tb);
/*
* update partition table control register,
* 64 K size.
@@ -738,7 +754,7 @@ static void __init htab_initialize(void)
unsigned long table;
unsigned long pteg_count;
unsigned long prot;
- unsigned long base = 0, size = 0, limit;
+ unsigned long base = 0, size = 0;
struct memblock_region *reg;
DBG(" -> htab_initialize()\n");
@@ -764,7 +780,8 @@ static void __init htab_initialize(void)
htab_hash_mask = pteg_count - 1;
- if (firmware_has_feature(FW_FEATURE_LPAR)) {
+ if (firmware_has_feature(FW_FEATURE_LPAR) ||
+ firmware_has_feature(FW_FEATURE_PS3_LV1)) {
/* Using a hypervisor which owns the htab */
htab_address = NULL;
_SDR1 = 0;
@@ -775,20 +792,26 @@ static void __init htab_initialize(void)
* Clear the htab if firmware assisted dump is active so
* that we dont end up using old mappings.
*/
- if (is_fadump_active() && ppc_md.hpte_clear_all)
- ppc_md.hpte_clear_all();
+ if (is_fadump_active() && mmu_hash_ops.hpte_clear_all)
+ mmu_hash_ops.hpte_clear_all();
#endif
} else {
- /* Find storage for the HPT. Must be contiguous in
- * the absolute address space. On cell we want it to be
- * in the first 2 Gig so we can use it for IOMMU hacks.
+ unsigned long limit = MEMBLOCK_ALLOC_ANYWHERE;
+
+#ifdef CONFIG_PPC_CELL
+ /*
+ * Cell may require the hash table down low when using the
+ * Axon IOMMU in order to fit the dynamic region over it, see
+ * comments in cell/iommu.c
*/
- if (machine_is(cell))
+ if (fdt_subnode_offset(initial_boot_params, 0, "axon") > 0) {
limit = 0x80000000;
- else
- limit = MEMBLOCK_ALLOC_ANYWHERE;
+ pr_info("Hash table forced below 2G for Axon IOMMU\n");
+ }
+#endif /* CONFIG_PPC_CELL */
- table = memblock_alloc_base(htab_size_bytes, htab_size_bytes, limit);
+ table = memblock_alloc_base(htab_size_bytes, htab_size_bytes,
+ limit);
DBG("Hash table allocated at %lx, size: %lx\n", table,
htab_size_bytes);
@@ -796,7 +819,7 @@ static void __init htab_initialize(void)
htab_address = __va(table);
/* htab absolute addr + encoded htabsize */
- _SDR1 = table + __ilog2(pteg_count) - 11;
+ _SDR1 = table + __ilog2(htab_size_bytes) - 18;
/* Initialize the HPT with no entries */
memset((void *)table, 0, htab_size_bytes);
@@ -805,7 +828,7 @@ static void __init htab_initialize(void)
/* Set SDR1 */
mtspr(SPRN_SDR1, _SDR1);
else
- hash_init_partition_table(table, pteg_count);
+ hash_init_partition_table(table, htab_size_bytes);
}
prot = pgprot_val(PAGE_KERNEL);
@@ -832,34 +855,6 @@ static void __init htab_initialize(void)
DBG("creating mapping for region: %lx..%lx (prot: %lx)\n",
base, size, prot);
-#ifdef CONFIG_U3_DART
- /* Do not map the DART space. Fortunately, it will be aligned
- * in such a way that it will not cross two memblock regions and
- * will fit within a single 16Mb page.
- * The DART space is assumed to be a full 16Mb region even if
- * we only use 2Mb of that space. We will use more of it later
- * for AGP GART. We have to use a full 16Mb large page.
- */
- DBG("DART base: %lx\n", dart_tablebase);
-
- if (dart_tablebase != 0 && dart_tablebase >= base
- && dart_tablebase < (base + size)) {
- unsigned long dart_table_end = dart_tablebase + 16 * MB;
- if (base != dart_tablebase)
- BUG_ON(htab_bolt_mapping(base, dart_tablebase,
- __pa(base), prot,
- mmu_linear_psize,
- mmu_kernel_ssize));
- if ((base + size) > dart_table_end)
- BUG_ON(htab_bolt_mapping(dart_tablebase+16*MB,
- base + size,
- __pa(dart_table_end),
- prot,
- mmu_linear_psize,
- mmu_kernel_ssize));
- continue;
- }
-#endif /* CONFIG_U3_DART */
BUG_ON(htab_bolt_mapping(base, base + size, __pa(base),
prot, mmu_linear_psize, mmu_kernel_ssize));
}
@@ -926,12 +921,24 @@ void __init hash__early_init_mmu(void)
pci_io_base = ISA_IO_BASE;
#endif
+ /* Select appropriate backend */
+ if (firmware_has_feature(FW_FEATURE_PS3_LV1))
+ ps3_early_mm_init();
+ else if (firmware_has_feature(FW_FEATURE_LPAR))
+ hpte_init_pseries();
+ else if (IS_ENABLED(CONFIG_PPC_NATIVE))
+ hpte_init_native();
+
+ if (!mmu_hash_ops.hpte_insert)
+ panic("hash__early_init_mmu: No MMU hash ops defined!\n");
+
/* Initialize the MMU Hash table and create the linear mapping
* of memory. Has to be done before SLB initialization as this is
* currently where the page size encoding is obtained.
*/
htab_initialize();
+ pr_info("Initializing hash mmu with SLB\n");
/* Initialize SLB management */
slb_initialize();
}
@@ -1474,7 +1481,8 @@ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize,
* We use same base page size and actual psize, because we don't
* use these functions for hugepage
*/
- ppc_md.hpte_invalidate(slot, vpn, psize, psize, ssize, local);
+ mmu_hash_ops.hpte_invalidate(slot, vpn, psize, psize,
+ ssize, local);
} pte_iterate_hashed_end();
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -1515,9 +1523,9 @@ void flush_hash_hugepage(unsigned long vsid, unsigned long addr,
if (!hpte_slot_array)
return;
- if (ppc_md.hugepage_invalidate) {
- ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array,
- psize, ssize, local);
+ if (mmu_hash_ops.hugepage_invalidate) {
+ mmu_hash_ops.hugepage_invalidate(vsid, s_addr, hpte_slot_array,
+ psize, ssize, local);
goto tm_abort;
}
/*
@@ -1544,8 +1552,8 @@ void flush_hash_hugepage(unsigned long vsid, unsigned long addr,
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
slot += hidx & _PTEIDX_GROUP_IX;
- ppc_md.hpte_invalidate(slot, vpn, psize,
- MMU_PAGE_16M, ssize, local);
+ mmu_hash_ops.hpte_invalidate(slot, vpn, psize,
+ MMU_PAGE_16M, ssize, local);
}
tm_abort:
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -1569,8 +1577,8 @@ tm_abort:
void flush_hash_range(unsigned long number, int local)
{
- if (ppc_md.flush_hash_range)
- ppc_md.flush_hash_range(number, local);
+ if (mmu_hash_ops.flush_hash_range)
+ mmu_hash_ops.flush_hash_range(number, local);
else {
int i;
struct ppc64_tlb_batch *batch =
@@ -1615,22 +1623,22 @@ repeat:
HPTES_PER_GROUP) & ~0x7UL;
/* Insert into the hash table, primary slot */
- slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, vflags,
- psize, psize, ssize);
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, vflags,
+ psize, psize, ssize);
/* Primary is full, try the secondary */
if (unlikely(slot == -1)) {
hpte_group = ((~hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
- slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags,
- vflags | HPTE_V_SECONDARY,
- psize, psize, ssize);
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags,
+ vflags | HPTE_V_SECONDARY,
+ psize, psize, ssize);
if (slot == -1) {
if (mftb() & 0x1)
hpte_group = ((hash & htab_hash_mask) *
HPTES_PER_GROUP)&~0x7UL;
- ppc_md.hpte_remove(hpte_group);
+ mmu_hash_ops.hpte_remove(hpte_group);
goto repeat;
}
}
@@ -1680,8 +1688,9 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi)
hash = ~hash;
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
slot += hidx & _PTEIDX_GROUP_IX;
- ppc_md.hpte_invalidate(slot, vpn, mmu_linear_psize, mmu_linear_psize,
- mmu_kernel_ssize, 0);
+ mmu_hash_ops.hpte_invalidate(slot, vpn, mmu_linear_psize,
+ mmu_linear_psize,
+ mmu_kernel_ssize, 0);
}
void __kernel_map_pages(struct page *page, int numpages, int enable)
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
index ba3fc229468a..f20d16f849c5 100644
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/hugepage-hash64.c
@@ -103,8 +103,8 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
slot += hidx & _PTEIDX_GROUP_IX;
- ret = ppc_md.hpte_updatepp(slot, rflags, vpn,
- psize, lpsize, ssize, flags);
+ ret = mmu_hash_ops.hpte_updatepp(slot, rflags, vpn,
+ psize, lpsize, ssize, flags);
/*
* We failed to update, try to insert a new entry.
*/
@@ -131,23 +131,24 @@ repeat:
hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
/* Insert into the hash table, primary slot */
- slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0,
- psize, lpsize, ssize);
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0,
+ psize, lpsize, ssize);
/*
* Primary is full, try the secondary
*/
if (unlikely(slot == -1)) {
hpte_group = ((~hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
- slot = ppc_md.hpte_insert(hpte_group, vpn, pa,
- rflags, HPTE_V_SECONDARY,
- psize, lpsize, ssize);
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa,
+ rflags,
+ HPTE_V_SECONDARY,
+ psize, lpsize, ssize);
if (slot == -1) {
if (mftb() & 0x1)
hpte_group = ((hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
- ppc_md.hpte_remove(hpte_group);
+ mmu_hash_ops.hpte_remove(hpte_group);
goto repeat;
}
}
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index 3058560b6121..d5026f3800b6 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -79,8 +79,8 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT;
- if (ppc_md.hpte_updatepp(slot, rflags, vpn, mmu_psize,
- mmu_psize, ssize, flags) == -1)
+ if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, mmu_psize,
+ mmu_psize, ssize, flags) == -1)
old_pte &= ~_PAGE_HPTEFLAGS;
}
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 119d18611500..7372ee13eb1e 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -81,6 +81,13 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
if (! new)
return -ENOMEM;
+ /*
+ * Make sure other cpus find the hugepd set only after a
+ * properly initialized page table is visible to them.
+ * For more details look for comment in __pte_alloc().
+ */
+ smp_wmb();
+
spin_lock(&mm->page_table_lock);
#ifdef CONFIG_PPC_FSL_BOOK3E
/*
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index c899fe340bbd..448685fbf27c 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -64,7 +64,7 @@ EXPORT_SYMBOL(memstart_addr);
phys_addr_t kernstart_addr;
EXPORT_SYMBOL(kernstart_addr);
-#ifdef CONFIG_RELOCATABLE_PPC32
+#ifdef CONFIG_RELOCATABLE
/* Used in __va()/__pa() */
long long virt_phys_offset;
EXPORT_SYMBOL(virt_phys_offset);
@@ -80,9 +80,6 @@ EXPORT_SYMBOL(agp_special_page);
void MMU_init(void);
-/* XXX should be in current.h -- paulus */
-extern struct task_struct *current_set[NR_CPUS];
-
/*
* this tells the system to map all of ram with the segregs
* (i.e. page tables) instead of the bats.
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 2fd57fa48429..5f844337de21 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -116,6 +116,16 @@ int memory_add_physaddr_to_nid(u64 start)
}
#endif
+int __weak create_section_mapping(unsigned long start, unsigned long end)
+{
+ return -ENODEV;
+}
+
+int __weak remove_section_mapping(unsigned long start, unsigned long end)
+{
+ return -ENODEV;
+}
+
int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
{
struct pglist_data *pgdata;
@@ -239,8 +249,14 @@ static int __init mark_nonram_nosave(void)
static bool zone_limits_final;
+/*
+ * The memory zones past TOP_ZONE are managed by generic mm code.
+ * These should be set to zero since that's what every other
+ * architecture does.
+ */
static unsigned long max_zone_pfns[MAX_NR_ZONES] = {
- [0 ... MAX_NR_ZONES - 1] = ~0UL
+ [0 ... TOP_ZONE ] = ~0UL,
+ [TOP_ZONE + 1 ... MAX_NR_ZONES - 1] = 0
};
/*
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index 196222227e82..b114f8b93ec9 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -181,7 +181,10 @@ void destroy_context(struct mm_struct *mm)
#ifdef CONFIG_PPC_RADIX_MMU
void radix__switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
{
- mtspr(SPRN_PID, next->context.id);
asm volatile("isync": : :"memory");
+ mtspr(SPRN_PID, next->context.id);
+ asm volatile("isync \n"
+ PPC_SLBIA(0x7)
+ : : :"memory");
}
#endif
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 6af65327c993..f988db655e5b 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -154,9 +154,10 @@ struct tlbcam {
};
#endif
-#if defined(CONFIG_6xx) || defined(CONFIG_FSL_BOOKE)
+#if defined(CONFIG_6xx) || defined(CONFIG_FSL_BOOKE) || defined(CONFIG_PPC_8xx)
/* 6xx have BATS */
/* FSL_BOOKE have TLBCAM */
+/* 8xx have LTLB */
phys_addr_t v_block_mapped(unsigned long va);
unsigned long p_block_mapped(phys_addr_t pa);
#else
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 6dc07ddbfd04..75b9cd6150cc 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1153,18 +1153,34 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
static u64 hot_add_drconf_memory_max(void)
{
- struct device_node *memory = NULL;
- unsigned int drconf_cell_cnt = 0;
- u64 lmb_size = 0;
+ struct device_node *memory = NULL;
+ struct device_node *dn = NULL;
+ unsigned int drconf_cell_cnt = 0;
+ u64 lmb_size = 0;
const __be32 *dm = NULL;
+ const __be64 *lrdr = NULL;
+ struct of_drconf_cell drmem;
+
+ dn = of_find_node_by_path("/rtas");
+ if (dn) {
+ lrdr = of_get_property(dn, "ibm,lrdr-capacity", NULL);
+ of_node_put(dn);
+ if (lrdr)
+ return be64_to_cpup(lrdr);
+ }
+
+ memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+ if (memory) {
+ drconf_cell_cnt = of_get_drconf_memory(memory, &dm);
+ lmb_size = of_get_lmb_size(memory);
- memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
- if (memory) {
- drconf_cell_cnt = of_get_drconf_memory(memory, &dm);
- lmb_size = of_get_lmb_size(memory);
- of_node_put(memory);
- }
- return lmb_size * drconf_cell_cnt;
+ /* Advance to the last cell, each cell has 6 32 bit integers */
+ dm += (drconf_cell_cnt - 1) * 6;
+ read_drconf_cell(&drmem, &dm);
+ of_node_put(memory);
+ return drmem.base_addr + lmb_size;
+ }
+ return 0;
}
/*
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 7931e1496f0d..003ff48a11b6 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -21,8 +21,11 @@
#include <trace/events/thp.h>
-static int native_update_partition_table(u64 patb1)
+static int native_register_process_table(unsigned long base, unsigned long pg_sz,
+ unsigned long table_size)
{
+ unsigned long patb1 = base | table_size | PATB_GR;
+
partition_tb->patb1 = cpu_to_be64(patb1);
return 0;
}
@@ -168,7 +171,7 @@ redo:
* of process table here. But our linear mapping also enable us to use
* physical address here.
*/
- ppc_md.update_partition_table(__pa(process_tb) | (PRTB_SIZE_SHIFT - 12) | PATB_GR);
+ ppc_md.register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12);
pr_info("Process table %p and radix root for kernel: %p\n", process_tb, init_mm.pgd);
}
@@ -182,7 +185,8 @@ static void __init radix_init_partition_table(void)
partition_tb = early_alloc_pgtable(1UL << PATB_SIZE_SHIFT);
partition_tb->patb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) |
RADIX_PGD_INDEX_SIZE | PATB_HR);
- printk("Partition table %p\n", partition_tb);
+ pr_info("Initializing Radix MMU\n");
+ pr_info("Partition table %p\n", partition_tb);
memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
/*
@@ -194,7 +198,7 @@ static void __init radix_init_partition_table(void)
void __init radix_init_native(void)
{
- ppc_md.update_partition_table = native_update_partition_table;
+ ppc_md.register_process_table = native_register_process_table;
}
static int __init get_idx_from_shift(unsigned int shift)
@@ -341,8 +345,9 @@ void __init radix__early_init_mmu(void)
radix_init_page_sizes();
if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+ radix_init_native();
lpcr = mfspr(SPRN_LPCR);
- mtspr(SPRN_LPCR, lpcr | LPCR_UPRT);
+ mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
radix_init_partition_table();
}
@@ -357,7 +362,7 @@ void radix__early_init_mmu_secondary(void)
*/
if (!firmware_has_feature(FW_FEATURE_LPAR)) {
lpcr = mfspr(SPRN_LPCR);
- mtspr(SPRN_LPCR, lpcr | LPCR_UPRT);
+ mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
mtspr(SPRN_PTCR,
__pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index ab2f60e812e2..e1f22700fb16 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -12,6 +12,7 @@
#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/memblock.h>
+#include <asm/ppc-opcode.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
@@ -34,8 +35,7 @@ static inline void __tlbiel_pid(unsigned long pid, int set,
r = 1; /* raidx format */
asm volatile("ptesync": : :"memory");
- asm volatile(".long 0x7c000224 | (%0 << 11) | (%1 << 16) |"
- "(%2 << 17) | (%3 << 18) | (%4 << 21)"
+ asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
asm volatile("ptesync": : :"memory");
}
@@ -63,8 +63,7 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
r = 1; /* raidx format */
asm volatile("ptesync": : :"memory");
- asm volatile(".long 0x7c000264 | (%0 << 11) | (%1 << 16) |"
- "(%2 << 17) | (%3 << 18) | (%4 << 21)"
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
@@ -81,8 +80,7 @@ static inline void _tlbiel_va(unsigned long va, unsigned long pid,
r = 1; /* raidx format */
asm volatile("ptesync": : :"memory");
- asm volatile(".long 0x7c000224 | (%0 << 11) | (%1 << 16) |"
- "(%2 << 17) | (%3 << 18) | (%4 << 21)"
+ asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
asm volatile("ptesync": : :"memory");
}
@@ -99,8 +97,7 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid,
r = 1; /* raidx format */
asm volatile("ptesync": : :"memory");
- asm volatile(".long 0x7c000264 | (%0 << 11) | (%1 << 16) |"
- "(%2 << 17) | (%3 << 18) | (%4 << 21)"
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
@@ -285,9 +282,61 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
}
EXPORT_SYMBOL(radix__flush_tlb_range);
+static int radix_get_mmu_psize(int page_size)
+{
+ int psize;
+
+ if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift))
+ psize = mmu_virtual_psize;
+ else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift))
+ psize = MMU_PAGE_2M;
+ else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift))
+ psize = MMU_PAGE_1G;
+ else
+ return -1;
+ return psize;
+}
void radix__tlb_flush(struct mmu_gather *tlb)
{
struct mm_struct *mm = tlb->mm;
radix__flush_tlb_mm(mm);
}
+
+void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa,
+ unsigned long page_size)
+{
+ unsigned long rb,rs,prs,r;
+ unsigned long ap;
+ unsigned long ric = RIC_FLUSH_TLB;
+
+ ap = mmu_get_ap(radix_get_mmu_psize(page_size));
+ rb = gpa & ~(PPC_BITMASK(52, 63));
+ rb |= ap << PPC_BITLSHIFT(58);
+ rs = lpid & ((1UL << 32) - 1);
+ prs = 0; /* process scoped */
+ r = 1; /* raidx format */
+
+ asm volatile("ptesync": : :"memory");
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+}
+EXPORT_SYMBOL(radix__flush_tlb_lpid_va);
+
+void radix__flush_tlb_lpid(unsigned long lpid)
+{
+ unsigned long rb,rs,prs,r;
+ unsigned long ric = RIC_FLUSH_ALL;
+
+ rb = 0x2 << PPC_BITLSHIFT(53); /* IS = 2 */
+ rs = lpid & ((1UL << 32) - 1);
+ prs = 0; /* partition scoped */
+ r = 1; /* raidx format */
+
+ asm volatile("ptesync": : :"memory");
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+}
+EXPORT_SYMBOL(radix__flush_tlb_lpid);