summaryrefslogtreecommitdiff
path: root/arch/powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/Kconfig8
-rw-r--r--arch/powerpc/configs/ppc64_defconfig1
-rw-r--r--arch/powerpc/configs/pseries_defconfig1
-rw-r--r--arch/powerpc/include/asm/eeh.h13
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h2
-rw-r--r--arch/powerpc/include/asm/machdep.h9
-rw-r--r--arch/powerpc/include/asm/mmu-hash64.h169
-rw-r--r--arch/powerpc/include/asm/mmu.h9
-rw-r--r--arch/powerpc/include/asm/paca.h2
-rw-r--r--arch/powerpc/include/asm/page_64.h10
-rw-r--r--arch/powerpc/include/asm/pci-bridge.h9
-rw-r--r--arch/powerpc/include/asm/pgtable-ppc64-4k.h4
-rw-r--r--arch/powerpc/include/asm/pgtable-ppc64-64k.h2
-rw-r--r--arch/powerpc/include/asm/pgtable-ppc64.h19
-rw-r--r--arch/powerpc/include/asm/pgtable.h10
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h3
-rw-r--r--arch/powerpc/include/asm/processor.h4
-rw-r--r--arch/powerpc/include/asm/pte-hash64-64k.h18
-rw-r--r--arch/powerpc/include/asm/reg.h1
-rw-r--r--arch/powerpc/include/asm/sparsemem.h4
-rw-r--r--arch/powerpc/include/asm/thread_info.h3
-rw-r--r--arch/powerpc/include/asm/tlbflush.h7
-rw-r--r--arch/powerpc/include/asm/uaccess.h11
-rw-r--r--arch/powerpc/kernel/entry_32.S47
-rw-r--r--arch/powerpc/kernel/entry_64.S35
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S127
-rw-r--r--arch/powerpc/kernel/irq.c8
-rw-r--r--arch/powerpc/kernel/pci-common.c20
-rw-r--r--arch/powerpc/kernel/time.c8
-rw-r--r--arch/powerpc/kvm/book3s_32_mmu_host.c8
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_host.c17
-rw-r--r--arch/powerpc/kvm/trace.h14
-rw-r--r--arch/powerpc/lib/sstep.c36
-rw-r--r--arch/powerpc/mm/hash_low_64.S97
-rw-r--r--arch/powerpc/mm/hash_native_64.c164
-rw-r--r--arch/powerpc/mm/hash_utils_64.c45
-rw-r--r--arch/powerpc/mm/hugetlbpage-hash64.c15
-rw-r--r--arch/powerpc/mm/mmu_context_hash64.c10
-rw-r--r--arch/powerpc/mm/pgtable_64.c12
-rw-r--r--arch/powerpc/mm/slb_low.S62
-rw-r--r--arch/powerpc/mm/slice.c112
-rw-r--r--arch/powerpc/mm/tlb_hash64.c11
-rw-r--r--arch/powerpc/platforms/cell/beat_htab.c45
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c733
-rw-r--r--arch/powerpc/platforms/powernv/pci.h21
-rw-r--r--arch/powerpc/platforms/ps3/htab.c22
-rw-r--r--arch/powerpc/platforms/pseries/eeh.c14
-rw-r--r--arch/powerpc/platforms/pseries/eeh_driver.c99
-rw-r--r--arch/powerpc/platforms/pseries/eeh_pe.c102
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c76
-rw-r--r--arch/powerpc/platforms/pseries/pci_dlpar.c32
-rw-r--r--arch/powerpc/xmon/xmon.c107
52 files changed, 1418 insertions, 1000 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index b8bab10bd0f1..4ce0be32d153 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -562,6 +562,14 @@ config SCHED_SMT
when dealing with POWER5 cpus at a cost of slightly increased
overhead in some places. If unsure say N here.
+config PPC_DENORMALISATION
+ bool "PowerPC denormalisation exception handling"
+ depends on PPC_BOOK3S_64
+ default "n"
+ ---help---
+ Add support for handling denormalisation of single precision
+ values. Useful for bare metal only. If unsure say Y here.
+
config CMDLINE_BOOL
bool "Default bootloader kernel arguments"
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index db27c82e0542..e263e6a5aca1 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -51,6 +51,7 @@ CONFIG_KEXEC=y
CONFIG_IRQ_ALL_CPUS=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_SCHED_SMT=y
+CONFIG_PPC_DENORMALISATION=y
CONFIG_PCCARD=y
CONFIG_ELECTRA_CF=y
CONFIG_HOTPLUG_PCI=m
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index 1f65b3c9b59a..c169dfb3e42d 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -48,6 +48,7 @@ CONFIG_MEMORY_HOTREMOVE=y
CONFIG_PPC_64K_PAGES=y
CONFIG_PPC_SUBPAGE_PROT=y
CONFIG_SCHED_SMT=y
+CONFIG_PPC_DENORMALISATION=y
CONFIG_HOTPLUG_PCI=m
CONFIG_HOTPLUG_PCI_RPA=m
CONFIG_HOTPLUG_PCI_RPA_DLPAR=m
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 58c5ee61e700..b0ef73882b38 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -45,9 +45,10 @@ struct device_node;
* in the corresponding PHB. Therefore, the root PEs should be created
* against existing PHBs in on-to-one fashion.
*/
-#define EEH_PE_PHB 1 /* PHB PE */
-#define EEH_PE_DEVICE 2 /* Device PE */
-#define EEH_PE_BUS 3 /* Bus PE */
+#define EEH_PE_INVALID (1 << 0) /* Invalid */
+#define EEH_PE_PHB (1 << 1) /* PHB PE */
+#define EEH_PE_DEVICE (1 << 2) /* Device PE */
+#define EEH_PE_BUS (1 << 3) /* Bus PE */
#define EEH_PE_ISOLATED (1 << 0) /* Isolated PE */
#define EEH_PE_RECOVERING (1 << 1) /* Recovering PE */
@@ -184,7 +185,7 @@ static inline void eeh_unlock(void)
typedef void *(*eeh_traverse_func)(void *data, void *flag);
int __devinit eeh_phb_pe_create(struct pci_controller *phb);
int eeh_add_to_parent_pe(struct eeh_dev *edev);
-int eeh_rmv_from_parent_pe(struct eeh_dev *edev);
+int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe);
void *eeh_pe_dev_traverse(struct eeh_pe *root,
eeh_traverse_func fn, void *flag);
void eeh_pe_restore_bars(struct eeh_pe *pe);
@@ -200,7 +201,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev);
void __init eeh_addr_cache_build(void);
void eeh_add_device_tree_early(struct device_node *);
void eeh_add_device_tree_late(struct pci_bus *);
-void eeh_remove_bus_device(struct pci_dev *);
+void eeh_remove_bus_device(struct pci_dev *, int);
/**
* EEH_POSSIBLE_ERROR() -- test for possible MMIO failure.
@@ -239,7 +240,7 @@ static inline void eeh_add_device_tree_early(struct device_node *dn) { }
static inline void eeh_add_device_tree_late(struct pci_bus *bus) { }
-static inline void eeh_remove_bus_device(struct pci_dev *dev) { }
+static inline void eeh_remove_bus_device(struct pci_dev *dev, int purge_pe) { }
static inline void eeh_lock(void) { }
static inline void eeh_unlock(void) { }
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index f0e0c6a66d97..7aefdb3e1ce4 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -59,7 +59,7 @@ struct hpte_cache {
struct hlist_node list_vpte;
struct hlist_node list_vpte_long;
struct rcu_head rcu_head;
- u64 host_va;
+ u64 host_vpn;
u64 pfn;
ulong slot;
struct kvmppc_pte pte;
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 236b4779ec4f..c4231973edd3 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -34,19 +34,19 @@ struct machdep_calls {
char *name;
#ifdef CONFIG_PPC64
void (*hpte_invalidate)(unsigned long slot,
- unsigned long va,
+ unsigned long vpn,
int psize, int ssize,
int local);
long (*hpte_updatepp)(unsigned long slot,
unsigned long newpp,
- unsigned long va,
+ unsigned long vpn,
int psize, int ssize,
int local);
void (*hpte_updateboltedpp)(unsigned long newpp,
unsigned long ea,
int psize, int ssize);
long (*hpte_insert)(unsigned long hpte_group,
- unsigned long va,
+ unsigned long vpn,
unsigned long prpn,
unsigned long rflags,
unsigned long vflags,
@@ -215,6 +215,9 @@ struct machdep_calls {
/* Called after scan and before resource survey */
void (*pcibios_fixup_phb)(struct pci_controller *hose);
+ /* Called during PCI resource reassignment */
+ resource_size_t (*pcibios_window_alignment)(struct pci_bus *, unsigned long type);
+
/* Called to shutdown machine specific hardware not already controlled
* by other drivers.
*/
diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index 1c65a59881ea..9673f73eb8db 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -16,6 +16,13 @@
#include <asm/page.h>
/*
+ * This is necessary to get the definition of PGTABLE_RANGE which we
+ * need for various slices related matters. Note that this isn't the
+ * complete pgtable.h but only a portion of it.
+ */
+#include <asm/pgtable-ppc64.h>
+
+/*
* Segment table
*/
@@ -154,9 +161,25 @@ struct mmu_psize_def
#define MMU_SEGSIZE_256M 0
#define MMU_SEGSIZE_1T 1
+/*
+ * encode page number shift.
+ * in order to fit the 78 bit va in a 64 bit variable we shift the va by
+ * 12 bits. This enable us to address upto 76 bit va.
+ * For hpt hash from a va we can ignore the page size bits of va and for
+ * hpte encoding we ignore up to 23 bits of va. So ignoring lower 12 bits ensure
+ * we work in all cases including 4k page size.
+ */
+#define VPN_SHIFT 12
#ifndef __ASSEMBLY__
+static inline int segment_shift(int ssize)
+{
+ if (ssize == MMU_SEGSIZE_256M)
+ return SID_SHIFT;
+ return SID_SHIFT_1T;
+}
+
/*
* The current system page and segment sizes
*/
@@ -180,18 +203,39 @@ extern unsigned long tce_alloc_start, tce_alloc_end;
extern int mmu_ci_restrictions;
/*
+ * This computes the AVPN and B fields of the first dword of a HPTE,
+ * for use when we want to match an existing PTE. The bottom 7 bits
+ * of the returned value are zero.
+ */
+static inline unsigned long hpte_encode_avpn(unsigned long vpn, int psize,
+ int ssize)
+{
+ unsigned long v;
+ /*
+ * The AVA field omits the low-order 23 bits of the 78 bits VA.
+ * These bits are not needed in the PTE, because the
+ * low-order b of these bits are part of the byte offset
+ * into the virtual page and, if b < 23, the high-order
+ * 23-b of these bits are always used in selecting the
+ * PTEGs to be searched
+ */
+ v = (vpn >> (23 - VPN_SHIFT)) & ~(mmu_psize_defs[psize].avpnm);
+ v <<= HPTE_V_AVPN_SHIFT;
+ v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT;
+ return v;
+}
+
+/*
* This function sets the AVPN and L fields of the HPTE appropriately
* for the page size
*/
-static inline unsigned long hpte_encode_v(unsigned long va, int psize,
- int ssize)
+static inline unsigned long hpte_encode_v(unsigned long vpn,
+ int psize, int ssize)
{
unsigned long v;
- v = (va >> 23) & ~(mmu_psize_defs[psize].avpnm);
- v <<= HPTE_V_AVPN_SHIFT;
+ v = hpte_encode_avpn(vpn, psize, ssize);
if (psize != MMU_PAGE_4K)
v |= HPTE_V_LARGE;
- v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT;
return v;
}
@@ -216,30 +260,37 @@ static inline unsigned long hpte_encode_r(unsigned long pa, int psize)
}
/*
- * Build a VA given VSID, EA and segment size
+ * Build a VPN_SHIFT bit shifted va given VSID, EA and segment size.
*/
-static inline unsigned long hpt_va(unsigned long ea, unsigned long vsid,
- int ssize)
+static inline unsigned long hpt_vpn(unsigned long ea,
+ unsigned long vsid, int ssize)
{
- if (ssize == MMU_SEGSIZE_256M)
- return (vsid << 28) | (ea & 0xfffffffUL);
- return (vsid << 40) | (ea & 0xffffffffffUL);
+ unsigned long mask;
+ int s_shift = segment_shift(ssize);
+
+ mask = (1ul << (s_shift - VPN_SHIFT)) - 1;
+ return (vsid << (s_shift - VPN_SHIFT)) | ((ea >> VPN_SHIFT) & mask);
}
/*
* This hashes a virtual address
*/
-
-static inline unsigned long hpt_hash(unsigned long va, unsigned int shift,
- int ssize)
+static inline unsigned long hpt_hash(unsigned long vpn,
+ unsigned int shift, int ssize)
{
+ int mask;
unsigned long hash, vsid;
+ /* VPN_SHIFT can be atmost 12 */
if (ssize == MMU_SEGSIZE_256M) {
- hash = (va >> 28) ^ ((va & 0x0fffffffUL) >> shift);
+ mask = (1ul << (SID_SHIFT - VPN_SHIFT)) - 1;
+ hash = (vpn >> (SID_SHIFT - VPN_SHIFT)) ^
+ ((vpn & mask) >> (shift - VPN_SHIFT));
} else {
- vsid = va >> 40;
- hash = vsid ^ (vsid << 25) ^ ((va & 0xffffffffffUL) >> shift);
+ mask = (1ul << (SID_SHIFT_1T - VPN_SHIFT)) - 1;
+ vsid = vpn >> (SID_SHIFT_1T - VPN_SHIFT);
+ hash = vsid ^ (vsid << 25) ^
+ ((vpn & mask) >> (shift - VPN_SHIFT)) ;
}
return hash & 0x7fffffffffUL;
}
@@ -280,63 +331,61 @@ extern void slb_set_size(u16 size);
#endif /* __ASSEMBLY__ */
/*
- * VSID allocation
+ * VSID allocation (256MB segment)
+ *
+ * We first generate a 38-bit "proto-VSID". For kernel addresses this
+ * is equal to the ESID | 1 << 37, for user addresses it is:
+ * (context << USER_ESID_BITS) | (esid & ((1U << USER_ESID_BITS) - 1)
*
- * We first generate a 36-bit "proto-VSID". For kernel addresses this
- * is equal to the ESID, for user addresses it is:
- * (context << 15) | (esid & 0x7fff)
+ * This splits the proto-VSID into the below range
+ * 0 - (2^(CONTEXT_BITS + USER_ESID_BITS) - 1) : User proto-VSID range
+ * 2^(CONTEXT_BITS + USER_ESID_BITS) - 2^(VSID_BITS) : Kernel proto-VSID range
*
- * The two forms are distinguishable because the top bit is 0 for user
- * addresses, whereas the top two bits are 1 for kernel addresses.
- * Proto-VSIDs with the top two bits equal to 0b10 are reserved for
- * now.
+ * We also have CONTEXT_BITS + USER_ESID_BITS = VSID_BITS - 1
+ * That is, we assign half of the space to user processes and half
+ * to the kernel.
*
* The proto-VSIDs are then scrambled into real VSIDs with the
* multiplicative hash:
*
* VSID = (proto-VSID * VSID_MULTIPLIER) % VSID_MODULUS
- * where VSID_MULTIPLIER = 268435399 = 0xFFFFFC7
- * VSID_MODULUS = 2^36-1 = 0xFFFFFFFFF
*
- * This scramble is only well defined for proto-VSIDs below
- * 0xFFFFFFFFF, so both proto-VSID and actual VSID 0xFFFFFFFFF are
- * reserved. VSID_MULTIPLIER is prime, so in particular it is
+ * VSID_MULTIPLIER is prime, so in particular it is
* co-prime to VSID_MODULUS, making this a 1:1 scrambling function.
* Because the modulus is 2^n-1 we can compute it efficiently without
* a divide or extra multiply (see below).
*
* This scheme has several advantages over older methods:
*
- * - We have VSIDs allocated for every kernel address
+ * - We have VSIDs allocated for every kernel address
* (i.e. everything above 0xC000000000000000), except the very top
* segment, which simplifies several things.
*
- * - We allow for 16 significant bits of ESID and 19 bits of
- * context for user addresses. i.e. 16T (44 bits) of address space for
- * up to half a million contexts.
+ * - We allow for USER_ESID_BITS significant bits of ESID and
+ * CONTEXT_BITS bits of context for user addresses.
+ * i.e. 64T (46 bits) of address space for up to half a million contexts.
*
- * - The scramble function gives robust scattering in the hash
+ * - The scramble function gives robust scattering in the hash
* table (at least based on some initial results). The previous
* method was more susceptible to pathological cases giving excessive
* hash collisions.
*/
+
/*
- * WARNING - If you change these you must make sure the asm
- * implementations in slb_allocate (slb_low.S), do_stab_bolted
- * (head.S) and ASM_VSID_SCRAMBLE (below) are changed accordingly.
+ * This should be computed such that protovosid * vsid_mulitplier
+ * doesn't overflow 64 bits. It should also be co-prime to vsid_modulus
*/
-
-#define VSID_MULTIPLIER_256M ASM_CONST(200730139) /* 28-bit prime */
-#define VSID_BITS_256M 36
+#define VSID_MULTIPLIER_256M ASM_CONST(12538073) /* 24-bit prime */
+#define VSID_BITS_256M 38
#define VSID_MODULUS_256M ((1UL<<VSID_BITS_256M)-1)
#define VSID_MULTIPLIER_1T ASM_CONST(12538073) /* 24-bit prime */
-#define VSID_BITS_1T 24
+#define VSID_BITS_1T 26
#define VSID_MODULUS_1T ((1UL<<VSID_BITS_1T)-1)
#define CONTEXT_BITS 19
-#define USER_ESID_BITS 16
-#define USER_ESID_BITS_1T 4
+#define USER_ESID_BITS 18
+#define USER_ESID_BITS_1T 6
#define USER_VSID_RANGE (1UL << (USER_ESID_BITS + SID_SHIFT))
@@ -372,6 +421,8 @@ extern void slb_set_size(u16 size);
srdi rx,rx,VSID_BITS_##size; /* extract 2^VSID_BITS bit */ \
add rt,rt,rx
+/* 4 bits per slice and we have one slice per 1TB */
+#define SLICE_ARRAY_SIZE (PGTABLE_RANGE >> 41)
#ifndef __ASSEMBLY__
@@ -416,7 +467,7 @@ typedef struct {
#ifdef CONFIG_PPC_MM_SLICES
u64 low_slices_psize; /* SLB page size encodings */
- u64 high_slices_psize; /* 4 bits per slice for now */
+ unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
#else
u16 sllp; /* SLB page size encoding */
#endif
@@ -452,12 +503,32 @@ typedef struct {
})
#endif /* 1 */
-/* This is only valid for addresses >= PAGE_OFFSET */
+/*
+ * This is only valid for addresses >= PAGE_OFFSET
+ * The proto-VSID space is divided into two class
+ * User: 0 to 2^(CONTEXT_BITS + USER_ESID_BITS) -1
+ * kernel: 2^(CONTEXT_BITS + USER_ESID_BITS) to 2^(VSID_BITS) - 1
+ *
+ * With KERNEL_START at 0xc000000000000000, the proto vsid for
+ * the kernel ends up with 0xc00000000 (36 bits). With 64TB
+ * support we need to have kernel proto-VSID in the
+ * [2^37 to 2^38 - 1] range due to the increased USER_ESID_BITS.
+ */
static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize)
{
- if (ssize == MMU_SEGSIZE_256M)
- return vsid_scramble(ea >> SID_SHIFT, 256M);
- return vsid_scramble(ea >> SID_SHIFT_1T, 1T);
+ unsigned long proto_vsid;
+ /*
+ * We need to make sure proto_vsid for the kernel is
+ * >= 2^(CONTEXT_BITS + USER_ESID_BITS[_1T])
+ */
+ if (ssize == MMU_SEGSIZE_256M) {
+ proto_vsid = ea >> SID_SHIFT;
+ proto_vsid |= (1UL << (CONTEXT_BITS + USER_ESID_BITS));
+ return vsid_scramble(proto_vsid, 256M);
+ }
+ proto_vsid = ea >> SID_SHIFT_1T;
+ proto_vsid |= (1UL << (CONTEXT_BITS + USER_ESID_BITS_1T));
+ return vsid_scramble(proto_vsid, 1T);
}
/* Returns the segment size indicator for a user address */
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index e8a26db2e8f3..5e38eedea218 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -146,6 +146,15 @@ extern void setup_initial_memory_limit(phys_addr_t first_memblock_base,
extern u64 ppc64_rma_size;
#endif /* CONFIG_PPC64 */
+struct mm_struct;
+#ifdef CONFIG_DEBUG_VM
+extern void assert_pte_locked(struct mm_struct *mm, unsigned long addr);
+#else /* CONFIG_DEBUG_VM */
+static inline void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
+{
+}
+#endif /* !CONFIG_DEBUG_VM */
+
#endif /* !__ASSEMBLY__ */
/* The kernel use the constants below to index in the page sizes array.
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 7796519fd238..e9e7a6999bb8 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -100,7 +100,7 @@ struct paca_struct {
/* SLB related definitions */
u16 vmalloc_sllp;
u16 slb_cache_ptr;
- u16 slb_cache[SLB_CACHE_ENTRIES];
+ u32 slb_cache[SLB_CACHE_ENTRIES];
#endif /* CONFIG_PPC_STD_MMU_64 */
#ifdef CONFIG_PPC_BOOK3E
diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
index fed85e6290e1..cd915d6b093d 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -78,11 +78,19 @@ extern u64 ppc64_pft_size;
#define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT)
#define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT)
+/*
+ * 1 bit per slice and we have one slice per 1TB
+ * Right now we support only 64TB.
+ * IF we change this we will have to change the type
+ * of high_slices
+ */
+#define SLICE_MASK_SIZE 8
+
#ifndef __ASSEMBLY__
struct slice_mask {
u16 low_slices;
- u16 high_slices;
+ u64 high_slices;
};
struct mm_struct;
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 973df4d9d366..025a130729bc 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -182,6 +182,14 @@ static inline int pci_device_from_OF_node(struct device_node *np,
#if defined(CONFIG_EEH)
static inline struct eeh_dev *of_node_to_eeh_dev(struct device_node *dn)
{
+ /*
+ * For those OF nodes whose parent isn't PCI bridge, they
+ * don't have PCI_DN actually. So we have to skip them for
+ * any EEH operations.
+ */
+ if (!dn || !PCI_DN(dn))
+ return NULL;
+
return PCI_DN(dn)->edev;
}
#else
@@ -192,6 +200,7 @@ static inline struct eeh_dev *of_node_to_eeh_dev(struct device_node *dn)
extern struct pci_bus *pcibios_find_pci_bus(struct device_node *dn);
/** Remove all of the PCI devices under this bus */
+extern void __pcibios_remove_pci_devices(struct pci_bus *bus, int purge_pe);
extern void pcibios_remove_pci_devices(struct pci_bus *bus);
/** Discover new pci devices under this bus, and add them */
diff --git a/arch/powerpc/include/asm/pgtable-ppc64-4k.h b/arch/powerpc/include/asm/pgtable-ppc64-4k.h
index 6eefdcffa359..12798c9d4b4b 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64-4k.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64-4k.h
@@ -7,7 +7,7 @@
*/
#define PTE_INDEX_SIZE 9
#define PMD_INDEX_SIZE 7
-#define PUD_INDEX_SIZE 7
+#define PUD_INDEX_SIZE 9
#define PGD_INDEX_SIZE 9
#ifndef __ASSEMBLY__
@@ -19,7 +19,7 @@
#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE)
#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE)
-#define PTRS_PER_PUD (1 << PMD_INDEX_SIZE)
+#define PTRS_PER_PUD (1 << PUD_INDEX_SIZE)
#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE)
/* PMD_SHIFT determines what a second-level page table entry can map */
diff --git a/arch/powerpc/include/asm/pgtable-ppc64-64k.h b/arch/powerpc/include/asm/pgtable-ppc64-64k.h
index 90533ddcd703..be4e2878fbc0 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64-64k.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64-64k.h
@@ -7,7 +7,7 @@
#define PTE_INDEX_SIZE 12
#define PMD_INDEX_SIZE 12
#define PUD_INDEX_SIZE 0
-#define PGD_INDEX_SIZE 4
+#define PGD_INDEX_SIZE 6
#ifndef __ASSEMBLY__
#define PTE_TABLE_SIZE (sizeof(real_pte_t) << PTE_INDEX_SIZE)
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index c4205616dfb5..0182c203e411 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -21,17 +21,6 @@
#define PGTABLE_RANGE (ASM_CONST(1) << PGTABLE_EADDR_SIZE)
-/* Some sanity checking */
-#if TASK_SIZE_USER64 > PGTABLE_RANGE
-#error TASK_SIZE_USER64 exceeds pagetable range
-#endif
-
-#ifdef CONFIG_PPC_STD_MMU_64
-#if TASK_SIZE_USER64 > (1UL << (USER_ESID_BITS + SID_SHIFT))
-#error TASK_SIZE_USER64 exceeds user VSID range
-#endif
-#endif
-
/*
* Define the address range of the kernel non-linear virtual area
*/
@@ -41,7 +30,7 @@
#else
#define KERN_VIRT_START ASM_CONST(0xD000000000000000)
#endif
-#define KERN_VIRT_SIZE PGTABLE_RANGE
+#define KERN_VIRT_SIZE ASM_CONST(0x0000100000000000)
/*
* The vmalloc space starts at the beginning of that region, and
@@ -117,9 +106,6 @@
#ifndef __ASSEMBLY__
-#include <linux/stddef.h>
-#include <asm/tlbflush.h>
-
/*
* This is the default implementation of various PTE accessors, it's
* used in all cases except Book3S with 64K pages where we have a
@@ -198,7 +184,8 @@
/* to find an entry in a kernel page-table-directory */
/* This now only contains the vmalloc pages */
#define pgd_offset_k(address) pgd_offset(&init_mm, address)
-
+extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned long pte, int huge);
/* Atomic PTE updates */
static inline unsigned long pte_update(struct mm_struct *mm,
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 2e0e4110f7ae..a9cbd3ba5c33 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -9,14 +9,6 @@
struct mm_struct;
-#ifdef CONFIG_DEBUG_VM
-extern void assert_pte_locked(struct mm_struct *mm, unsigned long addr);
-#else /* CONFIG_DEBUG_VM */
-static inline void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
-{
-}
-#endif /* !CONFIG_DEBUG_VM */
-
#endif /* !__ASSEMBLY__ */
#if defined(CONFIG_PPC64)
@@ -27,6 +19,8 @@ static inline void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
#ifndef __ASSEMBLY__
+#include <asm/tlbflush.h>
+
/* Generic accessors to PTE bits */
static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; }
static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 4c25319f2fbc..5f73ce63fcae 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -126,6 +126,7 @@
#define PPC_INST_TLBIVAX 0x7c000624
#define PPC_INST_TLBSRX_DOT 0x7c0006a5
#define PPC_INST_XXLOR 0xf0000510
+#define PPC_INST_XVCPSGNDP 0xf0000780
#define PPC_INST_NAP 0x4c000364
#define PPC_INST_SLEEP 0x4c0003a4
@@ -277,6 +278,8 @@
VSX_XX1((s), a, b))
#define XXLOR(t, a, b) stringify_in_c(.long PPC_INST_XXLOR | \
VSX_XX3((t), a, b))
+#define XVCPSGNDP(t, a, b) stringify_in_c(.long (PPC_INST_XVCPSGNDP | \
+ VSX_XX3((t), (a), (b))))
#define PPC_NAP stringify_in_c(.long PPC_INST_NAP)
#define PPC_SLEEP stringify_in_c(.long PPC_INST_SLEEP)
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 83efc6e81543..9dc5cd1fde1a 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -97,8 +97,8 @@ extern struct task_struct *last_task_used_spe;
#endif
#ifdef CONFIG_PPC64
-/* 64-bit user address space is 44-bits (16TB user VM) */
-#define TASK_SIZE_USER64 (0x0000100000000000UL)
+/* 64-bit user address space is 46-bits (64TB user VM) */
+#define TASK_SIZE_USER64 (0x0000400000000000UL)
/*
* 32-bit user address space is 4GB - 1 page
diff --git a/arch/powerpc/include/asm/pte-hash64-64k.h b/arch/powerpc/include/asm/pte-hash64-64k.h
index 59247e816ac5..eedf427c9124 100644
--- a/arch/powerpc/include/asm/pte-hash64-64k.h
+++ b/arch/powerpc/include/asm/pte-hash64-64k.h
@@ -58,14 +58,16 @@
/* Trick: we set __end to va + 64k, which happens works for
* a 16M page as well as we want only one iteration
*/
-#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift) \
- do { \
- unsigned long __end = va + PAGE_SIZE; \
- unsigned __split = (psize == MMU_PAGE_4K || \
- psize == MMU_PAGE_64K_AP); \
- shift = mmu_psize_defs[psize].shift; \
- for (index = 0; va < __end; index++, va += (1L << shift)) { \
- if (!__split || __rpte_sub_valid(rpte, index)) do { \
+#define pte_iterate_hashed_subpages(rpte, psize, vpn, index, shift) \
+ do { \
+ unsigned long __end = vpn + (1UL << (PAGE_SHIFT - VPN_SHIFT)); \
+ unsigned __split = (psize == MMU_PAGE_4K || \
+ psize == MMU_PAGE_64K_AP); \
+ shift = mmu_psize_defs[psize].shift; \
+ for (index = 0; vpn < __end; index++, \
+ vpn += (1L << (shift - VPN_SHIFT))) { \
+ if (!__split || __rpte_sub_valid(rpte, index)) \
+ do {
#define pte_iterate_hashed_end() } while(0); } } while(0)
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 121a90bbf778..a1096fb62816 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -524,6 +524,7 @@
#define SPRN_HSRR0 0x13A /* Save/Restore Register 0 */
#define SPRN_HSRR1 0x13B /* Save/Restore Register 1 */
+#define HSRR1_DENORM 0x00100000 /* Denorm exception */
#define SPRN_TBCTL 0x35f /* PA6T Timebase control register */
#define TBCTL_FREEZE 0x0000000000000000ull /* Freeze all tbs */
diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h
index 0c5fa3145615..f6fc0ee813d7 100644
--- a/arch/powerpc/include/asm/sparsemem.h
+++ b/arch/powerpc/include/asm/sparsemem.h
@@ -10,8 +10,8 @@
*/
#define SECTION_SIZE_BITS 24
-#define MAX_PHYSADDR_BITS 44
-#define MAX_PHYSMEM_BITS 44
+#define MAX_PHYSADDR_BITS 46
+#define MAX_PHYSMEM_BITS 46
#endif /* CONFIG_SPARSEMEM */
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index e942203cd4a8..8ceea14d6fe4 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -104,6 +104,8 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_NOTIFY_RESUME 13 /* callback before returning to user */
#define TIF_UPROBE 14 /* breakpointed or single-stepping */
#define TIF_SYSCALL_TRACEPOINT 15 /* syscall tracepoint instrumentation */
+#define TIF_EMULATE_STACK_STORE 16 /* Is an instruction emulation
+ for stack store? */
/* as above, but as bit values */
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
@@ -121,6 +123,7 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
#define _TIF_UPROBE (1<<TIF_UPROBE)
#define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
+#define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE)
#define _TIF_SYSCALL_T_OR_A (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
_TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT)
diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h
index 81143fcbd113..61a59271665b 100644
--- a/arch/powerpc/include/asm/tlbflush.h
+++ b/arch/powerpc/include/asm/tlbflush.h
@@ -95,7 +95,7 @@ struct ppc64_tlb_batch {
unsigned long index;
struct mm_struct *mm;
real_pte_t pte[PPC64_TLB_BATCH_NR];
- unsigned long vaddr[PPC64_TLB_BATCH_NR];
+ unsigned long vpn[PPC64_TLB_BATCH_NR];
unsigned int psize;
int ssize;
};
@@ -103,9 +103,6 @@ DECLARE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
extern void __flush_tlb_pending(struct ppc64_tlb_batch *batch);
-extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, unsigned long pte, int huge);
-
#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
static inline void arch_enter_lazy_mmu_mode(void)
@@ -127,7 +124,7 @@ static inline void arch_leave_lazy_mmu_mode(void)
#define arch_flush_lazy_mmu_mode() do {} while (0)
-extern void flush_hash_page(unsigned long va, real_pte_t pte, int psize,
+extern void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize,
int ssize, int local);
extern void flush_hash_range(unsigned long number, int local);
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 17bb40cad5bf..4db49590acf5 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -98,11 +98,6 @@ struct exception_table_entry {
* PowerPC, we can just do these as direct assignments. (Of course, the
* exception handling means that it's no longer "just"...)
*
- * The "user64" versions of the user access functions are versions that
- * allow access of 64-bit data. The "get_user" functions do not
- * properly handle 64-bit data because the value gets down cast to a long.
- * The "put_user" functions already handle 64-bit data properly but we add
- * "user64" versions for completeness
*/
#define get_user(x, ptr) \
__get_user_check((x), (ptr), sizeof(*(ptr)))
@@ -114,12 +109,6 @@ struct exception_table_entry {
#define __put_user(x, ptr) \
__put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
-#ifndef __powerpc64__
-#define __get_user64(x, ptr) \
- __get_user64_nocheck((x), (ptr), sizeof(*(ptr)))
-#define __put_user64(x, ptr) __put_user(x, ptr)
-#endif
-
#define __get_user_inatomic(x, ptr) \
__get_user_nosleep((x), (ptr), sizeof(*(ptr)))
#define __put_user_inatomic(x, ptr) \
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index ead5016b02d0..af37528da49f 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -831,19 +831,56 @@ restore_user:
bnel- load_dbcr0
#endif
-#ifdef CONFIG_PREEMPT
b restore
/* N.B. the only way to get here is from the beq following ret_from_except. */
resume_kernel:
- /* check current_thread_info->preempt_count */
+ /* check current_thread_info, _TIF_EMULATE_STACK_STORE */
CURRENT_THREAD_INFO(r9, r1)
+ lwz r8,TI_FLAGS(r9)
+ andis. r8,r8,_TIF_EMULATE_STACK_STORE@h
+ beq+ 1f
+
+ addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */
+
+ lwz r3,GPR1(r1)
+ subi r3,r3,INT_FRAME_SIZE /* dst: Allocate a trampoline exception frame */
+ mr r4,r1 /* src: current exception frame */
+ mr r1,r3 /* Reroute the trampoline frame to r1 */
+
+ /* Copy from the original to the trampoline. */
+ li r5,INT_FRAME_SIZE/4 /* size: INT_FRAME_SIZE */
+ li r6,0 /* start offset: 0 */
+ mtctr r5
+2: lwzx r0,r6,r4
+ stwx r0,r6,r3
+ addi r6,r6,4
+ bdnz 2b
+
+ /* Do real store operation to complete stwu */
+ lwz r5,GPR1(r1)
+ stw r8,0(r5)
+
+ /* Clear _TIF_EMULATE_STACK_STORE flag */
+ lis r11,_TIF_EMULATE_STACK_STORE@h
+ addi r5,r9,TI_FLAGS
+0: lwarx r8,0,r5
+ andc r8,r8,r11
+#ifdef CONFIG_IBM405_ERR77
+ dcbt 0,r5
+#endif
+ stwcx. r8,0,r5
+ bne- 0b
+1:
+
+#ifdef CONFIG_PREEMPT
+ /* check current_thread_info->preempt_count */
lwz r0,TI_PREEMPT(r9)
cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
bne restore
- lwz r0,TI_FLAGS(r9)
- andi. r0,r0,_TIF_NEED_RESCHED
+ andi. r8,r8,_TIF_NEED_RESCHED
beq+ restore
+ lwz r3,_MSR(r1)
andi. r0,r3,MSR_EE /* interrupts off? */
beq restore /* don't schedule if so */
#ifdef CONFIG_TRACE_IRQFLAGS
@@ -864,8 +901,6 @@ resume_kernel:
*/
bl trace_hardirqs_on
#endif
-#else
-resume_kernel:
#endif /* CONFIG_PREEMPT */
/* interrupts are hard-disabled at this point */
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index b40e0b4815b3..0e931aaffca2 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -593,6 +593,41 @@ _GLOBAL(ret_from_except_lite)
b .ret_from_except
resume_kernel:
+ /* check current_thread_info, _TIF_EMULATE_STACK_STORE */
+ CURRENT_THREAD_INFO(r9, r1)
+ ld r8,TI_FLAGS(r9)
+ andis. r8,r8,_TIF_EMULATE_STACK_STORE@h
+ beq+ 1f
+
+ addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */
+
+ lwz r3,GPR1(r1)
+ subi r3,r3,INT_FRAME_SIZE /* dst: Allocate a trampoline exception frame */
+ mr r4,r1 /* src: current exception frame */
+ mr r1,r3 /* Reroute the trampoline frame to r1 */
+
+ /* Copy from the original to the trampoline. */
+ li r5,INT_FRAME_SIZE/8 /* size: INT_FRAME_SIZE */
+ li r6,0 /* start offset: 0 */
+ mtctr r5
+2: ldx r0,r6,r4
+ stdx r0,r6,r3
+ addi r6,r6,8
+ bdnz 2b
+
+ /* Do real store operation to complete stwu */
+ lwz r5,GPR1(r1)
+ std r8,0(r5)
+
+ /* Clear _TIF_EMULATE_STACK_STORE flag */
+ lis r11,_TIF_EMULATE_STACK_STORE@h
+ addi r5,r9,TI_FLAGS
+ ldarx r4,0,r5
+ andc r4,r4,r11
+ stdcx. r4,0,r5
+ bne- 0b
+1:
+
#ifdef CONFIG_PREEMPT
/* Check if we need to preempt */
andi. r0,r4,_TIF_NEED_RESCHED
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 39aa97d3ff88..10b658ad65e1 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -275,6 +275,31 @@ vsx_unavailable_pSeries_1:
STD_EXCEPTION_PSERIES(0x1300, 0x1300, instruction_breakpoint)
KVM_HANDLER_PR_SKIP(PACA_EXGEN, EXC_STD, 0x1300)
+ . = 0x1500
+ .global denorm_Hypervisor
+denorm_exception_hv:
+ HMT_MEDIUM
+ mtspr SPRN_SPRG_HSCRATCH0,r13
+ mfspr r13,SPRN_SPRG_HPACA
+ std r9,PACA_EXGEN+EX_R9(r13)
+ std r10,PACA_EXGEN+EX_R10(r13)
+ std r11,PACA_EXGEN+EX_R11(r13)
+ std r12,PACA_EXGEN+EX_R12(r13)
+ mfspr r9,SPRN_SPRG_HSCRATCH0
+ std r9,PACA_EXGEN+EX_R13(r13)
+ mfcr r9
+
+#ifdef CONFIG_PPC_DENORMALISATION
+ mfspr r10,SPRN_HSRR1
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */
+ addi r11,r11,-4 /* HSRR0 is next instruction */
+ bne+ denorm_assist
+#endif
+
+ EXCEPTION_PROLOG_PSERIES_1(denorm_common, EXC_HV)
+ KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x1500)
+
#ifdef CONFIG_CBE_RAS
STD_EXCEPTION_HV(0x1600, 0x1602, cbe_maintenance)
KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1602)
@@ -336,6 +361,103 @@ do_stab_bolted_pSeries:
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x900)
KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x982)
+#ifdef CONFIG_PPC_DENORMALISATION
+denorm_assist:
+BEGIN_FTR_SECTION
+/*
+ * To denormalise we need to move a copy of the register to itself.
+ * For POWER6 do that here for all FP regs.
+ */
+ mfmsr r10
+ ori r10,r10,(MSR_FP|MSR_FE0|MSR_FE1)
+ xori r10,r10,(MSR_FE0|MSR_FE1)
+ mtmsrd r10
+ sync
+ fmr 0,0
+ fmr 1,1
+ fmr 2,2
+ fmr 3,3
+ fmr 4,4
+ fmr 5,5
+ fmr 6,6
+ fmr 7,7
+ fmr 8,8
+ fmr 9,9
+ fmr 10,10
+ fmr 11,11
+ fmr 12,12
+ fmr 13,13
+ fmr 14,14
+ fmr 15,15
+ fmr 16,16
+ fmr 17,17
+ fmr 18,18
+ fmr 19,19
+ fmr 20,20
+ fmr 21,21
+ fmr 22,22
+ fmr 23,23
+ fmr 24,24
+ fmr 25,25
+ fmr 26,26
+ fmr 27,27
+ fmr 28,28
+ fmr 29,29
+ fmr 30,30
+ fmr 31,31
+FTR_SECTION_ELSE
+/*
+ * To denormalise we need to move a copy of the register to itself.
+ * For POWER7 do that here for the first 32 VSX registers only.
+ */
+ mfmsr r10
+ oris r10,r10,MSR_VSX@h
+ mtmsrd r10
+ sync
+ XVCPSGNDP(0,0,0)
+ XVCPSGNDP(1,1,1)
+ XVCPSGNDP(2,2,2)
+ XVCPSGNDP(3,3,3)
+ XVCPSGNDP(4,4,4)
+ XVCPSGNDP(5,5,5)
+ XVCPSGNDP(6,6,6)
+ XVCPSGNDP(7,7,7)
+ XVCPSGNDP(8,8,8)
+ XVCPSGNDP(9,9,9)
+ XVCPSGNDP(10,10,10)
+ XVCPSGNDP(11,11,11)
+ XVCPSGNDP(12,12,12)
+ XVCPSGNDP(13,13,13)
+ XVCPSGNDP(14,14,14)
+ XVCPSGNDP(15,15,15)
+ XVCPSGNDP(16,16,16)
+ XVCPSGNDP(17,17,17)
+ XVCPSGNDP(18,18,18)
+ XVCPSGNDP(19,19,19)
+ XVCPSGNDP(20,20,20)
+ XVCPSGNDP(21,21,21)
+ XVCPSGNDP(22,22,22)
+ XVCPSGNDP(23,23,23)
+ XVCPSGNDP(24,24,24)
+ XVCPSGNDP(25,25,25)
+ XVCPSGNDP(26,26,26)
+ XVCPSGNDP(27,27,27)
+ XVCPSGNDP(28,28,28)
+ XVCPSGNDP(29,29,29)
+ XVCPSGNDP(30,30,30)
+ XVCPSGNDP(31,31,31)
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_206)
+ mtspr SPRN_HSRR0,r11
+ mtcrf 0x80,r9
+ ld r9,PACA_EXGEN+EX_R9(r13)
+ ld r10,PACA_EXGEN+EX_R10(r13)
+ ld r11,PACA_EXGEN+EX_R11(r13)
+ ld r12,PACA_EXGEN+EX_R12(r13)
+ ld r13,PACA_EXGEN+EX_R13(r13)
+ HRFID
+ b .
+#endif
+
.align 7
/* moved from 0xe00 */
STD_EXCEPTION_HV(., 0xe02, h_data_storage)
@@ -495,6 +617,7 @@ machine_check_common:
STD_EXCEPTION_COMMON(0xe60, hmi_exception, .unknown_exception)
STD_EXCEPTION_COMMON_ASYNC(0xf00, performance_monitor, .performance_monitor_exception)
STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception)
+ STD_EXCEPTION_COMMON(0x1502, denorm, .unknown_exception)
#ifdef CONFIG_ALTIVEC
STD_EXCEPTION_COMMON(0x1700, altivec_assist, .altivec_assist_exception)
#else
@@ -960,7 +1083,9 @@ _GLOBAL(do_stab_bolted)
rldimi r10,r11,7,52 /* r10 = first ste of the group */
/* Calculate VSID */
- /* This is a kernel address, so protovsid = ESID */
+ /* This is a kernel address, so protovsid = ESID | 1 << 37 */
+ li r9,0x1
+ rldimi r11,r9,(CONTEXT_BITS + USER_ESID_BITS),0
ASM_VSID_SCRAMBLE(r11, r9, 256M)
rldic r9,r11,12,16 /* r9 = vsid << 12 */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 1f017bb7a7ce..71413f41278f 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -489,10 +489,10 @@ void do_IRQ(struct pt_regs *regs)
struct pt_regs *old_regs = set_irq_regs(regs);
unsigned int irq;
- trace_irq_entry(regs);
-
irq_enter();
+ trace_irq_entry(regs);
+
check_stack_overflow();
/*
@@ -511,10 +511,10 @@ void do_IRQ(struct pt_regs *regs)
else
__get_cpu_var(irq_stat).spurious_irqs++;
+ trace_irq_exit(regs);
+
irq_exit();
set_irq_regs(old_regs);
-
- trace_irq_exit(regs);
}
void __init init_IRQ(void)
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 4cb714792bea..7f94f760dd0c 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -99,6 +99,26 @@ void pcibios_free_controller(struct pci_controller *phb)
kfree(phb);
}
+/*
+ * The function is used to return the minimal alignment
+ * for memory or I/O windows of the associated P2P bridge.
+ * By default, 4KiB alignment for I/O windows and 1MiB for
+ * memory windows.
+ */
+resource_size_t pcibios_window_alignment(struct pci_bus *bus,
+ unsigned long type)
+{
+ if (ppc_md.pcibios_window_alignment)
+ return ppc_md.pcibios_window_alignment(bus, type);
+
+ /*
+ * PCI core will figure out the default
+ * alignment: 4KiB for I/O and 1MiB for
+ * memory window.
+ */
+ return 1;
+}
+
static resource_size_t pcibios_io_size(const struct pci_controller *hose)
{
#ifdef CONFIG_PPC64
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index e49e93191b69..bd693a11d86e 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -493,8 +493,6 @@ void timer_interrupt(struct pt_regs * regs)
*/
may_hard_irq_enable();
- trace_timer_interrupt_entry(regs);
-
__get_cpu_var(irq_stat).timer_irqs++;
#if defined(CONFIG_PPC32) && defined(CONFIG_PMAC)
@@ -505,6 +503,8 @@ void timer_interrupt(struct pt_regs * regs)
old_regs = set_irq_regs(regs);
irq_enter();
+ trace_timer_interrupt_entry(regs);
+
if (test_irq_work_pending()) {
clear_irq_work_pending();
irq_work_run();
@@ -529,10 +529,10 @@ void timer_interrupt(struct pt_regs * regs)
}
#endif
+ trace_timer_interrupt_exit(regs);
+
irq_exit();
set_irq_regs(old_regs);
-
- trace_timer_interrupt_exit(regs);
}
/*
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index 837f13e7b6bf..00aa61268e0d 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -141,7 +141,7 @@ extern char etext[];
int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
{
pfn_t hpaddr;
- u64 va;
+ u64 vpn;
u64 vsid;
struct kvmppc_sid_map *map;
volatile u32 *pteg;
@@ -173,7 +173,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
BUG_ON(!map);
vsid = map->host_vsid;
- va = (vsid << SID_SHIFT) | (eaddr & ~ESID_MASK);
+ vpn = (vsid << (SID_SHIFT - VPN_SHIFT)) | ((eaddr & ~ESID_MASK) >> VPN_SHIFT)
next_pteg:
if (rr == 16) {
@@ -244,11 +244,11 @@ next_pteg:
dprintk_mmu("KVM: %c%c Map 0x%llx: [%lx] 0x%llx (0x%llx) -> %lx\n",
orig_pte->may_write ? 'w' : '-',
orig_pte->may_execute ? 'x' : '-',
- orig_pte->eaddr, (ulong)pteg, va,
+ orig_pte->eaddr, (ulong)pteg, vpn,
orig_pte->vpage, hpaddr);
pte->slot = (ulong)&pteg[rr];
- pte->host_va = va;
+ pte->host_vpn = vpn;
pte->pte = *orig_pte;
pte->pfn = hpaddr >> PAGE_SHIFT;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index 0688b6b39585..4d72f9ebc554 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -33,7 +33,7 @@
void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
{
- ppc_md.hpte_invalidate(pte->slot, pte->host_va,
+ ppc_md.hpte_invalidate(pte->slot, pte->host_vpn,
MMU_PAGE_4K, MMU_SEGSIZE_256M,
false);
}
@@ -80,8 +80,9 @@ static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)
int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
{
+ unsigned long vpn;
pfn_t hpaddr;
- ulong hash, hpteg, va;
+ ulong hash, hpteg;
u64 vsid;
int ret;
int rflags = 0x192;
@@ -117,7 +118,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
}
vsid = map->host_vsid;
- va = hpt_va(orig_pte->eaddr, vsid, MMU_SEGSIZE_256M);
+ vpn = hpt_vpn(orig_pte->eaddr, vsid, MMU_SEGSIZE_256M);
if (!orig_pte->may_write)
rflags |= HPTE_R_PP;
@@ -129,7 +130,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
else
kvmppc_mmu_flush_icache(hpaddr >> PAGE_SHIFT);
- hash = hpt_hash(va, PTE_SIZE, MMU_SEGSIZE_256M);
+ hash = hpt_hash(vpn, PTE_SIZE, MMU_SEGSIZE_256M);
map_again:
hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
@@ -141,7 +142,8 @@ map_again:
goto out;
}
- ret = ppc_md.hpte_insert(hpteg, va, hpaddr, rflags, vflags, MMU_PAGE_4K, MMU_SEGSIZE_256M);
+ ret = ppc_md.hpte_insert(hpteg, vpn, hpaddr, rflags, vflags,
+ MMU_PAGE_4K, MMU_SEGSIZE_256M);
if (ret < 0) {
/* If we couldn't map a primary PTE, try a secondary */
@@ -152,7 +154,8 @@ map_again:
} else {
struct hpte_cache *pte = kvmppc_mmu_hpte_cache_next(vcpu);
- trace_kvm_book3s_64_mmu_map(rflags, hpteg, va, hpaddr, orig_pte);
+ trace_kvm_book3s_64_mmu_map(rflags, hpteg,
+ vpn, hpaddr, orig_pte);
/* The ppc_md code may give us a secondary entry even though we
asked for a primary. Fix up. */
@@ -162,7 +165,7 @@ map_again:
}
pte->slot = hpteg + (ret & 7);
- pte->host_va = va;
+ pte->host_vpn = vpn;
pte->pte = *orig_pte;
pte->pfn = hpaddr >> PAGE_SHIFT;
diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
index 877186b7b1c3..ddb6a2149d44 100644
--- a/arch/powerpc/kvm/trace.h
+++ b/arch/powerpc/kvm/trace.h
@@ -189,7 +189,7 @@ TRACE_EVENT(kvm_book3s_mmu_map,
TP_ARGS(pte),
TP_STRUCT__entry(
- __field( u64, host_va )
+ __field( u64, host_vpn )
__field( u64, pfn )
__field( ulong, eaddr )
__field( u64, vpage )
@@ -198,7 +198,7 @@ TRACE_EVENT(kvm_book3s_mmu_map,
),
TP_fast_assign(
- __entry->host_va = pte->host_va;
+ __entry->host_vpn = pte->host_vpn;
__entry->pfn = pte->pfn;
__entry->eaddr = pte->pte.eaddr;
__entry->vpage = pte->pte.vpage;
@@ -208,8 +208,8 @@ TRACE_EVENT(kvm_book3s_mmu_map,
(pte->pte.may_execute ? 0x1 : 0);
),
- TP_printk("Map: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
- __entry->host_va, __entry->pfn, __entry->eaddr,
+ TP_printk("Map: hvpn=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
+ __entry->host_vpn, __entry->pfn, __entry->eaddr,
__entry->vpage, __entry->raddr, __entry->flags)
);
@@ -218,7 +218,7 @@ TRACE_EVENT(kvm_book3s_mmu_invalidate,
TP_ARGS(pte),
TP_STRUCT__entry(
- __field( u64, host_va )
+ __field( u64, host_vpn )
__field( u64, pfn )
__field( ulong, eaddr )
__field( u64, vpage )
@@ -227,7 +227,7 @@ TRACE_EVENT(kvm_book3s_mmu_invalidate,
),
TP_fast_assign(
- __entry->host_va = pte->host_va;
+ __entry->host_vpn = pte->host_vpn;
__entry->pfn = pte->pfn;
__entry->eaddr = pte->pte.eaddr;
__entry->vpage = pte->pte.vpage;
@@ -238,7 +238,7 @@ TRACE_EVENT(kvm_book3s_mmu_invalidate,
),
TP_printk("Flush: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
- __entry->host_va, __entry->pfn, __entry->eaddr,
+ __entry->host_vpn, __entry->pfn, __entry->eaddr,
__entry->vpage, __entry->raddr, __entry->flags)
);
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 9a52349874ee..e15c521846ca 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -566,7 +566,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
unsigned long int ea;
unsigned int cr, mb, me, sh;
int err;
- unsigned long old_ra;
+ unsigned long old_ra, val3;
long ival;
opcode = instr >> 26;
@@ -1486,11 +1486,43 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
goto ldst_done;
case 36: /* stw */
- case 37: /* stwu */
val = regs->gpr[rd];
err = write_mem(val, dform_ea(instr, regs), 4, regs);
goto ldst_done;
+ case 37: /* stwu */
+ val = regs->gpr[rd];
+ val3 = dform_ea(instr, regs);
+ /*
+ * For PPC32 we always use stwu to change stack point with r1. So
+ * this emulated store may corrupt the exception frame, now we
+ * have to provide the exception frame trampoline, which is pushed
+ * below the kprobed function stack. So we only update gpr[1] but
+ * don't emulate the real store operation. We will do real store
+ * operation safely in exception return code by checking this flag.
+ */
+ if ((ra == 1) && !(regs->msr & MSR_PR) \
+ && (val3 >= (regs->gpr[1] - STACK_INT_FRAME_SIZE))) {
+ /*
+ * Check if we will touch kernel sack overflow
+ */
+ if (val3 - STACK_INT_FRAME_SIZE <= current->thread.ksp_limit) {
+ printk(KERN_CRIT "Can't kprobe this since Kernel stack overflow.\n");
+ err = -EINVAL;
+ break;
+ }
+
+ /*
+ * Check if we already set since that means we'll
+ * lose the previous value.
+ */
+ WARN_ON(test_thread_flag(TIF_EMULATE_STACK_STORE));
+ set_thread_flag(TIF_EMULATE_STACK_STORE);
+ err = 0;
+ } else
+ err = write_mem(val, val3, 4, regs);
+ goto ldst_done;
+
case 38: /* stb */
case 39: /* stbu */
val = regs->gpr[rd];
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index 602aeb06d298..56585086413a 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -63,7 +63,7 @@ _GLOBAL(__hash_page_4K)
/* Save non-volatile registers.
* r31 will hold "old PTE"
* r30 is "new PTE"
- * r29 is "va"
+ * r29 is vpn
* r28 is a hash value
* r27 is hashtab mask (maybe dynamic patched instead ?)
*/
@@ -111,10 +111,10 @@ BEGIN_FTR_SECTION
cmpdi r9,0 /* check segment size */
bne 3f
END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
- /* Calc va and put it in r29 */
- rldicr r29,r5,28,63-28
- rldicl r3,r3,0,36
- or r29,r3,r29
+ /* Calc vpn and put it in r29 */
+ sldi r29,r5,SID_SHIFT - VPN_SHIFT
+ rldicl r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT - VPN_SHIFT)
+ or r29,r28,r29
/* Calculate hash value for primary slot and store it in r28 */
rldicl r5,r5,0,25 /* vsid & 0x0000007fffffffff */
@@ -122,14 +122,19 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
xor r28,r5,r0
b 4f
-3: /* Calc VA and hash in r29 and r28 for 1T segment */
- sldi r29,r5,40 /* vsid << 40 */
- clrldi r3,r3,24 /* ea & 0xffffffffff */
+3: /* Calc vpn and put it in r29 */
+ sldi r29,r5,SID_SHIFT_1T - VPN_SHIFT
+ rldicl r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT_1T - VPN_SHIFT)
+ or r29,r28,r29
+
+ /*
+ * calculate hash value for primary slot and
+ * store it in r28 for 1T segment
+ */
rldic r28,r5,25,25 /* (vsid << 25) & 0x7fffffffff */
clrldi r5,r5,40 /* vsid & 0xffffff */
rldicl r0,r3,64-12,36 /* (ea >> 12) & 0xfffffff */
xor r28,r28,r5
- or r29,r3,r29 /* VA */
xor r28,r28,r0 /* hash */
/* Convert linux PTE bits into HW equivalents */
@@ -185,7 +190,7 @@ htab_insert_pte:
/* Call ppc_md.hpte_insert */
ld r6,STK_PARAM(R4)(r1) /* Retrieve new pp bits */
- mr r4,r29 /* Retrieve va */
+ mr r4,r29 /* Retrieve vpn */
li r7,0 /* !bolted, !secondary */
li r8,MMU_PAGE_4K /* page size */
ld r9,STK_PARAM(R9)(r1) /* segment size */
@@ -208,7 +213,7 @@ _GLOBAL(htab_call_hpte_insert1)
/* Call ppc_md.hpte_insert */
ld r6,STK_PARAM(R4)(r1) /* Retrieve new pp bits */
- mr r4,r29 /* Retrieve va */
+ mr r4,r29 /* Retrieve vpn */
li r7,HPTE_V_SECONDARY /* !bolted, secondary */
li r8,MMU_PAGE_4K /* page size */
ld r9,STK_PARAM(R9)(r1) /* segment size */
@@ -278,7 +283,7 @@ htab_modify_pte:
add r3,r0,r3 /* add slot idx */
/* Call ppc_md.hpte_updatepp */
- mr r5,r29 /* va */
+ mr r5,r29 /* vpn */
li r6,MMU_PAGE_4K /* page size */
ld r7,STK_PARAM(R9)(r1) /* segment size */
ld r8,STK_PARAM(R8)(r1) /* get "local" param */
@@ -339,7 +344,7 @@ _GLOBAL(__hash_page_4K)
/* Save non-volatile registers.
* r31 will hold "old PTE"
* r30 is "new PTE"
- * r29 is "va"
+ * r29 is vpn
* r28 is a hash value
* r27 is hashtab mask (maybe dynamic patched instead ?)
* r26 is the hidx mask
@@ -394,10 +399,14 @@ BEGIN_FTR_SECTION
cmpdi r9,0 /* check segment size */
bne 3f
END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
- /* Calc va and put it in r29 */
- rldicr r29,r5,28,63-28 /* r29 = (vsid << 28) */
- rldicl r3,r3,0,36 /* r3 = (ea & 0x0fffffff) */
- or r29,r3,r29 /* r29 = va */
+ /* Calc vpn and put it in r29 */
+ sldi r29,r5,SID_SHIFT - VPN_SHIFT
+ /*
+ * clrldi r3,r3,64 - SID_SHIFT --> ea & 0xfffffff
+ * srdi r28,r3,VPN_SHIFT
+ */
+ rldicl r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT - VPN_SHIFT)
+ or r29,r28,r29
/* Calculate hash value for primary slot and store it in r28 */
rldicl r5,r5,0,25 /* vsid & 0x0000007fffffffff */
@@ -405,14 +414,23 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
xor r28,r5,r0
b 4f
-3: /* Calc VA and hash in r29 and r28 for 1T segment */
- sldi r29,r5,40 /* vsid << 40 */
- clrldi r3,r3,24 /* ea & 0xffffffffff */
+3: /* Calc vpn and put it in r29 */
+ sldi r29,r5,SID_SHIFT_1T - VPN_SHIFT
+ /*
+ * clrldi r3,r3,64 - SID_SHIFT_1T --> ea & 0xffffffffff
+ * srdi r28,r3,VPN_SHIFT
+ */
+ rldicl r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT_1T - VPN_SHIFT)
+ or r29,r28,r29
+
+ /*
+ * Calculate hash value for primary slot and
+ * store it in r28 for 1T segment
+ */
rldic r28,r5,25,25 /* (vsid << 25) & 0x7fffffffff */
clrldi r5,r5,40 /* vsid & 0xffffff */
rldicl r0,r3,64-12,36 /* (ea >> 12) & 0xfffffff */
xor r28,r28,r5
- or r29,r3,r29 /* VA */
xor r28,r28,r0 /* hash */
/* Convert linux PTE bits into HW equivalents */
@@ -488,7 +506,7 @@ htab_special_pfn:
/* Call ppc_md.hpte_insert */
ld r6,STK_PARAM(R4)(r1) /* Retrieve new pp bits */
- mr r4,r29 /* Retrieve va */
+ mr r4,r29 /* Retrieve vpn */
li r7,0 /* !bolted, !secondary */
li r8,MMU_PAGE_4K /* page size */
ld r9,STK_PARAM(R9)(r1) /* segment size */
@@ -515,7 +533,7 @@ _GLOBAL(htab_call_hpte_insert1)
/* Call ppc_md.hpte_insert */
ld r6,STK_PARAM(R4)(r1) /* Retrieve new pp bits */
- mr r4,r29 /* Retrieve va */
+ mr r4,r29 /* Retrieve vpn */
li r7,HPTE_V_SECONDARY /* !bolted, secondary */
li r8,MMU_PAGE_4K /* page size */
ld r9,STK_PARAM(R9)(r1) /* segment size */
@@ -547,7 +565,7 @@ _GLOBAL(htab_call_hpte_remove)
* useless now that the segment has been switched to 4k pages.
*/
htab_inval_old_hpte:
- mr r3,r29 /* virtual addr */
+ mr r3,r29 /* vpn */
mr r4,r31 /* PTE.pte */
li r5,0 /* PTE.hidx */
li r6,MMU_PAGE_64K /* psize */
@@ -620,7 +638,7 @@ htab_modify_pte:
add r3,r0,r3 /* add slot idx */
/* Call ppc_md.hpte_updatepp */
- mr r5,r29 /* va */
+ mr r5,r29 /* vpn */
li r6,MMU_PAGE_4K /* page size */
ld r7,STK_PARAM(R9)(r1) /* segment size */
ld r8,STK_PARAM(R8)(r1) /* get "local" param */
@@ -676,7 +694,7 @@ _GLOBAL(__hash_page_64K)
/* Save non-volatile registers.
* r31 will hold "old PTE"
* r30 is "new PTE"
- * r29 is "va"
+ * r29 is vpn
* r28 is a hash value
* r27 is hashtab mask (maybe dynamic patched instead ?)
*/
@@ -729,10 +747,10 @@ BEGIN_FTR_SECTION
cmpdi r9,0 /* check segment size */
bne 3f
END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
- /* Calc va and put it in r29 */
- rldicr r29,r5,28,63-28
- rldicl r3,r3,0,36
- or r29,r3,r29
+ /* Calc vpn and put it in r29 */
+ sldi r29,r5,SID_SHIFT - VPN_SHIFT
+ rldicl r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT - VPN_SHIFT)
+ or r29,r28,r29
/* Calculate hash value for primary slot and store it in r28 */
rldicl r5,r5,0,25 /* vsid & 0x0000007fffffffff */
@@ -740,14 +758,19 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
xor r28,r5,r0
b 4f
-3: /* Calc VA and hash in r29 and r28 for 1T segment */
- sldi r29,r5,40 /* vsid << 40 */
- clrldi r3,r3,24 /* ea & 0xffffffffff */
+3: /* Calc vpn and put it in r29 */
+ sldi r29,r5,SID_SHIFT_1T - VPN_SHIFT
+ rldicl r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT_1T - VPN_SHIFT)
+ or r29,r28,r29
+
+ /*
+ * calculate hash value for primary slot and
+ * store it in r28 for 1T segment
+ */
rldic r28,r5,25,25 /* (vsid << 25) & 0x7fffffffff */
clrldi r5,r5,40 /* vsid & 0xffffff */
rldicl r0,r3,64-16,40 /* (ea >> 16) & 0xffffff */
xor r28,r28,r5
- or r29,r3,r29 /* VA */
xor r28,r28,r0 /* hash */
/* Convert linux PTE bits into HW equivalents */
@@ -806,7 +829,7 @@ ht64_insert_pte:
/* Call ppc_md.hpte_insert */
ld r6,STK_PARAM(R4)(r1) /* Retrieve new pp bits */
- mr r4,r29 /* Retrieve va */
+ mr r4,r29 /* Retrieve vpn */
li r7,0 /* !bolted, !secondary */
li r8,MMU_PAGE_64K
ld r9,STK_PARAM(R9)(r1) /* segment size */
@@ -829,7 +852,7 @@ _GLOBAL(ht64_call_hpte_insert1)
/* Call ppc_md.hpte_insert */
ld r6,STK_PARAM(R4)(r1) /* Retrieve new pp bits */
- mr r4,r29 /* Retrieve va */
+ mr r4,r29 /* Retrieve vpn */
li r7,HPTE_V_SECONDARY /* !bolted, secondary */
li r8,MMU_PAGE_64K
ld r9,STK_PARAM(R9)(r1) /* segment size */
@@ -899,7 +922,7 @@ ht64_modify_pte:
add r3,r0,r3 /* add slot idx */
/* Call ppc_md.hpte_updatepp */
- mr r5,r29 /* va */
+ mr r5,r29 /* vpn */
li r6,MMU_PAGE_64K
ld r7,STK_PARAM(R9)(r1) /* segment size */
ld r8,STK_PARAM(R8)(r1) /* get "local" param */
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index f21e8ce8db33..a4a1c728f269 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -39,22 +39,35 @@
DEFINE_RAW_SPINLOCK(native_tlbie_lock);
-static inline void __tlbie(unsigned long va, int psize, int ssize)
+static inline void __tlbie(unsigned long vpn, int psize, int ssize)
{
+ unsigned long va;
unsigned int penc;
- /* clear top 16 bits, non SLS segment */
+ /*
+ * We need 14 to 65 bits of va for a tlibe of 4K page
+ * With vpn we ignore the lower VPN_SHIFT bits already.
+ * And top two bits are already ignored because we can
+ * only accomadate 76 bits in a 64 bit vpn with a VPN_SHIFT
+ * of 12.
+ */
+ va = vpn << VPN_SHIFT;
+ /*
+ * clear top 16 bits of 64bit va, non SLS segment
+ * Older versions of the architecture (2.02 and earler) require the
+ * masking of the top 16 bits.
+ */
va &= ~(0xffffULL << 48);
switch (psize) {
case MMU_PAGE_4K:
- va &= ~0xffful;
va |= ssize << 8;
asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
: : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
: "memory");
break;
default:
+ /* We need 14 to 14 + i bits of va */
penc = mmu_psize_defs[psize].penc;
va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
va |= penc << 12;
@@ -67,21 +80,28 @@ static inline void __tlbie(unsigned long va, int psize, int ssize)
}
}
-static inline void __tlbiel(unsigned long va, int psize, int ssize)
+static inline void __tlbiel(unsigned long vpn, int psize, int ssize)
{
+ unsigned long va;
unsigned int penc;
- /* clear top 16 bits, non SLS segment */
+ /* VPN_SHIFT can be atmost 12 */
+ va = vpn << VPN_SHIFT;
+ /*
+ * clear top 16 bits of 64 bit va, non SLS segment
+ * Older versions of the architecture (2.02 and earler) require the
+ * masking of the top 16 bits.
+ */
va &= ~(0xffffULL << 48);
switch (psize) {
case MMU_PAGE_4K:
- va &= ~0xffful;
va |= ssize << 8;
asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
: : "r"(va) : "memory");
break;
default:
+ /* We need 14 to 14 + i bits of va */
penc = mmu_psize_defs[psize].penc;
va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
va |= penc << 12;
@@ -94,7 +114,7 @@ static inline void __tlbiel(unsigned long va, int psize, int ssize)
}
-static inline void tlbie(unsigned long va, int psize, int ssize, int local)
+static inline void tlbie(unsigned long vpn, int psize, int ssize, int local)
{
unsigned int use_local = local && mmu_has_feature(MMU_FTR_TLBIEL);
int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
@@ -105,10 +125,10 @@ static inline void tlbie(unsigned long va, int psize, int ssize, int local)
raw_spin_lock(&native_tlbie_lock);
asm volatile("ptesync": : :"memory");
if (use_local) {
- __tlbiel(va, psize, ssize);
+ __tlbiel(vpn, psize, ssize);
asm volatile("ptesync": : :"memory");
} else {
- __tlbie(va, psize, ssize);
+ __tlbie(vpn, psize, ssize);
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
if (lock_tlbie && !use_local)
@@ -134,7 +154,7 @@ static inline void native_unlock_hpte(struct hash_pte *hptep)
clear_bit_unlock(HPTE_LOCK_BIT, word);
}
-static long native_hpte_insert(unsigned long hpte_group, unsigned long va,
+static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
unsigned long pa, unsigned long rflags,
unsigned long vflags, int psize, int ssize)
{
@@ -143,9 +163,9 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long va,
int i;
if (!(vflags & HPTE_V_BOLTED)) {
- DBG_LOW(" insert(group=%lx, va=%016lx, pa=%016lx,"
+ DBG_LOW(" insert(group=%lx, vpn=%016lx, pa=%016lx,"
" rflags=%lx, vflags=%lx, psize=%d)\n",
- hpte_group, va, pa, rflags, vflags, psize);
+ hpte_group, vpn, pa, rflags, vflags, psize);
}
for (i = 0; i < HPTES_PER_GROUP; i++) {
@@ -163,7 +183,7 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long va,
if (i == HPTES_PER_GROUP)
return -1;
- hpte_v = hpte_encode_v(va, psize, ssize) | vflags | HPTE_V_VALID;
+ hpte_v = hpte_encode_v(vpn, psize, ssize) | vflags | HPTE_V_VALID;
hpte_r = hpte_encode_r(pa, psize) | rflags;
if (!(vflags & HPTE_V_BOLTED)) {
@@ -225,17 +245,17 @@ static long native_hpte_remove(unsigned long hpte_group)
}
static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
- unsigned long va, int psize, int ssize,
+ unsigned long vpn, int psize, int ssize,
int local)
{
struct hash_pte *hptep = htab_address + slot;
unsigned long hpte_v, want_v;
int ret = 0;
- want_v = hpte_encode_v(va, psize, ssize);
+ want_v = hpte_encode_v(vpn, psize, ssize);
- DBG_LOW(" update(va=%016lx, avpnv=%016lx, hash=%016lx, newpp=%x)",
- va, want_v & HPTE_V_AVPN, slot, newpp);
+ DBG_LOW(" update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)",
+ vpn, want_v & HPTE_V_AVPN, slot, newpp);
native_lock_hpte(hptep);
@@ -254,12 +274,12 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
native_unlock_hpte(hptep);
/* Ensure it is out of the tlb too. */
- tlbie(va, psize, ssize, local);
+ tlbie(vpn, psize, ssize, local);
return ret;
}
-static long native_hpte_find(unsigned long va, int psize, int ssize)
+static long native_hpte_find(unsigned long vpn, int psize, int ssize)
{
struct hash_pte *hptep;
unsigned long hash;
@@ -267,8 +287,8 @@ static long native_hpte_find(unsigned long va, int psize, int ssize)
long slot;
unsigned long want_v, hpte_v;
- hash = hpt_hash(va, mmu_psize_defs[psize].shift, ssize);
- want_v = hpte_encode_v(va, psize, ssize);
+ hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
+ want_v = hpte_encode_v(vpn, psize, ssize);
/* Bolted mappings are only ever in the primary group */
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
@@ -295,14 +315,15 @@ static long native_hpte_find(unsigned long va, int psize, int ssize)
static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
int psize, int ssize)
{
- unsigned long vsid, va;
+ unsigned long vpn;
+ unsigned long vsid;
long slot;
struct hash_pte *hptep;
vsid = get_kernel_vsid(ea, ssize);
- va = hpt_va(ea, vsid, ssize);
+ vpn = hpt_vpn(ea, vsid, ssize);
- slot = native_hpte_find(va, psize, ssize);
+ slot = native_hpte_find(vpn, psize, ssize);
if (slot == -1)
panic("could not find page to bolt\n");
hptep = htab_address + slot;
@@ -312,10 +333,10 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
(newpp & (HPTE_R_PP | HPTE_R_N));
/* Ensure it is out of the tlb too. */
- tlbie(va, psize, ssize, 0);
+ tlbie(vpn, psize, ssize, 0);
}
-static void native_hpte_invalidate(unsigned long slot, unsigned long va,
+static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
int psize, int ssize, int local)
{
struct hash_pte *hptep = htab_address + slot;
@@ -325,9 +346,9 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long va,
local_irq_save(flags);
- DBG_LOW(" invalidate(va=%016lx, hash: %x)\n", va, slot);
+ DBG_LOW(" invalidate(vpn=%016lx, hash: %lx)\n", vpn, slot);
- want_v = hpte_encode_v(va, psize, ssize);
+ want_v = hpte_encode_v(vpn, psize, ssize);
native_lock_hpte(hptep);
hpte_v = hptep->v;
@@ -339,7 +360,7 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long va,
hptep->v = 0;
/* Invalidate the TLB */
- tlbie(va, psize, ssize, local);
+ tlbie(vpn, psize, ssize, local);
local_irq_restore(flags);
}
@@ -349,11 +370,12 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long va,
#define LP_MASK(i) ((0xFF >> (i)) << LP_SHIFT)
static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
- int *psize, int *ssize, unsigned long *va)
+ int *psize, int *ssize, unsigned long *vpn)
{
+ unsigned long avpn, pteg, vpi;
unsigned long hpte_r = hpte->r;
unsigned long hpte_v = hpte->v;
- unsigned long avpn;
+ unsigned long vsid, seg_off;
int i, size, shift, penc;
if (!(hpte_v & HPTE_V_LARGE))
@@ -380,32 +402,38 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
}
/* This works for all page sizes, and for 256M and 1T segments */
+ *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
shift = mmu_psize_defs[size].shift;
- avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm) << 23;
-
- if (shift < 23) {
- unsigned long vpi, vsid, pteg;
- pteg = slot / HPTES_PER_GROUP;
- if (hpte_v & HPTE_V_SECONDARY)
- pteg = ~pteg;
- switch (hpte_v >> HPTE_V_SSIZE_SHIFT) {
- case MMU_SEGSIZE_256M:
- vpi = ((avpn >> 28) ^ pteg) & htab_hash_mask;
- break;
- case MMU_SEGSIZE_1T:
- vsid = avpn >> 40;
+ avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm);
+ pteg = slot / HPTES_PER_GROUP;
+ if (hpte_v & HPTE_V_SECONDARY)
+ pteg = ~pteg;
+
+ switch (*ssize) {
+ case MMU_SEGSIZE_256M:
+ /* We only have 28 - 23 bits of seg_off in avpn */
+ seg_off = (avpn & 0x1f) << 23;
+ vsid = avpn >> 5;
+ /* We can find more bits from the pteg value */
+ if (shift < 23) {
+ vpi = (vsid ^ pteg) & htab_hash_mask;
+ seg_off |= vpi << shift;
+ }
+ *vpn = vsid << (SID_SHIFT - VPN_SHIFT) | seg_off >> VPN_SHIFT;
+ case MMU_SEGSIZE_1T:
+ /* We only have 40 - 23 bits of seg_off in avpn */
+ seg_off = (avpn & 0x1ffff) << 23;
+ vsid = avpn >> 17;
+ if (shift < 23) {
vpi = (vsid ^ (vsid << 25) ^ pteg) & htab_hash_mask;
- break;
- default:
- avpn = vpi = size = 0;
+ seg_off |= vpi << shift;
}
- avpn |= (vpi << mmu_psize_defs[size].shift);
+ *vpn = vsid << (SID_SHIFT_1T - VPN_SHIFT) | seg_off >> VPN_SHIFT;
+ default:
+ *vpn = size = 0;
}
-
- *va = avpn;
*psize = size;
- *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
}
/*
@@ -418,9 +446,10 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
*/
static void native_hpte_clear(void)
{
+ unsigned long vpn = 0;
unsigned long slot, slots, flags;
struct hash_pte *hptep = htab_address;
- unsigned long hpte_v, va;
+ unsigned long hpte_v;
unsigned long pteg_count;
int psize, ssize;
@@ -448,9 +477,9 @@ static void native_hpte_clear(void)
* already hold the native_tlbie_lock.
*/
if (hpte_v & HPTE_V_VALID) {
- hpte_decode(hptep, slot, &psize, &ssize, &va);
+ hpte_decode(hptep, slot, &psize, &ssize, &vpn);
hptep->v = 0;
- __tlbie(va, psize, ssize);
+ __tlbie(vpn, psize, ssize);
}
}
@@ -465,7 +494,8 @@ static void native_hpte_clear(void)
*/
static void native_flush_hash_range(unsigned long number, int local)
{
- unsigned long va, hash, index, hidx, shift, slot;
+ unsigned long vpn;
+ unsigned long hash, index, hidx, shift, slot;
struct hash_pte *hptep;
unsigned long hpte_v;
unsigned long want_v;
@@ -479,18 +509,18 @@ static void native_flush_hash_range(unsigned long number, int local)
local_irq_save(flags);
for (i = 0; i < number; i++) {
- va = batch->vaddr[i];
+ vpn = batch->vpn[i];
pte = batch->pte[i];
- pte_iterate_hashed_subpages(pte, psize, va, index, shift) {
- hash = hpt_hash(va, shift, ssize);
+ pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
+ hash = hpt_hash(vpn, shift, ssize);
hidx = __rpte_to_hidx(pte, index);
if (hidx & _PTEIDX_SECONDARY)
hash = ~hash;
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
slot += hidx & _PTEIDX_GROUP_IX;
hptep = htab_address + slot;
- want_v = hpte_encode_v(va, psize, ssize);
+ want_v = hpte_encode_v(vpn, psize, ssize);
native_lock_hpte(hptep);
hpte_v = hptep->v;
if (!HPTE_V_COMPARE(hpte_v, want_v) ||
@@ -505,12 +535,12 @@ static void native_flush_hash_range(unsigned long number, int local)
mmu_psize_defs[psize].tlbiel && local) {
asm volatile("ptesync":::"memory");
for (i = 0; i < number; i++) {
- va = batch->vaddr[i];
+ vpn = batch->vpn[i];
pte = batch->pte[i];
- pte_iterate_hashed_subpages(pte, psize, va, index,
- shift) {
- __tlbiel(va, psize, ssize);
+ pte_iterate_hashed_subpages(pte, psize,
+ vpn, index, shift) {
+ __tlbiel(vpn, psize, ssize);
} pte_iterate_hashed_end();
}
asm volatile("ptesync":::"memory");
@@ -522,12 +552,12 @@ static void native_flush_hash_range(unsigned long number, int local)
asm volatile("ptesync":::"memory");
for (i = 0; i < number; i++) {
- va = batch->vaddr[i];
+ vpn = batch->vpn[i];
pte = batch->pte[i];
- pte_iterate_hashed_subpages(pte, psize, va, index,
- shift) {
- __tlbie(va, psize, ssize);
+ pte_iterate_hashed_subpages(pte, psize,
+ vpn, index, shift) {
+ __tlbie(vpn, psize, ssize);
} pte_iterate_hashed_end();
}
asm volatile("eieio; tlbsync; ptesync":::"memory");
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index ba45739bdfe8..3a292be2e079 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -191,18 +191,18 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
vaddr += step, paddr += step) {
unsigned long hash, hpteg;
unsigned long vsid = get_kernel_vsid(vaddr, ssize);
- unsigned long va = hpt_va(vaddr, vsid, ssize);
+ unsigned long vpn = hpt_vpn(vaddr, vsid, ssize);
unsigned long tprot = prot;
/* Make kernel text executable */
if (overlaps_kernel_text(vaddr, vaddr + step))
tprot &= ~HPTE_R_N;
- hash = hpt_hash(va, shift, ssize);
+ hash = hpt_hash(vpn, shift, ssize);
hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
BUG_ON(!ppc_md.hpte_insert);
- ret = ppc_md.hpte_insert(hpteg, va, paddr, tprot,
+ ret = ppc_md.hpte_insert(hpteg, vpn, paddr, tprot,
HPTE_V_BOLTED, psize, ssize);
if (ret < 0)
@@ -803,16 +803,19 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
#ifdef CONFIG_PPC_MM_SLICES
unsigned int get_paca_psize(unsigned long addr)
{
- unsigned long index, slices;
+ u64 lpsizes;
+ unsigned char *hpsizes;
+ unsigned long index, mask_index;
if (addr < SLICE_LOW_TOP) {
- slices = get_paca()->context.low_slices_psize;
+ lpsizes = get_paca()->context.low_slices_psize;
index = GET_LOW_SLICE_INDEX(addr);
- } else {
- slices = get_paca()->context.high_slices_psize;
- index = GET_HIGH_SLICE_INDEX(addr);
+ return (lpsizes >> (index * 4)) & 0xF;
}
- return (slices >> (index * 4)) & 0xF;
+ hpsizes = get_paca()->context.high_slices_psize;
+ index = GET_HIGH_SLICE_INDEX(addr);
+ mask_index = index & 0x1;
+ return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xF;
}
#else
@@ -1152,21 +1155,21 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
/* WARNING: This is called from hash_low_64.S, if you change this prototype,
* do not forget to update the assembly call site !
*/
-void flush_hash_page(unsigned long va, real_pte_t pte, int psize, int ssize,
+void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize,
int local)
{
unsigned long hash, index, shift, hidx, slot;
- DBG_LOW("flush_hash_page(va=%016lx)\n", va);
- pte_iterate_hashed_subpages(pte, psize, va, index, shift) {
- hash = hpt_hash(va, shift, ssize);
+ DBG_LOW("flush_hash_page(vpn=%016lx)\n", vpn);
+ pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
+ hash = hpt_hash(vpn, shift, ssize);
hidx = __rpte_to_hidx(pte, index);
if (hidx & _PTEIDX_SECONDARY)
hash = ~hash;
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
slot += hidx & _PTEIDX_GROUP_IX;
DBG_LOW(" sub %ld: hash=%lx, hidx=%lx\n", index, slot, hidx);
- ppc_md.hpte_invalidate(slot, va, psize, ssize, local);
+ ppc_md.hpte_invalidate(slot, vpn, psize, ssize, local);
} pte_iterate_hashed_end();
}
@@ -1180,7 +1183,7 @@ void flush_hash_range(unsigned long number, int local)
&__get_cpu_var(ppc64_tlb_batch);
for (i = 0; i < number; i++)
- flush_hash_page(batch->vaddr[i], batch->pte[i],
+ flush_hash_page(batch->vpn[i], batch->pte[i],
batch->psize, batch->ssize, local);
}
}
@@ -1207,14 +1210,14 @@ static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi)
{
unsigned long hash, hpteg;
unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize);
- unsigned long va = hpt_va(vaddr, vsid, mmu_kernel_ssize);
+ unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize);
unsigned long mode = htab_convert_pte_flags(PAGE_KERNEL);
int ret;
- hash = hpt_hash(va, PAGE_SHIFT, mmu_kernel_ssize);
+ hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize);
hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
- ret = ppc_md.hpte_insert(hpteg, va, __pa(vaddr),
+ ret = ppc_md.hpte_insert(hpteg, vpn, __pa(vaddr),
mode, HPTE_V_BOLTED,
mmu_linear_psize, mmu_kernel_ssize);
BUG_ON (ret < 0);
@@ -1228,9 +1231,9 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi)
{
unsigned long hash, hidx, slot;
unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize);
- unsigned long va = hpt_va(vaddr, vsid, mmu_kernel_ssize);
+ unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize);
- hash = hpt_hash(va, PAGE_SHIFT, mmu_kernel_ssize);
+ hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize);
spin_lock(&linear_map_hash_lock);
BUG_ON(!(linear_map_hash_slots[lmi] & 0x80));
hidx = linear_map_hash_slots[lmi] & 0x7f;
@@ -1240,7 +1243,7 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi)
hash = ~hash;
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
slot += hidx & _PTEIDX_GROUP_IX;
- ppc_md.hpte_invalidate(slot, va, mmu_linear_psize, mmu_kernel_ssize, 0);
+ ppc_md.hpte_invalidate(slot, vpn, mmu_linear_psize, mmu_kernel_ssize, 0);
}
void kernel_map_pages(struct page *page, int numpages, int enable)
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index cc5c273086cf..cecad348f604 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -18,14 +18,15 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
pte_t *ptep, unsigned long trap, int local, int ssize,
unsigned int shift, unsigned int mmu_psize)
{
+ unsigned long vpn;
unsigned long old_pte, new_pte;
- unsigned long va, rflags, pa, sz;
+ unsigned long rflags, pa, sz;
long slot;
BUG_ON(shift != mmu_psize_defs[mmu_psize].shift);
/* Search the Linux page table for a match with va */
- va = hpt_va(ea, vsid, ssize);
+ vpn = hpt_vpn(ea, vsid, ssize);
/* At this point, we have a pte (old_pte) which can be used to build
* or update an HPTE. There are 2 cases:
@@ -69,19 +70,19 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
/* There MIGHT be an HPTE for this pte */
unsigned long hash, slot;
- hash = hpt_hash(va, shift, ssize);
+ hash = hpt_hash(vpn, shift, ssize);
if (old_pte & _PAGE_F_SECOND)
hash = ~hash;
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
slot += (old_pte & _PAGE_F_GIX) >> 12;
- if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_psize,
+ if (ppc_md.hpte_updatepp(slot, rflags, vpn, mmu_psize,
ssize, local) == -1)
old_pte &= ~_PAGE_HPTEFLAGS;
}
if (likely(!(old_pte & _PAGE_HASHPTE))) {
- unsigned long hash = hpt_hash(va, shift, ssize);
+ unsigned long hash = hpt_hash(vpn, shift, ssize);
unsigned long hpte_group;
pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
@@ -101,14 +102,14 @@ repeat:
_PAGE_COHERENT | _PAGE_GUARDED));
/* Insert into the hash table, primary slot */
- slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0,
+ slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0,
mmu_psize, ssize);
/* Primary is full, try the secondary */
if (unlikely(slot == -1)) {
hpte_group = ((~hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
- slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags,
+ slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags,
HPTE_V_SECONDARY,
mmu_psize, ssize);
if (slot == -1) {
diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c
index 40677aa0190e..40bc5b0ace54 100644
--- a/arch/powerpc/mm/mmu_context_hash64.c
+++ b/arch/powerpc/mm/mmu_context_hash64.c
@@ -30,11 +30,13 @@ static DEFINE_SPINLOCK(mmu_context_lock);
static DEFINE_IDA(mmu_context_ida);
/*
- * The proto-VSID space has 2^35 - 1 segments available for user mappings.
- * Each segment contains 2^28 bytes. Each context maps 2^44 bytes,
- * so we can support 2^19-1 contexts (19 == 35 + 28 - 44).
+ * 256MB segment
+ * The proto-VSID space has 2^(CONTEX_BITS + USER_ESID_BITS) - 1 segments
+ * available for user mappings. Each segment contains 2^28 bytes. Each
+ * context maps 2^46 bytes (64TB) so we can support 2^19-1 contexts
+ * (19 == 37 + 28 - 46).
*/
-#define MAX_CONTEXT ((1UL << 19) - 1)
+#define MAX_CONTEXT ((1UL << CONTEXT_BITS) - 1)
int __init_new_context(void)
{
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 297d49547ea8..e212a271c7a4 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -55,8 +55,18 @@
#include "mmu_decl.h"
-unsigned long ioremap_bot = IOREMAP_BASE;
+/* Some sanity checking */
+#if TASK_SIZE_USER64 > PGTABLE_RANGE
+#error TASK_SIZE_USER64 exceeds pagetable range
+#endif
+
+#ifdef CONFIG_PPC_STD_MMU_64
+#if TASK_SIZE_USER64 > (1UL << (USER_ESID_BITS + SID_SHIFT))
+#error TASK_SIZE_USER64 exceeds user VSID range
+#endif
+#endif
+unsigned long ioremap_bot = IOREMAP_BASE;
#ifdef CONFIG_PPC_MMU_NOHASH
static void *early_alloc_pgtable(unsigned long size)
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index b9ee79ce2200..1a16ca227757 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -56,6 +56,12 @@ _GLOBAL(slb_allocate_realmode)
*/
_GLOBAL(slb_miss_kernel_load_linear)
li r11,0
+ li r9,0x1
+ /*
+ * for 1T we shift 12 bits more. slb_finish_load_1T will do
+ * the necessary adjustment
+ */
+ rldimi r10,r9,(CONTEXT_BITS + USER_ESID_BITS),0
BEGIN_FTR_SECTION
b slb_finish_load
END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
@@ -85,6 +91,12 @@ _GLOBAL(slb_miss_kernel_load_vmemmap)
_GLOBAL(slb_miss_kernel_load_io)
li r11,0
6:
+ li r9,0x1
+ /*
+ * for 1T we shift 12 bits more. slb_finish_load_1T will do
+ * the necessary adjustment
+ */
+ rldimi r10,r9,(CONTEXT_BITS + USER_ESID_BITS),0
BEGIN_FTR_SECTION
b slb_finish_load
END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
@@ -108,17 +120,31 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
* between 4k and 64k standard page size
*/
#ifdef CONFIG_PPC_MM_SLICES
+ /* r10 have esid */
cmpldi r10,16
-
- /* Get the slice index * 4 in r11 and matching slice size mask in r9 */
- ld r9,PACALOWSLICESPSIZE(r13)
- sldi r11,r10,2
+ /* below SLICE_LOW_TOP */
blt 5f
- ld r9,PACAHIGHSLICEPSIZE(r13)
- srdi r11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT - 2)
- andi. r11,r11,0x3c
+ /*
+ * Handle hpsizes,
+ * r9 is get_paca()->context.high_slices_psize[index], r11 is mask_index
+ */
+ srdi r11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT + 1) /* index */
+ addi r9,r11,PACAHIGHSLICEPSIZE
+ lbzx r9,r13,r9 /* r9 is hpsizes[r11] */
+ /* r11 = (r10 >> (SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT)) & 0x1 */
+ rldicl r11,r10,(64 - (SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT)),63
+ b 6f
-5: /* Extract the psize and multiply to get an array offset */
+5:
+ /*
+ * Handle lpsizes
+ * r9 is get_paca()->context.low_slices_psize, r11 is index
+ */
+ ld r9,PACALOWSLICESPSIZE(r13)
+ mr r11,r10
+6:
+ sldi r11,r11,2 /* index * 4 */
+ /* Extract the psize and multiply to get an array offset */
srd r9,r9,r11
andi. r9,r9,0xf
mulli r9,r9,MMUPSIZEDEFSIZE
@@ -209,7 +235,11 @@ _GLOBAL(slb_allocate_user)
*/
slb_finish_load:
ASM_VSID_SCRAMBLE(r10,r9,256M)
- rldimi r11,r10,SLB_VSID_SHIFT,16 /* combine VSID and flags */
+ /*
+ * bits above VSID_BITS_256M need to be ignored from r10
+ * also combine VSID and flags
+ */
+ rldimi r11,r10,SLB_VSID_SHIFT,(64 - (SLB_VSID_SHIFT + VSID_BITS_256M))
/* r3 = EA, r11 = VSID data */
/*
@@ -252,10 +282,10 @@ _GLOBAL(slb_compare_rr_to_size)
bge 1f
/* still room in the slb cache */
- sldi r11,r3,1 /* r11 = offset * sizeof(u16) */
- rldicl r10,r10,36,28 /* get low 16 bits of the ESID */
- add r11,r11,r13 /* r11 = (u16 *)paca + offset */
- sth r10,PACASLBCACHE(r11) /* paca->slb_cache[offset] = esid */
+ sldi r11,r3,2 /* r11 = offset * sizeof(u32) */
+ srdi r10,r10,28 /* get the 36 bits of the ESID */
+ add r11,r11,r13 /* r11 = (u32 *)paca + offset */
+ stw r10,PACASLBCACHE(r11) /* paca->slb_cache[offset] = esid */
addi r3,r3,1 /* offset++ */
b 2f
1: /* offset >= SLB_CACHE_ENTRIES */
@@ -273,7 +303,11 @@ _GLOBAL(slb_compare_rr_to_size)
slb_finish_load_1T:
srdi r10,r10,40-28 /* get 1T ESID */
ASM_VSID_SCRAMBLE(r10,r9,1T)
- rldimi r11,r10,SLB_VSID_SHIFT_1T,16 /* combine VSID and flags */
+ /*
+ * bits above VSID_BITS_1T need to be ignored from r10
+ * also combine VSID and flags
+ */
+ rldimi r11,r10,SLB_VSID_SHIFT_1T,(64 - (SLB_VSID_SHIFT_1T + VSID_BITS_1T))
li r10,MMU_SEGSIZE_1T
rldimi r11,r10,SLB_VSID_SSIZE_SHIFT,0 /* insert segment size */
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 73709f7ce92c..5829d2a950d4 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -34,6 +34,11 @@
#include <asm/mmu.h>
#include <asm/spu.h>
+/* some sanity checks */
+#if (PGTABLE_RANGE >> 43) > SLICE_MASK_SIZE
+#error PGTABLE_RANGE exceeds slice_mask high_slices size
+#endif
+
static DEFINE_SPINLOCK(slice_convert_lock);
@@ -42,7 +47,7 @@ int _slice_debug = 1;
static void slice_print_mask(const char *label, struct slice_mask mask)
{
- char *p, buf[16 + 3 + 16 + 1];
+ char *p, buf[16 + 3 + 64 + 1];
int i;
if (!_slice_debug)
@@ -54,7 +59,7 @@ static void slice_print_mask(const char *label, struct slice_mask mask)
*(p++) = '-';
*(p++) = ' ';
for (i = 0; i < SLICE_NUM_HIGH; i++)
- *(p++) = (mask.high_slices & (1 << i)) ? '1' : '0';
+ *(p++) = (mask.high_slices & (1ul << i)) ? '1' : '0';
*(p++) = 0;
printk(KERN_DEBUG "%s:%s\n", label, buf);
@@ -84,8 +89,8 @@ static struct slice_mask slice_range_to_mask(unsigned long start,
}
if ((start + len) > SLICE_LOW_TOP)
- ret.high_slices = (1u << (GET_HIGH_SLICE_INDEX(end) + 1))
- - (1u << GET_HIGH_SLICE_INDEX(start));
+ ret.high_slices = (1ul << (GET_HIGH_SLICE_INDEX(end) + 1))
+ - (1ul << GET_HIGH_SLICE_INDEX(start));
return ret;
}
@@ -135,26 +140,31 @@ static struct slice_mask slice_mask_for_free(struct mm_struct *mm)
for (i = 0; i < SLICE_NUM_HIGH; i++)
if (!slice_high_has_vma(mm, i))
- ret.high_slices |= 1u << i;
+ ret.high_slices |= 1ul << i;
return ret;
}
static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize)
{
+ unsigned char *hpsizes;
+ int index, mask_index;
struct slice_mask ret = { 0, 0 };
unsigned long i;
- u64 psizes;
+ u64 lpsizes;
- psizes = mm->context.low_slices_psize;
+ lpsizes = mm->context.low_slices_psize;
for (i = 0; i < SLICE_NUM_LOW; i++)
- if (((psizes >> (i * 4)) & 0xf) == psize)
+ if (((lpsizes >> (i * 4)) & 0xf) == psize)
ret.low_slices |= 1u << i;
- psizes = mm->context.high_slices_psize;
- for (i = 0; i < SLICE_NUM_HIGH; i++)
- if (((psizes >> (i * 4)) & 0xf) == psize)
- ret.high_slices |= 1u << i;
+ hpsizes = mm->context.high_slices_psize;
+ for (i = 0; i < SLICE_NUM_HIGH; i++) {
+ mask_index = i & 0x1;
+ index = i >> 1;
+ if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize)
+ ret.high_slices |= 1ul << i;
+ }
return ret;
}
@@ -183,8 +193,10 @@ static void slice_flush_segments(void *parm)
static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psize)
{
+ int index, mask_index;
/* Write the new slice psize bits */
- u64 lpsizes, hpsizes;
+ unsigned char *hpsizes;
+ u64 lpsizes;
unsigned long i, flags;
slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize);
@@ -201,14 +213,18 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
lpsizes = (lpsizes & ~(0xful << (i * 4))) |
(((unsigned long)psize) << (i * 4));
- hpsizes = mm->context.high_slices_psize;
- for (i = 0; i < SLICE_NUM_HIGH; i++)
- if (mask.high_slices & (1u << i))
- hpsizes = (hpsizes & ~(0xful << (i * 4))) |
- (((unsigned long)psize) << (i * 4));
-
+ /* Assign the value back */
mm->context.low_slices_psize = lpsizes;
- mm->context.high_slices_psize = hpsizes;
+
+ hpsizes = mm->context.high_slices_psize;
+ for (i = 0; i < SLICE_NUM_HIGH; i++) {
+ mask_index = i & 0x1;
+ index = i >> 1;
+ if (mask.high_slices & (1ul << i))
+ hpsizes[index] = (hpsizes[index] &
+ ~(0xf << (mask_index * 4))) |
+ (((unsigned long)psize) << (mask_index * 4));
+ }
slice_dbg(" lsps=%lx, hsps=%lx\n",
mm->context.low_slices_psize,
@@ -587,18 +603,19 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp,
unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
{
- u64 psizes;
- int index;
+ unsigned char *hpsizes;
+ int index, mask_index;
if (addr < SLICE_LOW_TOP) {
- psizes = mm->context.low_slices_psize;
+ u64 lpsizes;
+ lpsizes = mm->context.low_slices_psize;
index = GET_LOW_SLICE_INDEX(addr);
- } else {
- psizes = mm->context.high_slices_psize;
- index = GET_HIGH_SLICE_INDEX(addr);
+ return (lpsizes >> (index * 4)) & 0xf;
}
-
- return (psizes >> (index * 4)) & 0xf;
+ hpsizes = mm->context.high_slices_psize;
+ index = GET_HIGH_SLICE_INDEX(addr);
+ mask_index = index & 0x1;
+ return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xf;
}
EXPORT_SYMBOL_GPL(get_slice_psize);
@@ -618,7 +635,9 @@ EXPORT_SYMBOL_GPL(get_slice_psize);
*/
void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
{
- unsigned long flags, lpsizes, hpsizes;
+ int index, mask_index;
+ unsigned char *hpsizes;
+ unsigned long flags, lpsizes;
unsigned int old_psize;
int i;
@@ -639,15 +658,21 @@ void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
if (((lpsizes >> (i * 4)) & 0xf) == old_psize)
lpsizes = (lpsizes & ~(0xful << (i * 4))) |
(((unsigned long)psize) << (i * 4));
+ /* Assign the value back */
+ mm->context.low_slices_psize = lpsizes;
hpsizes = mm->context.high_slices_psize;
- for (i = 0; i < SLICE_NUM_HIGH; i++)
- if (((hpsizes >> (i * 4)) & 0xf) == old_psize)
- hpsizes = (hpsizes & ~(0xful << (i * 4))) |
- (((unsigned long)psize) << (i * 4));
+ for (i = 0; i < SLICE_NUM_HIGH; i++) {
+ mask_index = i & 0x1;
+ index = i >> 1;
+ if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == old_psize)
+ hpsizes[index] = (hpsizes[index] &
+ ~(0xf << (mask_index * 4))) |
+ (((unsigned long)psize) << (mask_index * 4));
+ }
+
+
- mm->context.low_slices_psize = lpsizes;
- mm->context.high_slices_psize = hpsizes;
slice_dbg(" lsps=%lx, hsps=%lx\n",
mm->context.low_slices_psize,
@@ -660,18 +685,27 @@ void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
void slice_set_psize(struct mm_struct *mm, unsigned long address,
unsigned int psize)
{
+ unsigned char *hpsizes;
unsigned long i, flags;
- u64 *p;
+ u64 *lpsizes;
spin_lock_irqsave(&slice_convert_lock, flags);
if (address < SLICE_LOW_TOP) {
i = GET_LOW_SLICE_INDEX(address);
- p = &mm->context.low_slices_psize;
+ lpsizes = &mm->context.low_slices_psize;
+ *lpsizes = (*lpsizes & ~(0xful << (i * 4))) |
+ ((unsigned long) psize << (i * 4));
} else {
+ int index, mask_index;
i = GET_HIGH_SLICE_INDEX(address);
- p = &mm->context.high_slices_psize;
+ hpsizes = mm->context.high_slices_psize;
+ mask_index = i & 0x1;
+ index = i >> 1;
+ hpsizes[index] = (hpsizes[index] &
+ ~(0xf << (mask_index * 4))) |
+ (((unsigned long)psize) << (mask_index * 4));
}
- *p = (*p & ~(0xful << (i * 4))) | ((unsigned long) psize << (i * 4));
+
spin_unlock_irqrestore(&slice_convert_lock, flags);
#ifdef CONFIG_SPU_BASE
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index 31f18207970b..ae758b3ff72c 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -42,8 +42,9 @@ DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, unsigned long pte, int huge)
{
+ unsigned long vpn;
struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch);
- unsigned long vsid, vaddr;
+ unsigned long vsid;
unsigned int psize;
int ssize;
real_pte_t rpte;
@@ -86,7 +87,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
ssize = mmu_kernel_ssize;
}
- vaddr = hpt_va(addr, vsid, ssize);
+ vpn = hpt_vpn(addr, vsid, ssize);
rpte = __real_pte(__pte(pte), ptep);
/*
@@ -96,7 +97,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
* and decide to use local invalidates instead...
*/
if (!batch->active) {
- flush_hash_page(vaddr, rpte, psize, ssize, 0);
+ flush_hash_page(vpn, rpte, psize, ssize, 0);
put_cpu_var(ppc64_tlb_batch);
return;
}
@@ -122,7 +123,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
batch->ssize = ssize;
}
batch->pte[i] = rpte;
- batch->vaddr[i] = vaddr;
+ batch->vpn[i] = vpn;
batch->index = ++i;
if (i >= PPC64_TLB_BATCH_NR)
__flush_tlb_pending(batch);
@@ -146,7 +147,7 @@ void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
if (cpumask_equal(mm_cpumask(batch->mm), tmp))
local = 1;
if (i == 1)
- flush_hash_page(batch->vaddr[0], batch->pte[0],
+ flush_hash_page(batch->vpn[0], batch->pte[0],
batch->psize, batch->ssize, local);
else
flush_hash_range(i, local);
diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c
index 943c9d39aa16..0f6f83988b3d 100644
--- a/arch/powerpc/platforms/cell/beat_htab.c
+++ b/arch/powerpc/platforms/cell/beat_htab.c
@@ -88,7 +88,7 @@ static inline unsigned int beat_read_mask(unsigned hpte_group)
}
static long beat_lpar_hpte_insert(unsigned long hpte_group,
- unsigned long va, unsigned long pa,
+ unsigned long vpn, unsigned long pa,
unsigned long rflags, unsigned long vflags,
int psize, int ssize)
{
@@ -103,7 +103,7 @@ static long beat_lpar_hpte_insert(unsigned long hpte_group,
"rflags=%lx, vflags=%lx, psize=%d)\n",
hpte_group, va, pa, rflags, vflags, psize);
- hpte_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M) |
+ hpte_v = hpte_encode_v(vpn, psize, MMU_SEGSIZE_256M) |
vflags | HPTE_V_VALID;
hpte_r = hpte_encode_r(pa, psize) | rflags;
@@ -184,14 +184,14 @@ static void beat_lpar_hptab_clear(void)
*/
static long beat_lpar_hpte_updatepp(unsigned long slot,
unsigned long newpp,
- unsigned long va,
+ unsigned long vpn,
int psize, int ssize, int local)
{
unsigned long lpar_rc;
u64 dummy0, dummy1;
unsigned long want_v;
- want_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M);
+ want_v = hpte_encode_v(vpn, psize, MMU_SEGSIZE_256M);
DBG_LOW(" update: "
"avpnv=%016lx, slot=%016lx, psize: %d, newpp %016lx ... ",
@@ -220,15 +220,15 @@ static long beat_lpar_hpte_updatepp(unsigned long slot,
return 0;
}
-static long beat_lpar_hpte_find(unsigned long va, int psize)
+static long beat_lpar_hpte_find(unsigned long vpn, int psize)
{
unsigned long hash;
unsigned long i, j;
long slot;
unsigned long want_v, hpte_v;
- hash = hpt_hash(va, mmu_psize_defs[psize].shift, MMU_SEGSIZE_256M);
- want_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M);
+ hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, MMU_SEGSIZE_256M);
+ want_v = hpte_encode_v(vpn, psize, MMU_SEGSIZE_256M);
for (j = 0; j < 2; j++) {
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
@@ -255,14 +255,15 @@ static void beat_lpar_hpte_updateboltedpp(unsigned long newpp,
unsigned long ea,
int psize, int ssize)
{
- unsigned long lpar_rc, slot, vsid, va;
+ unsigned long vpn;
+ unsigned long lpar_rc, slot, vsid;
u64 dummy0, dummy1;
vsid = get_kernel_vsid(ea, MMU_SEGSIZE_256M);
- va = (vsid << 28) | (ea & 0x0fffffff);
+ vpn = hpt_vpn(ea, vsid, MMU_SEGSIZE_256M);
raw_spin_lock(&beat_htab_lock);
- slot = beat_lpar_hpte_find(va, psize);
+ slot = beat_lpar_hpte_find(vpn, psize);
BUG_ON(slot == -1);
lpar_rc = beat_write_htab_entry(0, slot, 0, newpp, 0, 7,
@@ -272,7 +273,7 @@ static void beat_lpar_hpte_updateboltedpp(unsigned long newpp,
BUG_ON(lpar_rc != 0);
}
-static void beat_lpar_hpte_invalidate(unsigned long slot, unsigned long va,
+static void beat_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
int psize, int ssize, int local)
{
unsigned long want_v;
@@ -282,7 +283,7 @@ static void beat_lpar_hpte_invalidate(unsigned long slot, unsigned long va,
DBG_LOW(" inval : slot=%lx, va=%016lx, psize: %d, local: %d\n",
slot, va, psize, local);
- want_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M);
+ want_v = hpte_encode_v(vpn, psize, MMU_SEGSIZE_256M);
raw_spin_lock_irqsave(&beat_htab_lock, flags);
dummy1 = beat_lpar_hpte_getword0(slot);
@@ -311,7 +312,7 @@ void __init hpte_init_beat(void)
}
static long beat_lpar_hpte_insert_v3(unsigned long hpte_group,
- unsigned long va, unsigned long pa,
+ unsigned long vpn, unsigned long pa,
unsigned long rflags, unsigned long vflags,
int psize, int ssize)
{
@@ -322,11 +323,11 @@ static long beat_lpar_hpte_insert_v3(unsigned long hpte_group,
return -1;
if (!(vflags & HPTE_V_BOLTED))
- DBG_LOW("hpte_insert(group=%lx, va=%016lx, pa=%016lx, "
+ DBG_LOW("hpte_insert(group=%lx, vpn=%016lx, pa=%016lx, "
"rflags=%lx, vflags=%lx, psize=%d)\n",
- hpte_group, va, pa, rflags, vflags, psize);
+ hpte_group, vpn, pa, rflags, vflags, psize);
- hpte_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M) |
+ hpte_v = hpte_encode_v(vpn, psize, MMU_SEGSIZE_256M) |
vflags | HPTE_V_VALID;
hpte_r = hpte_encode_r(pa, psize) | rflags;
@@ -364,14 +365,14 @@ static long beat_lpar_hpte_insert_v3(unsigned long hpte_group,
*/
static long beat_lpar_hpte_updatepp_v3(unsigned long slot,
unsigned long newpp,
- unsigned long va,
+ unsigned long vpn,
int psize, int ssize, int local)
{
unsigned long lpar_rc;
unsigned long want_v;
unsigned long pss;
- want_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M);
+ want_v = hpte_encode_v(vpn, psize, MMU_SEGSIZE_256M);
pss = (psize == MMU_PAGE_4K) ? -1UL : mmu_psize_defs[psize].penc;
DBG_LOW(" update: "
@@ -392,16 +393,16 @@ static long beat_lpar_hpte_updatepp_v3(unsigned long slot,
return 0;
}
-static void beat_lpar_hpte_invalidate_v3(unsigned long slot, unsigned long va,
+static void beat_lpar_hpte_invalidate_v3(unsigned long slot, unsigned long vpn,
int psize, int ssize, int local)
{
unsigned long want_v;
unsigned long lpar_rc;
unsigned long pss;
- DBG_LOW(" inval : slot=%lx, va=%016lx, psize: %d, local: %d\n",
- slot, va, psize, local);
- want_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M);
+ DBG_LOW(" inval : slot=%lx, vpn=%016lx, psize: %d, local: %d\n",
+ slot, vpn, psize, local);
+ want_v = hpte_encode_v(vpn, psize, MMU_SEGSIZE_256M);
pss = (psize == MMU_PAGE_4K) ? -1UL : mmu_psize_defs[psize].penc;
lpar_rc = beat_invalidate_htab_entry3(0, slot, want_v, pss);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 5e75dcfe51b9..471aa3ccd9fd 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -34,14 +34,6 @@
#include "powernv.h"
#include "pci.h"
-struct resource_wrap {
- struct list_head link;
- resource_size_t size;
- resource_size_t align;
- struct pci_dev *dev; /* Set if it's a device */
- struct pci_bus *bus; /* Set if it's a bridge */
-};
-
static int __pe_printk(const char *level, const struct pnv_ioda_pe *pe,
struct va_format *vaf)
{
@@ -77,273 +69,6 @@ define_pe_printk_level(pe_err, KERN_ERR);
define_pe_printk_level(pe_warn, KERN_WARNING);
define_pe_printk_level(pe_info, KERN_INFO);
-
-/* Calculate resource usage & alignment requirement of a single
- * device. This will also assign all resources within the device
- * for a given type starting at 0 for the biggest one and then
- * assigning in decreasing order of size.
- */
-static void __devinit pnv_ioda_calc_dev(struct pci_dev *dev, unsigned int flags,
- resource_size_t *size,
- resource_size_t *align)
-{
- resource_size_t start;
- struct resource *r;
- int i;
-
- pr_devel(" -> CDR %s\n", pci_name(dev));
-
- *size = *align = 0;
-
- /* Clear the resources out and mark them all unset */
- for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
- r = &dev->resource[i];
- if (!(r->flags & flags))
- continue;
- if (r->start) {
- r->end -= r->start;
- r->start = 0;
- }
- r->flags |= IORESOURCE_UNSET;
- }
-
- /* We currently keep all memory resources together, we
- * will handle prefetch & 64-bit separately in the future
- * but for now we stick everybody in M32
- */
- start = 0;
- for (;;) {
- resource_size_t max_size = 0;
- int max_no = -1;
-
- /* Find next biggest resource */
- for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
- r = &dev->resource[i];
- if (!(r->flags & IORESOURCE_UNSET) ||
- !(r->flags & flags))
- continue;
- if (resource_size(r) > max_size) {
- max_size = resource_size(r);
- max_no = i;
- }
- }
- if (max_no < 0)
- break;
- r = &dev->resource[max_no];
- if (max_size > *align)
- *align = max_size;
- *size += max_size;
- r->start = start;
- start += max_size;
- r->end = r->start + max_size - 1;
- r->flags &= ~IORESOURCE_UNSET;
- pr_devel(" -> R%d %016llx..%016llx\n",
- max_no, r->start, r->end);
- }
- pr_devel(" <- CDR %s size=%llx align=%llx\n",
- pci_name(dev), *size, *align);
-}
-
-/* Allocate a resource "wrap" for a given device or bridge and
- * insert it at the right position in the sorted list
- */
-static void __devinit pnv_ioda_add_wrap(struct list_head *list,
- struct pci_bus *bus,
- struct pci_dev *dev,
- resource_size_t size,
- resource_size_t align)
-{
- struct resource_wrap *w1, *w = kzalloc(sizeof(*w), GFP_KERNEL);
-
- w->size = size;
- w->align = align;
- w->dev = dev;
- w->bus = bus;
-
- list_for_each_entry(w1, list, link) {
- if (w1->align < align) {
- list_add_tail(&w->link, &w1->link);
- return;
- }
- }
- list_add_tail(&w->link, list);
-}
-
-/* Offset device resources of a given type */
-static void __devinit pnv_ioda_offset_dev(struct pci_dev *dev,
- unsigned int flags,
- resource_size_t offset)
-{
- struct resource *r;
- int i;
-
- pr_devel(" -> ODR %s [%x] +%016llx\n", pci_name(dev), flags, offset);
-
- for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
- r = &dev->resource[i];
- if (r->flags & flags) {
- dev->resource[i].start += offset;
- dev->resource[i].end += offset;
- }
- }
-
- pr_devel(" <- ODR %s [%x] +%016llx\n", pci_name(dev), flags, offset);
-}
-
-/* Offset bus resources (& all children) of a given type */
-static void __devinit pnv_ioda_offset_bus(struct pci_bus *bus,
- unsigned int flags,
- resource_size_t offset)
-{
- struct resource *r;
- struct pci_dev *dev;
- struct pci_bus *cbus;
- int i;
-
- pr_devel(" -> OBR %s [%x] +%016llx\n",
- bus->self ? pci_name(bus->self) : "root", flags, offset);
-
- pci_bus_for_each_resource(bus, r, i) {
- if (r && (r->flags & flags)) {
- r->start += offset;
- r->end += offset;
- }
- }
- list_for_each_entry(dev, &bus->devices, bus_list)
- pnv_ioda_offset_dev(dev, flags, offset);
- list_for_each_entry(cbus, &bus->children, node)
- pnv_ioda_offset_bus(cbus, flags, offset);
-
- pr_devel(" <- OBR %s [%x]\n",
- bus->self ? pci_name(bus->self) : "root", flags);
-}
-
-/* This is the guts of our IODA resource allocation. This is called
- * recursively for each bus in the system. It calculates all the
- * necessary size and requirements for children and assign them
- * resources such that:
- *
- * - Each function fits in it's own contiguous set of IO/M32
- * segment
- *
- * - All segments behind a P2P bridge are contiguous and obey
- * alignment constraints of those bridges
- */
-static void __devinit pnv_ioda_calc_bus(struct pci_bus *bus, unsigned int flags,
- resource_size_t *size,
- resource_size_t *align)
-{
- struct pci_controller *hose = pci_bus_to_host(bus);
- struct pnv_phb *phb = hose->private_data;
- resource_size_t dev_size, dev_align, start;
- resource_size_t min_align, min_balign;
- struct pci_dev *cdev;
- struct pci_bus *cbus;
- struct list_head head;
- struct resource_wrap *w;
- unsigned int bres;
-
- *size = *align = 0;
-
- pr_devel("-> CBR %s [%x]\n",
- bus->self ? pci_name(bus->self) : "root", flags);
-
- /* Calculate alignment requirements based on the type
- * of resource we are working on
- */
- if (flags & IORESOURCE_IO) {
- bres = 0;
- min_align = phb->ioda.io_segsize;
- min_balign = 0x1000;
- } else {
- bres = 1;
- min_align = phb->ioda.m32_segsize;
- min_balign = 0x100000;
- }
-
- /* Gather all our children resources ordered by alignment */
- INIT_LIST_HEAD(&head);
-
- /* - Busses */
- list_for_each_entry(cbus, &bus->children, node) {
- pnv_ioda_calc_bus(cbus, flags, &dev_size, &dev_align);
- pnv_ioda_add_wrap(&head, cbus, NULL, dev_size, dev_align);
- }
-
- /* - Devices */
- list_for_each_entry(cdev, &bus->devices, bus_list) {
- pnv_ioda_calc_dev(cdev, flags, &dev_size, &dev_align);
- /* Align them to segment size */
- if (dev_align < min_align)
- dev_align = min_align;
- pnv_ioda_add_wrap(&head, NULL, cdev, dev_size, dev_align);
- }
- if (list_empty(&head))
- goto empty;
-
- /* Now we can do two things: assign offsets to them within that
- * level and get our total alignment & size requirements. The
- * assignment algorithm is going to be uber-trivial for now, we
- * can try to be smarter later at filling out holes.
- */
- if (bus->self) {
- /* No offset for downstream bridges */
- start = 0;
- } else {
- /* Offset from the root */
- if (flags & IORESOURCE_IO)
- /* Don't hand out IO 0 */
- start = hose->io_resource.start + 0x1000;
- else
- start = hose->mem_resources[0].start;
- }
- while(!list_empty(&head)) {
- w = list_first_entry(&head, struct resource_wrap, link);
- list_del(&w->link);
- if (w->size) {
- if (start) {
- start = ALIGN(start, w->align);
- if (w->dev)
- pnv_ioda_offset_dev(w->dev,flags,start);
- else if (w->bus)
- pnv_ioda_offset_bus(w->bus,flags,start);
- }
- if (w->align > *align)
- *align = w->align;
- }
- start += w->size;
- kfree(w);
- }
- *size = start;
-
- /* Align and setup bridge resources */
- *align = max_t(resource_size_t, *align,
- max_t(resource_size_t, min_align, min_balign));
- *size = ALIGN(*size,
- max_t(resource_size_t, min_align, min_balign));
- empty:
- /* Only setup P2P's, not the PHB itself */
- if (bus->self) {
- struct resource *res = bus->resource[bres];
-
- if (WARN_ON(res == NULL))
- return;
-
- /*
- * FIXME: We should probably export and call
- * pci_bridge_check_ranges() to properly re-initialize
- * the PCI portion of the flags here, and to detect
- * what the bridge actually supports.
- */
- res->start = 0;
- res->flags = (*size) ? flags : 0;
- res->end = (*size) ? (*size - 1) : 0;
- }
-
- pr_devel("<- CBR %s [%x] *size=%016llx *align=%016llx\n",
- bus->self ? pci_name(bus->self) : "root", flags,*size,*align);
-}
-
static struct pci_dn *pnv_ioda_get_pdn(struct pci_dev *dev)
{
struct device_node *np;
@@ -354,172 +79,6 @@ static struct pci_dn *pnv_ioda_get_pdn(struct pci_dev *dev)
return PCI_DN(np);
}
-static void __devinit pnv_ioda_setup_pe_segments(struct pci_dev *dev)
-{
- struct pci_controller *hose = pci_bus_to_host(dev->bus);
- struct pnv_phb *phb = hose->private_data;
- struct pci_dn *pdn = pnv_ioda_get_pdn(dev);
- unsigned int pe, i;
- resource_size_t pos;
- struct resource io_res;
- struct resource m32_res;
- struct pci_bus_region region;
- int rc;
-
- /* Anything not referenced in the device-tree gets PE#0 */
- pe = pdn ? pdn->pe_number : 0;
-
- /* Calculate the device min/max */
- io_res.start = m32_res.start = (resource_size_t)-1;
- io_res.end = m32_res.end = 0;
- io_res.flags = IORESOURCE_IO;
- m32_res.flags = IORESOURCE_MEM;
-
- for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
- struct resource *r = NULL;
- if (dev->resource[i].flags & IORESOURCE_IO)
- r = &io_res;
- if (dev->resource[i].flags & IORESOURCE_MEM)
- r = &m32_res;
- if (!r)
- continue;
- if (dev->resource[i].start < r->start)
- r->start = dev->resource[i].start;
- if (dev->resource[i].end > r->end)
- r->end = dev->resource[i].end;
- }
-
- /* Setup IO segments */
- if (io_res.start < io_res.end) {
- pcibios_resource_to_bus(dev, &region, &io_res);
- pos = region.start;
- i = pos / phb->ioda.io_segsize;
- while(i < phb->ioda.total_pe && pos <= region.end) {
- if (phb->ioda.io_segmap[i]) {
- pr_err("%s: Trying to use IO seg #%d which is"
- " already used by PE# %d\n",
- pci_name(dev), i,
- phb->ioda.io_segmap[i]);
- /* XXX DO SOMETHING TO DISABLE DEVICE ? */
- break;
- }
- phb->ioda.io_segmap[i] = pe;
- rc = opal_pci_map_pe_mmio_window(phb->opal_id, pe,
- OPAL_IO_WINDOW_TYPE,
- 0, i);
- if (rc != OPAL_SUCCESS) {
- pr_err("%s: OPAL error %d setting up mapping"
- " for IO seg# %d\n",
- pci_name(dev), rc, i);
- /* XXX DO SOMETHING TO DISABLE DEVICE ? */
- break;
- }
- pos += phb->ioda.io_segsize;
- i++;
- };
- }
-
- /* Setup M32 segments */
- if (m32_res.start < m32_res.end) {
- pcibios_resource_to_bus(dev, &region, &m32_res);
- pos = region.start;
- i = pos / phb->ioda.m32_segsize;
- while(i < phb->ioda.total_pe && pos <= region.end) {
- if (phb->ioda.m32_segmap[i]) {
- pr_err("%s: Trying to use M32 seg #%d which is"
- " already used by PE# %d\n",
- pci_name(dev), i,
- phb->ioda.m32_segmap[i]);
- /* XXX DO SOMETHING TO DISABLE DEVICE ? */
- break;
- }
- phb->ioda.m32_segmap[i] = pe;
- rc = opal_pci_map_pe_mmio_window(phb->opal_id, pe,
- OPAL_M32_WINDOW_TYPE,
- 0, i);
- if (rc != OPAL_SUCCESS) {
- pr_err("%s: OPAL error %d setting up mapping"
- " for M32 seg# %d\n",
- pci_name(dev), rc, i);
- /* XXX DO SOMETHING TO DISABLE DEVICE ? */
- break;
- }
- pos += phb->ioda.m32_segsize;
- i++;
- }
- }
-}
-
-/* Check if a resource still fits in the total IO or M32 range
- * for a given PHB
- */
-static int __devinit pnv_ioda_resource_fit(struct pci_controller *hose,
- struct resource *r)
-{
- struct resource *bounds;
-
- if (r->flags & IORESOURCE_IO)
- bounds = &hose->io_resource;
- else if (r->flags & IORESOURCE_MEM)
- bounds = &hose->mem_resources[0];
- else
- return 1;
-
- if (r->start >= bounds->start && r->end <= bounds->end)
- return 1;
- r->flags = 0;
- return 0;
-}
-
-static void __devinit pnv_ioda_update_resources(struct pci_bus *bus)
-{
- struct pci_controller *hose = pci_bus_to_host(bus);
- struct pci_bus *cbus;
- struct pci_dev *cdev;
- unsigned int i;
-
- /* We used to clear all device enables here. However it looks like
- * clearing MEM enable causes Obsidian (IPR SCS) to go bonkers,
- * and shoot fatal errors to the PHB which in turns fences itself
- * and we can't recover from that ... yet. So for now, let's leave
- * the enables as-is and hope for the best.
- */
-
- /* Check if bus resources fit in our IO or M32 range */
- for (i = 0; bus->self && (i < 2); i++) {
- struct resource *r = bus->resource[i];
- if (r && !pnv_ioda_resource_fit(hose, r))
- pr_err("%s: Bus %d resource %d disabled, no room\n",
- pci_name(bus->self), bus->number, i);
- }
-
- /* Update self if it's not a PHB */
- if (bus->self)
- pci_setup_bridge(bus);
-
- /* Update child devices */
- list_for_each_entry(cdev, &bus->devices, bus_list) {
- /* Check if resource fits, if not, disabled it */
- for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
- struct resource *r = &cdev->resource[i];
- if (!pnv_ioda_resource_fit(hose, r))
- pr_err("%s: Resource %d disabled, no room\n",
- pci_name(cdev), i);
- }
-
- /* Assign segments */
- pnv_ioda_setup_pe_segments(cdev);
-
- /* Update HW BARs */
- for (i = 0; i <= PCI_ROM_RESOURCE; i++)
- pci_update_resource(cdev, i);
- }
-
- /* Update child busses */
- list_for_each_entry(cbus, &bus->children, node)
- pnv_ioda_update_resources(cbus);
-}
-
static int __devinit pnv_ioda_alloc_pe(struct pnv_phb *phb)
{
unsigned long pe;
@@ -547,7 +106,7 @@ static void __devinit pnv_ioda_free_pe(struct pnv_phb *phb, int pe)
* but in the meantime, we need to protect them to avoid warnings
*/
#ifdef CONFIG_PCI_MSI
-static struct pnv_ioda_pe * __devinit __pnv_ioda_get_one_pe(struct pci_dev *dev)
+static struct pnv_ioda_pe * __devinit pnv_ioda_get_pe(struct pci_dev *dev)
{
struct pci_controller *hose = pci_bus_to_host(dev->bus);
struct pnv_phb *phb = hose->private_data;
@@ -559,19 +118,6 @@ static struct pnv_ioda_pe * __devinit __pnv_ioda_get_one_pe(struct pci_dev *dev)
return NULL;
return &phb->ioda.pe_array[pdn->pe_number];
}
-
-static struct pnv_ioda_pe * __devinit pnv_ioda_get_pe(struct pci_dev *dev)
-{
- struct pnv_ioda_pe *pe = __pnv_ioda_get_one_pe(dev);
-
- while (!pe && dev->bus->self) {
- dev = dev->bus->self;
- pe = __pnv_ioda_get_one_pe(dev);
- if (pe)
- pe = pe->bus_pe;
- }
- return pe;
-}
#endif /* CONFIG_PCI_MSI */
static int __devinit pnv_ioda_configure_pe(struct pnv_phb *phb,
@@ -588,7 +134,11 @@ static int __devinit pnv_ioda_configure_pe(struct pnv_phb *phb,
dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER;
fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER;
parent = pe->pbus->self;
- count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1;
+ if (pe->flags & PNV_IODA_PE_BUS_ALL)
+ count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1;
+ else
+ count = 1;
+
switch(count) {
case 1: bcomp = OpalPciBusAll; break;
case 2: bcomp = OpalPciBus7Bits; break;
@@ -665,13 +215,13 @@ static void __devinit pnv_ioda_link_pe_by_weight(struct pnv_phb *phb,
{
struct pnv_ioda_pe *lpe;
- list_for_each_entry(lpe, &phb->ioda.pe_list, link) {
+ list_for_each_entry(lpe, &phb->ioda.pe_dma_list, dma_link) {
if (lpe->dma_weight < pe->dma_weight) {
- list_add_tail(&pe->link, &lpe->link);
+ list_add_tail(&pe->dma_link, &lpe->dma_link);
return;
}
}
- list_add_tail(&pe->link, &phb->ioda.pe_list);
+ list_add_tail(&pe->dma_link, &phb->ioda.pe_dma_list);
}
static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev)
@@ -698,6 +248,7 @@ static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev)
return 10;
}
+#if 0
static struct pnv_ioda_pe * __devinit pnv_ioda_setup_dev_PE(struct pci_dev *dev)
{
struct pci_controller *hose = pci_bus_to_host(dev->bus);
@@ -766,6 +317,7 @@ static struct pnv_ioda_pe * __devinit pnv_ioda_setup_dev_PE(struct pci_dev *dev)
return pe;
}
+#endif /* Useful for SRIOV case */
static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
{
@@ -783,34 +335,33 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
pdn->pcidev = dev;
pdn->pe_number = pe->pe_number;
pe->dma_weight += pnv_ioda_dma_weight(dev);
- if (dev->subordinate)
+ if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
pnv_ioda_setup_same_PE(dev->subordinate, pe);
}
}
-static void __devinit pnv_ioda_setup_bus_PE(struct pci_dev *dev,
- struct pnv_ioda_pe *ppe)
+/*
+ * There're 2 types of PCI bus sensitive PEs: One that is compromised of
+ * single PCI bus. Another one that contains the primary PCI bus and its
+ * subordinate PCI devices and buses. The second type of PE is normally
+ * orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports.
+ */
+static void __devinit pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all)
{
- struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct pci_controller *hose = pci_bus_to_host(bus);
struct pnv_phb *phb = hose->private_data;
- struct pci_bus *bus = dev->subordinate;
struct pnv_ioda_pe *pe;
int pe_num;
- if (!bus) {
- pr_warning("%s: Bridge without a subordinate bus !\n",
- pci_name(dev));
- return;
- }
pe_num = pnv_ioda_alloc_pe(phb);
if (pe_num == IODA_INVALID_PE) {
- pr_warning("%s: Not enough PE# available, disabling bus\n",
- pci_name(dev));
+ pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n",
+ __func__, pci_domain_nr(bus), bus->number);
return;
}
pe = &phb->ioda.pe_array[pe_num];
- ppe->bus_pe = pe;
+ pe->flags = (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS);
pe->pbus = bus;
pe->pdev = NULL;
pe->tce32_seg = -1;
@@ -818,8 +369,12 @@ static void __devinit pnv_ioda_setup_bus_PE(struct pci_dev *dev,
pe->rid = bus->busn_res.start << 8;
pe->dma_weight = 0;
- pe_info(pe, "Secondary busses %pR associated with PE\n",
- &bus->busn_res);
+ if (all)
+ pe_info(pe, "Secondary bus %d..%d associated with PE#%d\n",
+ bus->busn_res.start, bus->busn_res.end, pe_num);
+ else
+ pe_info(pe, "Secondary bus %d associated with PE#%d\n",
+ bus->busn_res.start, pe_num);
if (pnv_ioda_configure_pe(phb, pe)) {
/* XXX What do we do here ? */
@@ -832,6 +387,9 @@ static void __devinit pnv_ioda_setup_bus_PE(struct pci_dev *dev,
/* Associate it with all child devices */
pnv_ioda_setup_same_PE(bus, pe);
+ /* Put PE to the list */
+ list_add_tail(&pe->list, &phb->ioda.pe_list);
+
/* Account for one DMA PE if at least one DMA capable device exist
* below the bridge
*/
@@ -847,17 +405,33 @@ static void __devinit pnv_ioda_setup_bus_PE(struct pci_dev *dev,
static void __devinit pnv_ioda_setup_PEs(struct pci_bus *bus)
{
struct pci_dev *dev;
- struct pnv_ioda_pe *pe;
+
+ pnv_ioda_setup_bus_PE(bus, 0);
list_for_each_entry(dev, &bus->devices, bus_list) {
- pe = pnv_ioda_setup_dev_PE(dev);
- if (pe == NULL)
- continue;
- /* Leaving the PCIe domain ... single PE# */
- if (dev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
- pnv_ioda_setup_bus_PE(dev, pe);
- else if (dev->subordinate)
- pnv_ioda_setup_PEs(dev->subordinate);
+ if (dev->subordinate) {
+ if (pci_pcie_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE)
+ pnv_ioda_setup_bus_PE(dev->subordinate, 1);
+ else
+ pnv_ioda_setup_PEs(dev->subordinate);
+ }
+ }
+}
+
+/*
+ * Configure PEs so that the downstream PCI buses and devices
+ * could have their associated PE#. Unfortunately, we didn't
+ * figure out the way to identify the PLX bridge yet. So we
+ * simply put the PCI bus and the subordinate behind the root
+ * port to PE# here. The game rule here is expected to be changed
+ * as soon as we can detected PLX bridge correctly.
+ */
+static void __devinit pnv_pci_ioda_setup_PEs(void)
+{
+ struct pci_controller *hose, *tmp;
+
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+ pnv_ioda_setup_PEs(hose->bus);
}
}
@@ -999,7 +573,7 @@ static void __devinit pnv_ioda_setup_dma(struct pnv_phb *phb)
remaining = phb->ioda.tce32_count;
tw = phb->ioda.dma_weight;
base = 0;
- list_for_each_entry(pe, &phb->ioda.pe_list, link) {
+ list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) {
if (!pe->dma_weight)
continue;
if (!remaining) {
@@ -1108,34 +682,151 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { }
#endif /* CONFIG_PCI_MSI */
-/* This is the starting point of our IODA specific resource
- * allocation process
+/*
+ * This function is supposed to be called on basis of PE from top
+ * to bottom style. So the the I/O or MMIO segment assigned to
+ * parent PE could be overrided by its child PEs if necessary.
*/
-static void __devinit pnv_pci_ioda_fixup_phb(struct pci_controller *hose)
+static void __devinit pnv_ioda_setup_pe_seg(struct pci_controller *hose,
+ struct pnv_ioda_pe *pe)
{
- resource_size_t size, align;
- struct pci_bus *child;
+ struct pnv_phb *phb = hose->private_data;
+ struct pci_bus_region region;
+ struct resource *res;
+ int i, index;
+ int rc;
- /* Associate PEs per functions */
- pnv_ioda_setup_PEs(hose->bus);
+ /*
+ * NOTE: We only care PCI bus based PE for now. For PCI
+ * device based PE, for example SRIOV sensitive VF should
+ * be figured out later.
+ */
+ BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
+
+ pci_bus_for_each_resource(pe->pbus, res, i) {
+ if (!res || !res->flags ||
+ res->start > res->end)
+ continue;
+
+ if (res->flags & IORESOURCE_IO) {
+ region.start = res->start - phb->ioda.io_pci_base;
+ region.end = res->end - phb->ioda.io_pci_base;
+ index = region.start / phb->ioda.io_segsize;
+
+ while (index < phb->ioda.total_pe &&
+ region.start <= region.end) {
+ phb->ioda.io_segmap[index] = pe->pe_number;
+ rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+ pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index);
+ if (rc != OPAL_SUCCESS) {
+ pr_err("%s: OPAL error %d when mapping IO "
+ "segment #%d to PE#%d\n",
+ __func__, rc, index, pe->pe_number);
+ break;
+ }
+
+ region.start += phb->ioda.io_segsize;
+ index++;
+ }
+ } else if (res->flags & IORESOURCE_MEM) {
+ region.start = res->start -
+ hose->pci_mem_offset -
+ phb->ioda.m32_pci_base;
+ region.end = res->end -
+ hose->pci_mem_offset -
+ phb->ioda.m32_pci_base;
+ index = region.start / phb->ioda.m32_segsize;
+
+ while (index < phb->ioda.total_pe &&
+ region.start <= region.end) {
+ phb->ioda.m32_segmap[index] = pe->pe_number;
+ rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+ pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index);
+ if (rc != OPAL_SUCCESS) {
+ pr_err("%s: OPAL error %d when mapping M32 "
+ "segment#%d to PE#%d",
+ __func__, rc, index, pe->pe_number);
+ break;
+ }
+
+ region.start += phb->ioda.m32_segsize;
+ index++;
+ }
+ }
+ }
+}
- /* Calculate all resources */
- pnv_ioda_calc_bus(hose->bus, IORESOURCE_IO, &size, &align);
- pnv_ioda_calc_bus(hose->bus, IORESOURCE_MEM, &size, &align);
+static void __devinit pnv_pci_ioda_setup_seg(void)
+{
+ struct pci_controller *tmp, *hose;
+ struct pnv_phb *phb;
+ struct pnv_ioda_pe *pe;
- /* Apply then to HW */
- pnv_ioda_update_resources(hose->bus);
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+ phb = hose->private_data;
+ list_for_each_entry(pe, &phb->ioda.pe_list, list) {
+ pnv_ioda_setup_pe_seg(hose, pe);
+ }
+ }
+}
- /* Setup DMA */
- pnv_ioda_setup_dma(hose->private_data);
+static void __devinit pnv_pci_ioda_setup_DMA(void)
+{
+ struct pci_controller *hose, *tmp;
+ struct pnv_phb *phb;
- /* Configure PCI Express settings */
- list_for_each_entry(child, &hose->bus->children, node) {
- struct pci_dev *self = child->self;
- if (!self)
- continue;
- pcie_bus_configure_settings(child, self->pcie_mpss);
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+ pnv_ioda_setup_dma(hose->private_data);
+
+ /* Mark the PHB initialization done */
+ phb = hose->private_data;
+ phb->initialized = 1;
+ }
+}
+
+static void __devinit pnv_pci_ioda_fixup(void)
+{
+ pnv_pci_ioda_setup_PEs();
+ pnv_pci_ioda_setup_seg();
+ pnv_pci_ioda_setup_DMA();
+}
+
+/*
+ * Returns the alignment for I/O or memory windows for P2P
+ * bridges. That actually depends on how PEs are segmented.
+ * For now, we return I/O or M32 segment size for PE sensitive
+ * P2P bridges. Otherwise, the default values (4KiB for I/O,
+ * 1MiB for memory) will be returned.
+ *
+ * The current PCI bus might be put into one PE, which was
+ * create against the parent PCI bridge. For that case, we
+ * needn't enlarge the alignment so that we can save some
+ * resources.
+ */
+static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
+ unsigned long type)
+{
+ struct pci_dev *bridge;
+ struct pci_controller *hose = pci_bus_to_host(bus);
+ struct pnv_phb *phb = hose->private_data;
+ int num_pci_bridges = 0;
+
+ bridge = bus->self;
+ while (bridge) {
+ if (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) {
+ num_pci_bridges++;
+ if (num_pci_bridges >= 2)
+ return 1;
+ }
+
+ bridge = bridge->bus->self;
}
+
+ /* We need support prefetchable memory window later */
+ if (type & IORESOURCE_MEM)
+ return phb->ioda.m32_segsize;
+
+ return phb->ioda.io_segsize;
}
/* Prevent enabling devices for which we couldn't properly
@@ -1143,10 +834,22 @@ static void __devinit pnv_pci_ioda_fixup_phb(struct pci_controller *hose)
*/
static int __devinit pnv_pci_enable_device_hook(struct pci_dev *dev)
{
- struct pci_dn *pdn = pnv_ioda_get_pdn(dev);
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct pnv_phb *phb = hose->private_data;
+ struct pci_dn *pdn;
+
+ /* The function is probably called while the PEs have
+ * not be created yet. For example, resource reassignment
+ * during PCI probe period. We just skip the check if
+ * PEs isn't ready.
+ */
+ if (!phb->initialized)
+ return 0;
+ pdn = pnv_ioda_get_pdn(dev);
if (!pdn || pdn->pe_number == IODA_INVALID_PE)
return -EINVAL;
+
return 0;
}
@@ -1237,9 +940,9 @@ void __init pnv_pci_init_ioda1_phb(struct device_node *np)
/* Allocate aux data & arrays */
size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
m32map_off = size;
- size += phb->ioda.total_pe;
+ size += phb->ioda.total_pe * sizeof(phb->ioda.m32_segmap[0]);
iomap_off = size;
- size += phb->ioda.total_pe;
+ size += phb->ioda.total_pe * sizeof(phb->ioda.io_segmap[0]);
pemap_off = size;
size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe);
aux = alloc_bootmem(size);
@@ -1250,6 +953,7 @@ void __init pnv_pci_init_ioda1_phb(struct device_node *np)
phb->ioda.pe_array = aux + pemap_off;
set_bit(0, phb->ioda.pe_alloc);
+ INIT_LIST_HEAD(&phb->ioda.pe_dma_list);
INIT_LIST_HEAD(&phb->ioda.pe_list);
/* Calculate how many 32-bit TCE segments we have */
@@ -1298,14 +1002,17 @@ void __init pnv_pci_init_ioda1_phb(struct device_node *np)
/* Setup MSI support */
pnv_pci_init_ioda_msis(phb);
- /* We set both PCI_PROBE_ONLY and PCI_REASSIGN_ALL_RSRC. This is an
- * odd combination which essentially means that we skip all resource
- * fixups and assignments in the generic code, and do it all
- * ourselves here
+ /*
+ * We pass the PCI probe flag PCI_REASSIGN_ALL_RSRC here
+ * to let the PCI core do resource assignment. It's supposed
+ * that the PCI core will do correct I/O and MMIO alignment
+ * for the P2P bridge bars so that each PCI bus (excluding
+ * the child P2P bridges) can form individual PE.
*/
- ppc_md.pcibios_fixup_phb = pnv_pci_ioda_fixup_phb;
+ ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook;
- pci_add_flags(PCI_PROBE_ONLY | PCI_REASSIGN_ALL_RSRC);
+ ppc_md.pcibios_window_alignment = pnv_pci_window_alignment;
+ pci_add_flags(PCI_REASSIGN_ALL_RSRC);
/* Reset IODA tables to a clean state */
rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET);
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 8bc479634643..7cfb7c883deb 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -17,9 +17,14 @@ enum pnv_phb_model {
};
#define PNV_PCI_DIAG_BUF_SIZE 4096
+#define PNV_IODA_PE_DEV (1 << 0) /* PE has single PCI device */
+#define PNV_IODA_PE_BUS (1 << 1) /* PE has primary PCI bus */
+#define PNV_IODA_PE_BUS_ALL (1 << 2) /* PE has subordinate buses */
/* Data associated with a PE, including IOMMU tracking etc.. */
struct pnv_ioda_pe {
+ unsigned long flags;
+
/* A PE can be associated with a single device or an
* entire bus (& children). In the former case, pdev
* is populated, in the later case, pbus is.
@@ -40,11 +45,6 @@ struct pnv_ioda_pe {
*/
unsigned int dma_weight;
- /* This is a PCI-E -> PCI-X bridge, this points to the
- * corresponding bus PE
- */
- struct pnv_ioda_pe *bus_pe;
-
/* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */
int tce32_seg;
int tce32_segcount;
@@ -59,7 +59,8 @@ struct pnv_ioda_pe {
int mve_number;
/* Link in list of PE#s */
- struct list_head link;
+ struct list_head dma_link;
+ struct list_head list;
};
struct pnv_phb {
@@ -68,6 +69,7 @@ struct pnv_phb {
enum pnv_phb_model model;
u64 opal_id;
void __iomem *regs;
+ int initialized;
spinlock_t lock;
#ifdef CONFIG_PCI_MSI
@@ -107,6 +109,11 @@ struct pnv_phb {
unsigned int *io_segmap;
struct pnv_ioda_pe *pe_array;
+ /* Sorted list of used PE's based
+ * on the sequence of creation
+ */
+ struct list_head pe_list;
+
/* Reverse map of PEs, will have to extend if
* we are to support more than 256 PEs, indexed
* bus { bus, devfn }
@@ -125,7 +132,7 @@ struct pnv_phb {
/* Sorted list of used PE's, sorted at
* boot for resource allocation purposes
*/
- struct list_head pe_list;
+ struct list_head pe_dma_list;
} ioda;
};
diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c
index 3124cf791ebb..d00d7b0a3bda 100644
--- a/arch/powerpc/platforms/ps3/htab.c
+++ b/arch/powerpc/platforms/ps3/htab.c
@@ -43,7 +43,7 @@ enum ps3_lpar_vas_id {
static DEFINE_SPINLOCK(ps3_htab_lock);
-static long ps3_hpte_insert(unsigned long hpte_group, unsigned long va,
+static long ps3_hpte_insert(unsigned long hpte_group, unsigned long vpn,
unsigned long pa, unsigned long rflags, unsigned long vflags,
int psize, int ssize)
{
@@ -61,7 +61,7 @@ static long ps3_hpte_insert(unsigned long hpte_group, unsigned long va,
*/
vflags &= ~HPTE_V_SECONDARY;
- hpte_v = hpte_encode_v(va, psize, ssize) | vflags | HPTE_V_VALID;
+ hpte_v = hpte_encode_v(vpn, psize, ssize) | vflags | HPTE_V_VALID;
hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize) | rflags;
spin_lock_irqsave(&ps3_htab_lock, flags);
@@ -75,8 +75,8 @@ static long ps3_hpte_insert(unsigned long hpte_group, unsigned long va,
if (result) {
/* all entries bolted !*/
- pr_info("%s:result=%d va=%lx pa=%lx ix=%lx v=%llx r=%llx\n",
- __func__, result, va, pa, hpte_group, hpte_v, hpte_r);
+ pr_info("%s:result=%d vpn=%lx pa=%lx ix=%lx v=%llx r=%llx\n",
+ __func__, result, vpn, pa, hpte_group, hpte_v, hpte_r);
BUG();
}
@@ -107,7 +107,7 @@ static long ps3_hpte_remove(unsigned long hpte_group)
}
static long ps3_hpte_updatepp(unsigned long slot, unsigned long newpp,
- unsigned long va, int psize, int ssize, int local)
+ unsigned long vpn, int psize, int ssize, int local)
{
int result;
u64 hpte_v, want_v, hpte_rs;
@@ -115,7 +115,7 @@ static long ps3_hpte_updatepp(unsigned long slot, unsigned long newpp,
unsigned long flags;
long ret;
- want_v = hpte_encode_v(va, psize, ssize);
+ want_v = hpte_encode_v(vpn, psize, ssize);
spin_lock_irqsave(&ps3_htab_lock, flags);
@@ -125,8 +125,8 @@ static long ps3_hpte_updatepp(unsigned long slot, unsigned long newpp,
&hpte_rs);
if (result) {
- pr_info("%s: res=%d read va=%lx slot=%lx psize=%d\n",
- __func__, result, va, slot, psize);
+ pr_info("%s: res=%d read vpn=%lx slot=%lx psize=%d\n",
+ __func__, result, vpn, slot, psize);
BUG();
}
@@ -159,7 +159,7 @@ static void ps3_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
panic("ps3_hpte_updateboltedpp() not implemented");
}
-static void ps3_hpte_invalidate(unsigned long slot, unsigned long va,
+static void ps3_hpte_invalidate(unsigned long slot, unsigned long vpn,
int psize, int ssize, int local)
{
unsigned long flags;
@@ -170,8 +170,8 @@ static void ps3_hpte_invalidate(unsigned long slot, unsigned long va,
result = lv1_write_htab_entry(PS3_LPAR_VAS_ID_CURRENT, slot, 0, 0);
if (result) {
- pr_info("%s: res=%d va=%lx slot=%lx psize=%d\n",
- __func__, result, va, slot, psize);
+ pr_info("%s: res=%d vpn=%lx slot=%lx psize=%d\n",
+ __func__, result, vpn, slot, psize);
BUG();
}
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 18c168b752da..9a04322b1736 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -728,7 +728,7 @@ static void eeh_add_device_early(struct device_node *dn)
{
struct pci_controller *phb;
- if (!dn || !of_node_to_eeh_dev(dn))
+ if (!of_node_to_eeh_dev(dn))
return;
phb = of_node_to_eeh_dev(dn)->phb;
@@ -817,6 +817,7 @@ EXPORT_SYMBOL_GPL(eeh_add_device_tree_late);
/**
* eeh_remove_device - Undo EEH setup for the indicated pci device
* @dev: pci device to be removed
+ * @purge_pe: remove the PE or not
*
* This routine should be called when a device is removed from
* a running system (e.g. by hotplug or dlpar). It unregisters
@@ -824,7 +825,7 @@ EXPORT_SYMBOL_GPL(eeh_add_device_tree_late);
* this device will no longer be detected after this call; thus,
* i/o errors affecting this slot may leave this device unusable.
*/
-static void eeh_remove_device(struct pci_dev *dev)
+static void eeh_remove_device(struct pci_dev *dev, int purge_pe)
{
struct eeh_dev *edev;
@@ -843,7 +844,7 @@ static void eeh_remove_device(struct pci_dev *dev)
dev->dev.archdata.edev = NULL;
pci_dev_put(dev);
- eeh_rmv_from_parent_pe(edev);
+ eeh_rmv_from_parent_pe(edev, purge_pe);
eeh_addr_cache_rmv_dev(dev);
eeh_sysfs_remove_device(dev);
}
@@ -851,21 +852,22 @@ static void eeh_remove_device(struct pci_dev *dev)
/**
* eeh_remove_bus_device - Undo EEH setup for the indicated PCI device
* @dev: PCI device
+ * @purge_pe: remove the corresponding PE or not
*
* This routine must be called when a device is removed from the
* running system through hotplug or dlpar. The corresponding
* PCI address cache will be removed.
*/
-void eeh_remove_bus_device(struct pci_dev *dev)
+void eeh_remove_bus_device(struct pci_dev *dev, int purge_pe)
{
struct pci_bus *bus = dev->subordinate;
struct pci_dev *child, *tmp;
- eeh_remove_device(dev);
+ eeh_remove_device(dev, purge_pe);
if (bus && dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
list_for_each_entry_safe(child, tmp, &bus->devices, bus_list)
- eeh_remove_bus_device(child);
+ eeh_remove_bus_device(child, purge_pe);
}
}
EXPORT_SYMBOL_GPL(eeh_remove_bus_device);
diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c
index 8370ce7d5931..a3fefb61097c 100644
--- a/arch/powerpc/platforms/pseries/eeh_driver.c
+++ b/arch/powerpc/platforms/pseries/eeh_driver.c
@@ -25,6 +25,7 @@
#include <linux/delay.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
+#include <linux/module.h>
#include <linux/pci.h>
#include <asm/eeh.h>
#include <asm/eeh_event.h>
@@ -47,6 +48,41 @@ static inline const char *eeh_pcid_name(struct pci_dev *pdev)
return "";
}
+/**
+ * eeh_pcid_get - Get the PCI device driver
+ * @pdev: PCI device
+ *
+ * The function is used to retrieve the PCI device driver for
+ * the indicated PCI device. Besides, we will increase the reference
+ * of the PCI device driver to prevent that being unloaded on
+ * the fly. Otherwise, kernel crash would be seen.
+ */
+static inline struct pci_driver *eeh_pcid_get(struct pci_dev *pdev)
+{
+ if (!pdev || !pdev->driver)
+ return NULL;
+
+ if (!try_module_get(pdev->driver->driver.owner))
+ return NULL;
+
+ return pdev->driver;
+}
+
+/**
+ * eeh_pcid_put - Dereference on the PCI device driver
+ * @pdev: PCI device
+ *
+ * The function is called to do dereference on the PCI device
+ * driver of the indicated PCI device.
+ */
+static inline void eeh_pcid_put(struct pci_dev *pdev)
+{
+ if (!pdev || !pdev->driver)
+ return;
+
+ module_put(pdev->driver->driver.owner);
+}
+
#if 0
static void print_device_node_tree(struct pci_dn *pdn, int dent)
{
@@ -128,23 +164,24 @@ static void *eeh_report_error(void *data, void *userdata)
struct eeh_dev *edev = (struct eeh_dev *)data;
struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
enum pci_ers_result rc, *res = userdata;
- struct pci_driver *driver = dev->driver;
+ struct pci_driver *driver;
/* We might not have the associated PCI device,
* then we should continue for next one.
*/
if (!dev) return NULL;
-
dev->error_state = pci_channel_io_frozen;
- if (!driver)
- return NULL;
+ driver = eeh_pcid_get(dev);
+ if (!driver) return NULL;
eeh_disable_irq(dev);
if (!driver->err_handler ||
- !driver->err_handler->error_detected)
+ !driver->err_handler->error_detected) {
+ eeh_pcid_put(dev);
return NULL;
+ }
rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen);
@@ -152,6 +189,7 @@ static void *eeh_report_error(void *data, void *userdata)
if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
if (*res == PCI_ERS_RESULT_NONE) *res = rc;
+ eeh_pcid_put(dev);
return NULL;
}
@@ -171,12 +209,14 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata)
enum pci_ers_result rc, *res = userdata;
struct pci_driver *driver;
- if (!dev) return NULL;
+ driver = eeh_pcid_get(dev);
+ if (!driver) return NULL;
- if (!(driver = dev->driver) ||
- !driver->err_handler ||
- !driver->err_handler->mmio_enabled)
+ if (!driver->err_handler ||
+ !driver->err_handler->mmio_enabled) {
+ eeh_pcid_put(dev);
return NULL;
+ }
rc = driver->err_handler->mmio_enabled(dev);
@@ -184,6 +224,7 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata)
if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
if (*res == PCI_ERS_RESULT_NONE) *res = rc;
+ eeh_pcid_put(dev);
return NULL;
}
@@ -204,16 +245,19 @@ static void *eeh_report_reset(void *data, void *userdata)
enum pci_ers_result rc, *res = userdata;
struct pci_driver *driver;
- if (!dev || !(driver = dev->driver))
- return NULL;
-
+ if (!dev) return NULL;
dev->error_state = pci_channel_io_normal;
+ driver = eeh_pcid_get(dev);
+ if (!driver) return NULL;
+
eeh_enable_irq(dev);
if (!driver->err_handler ||
- !driver->err_handler->slot_reset)
+ !driver->err_handler->slot_reset) {
+ eeh_pcid_put(dev);
return NULL;
+ }
rc = driver->err_handler->slot_reset(dev);
if ((*res == PCI_ERS_RESULT_NONE) ||
@@ -221,6 +265,7 @@ static void *eeh_report_reset(void *data, void *userdata)
if (*res == PCI_ERS_RESULT_DISCONNECT &&
rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
+ eeh_pcid_put(dev);
return NULL;
}
@@ -240,20 +285,22 @@ static void *eeh_report_resume(void *data, void *userdata)
struct pci_driver *driver;
if (!dev) return NULL;
-
dev->error_state = pci_channel_io_normal;
- if (!(driver = dev->driver))
- return NULL;
+ driver = eeh_pcid_get(dev);
+ if (!driver) return NULL;
eeh_enable_irq(dev);
if (!driver->err_handler ||
- !driver->err_handler->resume)
+ !driver->err_handler->resume) {
+ eeh_pcid_put(dev);
return NULL;
+ }
driver->err_handler->resume(dev);
+ eeh_pcid_put(dev);
return NULL;
}
@@ -272,20 +319,22 @@ static void *eeh_report_failure(void *data, void *userdata)
struct pci_driver *driver;
if (!dev) return NULL;
-
dev->error_state = pci_channel_io_perm_failure;
- if (!(driver = dev->driver))
- return NULL;
+ driver = eeh_pcid_get(dev);
+ if (!driver) return NULL;
eeh_disable_irq(dev);
if (!driver->err_handler ||
- !driver->err_handler->error_detected)
+ !driver->err_handler->error_detected) {
+ eeh_pcid_put(dev);
return NULL;
+ }
driver->err_handler->error_detected(dev, pci_channel_io_perm_failure);
+ eeh_pcid_put(dev);
return NULL;
}
@@ -305,8 +354,14 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
/* pcibios will clear the counter; save the value */
cnt = pe->freeze_count;
+ /*
+ * We don't remove the corresponding PE instances because
+ * we need the information afterwords. The attached EEH
+ * devices are expected to be attached soon when calling
+ * into pcibios_add_pci_devices().
+ */
if (bus)
- pcibios_remove_pci_devices(bus);
+ __pcibios_remove_pci_devices(bus, 0);
/* Reset the pci controller. (Asserts RST#; resets config space).
* Reconfigure bridges and devices. Don't try to bring the system
diff --git a/arch/powerpc/platforms/pseries/eeh_pe.c b/arch/powerpc/platforms/pseries/eeh_pe.c
index 904123c7657b..9d35543736ed 100644
--- a/arch/powerpc/platforms/pseries/eeh_pe.c
+++ b/arch/powerpc/platforms/pseries/eeh_pe.c
@@ -99,23 +99,19 @@ static struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb)
{
struct eeh_pe *pe;
- eeh_lock();
-
list_for_each_entry(pe, &eeh_phb_pe, child) {
/*
* Actually, we needn't check the type since
* the PE for PHB has been determined when that
* was created.
*/
- if (pe->type == EEH_PE_PHB &&
+ if ((pe->type & EEH_PE_PHB) &&
pe->phb == phb) {
eeh_unlock();
return pe;
}
}
- eeh_unlock();
-
return NULL;
}
@@ -192,14 +188,21 @@ void *eeh_pe_dev_traverse(struct eeh_pe *root,
return NULL;
}
+ eeh_lock();
+
/* Traverse root PE */
for (pe = root; pe; pe = eeh_pe_next(pe, root)) {
eeh_pe_for_each_dev(pe, edev) {
ret = fn(edev, flag);
- if (ret) return ret;
+ if (ret) {
+ eeh_unlock();
+ return ret;
+ }
}
}
+ eeh_unlock();
+
return NULL;
}
@@ -219,7 +222,7 @@ static void *__eeh_pe_get(void *data, void *flag)
struct eeh_dev *edev = (struct eeh_dev *)flag;
/* Unexpected PHB PE */
- if (pe->type == EEH_PE_PHB)
+ if (pe->type & EEH_PE_PHB)
return NULL;
/* We prefer PE address */
@@ -251,9 +254,7 @@ static struct eeh_pe *eeh_pe_get(struct eeh_dev *edev)
struct eeh_pe *root = eeh_phb_pe_get(edev->phb);
struct eeh_pe *pe;
- eeh_lock();
pe = eeh_pe_traverse(root, __eeh_pe_get, edev);
- eeh_unlock();
return pe;
}
@@ -307,6 +308,8 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
{
struct eeh_pe *pe, *parent;
+ eeh_lock();
+
/*
* Search the PE has been existing or not according
* to the PE address. If that has been existing, the
@@ -314,8 +317,9 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
* components.
*/
pe = eeh_pe_get(edev);
- if (pe) {
+ if (pe && !(pe->type & EEH_PE_INVALID)) {
if (!edev->pe_config_addr) {
+ eeh_unlock();
pr_err("%s: PE with addr 0x%x already exists\n",
__func__, edev->config_addr);
return -EEXIST;
@@ -327,15 +331,36 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
/* Put the edev to PE */
list_add_tail(&edev->list, &pe->edevs);
+ eeh_unlock();
pr_debug("EEH: Add %s to Bus PE#%x\n",
edev->dn->full_name, pe->addr);
return 0;
+ } else if (pe && (pe->type & EEH_PE_INVALID)) {
+ list_add_tail(&edev->list, &pe->edevs);
+ edev->pe = pe;
+ /*
+ * We're running to here because of PCI hotplug caused by
+ * EEH recovery. We need clear EEH_PE_INVALID until the top.
+ */
+ parent = pe;
+ while (parent) {
+ if (!(parent->type & EEH_PE_INVALID))
+ break;
+ parent->type &= ~EEH_PE_INVALID;
+ parent = parent->parent;
+ }
+ eeh_unlock();
+ pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n",
+ edev->dn->full_name, pe->addr, pe->parent->addr);
+
+ return 0;
}
/* Create a new EEH PE */
pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE);
if (!pe) {
+ eeh_unlock();
pr_err("%s: out of memory!\n", __func__);
return -ENOMEM;
}
@@ -352,6 +377,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
if (!parent) {
parent = eeh_phb_pe_get(edev->phb);
if (!parent) {
+ eeh_unlock();
pr_err("%s: No PHB PE is found (PHB Domain=%d)\n",
__func__, edev->phb->global_number);
edev->pe = NULL;
@@ -368,6 +394,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
list_add_tail(&pe->child, &parent->child_list);
list_add_tail(&edev->list, &pe->edevs);
edev->pe = pe;
+ eeh_unlock();
pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n",
edev->dn->full_name, pe->addr, pe->parent->addr);
@@ -377,15 +404,17 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
/**
* eeh_rmv_from_parent_pe - Remove one EEH device from the associated PE
* @edev: EEH device
+ * @purge_pe: remove PE or not
*
* The PE hierarchy tree might be changed when doing PCI hotplug.
* Also, the PCI devices or buses could be removed from the system
* during EEH recovery. So we have to call the function remove the
* corresponding PE accordingly if necessary.
*/
-int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
+int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe)
{
- struct eeh_pe *pe, *parent;
+ struct eeh_pe *pe, *parent, *child;
+ int cnt;
if (!edev->pe) {
pr_warning("%s: No PE found for EEH device %s\n",
@@ -393,6 +422,8 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
return -EEXIST;
}
+ eeh_lock();
+
/* Remove the EEH device */
pe = edev->pe;
edev->pe = NULL;
@@ -406,18 +437,39 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
*/
while (1) {
parent = pe->parent;
- if (pe->type == EEH_PE_PHB)
+ if (pe->type & EEH_PE_PHB)
break;
- if (list_empty(&pe->edevs) &&
- list_empty(&pe->child_list)) {
- list_del(&pe->child);
- kfree(pe);
+ if (purge_pe) {
+ if (list_empty(&pe->edevs) &&
+ list_empty(&pe->child_list)) {
+ list_del(&pe->child);
+ kfree(pe);
+ } else {
+ break;
+ }
+ } else {
+ if (list_empty(&pe->edevs)) {
+ cnt = 0;
+ list_for_each_entry(child, &pe->child_list, child) {
+ if (!(pe->type & EEH_PE_INVALID)) {
+ cnt++;
+ break;
+ }
+ }
+
+ if (!cnt)
+ pe->type |= EEH_PE_INVALID;
+ else
+ break;
+ }
}
pe = parent;
}
+ eeh_unlock();
+
return 0;
}
@@ -463,7 +515,9 @@ static void *__eeh_pe_state_mark(void *data, void *flag)
*/
void eeh_pe_state_mark(struct eeh_pe *pe, int state)
{
+ eeh_lock();
eeh_pe_traverse(pe, __eeh_pe_state_mark, &state);
+ eeh_unlock();
}
/**
@@ -497,7 +551,9 @@ static void *__eeh_pe_state_clear(void *data, void *flag)
*/
void eeh_pe_state_clear(struct eeh_pe *pe, int state)
{
+ eeh_lock();
eeh_pe_traverse(pe, __eeh_pe_state_clear, &state);
+ eeh_unlock();
}
/**
@@ -559,6 +615,10 @@ static void *eeh_restore_one_device_bars(void *data, void *flag)
*/
void eeh_pe_restore_bars(struct eeh_pe *pe)
{
+ /*
+ * We needn't take the EEH lock since eeh_pe_dev_traverse()
+ * will take that.
+ */
eeh_pe_dev_traverse(pe, eeh_restore_one_device_bars, NULL);
}
@@ -578,14 +638,18 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
struct eeh_dev *edev;
struct pci_dev *pdev;
- if (pe->type == EEH_PE_PHB) {
+ eeh_lock();
+
+ if (pe->type & EEH_PE_PHB) {
bus = pe->phb->bus;
- } else if (pe->type == EEH_PE_BUS) {
+ } else if (pe->type & EEH_PE_BUS) {
edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
pdev = eeh_dev_to_pci_dev(edev);
if (pdev)
bus = pdev->bus;
}
+ eeh_unlock();
+
return bus;
}
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 177055d0186b..0da39fed355a 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -107,9 +107,9 @@ void vpa_init(int cpu)
}
static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
- unsigned long va, unsigned long pa,
- unsigned long rflags, unsigned long vflags,
- int psize, int ssize)
+ unsigned long vpn, unsigned long pa,
+ unsigned long rflags, unsigned long vflags,
+ int psize, int ssize)
{
unsigned long lpar_rc;
unsigned long flags;
@@ -117,11 +117,11 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
unsigned long hpte_v, hpte_r;
if (!(vflags & HPTE_V_BOLTED))
- pr_devel("hpte_insert(group=%lx, va=%016lx, pa=%016lx, "
- "rflags=%lx, vflags=%lx, psize=%d)\n",
- hpte_group, va, pa, rflags, vflags, psize);
+ pr_devel("hpte_insert(group=%lx, vpn=%016lx, "
+ "pa=%016lx, rflags=%lx, vflags=%lx, psize=%d)\n",
+ hpte_group, vpn, pa, rflags, vflags, psize);
- hpte_v = hpte_encode_v(va, psize, ssize) | vflags | HPTE_V_VALID;
+ hpte_v = hpte_encode_v(vpn, psize, ssize) | vflags | HPTE_V_VALID;
hpte_r = hpte_encode_r(pa, psize) | rflags;
if (!(vflags & HPTE_V_BOLTED))
@@ -226,22 +226,6 @@ static void pSeries_lpar_hptab_clear(void)
}
/*
- * This computes the AVPN and B fields of the first dword of a HPTE,
- * for use when we want to match an existing PTE. The bottom 7 bits
- * of the returned value are zero.
- */
-static inline unsigned long hpte_encode_avpn(unsigned long va, int psize,
- int ssize)
-{
- unsigned long v;
-
- v = (va >> 23) & ~(mmu_psize_defs[psize].avpnm);
- v <<= HPTE_V_AVPN_SHIFT;
- v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT;
- return v;
-}
-
-/*
* NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and
* the low 3 bits of flags happen to line up. So no transform is needed.
* We can probably optimize here and assume the high bits of newpp are
@@ -249,14 +233,14 @@ static inline unsigned long hpte_encode_avpn(unsigned long va, int psize,
*/
static long pSeries_lpar_hpte_updatepp(unsigned long slot,
unsigned long newpp,
- unsigned long va,
+ unsigned long vpn,
int psize, int ssize, int local)
{
unsigned long lpar_rc;
unsigned long flags = (newpp & 7) | H_AVPN;
unsigned long want_v;
- want_v = hpte_encode_avpn(va, psize, ssize);
+ want_v = hpte_encode_avpn(vpn, psize, ssize);
pr_devel(" update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...",
want_v, slot, flags, psize);
@@ -294,15 +278,15 @@ static unsigned long pSeries_lpar_hpte_getword0(unsigned long slot)
return dword0;
}
-static long pSeries_lpar_hpte_find(unsigned long va, int psize, int ssize)
+static long pSeries_lpar_hpte_find(unsigned long vpn, int psize, int ssize)
{
unsigned long hash;
unsigned long i;
long slot;
unsigned long want_v, hpte_v;
- hash = hpt_hash(va, mmu_psize_defs[psize].shift, ssize);
- want_v = hpte_encode_avpn(va, psize, ssize);
+ hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
+ want_v = hpte_encode_avpn(vpn, psize, ssize);
/* Bolted entries are always in the primary group */
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
@@ -322,12 +306,13 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
unsigned long ea,
int psize, int ssize)
{
- unsigned long lpar_rc, slot, vsid, va, flags;
+ unsigned long vpn;
+ unsigned long lpar_rc, slot, vsid, flags;
vsid = get_kernel_vsid(ea, ssize);
- va = hpt_va(ea, vsid, ssize);
+ vpn = hpt_vpn(ea, vsid, ssize);
- slot = pSeries_lpar_hpte_find(va, psize, ssize);
+ slot = pSeries_lpar_hpte_find(vpn, psize, ssize);
BUG_ON(slot == -1);
flags = newpp & 7;
@@ -336,17 +321,17 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
BUG_ON(lpar_rc != H_SUCCESS);
}
-static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va,
+static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
int psize, int ssize, int local)
{
unsigned long want_v;
unsigned long lpar_rc;
unsigned long dummy1, dummy2;
- pr_devel(" inval : slot=%lx, va=%016lx, psize: %d, local: %d\n",
- slot, va, psize, local);
+ pr_devel(" inval : slot=%lx, vpn=%016lx, psize: %d, local: %d\n",
+ slot, vpn, psize, local);
- want_v = hpte_encode_avpn(va, psize, ssize);
+ want_v = hpte_encode_avpn(vpn, psize, ssize);
lpar_rc = plpar_pte_remove(H_AVPN, slot, want_v, &dummy1, &dummy2);
if (lpar_rc == H_NOT_FOUND)
return;
@@ -357,15 +342,16 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va,
static void pSeries_lpar_hpte_removebolted(unsigned long ea,
int psize, int ssize)
{
- unsigned long slot, vsid, va;
+ unsigned long vpn;
+ unsigned long slot, vsid;
vsid = get_kernel_vsid(ea, ssize);
- va = hpt_va(ea, vsid, ssize);
+ vpn = hpt_vpn(ea, vsid, ssize);
- slot = pSeries_lpar_hpte_find(va, psize, ssize);
+ slot = pSeries_lpar_hpte_find(vpn, psize, ssize);
BUG_ON(slot == -1);
- pSeries_lpar_hpte_invalidate(slot, va, psize, ssize, 0);
+ pSeries_lpar_hpte_invalidate(slot, vpn, psize, ssize, 0);
}
/* Flag bits for H_BULK_REMOVE */
@@ -381,12 +367,12 @@ static void pSeries_lpar_hpte_removebolted(unsigned long ea,
*/
static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
{
+ unsigned long vpn;
unsigned long i, pix, rc;
unsigned long flags = 0;
struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
unsigned long param[9];
- unsigned long va;
unsigned long hash, index, shift, hidx, slot;
real_pte_t pte;
int psize, ssize;
@@ -398,21 +384,21 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
ssize = batch->ssize;
pix = 0;
for (i = 0; i < number; i++) {
- va = batch->vaddr[i];
+ vpn = batch->vpn[i];
pte = batch->pte[i];
- pte_iterate_hashed_subpages(pte, psize, va, index, shift) {
- hash = hpt_hash(va, shift, ssize);
+ pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
+ hash = hpt_hash(vpn, shift, ssize);
hidx = __rpte_to_hidx(pte, index);
if (hidx & _PTEIDX_SECONDARY)
hash = ~hash;
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
slot += hidx & _PTEIDX_GROUP_IX;
if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) {
- pSeries_lpar_hpte_invalidate(slot, va, psize,
+ pSeries_lpar_hpte_invalidate(slot, vpn, psize,
ssize, local);
} else {
param[pix] = HBR_REQUEST | HBR_AVPN | slot;
- param[pix+1] = hpte_encode_avpn(va, psize,
+ param[pix+1] = hpte_encode_avpn(vpn, psize,
ssize);
pix += 2;
if (pix == 8) {
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
index 3ccebc83dc02..261a577a3dd2 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -65,27 +65,43 @@ pcibios_find_pci_bus(struct device_node *dn)
EXPORT_SYMBOL_GPL(pcibios_find_pci_bus);
/**
- * pcibios_remove_pci_devices - remove all devices under this bus
+ * __pcibios_remove_pci_devices - remove all devices under this bus
+ * @bus: the indicated PCI bus
+ * @purge_pe: destroy the PE on removal of PCI devices
*
* Remove all of the PCI devices under this bus both from the
* linux pci device tree, and from the powerpc EEH address cache.
+ * By default, the corresponding PE will be destroied during the
+ * normal PCI hotplug path. For PCI hotplug during EEH recovery,
+ * the corresponding PE won't be destroied and deallocated.
*/
-void pcibios_remove_pci_devices(struct pci_bus *bus)
+void __pcibios_remove_pci_devices(struct pci_bus *bus, int purge_pe)
{
- struct pci_dev *dev, *tmp;
+ struct pci_dev *dev, *tmp;
struct pci_bus *child_bus;
/* First go down child busses */
list_for_each_entry(child_bus, &bus->children, node)
- pcibios_remove_pci_devices(child_bus);
+ __pcibios_remove_pci_devices(child_bus, purge_pe);
pr_debug("PCI: Removing devices on bus %04x:%02x\n",
- pci_domain_nr(bus), bus->number);
+ pci_domain_nr(bus), bus->number);
list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) {
pr_debug(" * Removing %s...\n", pci_name(dev));
- eeh_remove_bus_device(dev);
- pci_stop_and_remove_bus_device(dev);
- }
+ eeh_remove_bus_device(dev, purge_pe);
+ pci_stop_and_remove_bus_device(dev);
+ }
+}
+
+/**
+ * pcibios_remove_pci_devices - remove all devices under this bus
+ *
+ * Remove all of the PCI devices under this bus both from the
+ * linux pci device tree, and from the powerpc EEH address cache.
+ */
+void pcibios_remove_pci_devices(struct pci_bus *bus)
+{
+ __pcibios_remove_pci_devices(bus, 1);
}
EXPORT_SYMBOL_GPL(pcibios_remove_pci_devices);
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 987f441525cb..3a56a639a92e 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -60,6 +60,8 @@ static cpumask_t cpus_in_xmon = CPU_MASK_NONE;
static unsigned long xmon_taken = 1;
static int xmon_owner;
static int xmon_gate;
+#else
+#define xmon_owner 0
#endif /* CONFIG_SMP */
static unsigned long in_xmon __read_mostly = 0;
@@ -202,7 +204,13 @@ Commands:\n\
di dump instructions\n\
df dump float values\n\
dd dump double values\n\
- dl dump the kernel log buffer\n\
+ dl dump the kernel log buffer\n"
+#ifdef CONFIG_PPC64
+ "\
+ dp[#] dump paca for current cpu, or cpu #\n\
+ dpa dump paca for all possible cpus\n"
+#endif
+ "\
dr dump stream of raw bytes\n\
e print exception information\n\
f flush cache\n\
@@ -2009,6 +2017,95 @@ static void xmon_rawdump (unsigned long adrs, long ndump)
printf("\n");
}
+#ifdef CONFIG_PPC64
+static void dump_one_paca(int cpu)
+{
+ struct paca_struct *p;
+
+ if (setjmp(bus_error_jmp) != 0) {
+ printf("*** Error dumping paca for cpu 0x%x!\n", cpu);
+ return;
+ }
+
+ catch_memory_errors = 1;
+ sync();
+
+ p = &paca[cpu];
+
+ printf("paca for cpu 0x%x @ %p:\n", cpu, p);
+
+ printf(" %-*s = %s\n", 16, "possible", cpu_possible(cpu) ? "yes" : "no");
+ printf(" %-*s = %s\n", 16, "present", cpu_present(cpu) ? "yes" : "no");
+ printf(" %-*s = %s\n", 16, "online", cpu_online(cpu) ? "yes" : "no");
+
+#define DUMP(paca, name, format) \
+ printf(" %-*s = %#-*"format"\t(0x%lx)\n", 16, #name, 18, paca->name, \
+ offsetof(struct paca_struct, name));
+
+ DUMP(p, lock_token, "x");
+ DUMP(p, paca_index, "x");
+ DUMP(p, kernel_toc, "lx");
+ DUMP(p, kernelbase, "lx");
+ DUMP(p, kernel_msr, "lx");
+#ifdef CONFIG_PPC_STD_MMU_64
+ DUMP(p, stab_real, "lx");
+ DUMP(p, stab_addr, "lx");
+#endif
+ DUMP(p, emergency_sp, "p");
+ DUMP(p, data_offset, "lx");
+ DUMP(p, hw_cpu_id, "x");
+ DUMP(p, cpu_start, "x");
+ DUMP(p, kexec_state, "x");
+ DUMP(p, __current, "p");
+ DUMP(p, kstack, "lx");
+ DUMP(p, stab_rr, "lx");
+ DUMP(p, saved_r1, "lx");
+ DUMP(p, trap_save, "x");
+ DUMP(p, soft_enabled, "x");
+ DUMP(p, irq_happened, "x");
+ DUMP(p, io_sync, "x");
+ DUMP(p, irq_work_pending, "x");
+ DUMP(p, nap_state_lost, "x");
+
+#undef DUMP
+
+ catch_memory_errors = 0;
+ sync();
+}
+
+static void dump_all_pacas(void)
+{
+ int cpu;
+
+ if (num_possible_cpus() == 0) {
+ printf("No possible cpus, use 'dp #' to dump individual cpus\n");
+ return;
+ }
+
+ for_each_possible_cpu(cpu)
+ dump_one_paca(cpu);
+}
+
+static void dump_pacas(void)
+{
+ unsigned long num;
+ int c;
+
+ c = inchar();
+ if (c == 'a') {
+ dump_all_pacas();
+ return;
+ }
+
+ termch = c; /* Put c back, it wasn't 'a' */
+
+ if (scanhex(&num))
+ dump_one_paca(num);
+ else
+ dump_one_paca(xmon_owner);
+}
+#endif
+
#define isxdigit(c) (('0' <= (c) && (c) <= '9') \
|| ('a' <= (c) && (c) <= 'f') \
|| ('A' <= (c) && (c) <= 'F'))
@@ -2018,6 +2115,14 @@ dump(void)
int c;
c = inchar();
+
+#ifdef CONFIG_PPC64
+ if (c == 'p') {
+ dump_pacas();
+ return;
+ }
+#endif
+
if ((isxdigit(c) && c != 'f' && c != 'd') || c == '\n')
termch = c;
scanhex((void *)&adrs);