From a74f350b5ce6d8dd1847aa1191ea177fff025567 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sat, 2 Mar 2013 04:06:30 +0000 Subject: powerpc: Remove unused BITOP_LE_SWIZZLE macro The BITOP_LE_SWIZZLE macro was used in the little-endian bitops functions for powerpc. But these functions were converted to generic bitops and the BITOP_LE_SWIZZLE is not used anymore. Signed-off-by: Akinobu Mita Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/bitops.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index ef918a2328bb..08bd299c75b1 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -52,8 +52,6 @@ #define smp_mb__before_clear_bit() smp_mb() #define smp_mb__after_clear_bit() smp_mb() -#define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7) - /* Macro for generating the ***_bits() functions */ #define DEFINE_BITOP(fn, op, prefix, postfix) \ static __inline__ void fn(unsigned long mask, \ -- cgit v1.2.3 From 8170a83f15eeca9a84ff895f1a89b58918425a3f Mon Sep 17 00:00:00 2001 From: Tony Breeds Date: Mon, 4 Mar 2013 15:57:30 +0000 Subject: powerpc: Wireup the kcmp syscall to sys_ni Since kmp takes 2 unsigned long args there should be a compat wrapper. Since one isn't provided I think it's safer just to hook this up to not implemented. If we need it later we can do it properly then. Signed-off-by: Tony Breeds Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/systbl.h | 1 + arch/powerpc/include/asm/unistd.h | 2 +- arch/powerpc/include/uapi/asm/unistd.h | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 535b6d8a41cc..ebbec52d21bd 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -358,3 +358,4 @@ SYSCALL_SPU(setns) COMPAT_SYS(process_vm_readv) COMPAT_SYS(process_vm_writev) SYSCALL(finit_module) +SYSCALL(ni_syscall) /* sys_kcmp */ diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index f25b5c45c435..1487f0f12293 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -12,7 +12,7 @@ #include -#define __NR_syscalls 354 +#define __NR_syscalls 355 #define __NR__exit __NR_exit #define NR_syscalls __NR_syscalls diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h index 8c478c6c6b1e..74cb4d72d673 100644 --- a/arch/powerpc/include/uapi/asm/unistd.h +++ b/arch/powerpc/include/uapi/asm/unistd.h @@ -376,6 +376,7 @@ #define __NR_process_vm_readv 351 #define __NR_process_vm_writev 352 #define __NR_finit_module 353 +#define __NR_kcmp 354 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */ -- cgit v1.2.3 From fa759e9b0984748cf626aac59bca60bdab42c644 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Mon, 4 Mar 2013 19:45:51 +0000 Subject: powerpc: Add DSCR FSCR register bit definition This sets the DSCR (Data Stream Control Register) in the FSCR (Facility Status & Control Register). Also harmonise TAR (Target Address Register) FSCR bit definition too. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/reg.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index e66586122030..c9c67fc888c9 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -266,7 +266,8 @@ #define SPRN_HSRR0 0x13A /* Hypervisor Save/Restore 0 */ #define SPRN_HSRR1 0x13B /* Hypervisor Save/Restore 1 */ #define SPRN_FSCR 0x099 /* Facility Status & Control Register */ -#define FSCR_TAR (1<<8) /* Enable Target Adress Register */ +#define FSCR_TAR (1 << (63-55)) /* Enable Target Address Register */ +#define FSCR_DSCR (1 << (63-61)) /* Enable Data Stream Control Register */ #define SPRN_TAR 0x32f /* Target Address Register */ #define SPRN_LPCR 0x13E /* LPAR Control Register */ #define LPCR_VPM0 (1ul << (63-0)) -- cgit v1.2.3 From e39d1a471484662620651cd9520250d33843f235 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 13 Mar 2013 03:34:53 +0000 Subject: powerpc: Make VSID_BITS* dependency explicit VSID_BITS and VSID_BITS_1T depends on the context bits and user esid bits. Make the dependency explicit Acked-by: Paul Mackerras Signed-off-by: Aneesh Kumar K.V Signed-off-by: Benjamin Herrenschmidt CC: [v3.8] --- arch/powerpc/include/asm/mmu-hash64.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index 2fdb47a19efd..5f8c2bd58818 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -381,21 +381,22 @@ extern void slb_set_size(u16 size); * hash collisions. */ +#define CONTEXT_BITS 19 +#define USER_ESID_BITS 18 +#define USER_ESID_BITS_1T 6 + /* * This should be computed such that protovosid * vsid_mulitplier * doesn't overflow 64 bits. It should also be co-prime to vsid_modulus */ #define VSID_MULTIPLIER_256M ASM_CONST(12538073) /* 24-bit prime */ -#define VSID_BITS_256M 38 +#define VSID_BITS_256M (CONTEXT_BITS + USER_ESID_BITS + 1) #define VSID_MODULUS_256M ((1UL< Date: Wed, 13 Mar 2013 03:34:54 +0000 Subject: powerpc: Update kernel VSID range This patch change the kernel VSID range so that we limit VSID_BITS to 37. This enables us to support 64TB with 65 bit VA (37+28). Without this patch we have boot hangs on platforms that only support 65 bit VA. With this patch we now have proto vsid generated as below: We first generate a 37-bit "proto-VSID". Proto-VSIDs are generated from mmu context id and effective segment id of the address. For user processes max context id is limited to ((1ul << 19) - 5) for kernel space, we use the top 4 context ids to map address as below 0x7fffc - [ 0xc000000000000000 - 0xc0003fffffffffff ] 0x7fffd - [ 0xd000000000000000 - 0xd0003fffffffffff ] 0x7fffe - [ 0xe000000000000000 - 0xe0003fffffffffff ] 0x7ffff - [ 0xf000000000000000 - 0xf0003fffffffffff ] Acked-by: Paul Mackerras Signed-off-by: Aneesh Kumar K.V Tested-by: Geoff Levand Signed-off-by: Benjamin Herrenschmidt CC: [v3.8] --- arch/powerpc/include/asm/mmu-hash64.h | 115 +++++++++++++++++----------------- arch/powerpc/kernel/exceptions-64s.S | 34 +++++++--- arch/powerpc/mm/hash_utils_64.c | 20 ++++-- arch/powerpc/mm/mmu_context_hash64.c | 11 +--- arch/powerpc/mm/slb_low.S | 50 +++++++-------- arch/powerpc/mm/tlb_hash64.c | 2 +- 6 files changed, 126 insertions(+), 106 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index 5f8c2bd58818..a32461f9d825 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -343,17 +343,16 @@ extern void slb_set_size(u16 size); /* * VSID allocation (256MB segment) * - * We first generate a 38-bit "proto-VSID". For kernel addresses this - * is equal to the ESID | 1 << 37, for user addresses it is: - * (context << USER_ESID_BITS) | (esid & ((1U << USER_ESID_BITS) - 1) + * We first generate a 37-bit "proto-VSID". Proto-VSIDs are generated + * from mmu context id and effective segment id of the address. * - * This splits the proto-VSID into the below range - * 0 - (2^(CONTEXT_BITS + USER_ESID_BITS) - 1) : User proto-VSID range - * 2^(CONTEXT_BITS + USER_ESID_BITS) - 2^(VSID_BITS) : Kernel proto-VSID range - * - * We also have CONTEXT_BITS + USER_ESID_BITS = VSID_BITS - 1 - * That is, we assign half of the space to user processes and half - * to the kernel. + * For user processes max context id is limited to ((1ul << 19) - 5) + * for kernel space, we use the top 4 context ids to map address as below + * NOTE: each context only support 64TB now. + * 0x7fffc - [ 0xc000000000000000 - 0xc0003fffffffffff ] + * 0x7fffd - [ 0xd000000000000000 - 0xd0003fffffffffff ] + * 0x7fffe - [ 0xe000000000000000 - 0xe0003fffffffffff ] + * 0x7ffff - [ 0xf000000000000000 - 0xf0003fffffffffff ] * * The proto-VSIDs are then scrambled into real VSIDs with the * multiplicative hash: @@ -363,38 +362,45 @@ extern void slb_set_size(u16 size); * VSID_MULTIPLIER is prime, so in particular it is * co-prime to VSID_MODULUS, making this a 1:1 scrambling function. * Because the modulus is 2^n-1 we can compute it efficiently without - * a divide or extra multiply (see below). - * - * This scheme has several advantages over older methods: + * a divide or extra multiply (see below). The scramble function gives + * robust scattering in the hash table (at least based on some initial + * results). * - * - We have VSIDs allocated for every kernel address - * (i.e. everything above 0xC000000000000000), except the very top - * segment, which simplifies several things. + * We also consider VSID 0 special. We use VSID 0 for slb entries mapping + * bad address. This enables us to consolidate bad address handling in + * hash_page. * - * - We allow for USER_ESID_BITS significant bits of ESID and - * CONTEXT_BITS bits of context for user addresses. - * i.e. 64T (46 bits) of address space for up to half a million contexts. - * - * - The scramble function gives robust scattering in the hash - * table (at least based on some initial results). The previous - * method was more susceptible to pathological cases giving excessive - * hash collisions. + * We also need to avoid the last segment of the last context, because that + * would give a protovsid of 0x1fffffffff. That will result in a VSID 0 + * because of the modulo operation in vsid scramble. But the vmemmap + * (which is what uses region 0xf) will never be close to 64TB in size + * (it's 56 bytes per page of system memory). */ #define CONTEXT_BITS 19 #define USER_ESID_BITS 18 #define USER_ESID_BITS_1T 6 +/* + * 256MB segment + * The proto-VSID space has 2^(CONTEX_BITS + USER_ESID_BITS) - 1 segments + * available for user + kernel mapping. The top 4 contexts are used for + * kernel mapping. Each segment contains 2^28 bytes. Each + * context maps 2^46 bytes (64TB) so we can support 2^19-1 contexts + * (19 == 37 + 28 - 46). + */ +#define MAX_USER_CONTEXT ((ASM_CONST(1) << CONTEXT_BITS) - 5) + /* * This should be computed such that protovosid * vsid_mulitplier * doesn't overflow 64 bits. It should also be co-prime to vsid_modulus */ #define VSID_MULTIPLIER_256M ASM_CONST(12538073) /* 24-bit prime */ -#define VSID_BITS_256M (CONTEXT_BITS + USER_ESID_BITS + 1) +#define VSID_BITS_256M (CONTEXT_BITS + USER_ESID_BITS) #define VSID_MODULUS_256M ((1UL<= \ * 2^36-1, then r3+1 has the 2^36 bit set. So, if r3+1 has \ * the bit clear, r3 already has the answer we want, if it \ @@ -514,34 +521,6 @@ typedef struct { }) #endif /* 1 */ -/* - * This is only valid for addresses >= PAGE_OFFSET - * The proto-VSID space is divided into two class - * User: 0 to 2^(CONTEXT_BITS + USER_ESID_BITS) -1 - * kernel: 2^(CONTEXT_BITS + USER_ESID_BITS) to 2^(VSID_BITS) - 1 - * - * With KERNEL_START at 0xc000000000000000, the proto vsid for - * the kernel ends up with 0xc00000000 (36 bits). With 64TB - * support we need to have kernel proto-VSID in the - * [2^37 to 2^38 - 1] range due to the increased USER_ESID_BITS. - */ -static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize) -{ - unsigned long proto_vsid; - /* - * We need to make sure proto_vsid for the kernel is - * >= 2^(CONTEXT_BITS + USER_ESID_BITS[_1T]) - */ - if (ssize == MMU_SEGSIZE_256M) { - proto_vsid = ea >> SID_SHIFT; - proto_vsid |= (1UL << (CONTEXT_BITS + USER_ESID_BITS)); - return vsid_scramble(proto_vsid, 256M); - } - proto_vsid = ea >> SID_SHIFT_1T; - proto_vsid |= (1UL << (CONTEXT_BITS + USER_ESID_BITS_1T)); - return vsid_scramble(proto_vsid, 1T); -} - /* Returns the segment size indicator for a user address */ static inline int user_segment_size(unsigned long addr) { @@ -551,10 +530,15 @@ static inline int user_segment_size(unsigned long addr) return MMU_SEGSIZE_256M; } -/* This is only valid for user addresses (which are below 2^44) */ static inline unsigned long get_vsid(unsigned long context, unsigned long ea, int ssize) { + /* + * Bad address. We return VSID 0 for that + */ + if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) + return 0; + if (ssize == MMU_SEGSIZE_256M) return vsid_scramble((context << USER_ESID_BITS) | (ea >> SID_SHIFT), 256M); @@ -562,6 +546,25 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea, | (ea >> SID_SHIFT_1T), 1T); } +/* + * This is only valid for addresses >= PAGE_OFFSET + * + * For kernel space, we use the top 4 context ids to map address as below + * 0x7fffc - [ 0xc000000000000000 - 0xc0003fffffffffff ] + * 0x7fffd - [ 0xd000000000000000 - 0xd0003fffffffffff ] + * 0x7fffe - [ 0xe000000000000000 - 0xe0003fffffffffff ] + * 0x7ffff - [ 0xf000000000000000 - 0xf0003fffffffffff ] + */ +static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize) +{ + unsigned long context; + + /* + * kernel take the top 4 context from the available range + */ + context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1; + return get_vsid(context, ea, ssize); +} #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_MMU_HASH64_H_ */ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 87ef8f5ee5bc..b112359ea7a8 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1452,20 +1452,36 @@ do_ste_alloc: _GLOBAL(do_stab_bolted) stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ std r11,PACA_EXSLB+EX_SRR0(r13) /* save SRR0 in exc. frame */ + mfspr r11,SPRN_DAR /* ea */ + /* + * check for bad kernel/user address + * (ea & ~REGION_MASK) >= PGTABLE_RANGE + */ + rldicr. r9,r11,4,(63 - 46 - 4) + li r9,0 /* VSID = 0 for bad address */ + bne- 0f + + /* + * Calculate VSID: + * This is the kernel vsid, we take the top for context from + * the range. context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1 + * Here we know that (ea >> 60) == 0xc + */ + lis r9,(MAX_USER_CONTEXT + 1)@ha + addi r9,r9,(MAX_USER_CONTEXT + 1)@l + + srdi r10,r11,SID_SHIFT + rldimi r10,r9,USER_ESID_BITS,0 /* proto vsid */ + ASM_VSID_SCRAMBLE(r10, r9, 256M) + rldic r9,r10,12,16 /* r9 = vsid << 12 */ + +0: /* Hash to the primary group */ ld r10,PACASTABVIRT(r13) - mfspr r11,SPRN_DAR - srdi r11,r11,28 + srdi r11,r11,SID_SHIFT rldimi r10,r11,7,52 /* r10 = first ste of the group */ - /* Calculate VSID */ - /* This is a kernel address, so protovsid = ESID | 1 << 37 */ - li r9,0x1 - rldimi r11,r9,(CONTEXT_BITS + USER_ESID_BITS),0 - ASM_VSID_SCRAMBLE(r11, r9, 256M) - rldic r9,r11,12,16 /* r9 = vsid << 12 */ - /* Search the primary group for a free entry */ 1: ld r11,0(r10) /* Test valid bit of the current ste */ andi. r11,r11,0x80 diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 6ec6c1997b3a..f410c3e12c1e 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -195,6 +195,11 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, unsigned long vpn = hpt_vpn(vaddr, vsid, ssize); unsigned long tprot = prot; + /* + * If we hit a bad address return error. + */ + if (!vsid) + return -1; /* Make kernel text executable */ if (overlaps_kernel_text(vaddr, vaddr + step)) tprot &= ~HPTE_R_N; @@ -924,11 +929,6 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n", ea, access, trap); - if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) { - DBG_LOW(" out of pgtable range !\n"); - return 1; - } - /* Get region & vsid */ switch (REGION_ID(ea)) { case USER_REGION_ID: @@ -959,6 +959,11 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) } DBG_LOW(" mm=%p, mm->pgdir=%p, vsid=%016lx\n", mm, mm->pgd, vsid); + /* Bad address. */ + if (!vsid) { + DBG_LOW("Bad address!\n"); + return 1; + } /* Get pgdir */ pgdir = mm->pgd; if (pgdir == NULL) @@ -1128,6 +1133,8 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, /* Get VSID */ ssize = user_segment_size(ea); vsid = get_vsid(mm->context.id, ea, ssize); + if (!vsid) + return; /* Hash doesn't like irqs */ local_irq_save(flags); @@ -1235,6 +1242,9 @@ static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi) hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize); hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); + /* Don't create HPTE entries for bad address */ + if (!vsid) + return; ret = ppc_md.hpte_insert(hpteg, vpn, __pa(vaddr), mode, HPTE_V_BOLTED, mmu_linear_psize, mmu_kernel_ssize); diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c index 40bc5b0ace54..d1d1b92c5b99 100644 --- a/arch/powerpc/mm/mmu_context_hash64.c +++ b/arch/powerpc/mm/mmu_context_hash64.c @@ -29,15 +29,6 @@ static DEFINE_SPINLOCK(mmu_context_lock); static DEFINE_IDA(mmu_context_ida); -/* - * 256MB segment - * The proto-VSID space has 2^(CONTEX_BITS + USER_ESID_BITS) - 1 segments - * available for user mappings. Each segment contains 2^28 bytes. Each - * context maps 2^46 bytes (64TB) so we can support 2^19-1 contexts - * (19 == 37 + 28 - 46). - */ -#define MAX_CONTEXT ((1UL << CONTEXT_BITS) - 1) - int __init_new_context(void) { int index; @@ -56,7 +47,7 @@ again: else if (err) return err; - if (index > MAX_CONTEXT) { + if (index > MAX_USER_CONTEXT) { spin_lock(&mmu_context_lock); ida_remove(&mmu_context_ida, index); spin_unlock(&mmu_context_lock); diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index 1a16ca227757..77aafaa1ab09 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -31,10 +31,15 @@ * No other registers are examined or changed. */ _GLOBAL(slb_allocate_realmode) - /* r3 = faulting address */ + /* + * check for bad kernel/user address + * (ea & ~REGION_MASK) >= PGTABLE_RANGE + */ + rldicr. r9,r3,4,(63 - 46 - 4) + bne- 8f srdi r9,r3,60 /* get region */ - srdi r10,r3,28 /* get esid */ + srdi r10,r3,SID_SHIFT /* get esid */ cmpldi cr7,r9,0xc /* cmp PAGE_OFFSET for later use */ /* r3 = address, r10 = esid, cr7 = <> PAGE_OFFSET */ @@ -56,12 +61,14 @@ _GLOBAL(slb_allocate_realmode) */ _GLOBAL(slb_miss_kernel_load_linear) li r11,0 - li r9,0x1 /* - * for 1T we shift 12 bits more. slb_finish_load_1T will do - * the necessary adjustment + * context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1 + * r9 = region id. */ - rldimi r10,r9,(CONTEXT_BITS + USER_ESID_BITS),0 + addis r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@ha + addi r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@l + + BEGIN_FTR_SECTION b slb_finish_load END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) @@ -91,24 +98,19 @@ _GLOBAL(slb_miss_kernel_load_vmemmap) _GLOBAL(slb_miss_kernel_load_io) li r11,0 6: - li r9,0x1 /* - * for 1T we shift 12 bits more. slb_finish_load_1T will do - * the necessary adjustment + * context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1 + * r9 = region id. */ - rldimi r10,r9,(CONTEXT_BITS + USER_ESID_BITS),0 + addis r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@ha + addi r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@l + BEGIN_FTR_SECTION b slb_finish_load END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) b slb_finish_load_1T -0: /* user address: proto-VSID = context << 15 | ESID. First check - * if the address is within the boundaries of the user region - */ - srdi. r9,r10,USER_ESID_BITS - bne- 8f /* invalid ea bits set */ - - +0: /* when using slices, we extract the psize off the slice bitmaps * and then we need to get the sllp encoding off the mmu_psize_defs * array. @@ -164,15 +166,13 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) ld r9,PACACONTEXTID(r13) BEGIN_FTR_SECTION cmpldi r10,0x1000 -END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) - rldimi r10,r9,USER_ESID_BITS,0 -BEGIN_FTR_SECTION bge slb_finish_load_1T END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) b slb_finish_load 8: /* invalid EA */ li r10,0 /* BAD_VSID */ + li r9,0 /* BAD_VSID */ li r11,SLB_VSID_USER /* flags don't much matter */ b slb_finish_load @@ -221,8 +221,6 @@ _GLOBAL(slb_allocate_user) /* get context to calculate proto-VSID */ ld r9,PACACONTEXTID(r13) - rldimi r10,r9,USER_ESID_BITS,0 - /* fall through slb_finish_load */ #endif /* __DISABLED__ */ @@ -231,9 +229,10 @@ _GLOBAL(slb_allocate_user) /* * Finish loading of an SLB entry and return * - * r3 = EA, r10 = proto-VSID, r11 = flags, clobbers r9, cr7 = <> PAGE_OFFSET + * r3 = EA, r9 = context, r10 = ESID, r11 = flags, clobbers r9, cr7 = <> PAGE_OFFSET */ slb_finish_load: + rldimi r10,r9,USER_ESID_BITS,0 ASM_VSID_SCRAMBLE(r10,r9,256M) /* * bits above VSID_BITS_256M need to be ignored from r10 @@ -298,10 +297,11 @@ _GLOBAL(slb_compare_rr_to_size) /* * Finish loading of a 1T SLB entry (for the kernel linear mapping) and return. * - * r3 = EA, r10 = proto-VSID, r11 = flags, clobbers r9 + * r3 = EA, r9 = context, r10 = ESID(256MB), r11 = flags, clobbers r9 */ slb_finish_load_1T: - srdi r10,r10,40-28 /* get 1T ESID */ + srdi r10,r10,(SID_SHIFT_1T - SID_SHIFT) /* get 1T ESID */ + rldimi r10,r9,USER_ESID_BITS_1T,0 ASM_VSID_SCRAMBLE(r10,r9,1T) /* * bits above VSID_BITS_1T need to be ignored from r10 diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c index 0d82ef50dc3f..023ec8a13f38 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/tlb_hash64.c @@ -82,11 +82,11 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, if (!is_kernel_addr(addr)) { ssize = user_segment_size(addr); vsid = get_vsid(mm->context.id, addr, ssize); - WARN_ON(vsid == 0); } else { vsid = get_kernel_vsid(addr, mmu_kernel_ssize); ssize = mmu_kernel_ssize; } + WARN_ON(vsid == 0); vpn = hpt_vpn(addr, vsid, ssize); rpte = __real_pte(__pte(pte), ptep); -- cgit v1.2.3 From af81d7878c641629f2693ae3fdaf74b4af14dfca Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 13 Mar 2013 03:34:55 +0000 Subject: powerpc: Rename USER_ESID_BITS* to ESID_BITS* Now we use ESID_BITS of kernel address to build proto vsid. So rename USER_ESIT_BITS to ESID_BITS Acked-by: Paul Mackerras Signed-off-by: Aneesh Kumar K.V Signed-off-by: Benjamin Herrenschmidt CC: [v3.8] --- arch/powerpc/include/asm/mmu-hash64.h | 16 ++++++++-------- arch/powerpc/kernel/exceptions-64s.S | 2 +- arch/powerpc/kvm/book3s_64_mmu_host.c | 4 ++-- arch/powerpc/mm/pgtable_64.c | 2 +- arch/powerpc/mm/slb_low.S | 4 ++-- 5 files changed, 14 insertions(+), 14 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index a32461f9d825..b59e06f507ea 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -378,12 +378,12 @@ extern void slb_set_size(u16 size); */ #define CONTEXT_BITS 19 -#define USER_ESID_BITS 18 -#define USER_ESID_BITS_1T 6 +#define ESID_BITS 18 +#define ESID_BITS_1T 6 /* * 256MB segment - * The proto-VSID space has 2^(CONTEX_BITS + USER_ESID_BITS) - 1 segments + * The proto-VSID space has 2^(CONTEX_BITS + ESID_BITS) - 1 segments * available for user + kernel mapping. The top 4 contexts are used for * kernel mapping. Each segment contains 2^28 bytes. Each * context maps 2^46 bytes (64TB) so we can support 2^19-1 contexts @@ -396,15 +396,15 @@ extern void slb_set_size(u16 size); * doesn't overflow 64 bits. It should also be co-prime to vsid_modulus */ #define VSID_MULTIPLIER_256M ASM_CONST(12538073) /* 24-bit prime */ -#define VSID_BITS_256M (CONTEXT_BITS + USER_ESID_BITS) +#define VSID_BITS_256M (CONTEXT_BITS + ESID_BITS) #define VSID_MODULUS_256M ((1UL<> SID_SHIFT), 256M); - return vsid_scramble((context << USER_ESID_BITS_1T) + return vsid_scramble((context << ESID_BITS_1T) | (ea >> SID_SHIFT_1T), 1T); } diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index b112359ea7a8..200afa5bcfb7 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1472,7 +1472,7 @@ _GLOBAL(do_stab_bolted) addi r9,r9,(MAX_USER_CONTEXT + 1)@l srdi r10,r11,SID_SHIFT - rldimi r10,r9,USER_ESID_BITS,0 /* proto vsid */ + rldimi r10,r9,ESID_BITS,0 /* proto vsid */ ASM_VSID_SCRAMBLE(r10, r9, 256M) rldic r9,r10,12,16 /* r9 = vsid << 12 */ diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index ead58e317294..5d7d29a313eb 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -326,8 +326,8 @@ int kvmppc_mmu_init(struct kvm_vcpu *vcpu) vcpu3s->context_id[0] = err; vcpu3s->proto_vsid_max = ((vcpu3s->context_id[0] + 1) - << USER_ESID_BITS) - 1; - vcpu3s->proto_vsid_first = vcpu3s->context_id[0] << USER_ESID_BITS; + << ESID_BITS) - 1; + vcpu3s->proto_vsid_first = vcpu3s->context_id[0] << ESID_BITS; vcpu3s->proto_vsid_next = vcpu3s->proto_vsid_first; kvmppc_mmu_hpte_init(vcpu); diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index e212a271c7a4..654258f165ae 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -61,7 +61,7 @@ #endif #ifdef CONFIG_PPC_STD_MMU_64 -#if TASK_SIZE_USER64 > (1UL << (USER_ESID_BITS + SID_SHIFT)) +#if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT)) #error TASK_SIZE_USER64 exceeds user VSID range #endif #endif diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index 77aafaa1ab09..17aa6dfceb34 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -232,7 +232,7 @@ _GLOBAL(slb_allocate_user) * r3 = EA, r9 = context, r10 = ESID, r11 = flags, clobbers r9, cr7 = <> PAGE_OFFSET */ slb_finish_load: - rldimi r10,r9,USER_ESID_BITS,0 + rldimi r10,r9,ESID_BITS,0 ASM_VSID_SCRAMBLE(r10,r9,256M) /* * bits above VSID_BITS_256M need to be ignored from r10 @@ -301,7 +301,7 @@ _GLOBAL(slb_compare_rr_to_size) */ slb_finish_load_1T: srdi r10,r10,(SID_SHIFT_1T - SID_SHIFT) /* get 1T ESID */ - rldimi r10,r9,USER_ESID_BITS_1T,0 + rldimi r10,r9,ESID_BITS_1T,0 ASM_VSID_SCRAMBLE(r10,r9,1T) /* * bits above VSID_BITS_1T need to be ignored from r10 -- cgit v1.2.3 From ee761f629d598579594d7e1eb8c552f3c5f71e4d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 Mar 2013 22:49:32 +0100 Subject: arch: Consolidate tsk_is_polling() Move it to a common place. Preparatory patch for implementing set/clear for the idle need_resched poll implementation. Signed-off-by: Thomas Gleixner Cc: Linus Torvalds Cc: Rusty Russell Cc: Paul McKenney Cc: Peter Zijlstra Reviewed-by: Cc: Srivatsa S. Bhat Cc: Magnus Damm Link: http://lkml.kernel.org/r/20130321215233.446034505@linutronix.de Signed-off-by: Thomas Gleixner --- arch/alpha/include/asm/thread_info.h | 2 -- arch/ia64/include/asm/thread_info.h | 2 -- arch/metag/include/asm/thread_info.h | 2 -- arch/microblaze/include/asm/thread_info.h | 1 - arch/mn10300/include/asm/thread_info.h | 2 -- arch/openrisc/include/asm/thread_info.h | 2 -- arch/parisc/include/asm/thread_info.h | 2 -- arch/powerpc/include/asm/thread_info.h | 2 -- arch/sh/include/asm/thread_info.h | 2 -- arch/sparc/include/asm/thread_info_32.h | 2 -- arch/sparc/include/asm/thread_info_64.h | 2 -- arch/tile/include/asm/thread_info.h | 2 -- arch/x86/include/asm/thread_info.h | 2 -- include/linux/sched.h | 20 ++++++++++++++++++++ kernel/sched/core.c | 5 ----- 15 files changed, 20 insertions(+), 30 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h index 1f8c72959fb6..52cd2a4a3ff4 100644 --- a/arch/alpha/include/asm/thread_info.h +++ b/arch/alpha/include/asm/thread_info.h @@ -95,8 +95,6 @@ register struct thread_info *__current_thread_info __asm__("$8"); #define TS_POLLING 0x0010 /* idle task polling need_resched, skip sending interrupt */ -#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) - #ifndef __ASSEMBLY__ #define HAVE_SET_RESTORE_SIGMASK 1 static inline void set_restore_sigmask(void) diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h index 020d655ed082..cade13dd0299 100644 --- a/arch/ia64/include/asm/thread_info.h +++ b/arch/ia64/include/asm/thread_info.h @@ -131,8 +131,6 @@ struct thread_info { #define TS_POLLING 1 /* true if in idle loop and not sleeping */ #define TS_RESTORE_SIGMASK 2 /* restore signal mask in do_signal() */ -#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) - #ifndef __ASSEMBLY__ #define HAVE_SET_RESTORE_SIGMASK 1 static inline void set_restore_sigmask(void) diff --git a/arch/metag/include/asm/thread_info.h b/arch/metag/include/asm/thread_info.h index 0ecd34d8b5f6..7c4a33006142 100644 --- a/arch/metag/include/asm/thread_info.h +++ b/arch/metag/include/asm/thread_info.h @@ -150,6 +150,4 @@ static inline int kstack_end(void *addr) #define _TIF_WORK_MASK (_TIF_ALLWORK_MASK & ~(_TIF_SYSCALL_TRACE | \ _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP)) -#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) - #endif /* _ASM_THREAD_INFO_H */ diff --git a/arch/microblaze/include/asm/thread_info.h b/arch/microblaze/include/asm/thread_info.h index 008f30433d22..de26ea6373de 100644 --- a/arch/microblaze/include/asm/thread_info.h +++ b/arch/microblaze/include/asm/thread_info.h @@ -182,7 +182,6 @@ static inline bool test_and_clear_restore_sigmask(void) ti->status &= ~TS_RESTORE_SIGMASK; return true; } -#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) #endif #endif /* __KERNEL__ */ diff --git a/arch/mn10300/include/asm/thread_info.h b/arch/mn10300/include/asm/thread_info.h index f90062b0622d..224b4262486d 100644 --- a/arch/mn10300/include/asm/thread_info.h +++ b/arch/mn10300/include/asm/thread_info.h @@ -165,8 +165,6 @@ void arch_release_thread_info(struct thread_info *ti); #define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */ #define _TIF_ALLWORK_MASK 0x0000FFFF /* work to do on any return to u-space */ -#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) - #endif /* __KERNEL__ */ #endif /* _ASM_THREAD_INFO_H */ diff --git a/arch/openrisc/include/asm/thread_info.h b/arch/openrisc/include/asm/thread_info.h index 07f3212422ad..d797acc901e4 100644 --- a/arch/openrisc/include/asm/thread_info.h +++ b/arch/openrisc/include/asm/thread_info.h @@ -128,8 +128,6 @@ register struct thread_info *current_thread_info_reg asm("r10"); /* For OpenRISC, this is anything in the LSW other than syscall trace */ #define _TIF_WORK_MASK (0xff & ~(_TIF_SYSCALL_TRACE|_TIF_SINGLESTEP)) -#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) - #endif /* __KERNEL__ */ #endif /* _ASM_THREAD_INFO_H */ diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h index d1fb79a36f3d..6182832e5b6c 100644 --- a/arch/parisc/include/asm/thread_info.h +++ b/arch/parisc/include/asm/thread_info.h @@ -77,8 +77,6 @@ struct thread_info { #define _TIF_SYSCALL_TRACE_MASK (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP | \ _TIF_BLOCKSTEP) -#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) - #endif /* __KERNEL__ */ #endif /* _ASM_PARISC_THREAD_INFO_H */ diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 406b7b9a1341..8ceea14d6fe4 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -182,8 +182,6 @@ static inline bool test_thread_local_flags(unsigned int flags) #define is_32bit_task() (1) #endif -#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) - #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h index 7d5ac4e48485..45a93669289d 100644 --- a/arch/sh/include/asm/thread_info.h +++ b/arch/sh/include/asm/thread_info.h @@ -207,8 +207,6 @@ static inline bool test_and_clear_restore_sigmask(void) return true; } -#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) - #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/sparc/include/asm/thread_info_32.h b/arch/sparc/include/asm/thread_info_32.h index 25849ae3e900..dd3807599bb9 100644 --- a/arch/sparc/include/asm/thread_info_32.h +++ b/arch/sparc/include/asm/thread_info_32.h @@ -132,8 +132,6 @@ register struct thread_info *current_thread_info_reg asm("g6"); #define _TIF_DO_NOTIFY_RESUME_MASK (_TIF_NOTIFY_RESUME | \ _TIF_SIGPENDING) -#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) - #endif /* __KERNEL__ */ #endif /* _ASM_THREAD_INFO_H */ diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h index 269bd92313df..d5e504251079 100644 --- a/arch/sparc/include/asm/thread_info_64.h +++ b/arch/sparc/include/asm/thread_info_64.h @@ -256,8 +256,6 @@ static inline bool test_and_clear_restore_sigmask(void) return true; } -#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) - #define thread32_stack_is_64bit(__SP) (((__SP) & 0x1) != 0) #define test_thread_64bit_stack(__SP) \ ((test_thread_flag(TIF_32BIT) && !thread32_stack_is_64bit(__SP)) ? \ diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h index e9c670d7a7fe..ccc8ef37235c 100644 --- a/arch/tile/include/asm/thread_info.h +++ b/arch/tile/include/asm/thread_info.h @@ -153,8 +153,6 @@ extern void _cpu_idle(void); #define TS_POLLING 0x0004 /* in idle loop but not sleeping */ #define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal */ -#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) - #ifndef __ASSEMBLY__ #define HAVE_SET_RESTORE_SIGMASK 1 static inline void set_restore_sigmask(void) diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 2cd056e3ada3..a1df6e84691f 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -241,8 +241,6 @@ static inline struct thread_info *current_thread_info(void) skip sending interrupt */ #define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */ -#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) - #ifndef __ASSEMBLY__ #define HAVE_SET_RESTORE_SIGMASK 1 static inline void set_restore_sigmask(void) diff --git a/include/linux/sched.h b/include/linux/sched.h index d35d2b6ddbfb..6709a5813f27 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2621,6 +2621,26 @@ static inline int spin_needbreak(spinlock_t *lock) #endif } +/* + * Idle thread specific functions to determine the need_resched + * polling state. We have two versions, one based on TS_POLLING in + * thread_info.status and one based on TIF_POLLING_NRFLAG in + * thread_info.flags + */ +#ifdef TS_POLLING +static inline int tsk_is_polling(struct task_struct *p) +{ + return task_thread_info(p)->status & TS_POLLING; +} +#elif defined(TIF_POLLING_NRFLAG) +static inline int tsk_is_polling(struct task_struct *p) +{ + return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG); +} +#else +static inline int tsk_is_polling(struct task_struct *p) { return 0; } +#endif + /* * Thread group CPU time accounting. */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 7f12624a393c..243a20c5cf91 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -512,11 +512,6 @@ static inline void init_hrtick(void) * the target CPU. */ #ifdef CONFIG_SMP - -#ifndef tsk_is_polling -#define tsk_is_polling(t) 0 -#endif - void resched_task(struct task_struct *p) { int cpu; -- cgit v1.2.3 From f15706b79d6f71e016cd06afa21ee31500029067 Mon Sep 17 00:00:00 2001 From: Anton Arapov Date: Wed, 3 Apr 2013 18:00:34 +0200 Subject: uretprobes/powerpc: Hijack return address Hijack the return address and replace it with a trampoline address. PowerPC implementation. Signed-off-by: Anton Arapov Acked-by: Ananth N Mavinakayanahalli Acked-by: Srikar Dronamraju Signed-off-by: Oleg Nesterov --- arch/powerpc/include/asm/uprobes.h | 1 + arch/powerpc/kernel/uprobes.c | 13 +++++++++++++ 2 files changed, 14 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h index b532060d0916..23016020915e 100644 --- a/arch/powerpc/include/asm/uprobes.h +++ b/arch/powerpc/include/asm/uprobes.h @@ -51,4 +51,5 @@ extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs); extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk); extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data); extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs); +extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs); #endif /* _ASM_UPROBES_H */ diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c index 2ecdbe304f46..59f419b935f2 100644 --- a/arch/powerpc/kernel/uprobes.c +++ b/arch/powerpc/kernel/uprobes.c @@ -192,3 +192,16 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) return false; } + +unsigned long +arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs) +{ + unsigned long orig_ret_vaddr; + + orig_ret_vaddr = regs->link; + + /* Replace the return addr with trampoline addr */ + regs->link = trampoline_vaddr; + + return orig_ret_vaddr; +} -- cgit v1.2.3 From 106c992a5ebef28193cf5958e49ceff5e4aebb04 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Mon, 29 Apr 2013 15:07:23 -0700 Subject: mm/hugetlb: add more arch-defined huge_pte functions Commit abf09bed3cce ("s390/mm: implement software dirty bits") introduced another difference in the pte layout vs. the pmd layout on s390, thoroughly breaking the s390 support for hugetlbfs. This requires replacing some more pte_xxx functions in mm/hugetlbfs.c with a huge_pte_xxx version. This patch introduces those huge_pte_xxx functions and their generic implementation in asm-generic/hugetlb.h, which will now be included on all architectures supporting hugetlbfs apart from s390. This change will be a no-op for those architectures. [akpm@linux-foundation.org: fix warning] Signed-off-by: Gerald Schaefer Cc: Mel Gorman Cc: Hugh Dickins Cc: Hillf Danton Acked-by: Michal Hocko [for !s390 parts] Cc: Tony Luck Cc: Fenghua Yu Cc: Ralf Baechle Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Paul Mundt Cc: "David S. Miller" Cc: Chris Metcalf Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Martin Schwidefsky Cc: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/include/asm/hugetlb.h | 1 + arch/mips/include/asm/hugetlb.h | 1 + arch/powerpc/include/asm/hugetlb.h | 1 + arch/s390/include/asm/hugetlb.h | 56 +++++++++++++++++++++- arch/s390/include/asm/pgtable.h | 95 ++++++++++++++++---------------------- arch/s390/mm/hugetlbpage.c | 2 +- arch/sh/include/asm/hugetlb.h | 1 + arch/sparc/include/asm/hugetlb.h | 1 + arch/tile/include/asm/hugetlb.h | 1 + arch/x86/include/asm/hugetlb.h | 1 + include/asm-generic/hugetlb.h | 40 ++++++++++++++++ mm/hugetlb.c | 24 +++++----- 12 files changed, 156 insertions(+), 68 deletions(-) create mode 100644 include/asm-generic/hugetlb.h (limited to 'arch/powerpc/include/asm') diff --git a/arch/ia64/include/asm/hugetlb.h b/arch/ia64/include/asm/hugetlb.h index 94eaa5bd5d0c..aa910054b8e7 100644 --- a/arch/ia64/include/asm/hugetlb.h +++ b/arch/ia64/include/asm/hugetlb.h @@ -2,6 +2,7 @@ #define _ASM_IA64_HUGETLB_H #include +#include void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, diff --git a/arch/mips/include/asm/hugetlb.h b/arch/mips/include/asm/hugetlb.h index ef99db994c2f..fe0d15d32660 100644 --- a/arch/mips/include/asm/hugetlb.h +++ b/arch/mips/include/asm/hugetlb.h @@ -10,6 +10,7 @@ #define __ASM_HUGETLB_H #include +#include static inline int is_hugepage_only_range(struct mm_struct *mm, diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 62e11a32c4c2..4fcbd6b14a3a 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -3,6 +3,7 @@ #ifdef CONFIG_HUGETLB_PAGE #include +#include extern struct kmem_cache *hugepte_cache; diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index 593753ee07f3..bd90359d6d22 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -114,7 +114,7 @@ static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, #define huge_ptep_set_wrprotect(__mm, __addr, __ptep) \ ({ \ pte_t __pte = huge_ptep_get(__ptep); \ - if (pte_write(__pte)) { \ + if (huge_pte_write(__pte)) { \ huge_ptep_invalidate(__mm, __addr, __ptep); \ set_huge_pte_at(__mm, __addr, __ptep, \ huge_pte_wrprotect(__pte)); \ @@ -127,4 +127,58 @@ static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, huge_ptep_invalidate(vma->vm_mm, address, ptep); } +static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot) +{ + pte_t pte; + pmd_t pmd; + + pmd = mk_pmd_phys(page_to_phys(page), pgprot); + pte_val(pte) = pmd_val(pmd); + return pte; +} + +static inline int huge_pte_write(pte_t pte) +{ + pmd_t pmd; + + pmd_val(pmd) = pte_val(pte); + return pmd_write(pmd); +} + +static inline int huge_pte_dirty(pte_t pte) +{ + /* No dirty bit in the segment table entry. */ + return 0; +} + +static inline pte_t huge_pte_mkwrite(pte_t pte) +{ + pmd_t pmd; + + pmd_val(pmd) = pte_val(pte); + pte_val(pte) = pmd_val(pmd_mkwrite(pmd)); + return pte; +} + +static inline pte_t huge_pte_mkdirty(pte_t pte) +{ + /* No dirty bit in the segment table entry. */ + return pte; +} + +static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot) +{ + pmd_t pmd; + + pmd_val(pmd) = pte_val(pte); + pte_val(pte) = pmd_val(pmd_modify(pmd, newprot)); + return pte; +} + +static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + pmd_clear((pmd_t *) ptep); +} + #endif /* _ASM_S390_HUGETLB_H */ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 4a64c0e5428f..b4622915bd15 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -424,6 +424,13 @@ extern unsigned long MODULES_END; #define __S110 PAGE_RW #define __S111 PAGE_RW +/* + * Segment entry (large page) protection definitions. + */ +#define SEGMENT_NONE __pgprot(_HPAGE_TYPE_NONE) +#define SEGMENT_RO __pgprot(_HPAGE_TYPE_RO) +#define SEGMENT_RW __pgprot(_HPAGE_TYPE_RW) + static inline int mm_exclusive(struct mm_struct *mm) { return likely(mm == current->active_mm && @@ -914,26 +921,6 @@ static inline pte_t pte_mkspecial(pte_t pte) #ifdef CONFIG_HUGETLB_PAGE static inline pte_t pte_mkhuge(pte_t pte) { - /* - * PROT_NONE needs to be remapped from the pte type to the ste type. - * The HW invalid bit is also different for pte and ste. The pte - * invalid bit happens to be the same as the ste _SEGMENT_ENTRY_LARGE - * bit, so we don't have to clear it. - */ - if (pte_val(pte) & _PAGE_INVALID) { - if (pte_val(pte) & _PAGE_SWT) - pte_val(pte) |= _HPAGE_TYPE_NONE; - pte_val(pte) |= _SEGMENT_ENTRY_INV; - } - /* - * Clear SW pte bits, there are no SW bits in a segment table entry. - */ - pte_val(pte) &= ~(_PAGE_SWT | _PAGE_SWX | _PAGE_SWC | - _PAGE_SWR | _PAGE_SWW); - /* - * Also set the change-override bit because we don't need dirty bit - * tracking for hugetlbfs pages. - */ pte_val(pte) |= (_SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO); return pte; } @@ -1278,31 +1265,7 @@ static inline void __pmd_idte(unsigned long address, pmd_t *pmdp) } } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - -#define SEGMENT_NONE __pgprot(_HPAGE_TYPE_NONE) -#define SEGMENT_RO __pgprot(_HPAGE_TYPE_RO) -#define SEGMENT_RW __pgprot(_HPAGE_TYPE_RW) - -#define __HAVE_ARCH_PGTABLE_DEPOSIT -extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable); - -#define __HAVE_ARCH_PGTABLE_WITHDRAW -extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm); - -static inline int pmd_trans_splitting(pmd_t pmd) -{ - return pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT; -} - -static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp, pmd_t entry) -{ - if (!(pmd_val(entry) & _SEGMENT_ENTRY_INV) && MACHINE_HAS_EDAT1) - pmd_val(entry) |= _SEGMENT_ENTRY_CO; - *pmdp = entry; -} - +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE) static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot) { /* @@ -1323,10 +1286,11 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) return pmd; } -static inline pmd_t pmd_mkhuge(pmd_t pmd) +static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot) { - pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE; - return pmd; + pmd_t __pmd; + pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot); + return __pmd; } static inline pmd_t pmd_mkwrite(pmd_t pmd) @@ -1336,6 +1300,34 @@ static inline pmd_t pmd_mkwrite(pmd_t pmd) pmd_val(pmd) &= ~_SEGMENT_ENTRY_RO; return pmd; } +#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */ + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + +#define __HAVE_ARCH_PGTABLE_DEPOSIT +extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable); + +#define __HAVE_ARCH_PGTABLE_WITHDRAW +extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm); + +static inline int pmd_trans_splitting(pmd_t pmd) +{ + return pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT; +} + +static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t entry) +{ + if (!(pmd_val(entry) & _SEGMENT_ENTRY_INV) && MACHINE_HAS_EDAT1) + pmd_val(entry) |= _SEGMENT_ENTRY_CO; + *pmdp = entry; +} + +static inline pmd_t pmd_mkhuge(pmd_t pmd) +{ + pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE; + return pmd; +} static inline pmd_t pmd_wrprotect(pmd_t pmd) { @@ -1432,13 +1424,6 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, } } -static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot) -{ - pmd_t __pmd; - pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot); - return __pmd; -} - #define pfn_pmd(pfn, pgprot) mk_pmd_phys(__pa((pfn) << PAGE_SHIFT), (pgprot)) #define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 532525ec88c1..121089d57802 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -39,7 +39,7 @@ int arch_prepare_hugepage(struct page *page) if (!ptep) return -ENOMEM; - pte = mk_pte(page, PAGE_RW); + pte_val(pte) = addr; for (i = 0; i < PTRS_PER_PTE; i++) { set_pte_at(&init_mm, addr + i * PAGE_SIZE, ptep + i, pte); pte_val(pte) += PAGE_SIZE; diff --git a/arch/sh/include/asm/hugetlb.h b/arch/sh/include/asm/hugetlb.h index b3808c7d67b2..699255d6d1c6 100644 --- a/arch/sh/include/asm/hugetlb.h +++ b/arch/sh/include/asm/hugetlb.h @@ -3,6 +3,7 @@ #include #include +#include static inline int is_hugepage_only_range(struct mm_struct *mm, diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h index 7eb57d245044..e4cab465b81f 100644 --- a/arch/sparc/include/asm/hugetlb.h +++ b/arch/sparc/include/asm/hugetlb.h @@ -2,6 +2,7 @@ #define _ASM_SPARC64_HUGETLB_H #include +#include void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, diff --git a/arch/tile/include/asm/hugetlb.h b/arch/tile/include/asm/hugetlb.h index 0f885af2b621..3257733003f8 100644 --- a/arch/tile/include/asm/hugetlb.h +++ b/arch/tile/include/asm/hugetlb.h @@ -16,6 +16,7 @@ #define _ASM_TILE_HUGETLB_H #include +#include static inline int is_hugepage_only_range(struct mm_struct *mm, diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h index bdd35dbd0605..a8091216963b 100644 --- a/arch/x86/include/asm/hugetlb.h +++ b/arch/x86/include/asm/hugetlb.h @@ -2,6 +2,7 @@ #define _ASM_X86_HUGETLB_H #include +#include static inline int is_hugepage_only_range(struct mm_struct *mm, diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h new file mode 100644 index 000000000000..d06079c774a0 --- /dev/null +++ b/include/asm-generic/hugetlb.h @@ -0,0 +1,40 @@ +#ifndef _ASM_GENERIC_HUGETLB_H +#define _ASM_GENERIC_HUGETLB_H + +static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot) +{ + return mk_pte(page, pgprot); +} + +static inline int huge_pte_write(pte_t pte) +{ + return pte_write(pte); +} + +static inline int huge_pte_dirty(pte_t pte) +{ + return pte_dirty(pte); +} + +static inline pte_t huge_pte_mkwrite(pte_t pte) +{ + return pte_mkwrite(pte); +} + +static inline pte_t huge_pte_mkdirty(pte_t pte) +{ + return pte_mkdirty(pte); +} + +static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot) +{ + return pte_modify(pte, newprot); +} + +static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + pte_clear(mm, addr, ptep); +} + +#endif /* _ASM_GENERIC_HUGETLB_H */ diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 1a12f5b9a0ab..73b864a32017 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2247,10 +2247,11 @@ static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page, pte_t entry; if (writable) { - entry = - pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); + entry = huge_pte_mkwrite(huge_pte_mkdirty(mk_huge_pte(page, + vma->vm_page_prot))); } else { - entry = huge_pte_wrprotect(mk_pte(page, vma->vm_page_prot)); + entry = huge_pte_wrprotect(mk_huge_pte(page, + vma->vm_page_prot)); } entry = pte_mkyoung(entry); entry = pte_mkhuge(entry); @@ -2264,7 +2265,7 @@ static void set_huge_ptep_writable(struct vm_area_struct *vma, { pte_t entry; - entry = pte_mkwrite(pte_mkdirty(huge_ptep_get(ptep))); + entry = huge_pte_mkwrite(huge_pte_mkdirty(huge_ptep_get(ptep))); if (huge_ptep_set_access_flags(vma, address, ptep, entry, 1)) update_mmu_cache(vma, address, ptep); } @@ -2379,7 +2380,7 @@ again: * HWPoisoned hugepage is already unmapped and dropped reference */ if (unlikely(is_hugetlb_entry_hwpoisoned(pte))) { - pte_clear(mm, address, ptep); + huge_pte_clear(mm, address, ptep); continue; } @@ -2403,7 +2404,7 @@ again: pte = huge_ptep_get_and_clear(mm, address, ptep); tlb_remove_tlb_entry(tlb, ptep, address); - if (pte_dirty(pte)) + if (huge_pte_dirty(pte)) set_page_dirty(page); page_remove_rmap(page); @@ -2856,7 +2857,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, * page now as it is used to determine if a reservation has been * consumed. */ - if ((flags & FAULT_FLAG_WRITE) && !pte_write(entry)) { + if ((flags & FAULT_FLAG_WRITE) && !huge_pte_write(entry)) { if (vma_needs_reservation(h, vma, address) < 0) { ret = VM_FAULT_OOM; goto out_mutex; @@ -2886,12 +2887,12 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (flags & FAULT_FLAG_WRITE) { - if (!pte_write(entry)) { + if (!huge_pte_write(entry)) { ret = hugetlb_cow(mm, vma, address, ptep, entry, pagecache_page); goto out_page_table_lock; } - entry = pte_mkdirty(entry); + entry = huge_pte_mkdirty(entry); } entry = pte_mkyoung(entry); if (huge_ptep_set_access_flags(vma, address, ptep, entry, @@ -2972,7 +2973,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, * directly from any kind of swap entries. */ if (absent || is_swap_pte(huge_ptep_get(pte)) || - ((flags & FOLL_WRITE) && !pte_write(huge_ptep_get(pte)))) { + ((flags & FOLL_WRITE) && + !huge_pte_write(huge_ptep_get(pte)))) { int ret; spin_unlock(&mm->page_table_lock); @@ -3042,7 +3044,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, } if (!huge_pte_none(huge_ptep_get(ptep))) { pte = huge_ptep_get_and_clear(mm, address, ptep); - pte = pte_mkhuge(pte_modify(pte, newprot)); + pte = pte_mkhuge(huge_pte_modify(pte, newprot)); pte = arch_make_huge_pte(pte, vma, NULL, 0); set_huge_pte_at(mm, address, ptep, pte); pages++; -- cgit v1.2.3