diff options
Diffstat (limited to 'arch/s390/include')
-rw-r--r-- | arch/s390/include/asm/bitops.h | 45 | ||||
-rw-r--r-- | arch/s390/include/asm/cmpxchg.h | 1 | ||||
-rw-r--r-- | arch/s390/include/asm/delay.h | 8 | ||||
-rw-r--r-- | arch/s390/include/asm/elf.h | 12 | ||||
-rw-r--r-- | arch/s390/include/asm/hugetlb.h | 17 | ||||
-rw-r--r-- | arch/s390/include/asm/irq.h | 9 | ||||
-rw-r--r-- | arch/s390/include/asm/lowcore.h | 4 | ||||
-rw-r--r-- | arch/s390/include/asm/mmu.h | 9 | ||||
-rw-r--r-- | arch/s390/include/asm/mmu_context.h | 6 | ||||
-rw-r--r-- | arch/s390/include/asm/page.h | 60 | ||||
-rw-r--r-- | arch/s390/include/asm/percpu.h | 68 | ||||
-rw-r--r-- | arch/s390/include/asm/pgalloc.h | 57 | ||||
-rw-r--r-- | arch/s390/include/asm/pgtable.h | 607 | ||||
-rw-r--r-- | arch/s390/include/asm/processor.h | 1 | ||||
-rw-r--r-- | arch/s390/include/asm/s390_ext.h | 17 | ||||
-rw-r--r-- | arch/s390/include/asm/suspend.h | 10 | ||||
-rw-r--r-- | arch/s390/include/asm/tlb.h | 62 | ||||
-rw-r--r-- | arch/s390/include/asm/tlbflush.h | 13 | ||||
-rw-r--r-- | arch/s390/include/asm/topology.h | 4 | ||||
-rw-r--r-- | arch/s390/include/asm/uaccess.h | 11 | ||||
-rw-r--r-- | arch/s390/include/asm/unistd.h | 4 |
21 files changed, 530 insertions, 495 deletions
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index e1c8f3a49884..667c6e9f6a34 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -621,6 +621,7 @@ static inline unsigned long find_first_zero_bit(const unsigned long *addr, bits = __ffz_word(bytes*8, __load_ulong_be(addr, bytes)); return (bits < size) ? bits : size; } +#define find_first_zero_bit find_first_zero_bit /** * find_first_bit - find the first set bit in a memory region @@ -641,6 +642,7 @@ static inline unsigned long find_first_bit(const unsigned long * addr, bits = __ffs_word(bytes*8, __load_ulong_be(addr, bytes)); return (bits < size) ? bits : size; } +#define find_first_bit find_first_bit /** * find_next_zero_bit - find the first zero bit in a memory region @@ -677,6 +679,7 @@ static inline int find_next_zero_bit (const unsigned long * addr, } return offset + find_first_zero_bit(p, size); } +#define find_next_zero_bit find_next_zero_bit /** * find_next_bit - find the first set bit in a memory region @@ -713,6 +716,7 @@ static inline int find_next_bit (const unsigned long * addr, } return offset + find_first_bit(p, size); } +#define find_next_bit find_next_bit /* * Every architecture must define this function. It's the fastest @@ -742,41 +746,6 @@ static inline int sched_find_first_bit(unsigned long *b) * 23 22 21 20 19 18 17 16 31 30 29 28 27 26 25 24 */ -static inline void __set_bit_le(unsigned long nr, void *addr) -{ - __set_bit(nr ^ (__BITOPS_WORDSIZE - 8), addr); -} - -static inline void __clear_bit_le(unsigned long nr, void *addr) -{ - __clear_bit(nr ^ (__BITOPS_WORDSIZE - 8), addr); -} - -static inline int __test_and_set_bit_le(unsigned long nr, void *addr) -{ - return __test_and_set_bit(nr ^ (__BITOPS_WORDSIZE - 8), addr); -} - -static inline int test_and_set_bit_le(unsigned long nr, void *addr) -{ - return test_and_set_bit(nr ^ (__BITOPS_WORDSIZE - 8), addr); -} - -static inline int __test_and_clear_bit_le(unsigned long nr, void *addr) -{ - return __test_and_clear_bit(nr ^ (__BITOPS_WORDSIZE - 8), addr); -} - -static inline int test_and_clear_bit_le(unsigned long nr, void *addr) -{ - return test_and_clear_bit(nr ^ (__BITOPS_WORDSIZE - 8), addr); -} - -static inline int test_bit_le(unsigned long nr, const void *addr) -{ - return test_bit(nr ^ (__BITOPS_WORDSIZE - 8), addr); -} - static inline int find_first_zero_bit_le(void *vaddr, unsigned int size) { unsigned long bytes, bits; @@ -787,6 +756,7 @@ static inline int find_first_zero_bit_le(void *vaddr, unsigned int size) bits = __ffz_word(bytes*8, __load_ulong_le(vaddr, bytes)); return (bits < size) ? bits : size; } +#define find_first_zero_bit_le find_first_zero_bit_le static inline int find_next_zero_bit_le(void *vaddr, unsigned long size, unsigned long offset) @@ -816,6 +786,7 @@ static inline int find_next_zero_bit_le(void *vaddr, unsigned long size, } return offset + find_first_zero_bit_le(p, size); } +#define find_next_zero_bit_le find_next_zero_bit_le static inline unsigned long find_first_bit_le(void *vaddr, unsigned long size) { @@ -827,6 +798,7 @@ static inline unsigned long find_first_bit_le(void *vaddr, unsigned long size) bits = __ffs_word(bytes*8, __load_ulong_le(vaddr, bytes)); return (bits < size) ? bits : size; } +#define find_first_bit_le find_first_bit_le static inline int find_next_bit_le(void *vaddr, unsigned long size, unsigned long offset) @@ -856,6 +828,9 @@ static inline int find_next_bit_le(void *vaddr, unsigned long size, } return offset + find_first_bit_le(p, size); } +#define find_next_bit_le find_next_bit_le + +#include <asm-generic/bitops/le.h> #define ext2_set_bit_atomic(lock, nr, addr) \ test_and_set_bit_le(nr, addr) diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h index 7488e52efa97..81d7908416cf 100644 --- a/arch/s390/include/asm/cmpxchg.h +++ b/arch/s390/include/asm/cmpxchg.h @@ -167,7 +167,6 @@ static inline unsigned long __cmpxchg(void *ptr, unsigned long old, #ifdef CONFIG_64BIT #define cmpxchg64(ptr, o, n) \ ({ \ - BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ cmpxchg((ptr), (o), (n)); \ }) #else /* CONFIG_64BIT */ diff --git a/arch/s390/include/asm/delay.h b/arch/s390/include/asm/delay.h index 8a096b83f51f..0e3b35f96be1 100644 --- a/arch/s390/include/asm/delay.h +++ b/arch/s390/include/asm/delay.h @@ -14,10 +14,12 @@ #ifndef _S390_DELAY_H #define _S390_DELAY_H -extern void __udelay(unsigned long long usecs); -extern void udelay_simple(unsigned long long usecs); -extern void __delay(unsigned long loops); +void __ndelay(unsigned long long nsecs); +void __udelay(unsigned long long usecs); +void udelay_simple(unsigned long long usecs); +void __delay(unsigned long loops); +#define ndelay(n) __ndelay((unsigned long long) (n)) #define udelay(n) __udelay((unsigned long long) (n)) #define mdelay(n) __udelay((unsigned long long) (n) * 1000) diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 10c029cfcc7d..64b61bf72e93 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -196,18 +196,6 @@ do { \ } while (0) #endif /* __s390x__ */ -/* - * An executable for which elf_read_implies_exec() returns TRUE will - * have the READ_IMPLIES_EXEC personality flag set automatically. - */ -#define elf_read_implies_exec(ex, executable_stack) \ -({ \ - if (current->mm->context.noexec && \ - executable_stack != EXSTACK_DISABLE_X) \ - disable_noexec(current->mm, current); \ - current->mm->context.noexec == 0; \ -}) - #define STACK_RND_MASK 0x7ffUL #define ARCH_DLINFO \ diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index b56403c2df28..799ed0f1643d 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -111,21 +111,10 @@ static inline void huge_ptep_invalidate(struct mm_struct *mm, { pmd_t *pmdp = (pmd_t *) ptep; - if (!MACHINE_HAS_IDTE) { - __pmd_csp(pmdp); - if (mm->context.noexec) { - pmdp = get_shadow_table(pmdp); - __pmd_csp(pmdp); - } - return; - } - - __pmd_idte(address, pmdp); - if (mm->context.noexec) { - pmdp = get_shadow_table(pmdp); + if (MACHINE_HAS_IDTE) __pmd_idte(address, pmdp); - } - return; + else + __pmd_csp(pmdp); } #define huge_ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \ diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index db14a311f1d2..ba7b01c726a3 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -2,6 +2,7 @@ #define _ASM_IRQ_H #include <linux/hardirq.h> +#include <linux/types.h> enum interruption_class { EXTERNAL_INTERRUPT, @@ -15,6 +16,7 @@ enum interruption_class { EXTINT_VRT, EXTINT_SCP, EXTINT_IUC, + EXTINT_CPM, IOINT_QAI, IOINT_QDI, IOINT_DAS, @@ -30,4 +32,11 @@ enum interruption_class { NR_IRQS, }; +typedef void (*ext_int_handler_t)(unsigned int, unsigned int, unsigned long); + +int register_external_interrupt(u16 code, ext_int_handler_t handler); +int unregister_external_interrupt(u16 code, ext_int_handler_t handler); +void service_subclass_irq_register(void); +void service_subclass_irq_unregister(void); + #endif /* _ASM_IRQ_H */ diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 65e172f8209d..228cf0b295db 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -124,7 +124,7 @@ struct _lowcore { /* Address space pointer. */ __u32 kernel_asce; /* 0x02ac */ __u32 user_asce; /* 0x02b0 */ - __u32 user_exec_asce; /* 0x02b4 */ + __u32 current_pid; /* 0x02b4 */ /* SMP info area */ __u32 cpu_nr; /* 0x02b8 */ @@ -255,7 +255,7 @@ struct _lowcore { /* Address space pointer. */ __u64 kernel_asce; /* 0x0310 */ __u64 user_asce; /* 0x0318 */ - __u64 user_exec_asce; /* 0x0320 */ + __u64 current_pid; /* 0x0320 */ /* SMP info area */ __u32 cpu_nr; /* 0x0328 */ diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index 78522cdefdd4..82d0847896a0 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -5,19 +5,18 @@ typedef struct { atomic_t attach_count; unsigned int flush_mm; spinlock_t list_lock; - struct list_head crst_list; struct list_head pgtable_list; unsigned long asce_bits; unsigned long asce_limit; unsigned long vdso_base; - int noexec; - int has_pgste; /* The mmu context has extended page tables */ - int alloc_pgste; /* cloned contexts will have extended page tables */ + /* Cloned contexts will be created with extended page tables. */ + unsigned int alloc_pgste:1; + /* The mmu context has extended page tables. */ + unsigned int has_pgste:1; } mm_context_t; #define INIT_MM_CONTEXT(name) \ .context.list_lock = __SPIN_LOCK_UNLOCKED(name.context.list_lock), \ - .context.crst_list = LIST_HEAD_INIT(name.context.crst_list), \ .context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), #endif diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 8c277caa8d3a..5682f160ff82 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -35,11 +35,9 @@ static inline int init_new_context(struct task_struct *tsk, * and if has_pgste is set, it will create extended page * tables. */ - mm->context.noexec = 0; mm->context.has_pgste = 1; mm->context.alloc_pgste = 1; } else { - mm->context.noexec = (user_mode == SECONDARY_SPACE_MODE); mm->context.has_pgste = 0; mm->context.alloc_pgste = 0; } @@ -63,10 +61,8 @@ static inline void update_mm(struct mm_struct *mm, struct task_struct *tsk) S390_lowcore.user_asce = mm->context.asce_bits | __pa(pgd); if (user_mode != HOME_SPACE_MODE) { /* Load primary space page table origin. */ - pgd = mm->context.noexec ? get_shadow_table(pgd) : pgd; - S390_lowcore.user_exec_asce = mm->context.asce_bits | __pa(pgd); asm volatile(LCTL_OPCODE" 1,1,%0\n" - : : "m" (S390_lowcore.user_exec_asce) ); + : : "m" (S390_lowcore.user_asce) ); } else /* Load home space page table origin. */ asm volatile(LCTL_OPCODE" 13,13,%0" diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 3c987e9ec8d6..accb372ddc7e 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -90,6 +90,7 @@ static inline void copy_page(void *to, void *from) */ typedef struct { unsigned long pgprot; } pgprot_t; +typedef struct { unsigned long pgste; } pgste_t; typedef struct { unsigned long pte; } pte_t; typedef struct { unsigned long pmd; } pmd_t; typedef struct { unsigned long pud; } pud_t; @@ -97,18 +98,21 @@ typedef struct { unsigned long pgd; } pgd_t; typedef pte_t *pgtable_t; #define pgprot_val(x) ((x).pgprot) +#define pgste_val(x) ((x).pgste) #define pte_val(x) ((x).pte) #define pmd_val(x) ((x).pmd) #define pud_val(x) ((x).pud) #define pgd_val(x) ((x).pgd) +#define __pgste(x) ((pgste_t) { (x) } ) #define __pte(x) ((pte_t) { (x) } ) #define __pmd(x) ((pmd_t) { (x) } ) +#define __pud(x) ((pud_t) { (x) } ) #define __pgd(x) ((pgd_t) { (x) } ) #define __pgprot(x) ((pgprot_t) { (x) } ) -static inline void -page_set_storage_key(unsigned long addr, unsigned int skey, int mapped) +static inline void page_set_storage_key(unsigned long addr, + unsigned char skey, int mapped) { if (!mapped) asm volatile(".insn rrf,0xb22b0000,%0,%1,8,0" @@ -117,15 +121,59 @@ page_set_storage_key(unsigned long addr, unsigned int skey, int mapped) asm volatile("sske %0,%1" : : "d" (skey), "a" (addr)); } -static inline unsigned int -page_get_storage_key(unsigned long addr) +static inline unsigned char page_get_storage_key(unsigned long addr) { - unsigned int skey; + unsigned char skey; - asm volatile("iske %0,%1" : "=d" (skey) : "a" (addr), "0" (0)); + asm volatile("iske %0,%1" : "=d" (skey) : "a" (addr)); return skey; } +static inline int page_reset_referenced(unsigned long addr) +{ + unsigned int ipm; + + asm volatile( + " rrbe 0,%1\n" + " ipm %0\n" + : "=d" (ipm) : "a" (addr) : "cc"); + return !!(ipm & 0x20000000); +} + +/* Bits int the storage key */ +#define _PAGE_CHANGED 0x02 /* HW changed bit */ +#define _PAGE_REFERENCED 0x04 /* HW referenced bit */ +#define _PAGE_FP_BIT 0x08 /* HW fetch protection bit */ +#define _PAGE_ACC_BITS 0xf0 /* HW access control bits */ + +/* + * Test and clear dirty bit in storage key. + * We can't clear the changed bit atomically. This is a potential + * race against modification of the referenced bit. This function + * should therefore only be called if it is not mapped in any + * address space. + */ +#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY +static inline int page_test_and_clear_dirty(unsigned long pfn, int mapped) +{ + unsigned char skey; + + skey = page_get_storage_key(pfn << PAGE_SHIFT); + if (!(skey & _PAGE_CHANGED)) + return 0; + page_set_storage_key(pfn << PAGE_SHIFT, skey & ~_PAGE_CHANGED, mapped); + return 1; +} + +/* + * Test and clear referenced bit in storage key. + */ +#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG +static inline int page_test_and_clear_young(unsigned long pfn) +{ + return page_reset_referenced(pfn << PAGE_SHIFT); +} + struct page; void arch_free_page(struct page *page, int order); void arch_alloc_page(struct page *page, int order); diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h index f7ad8719d02d..5325c89a5843 100644 --- a/arch/s390/include/asm/percpu.h +++ b/arch/s390/include/asm/percpu.h @@ -1,6 +1,9 @@ #ifndef __ARCH_S390_PERCPU__ #define __ARCH_S390_PERCPU__ +#include <linux/preempt.h> +#include <asm/cmpxchg.h> + /* * s390 uses its own implementation for per cpu data, the offset of * the cpu local data area is cached in the cpu's lowcore memory. @@ -16,6 +19,71 @@ #define ARCH_NEEDS_WEAK_PER_CPU #endif +#define arch_irqsafe_cpu_to_op(pcp, val, op) \ +do { \ + typedef typeof(pcp) pcp_op_T__; \ + pcp_op_T__ old__, new__, prev__; \ + pcp_op_T__ *ptr__; \ + preempt_disable(); \ + ptr__ = __this_cpu_ptr(&(pcp)); \ + prev__ = *ptr__; \ + do { \ + old__ = prev__; \ + new__ = old__ op (val); \ + switch (sizeof(*ptr__)) { \ + case 8: \ + prev__ = cmpxchg64(ptr__, old__, new__); \ + break; \ + default: \ + prev__ = cmpxchg(ptr__, old__, new__); \ + } \ + } while (prev__ != old__); \ + preempt_enable(); \ +} while (0) + +#define irqsafe_cpu_add_1(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, +) +#define irqsafe_cpu_add_2(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, +) +#define irqsafe_cpu_add_4(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, +) +#define irqsafe_cpu_add_8(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, +) + +#define irqsafe_cpu_and_1(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, &) +#define irqsafe_cpu_and_2(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, &) +#define irqsafe_cpu_and_4(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, &) +#define irqsafe_cpu_and_8(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, &) + +#define irqsafe_cpu_or_1(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, |) +#define irqsafe_cpu_or_2(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, |) +#define irqsafe_cpu_or_4(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, |) +#define irqsafe_cpu_or_8(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, |) + +#define irqsafe_cpu_xor_1(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, ^) +#define irqsafe_cpu_xor_2(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, ^) +#define irqsafe_cpu_xor_4(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, ^) +#define irqsafe_cpu_xor_8(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, ^) + +#define arch_irqsafe_cpu_cmpxchg(pcp, oval, nval) \ +({ \ + typedef typeof(pcp) pcp_op_T__; \ + pcp_op_T__ ret__; \ + pcp_op_T__ *ptr__; \ + preempt_disable(); \ + ptr__ = __this_cpu_ptr(&(pcp)); \ + switch (sizeof(*ptr__)) { \ + case 8: \ + ret__ = cmpxchg64(ptr__, oval, nval); \ + break; \ + default: \ + ret__ = cmpxchg(ptr__, oval, nval); \ + } \ + preempt_enable(); \ + ret__; \ +}) + +#define irqsafe_cpu_cmpxchg_1(pcp, oval, nval) arch_irqsafe_cpu_cmpxchg(pcp, oval, nval) +#define irqsafe_cpu_cmpxchg_2(pcp, oval, nval) arch_irqsafe_cpu_cmpxchg(pcp, oval, nval) +#define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) arch_irqsafe_cpu_cmpxchg(pcp, oval, nval) +#define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) arch_irqsafe_cpu_cmpxchg(pcp, oval, nval) + #include <asm-generic/percpu.h> #endif /* __ARCH_S390_PERCPU__ */ diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 082eb4e50e8b..f6314af3b354 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -19,14 +19,13 @@ #define check_pgt_cache() do {} while (0) -unsigned long *crst_table_alloc(struct mm_struct *, int); +unsigned long *crst_table_alloc(struct mm_struct *); void crst_table_free(struct mm_struct *, unsigned long *); void crst_table_free_rcu(struct mm_struct *, unsigned long *); unsigned long *page_table_alloc(struct mm_struct *); void page_table_free(struct mm_struct *, unsigned long *); void page_table_free_rcu(struct mm_struct *, unsigned long *); -void disable_noexec(struct mm_struct *, struct task_struct *); static inline void clear_table(unsigned long *s, unsigned long val, size_t n) { @@ -50,9 +49,6 @@ static inline void clear_table(unsigned long *s, unsigned long val, size_t n) static inline void crst_table_init(unsigned long *crst, unsigned long entry) { clear_table(crst, entry, sizeof(unsigned long)*2048); - crst = get_shadow_table(crst); - if (crst) - clear_table(crst, entry, sizeof(unsigned long)*2048); } #ifndef __s390x__ @@ -69,10 +65,7 @@ static inline unsigned long pgd_entry_type(struct mm_struct *mm) #define pmd_free(mm, x) do { } while (0) #define pgd_populate(mm, pgd, pud) BUG() -#define pgd_populate_kernel(mm, pgd, pud) BUG() - #define pud_populate(mm, pud, pmd) BUG() -#define pud_populate_kernel(mm, pud, pmd) BUG() #else /* __s390x__ */ @@ -90,7 +83,7 @@ void crst_table_downgrade(struct mm_struct *, unsigned long limit); static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) { - unsigned long *table = crst_table_alloc(mm, mm->context.noexec); + unsigned long *table = crst_table_alloc(mm); if (table) crst_table_init(table, _REGION3_ENTRY_EMPTY); return (pud_t *) table; @@ -99,43 +92,21 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr) { - unsigned long *table = crst_table_alloc(mm, mm->context.noexec); + unsigned long *table = crst_table_alloc(mm); if (table) crst_table_init(table, _SEGMENT_ENTRY_EMPTY); return (pmd_t *) table; } #define pmd_free(mm, pmd) crst_table_free(mm, (unsigned long *) pmd) -static inline void pgd_populate_kernel(struct mm_struct *mm, - pgd_t *pgd, pud_t *pud) -{ - pgd_val(*pgd) = _REGION2_ENTRY | __pa(pud); -} - static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) { - pgd_populate_kernel(mm, pgd, pud); - if (mm->context.noexec) { - pgd = get_shadow_table(pgd); - pud = get_shadow_table(pud); - pgd_populate_kernel(mm, pgd, pud); - } -} - -static inline void pud_populate_kernel(struct mm_struct *mm, - pud_t *pud, pmd_t *pmd) -{ - pud_val(*pud) = _REGION3_ENTRY | __pa(pmd); + pgd_val(*pgd) = _REGION2_ENTRY | __pa(pud); } static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) { - pud_populate_kernel(mm, pud, pmd); - if (mm->context.noexec) { - pud = get_shadow_table(pud); - pmd = get_shadow_table(pmd); - pud_populate_kernel(mm, pud, pmd); - } + pud_val(*pud) = _REGION3_ENTRY | __pa(pmd); } #endif /* __s390x__ */ @@ -143,29 +114,19 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) static inline pgd_t *pgd_alloc(struct mm_struct *mm) { spin_lock_init(&mm->context.list_lock); - INIT_LIST_HEAD(&mm->context.crst_list); INIT_LIST_HEAD(&mm->context.pgtable_list); - return (pgd_t *) - crst_table_alloc(mm, user_mode == SECONDARY_SPACE_MODE); + return (pgd_t *) crst_table_alloc(mm); } #define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd) -static inline void pmd_populate_kernel(struct mm_struct *mm, - pmd_t *pmd, pte_t *pte) -{ - pmd_val(*pmd) = _SEGMENT_ENTRY + __pa(pte); -} - static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte) { - pmd_populate_kernel(mm, pmd, pte); - if (mm->context.noexec) { - pmd = get_shadow_table(pmd); - pmd_populate_kernel(mm, pmd, pte + PTRS_PER_PTE); - } + pmd_val(*pmd) = _SEGMENT_ENTRY + __pa(pte); } +#define pmd_populate_kernel(mm, pmd, pte) pmd_populate(mm, pmd, pte) + #define pmd_pgtable(pmd) \ (pgtable_t)(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 02ace3491c51..e4efacfe1b63 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -31,9 +31,8 @@ #ifndef __ASSEMBLY__ #include <linux/sched.h> #include <linux/mm_types.h> -#include <asm/bitops.h> #include <asm/bug.h> -#include <asm/processor.h> +#include <asm/page.h> extern pgd_t swapper_pg_dir[] __attribute__ ((aligned (4096))); extern void paging_init(void); @@ -243,11 +242,13 @@ extern unsigned long VMALLOC_START; /* Software bits in the page table entry */ #define _PAGE_SWT 0x001 /* SW pte type bit t */ #define _PAGE_SWX 0x002 /* SW pte type bit x */ -#define _PAGE_SPECIAL 0x004 /* SW associated with special page */ +#define _PAGE_SWC 0x004 /* SW pte changed bit (for KVM) */ +#define _PAGE_SWR 0x008 /* SW pte referenced bit (for KVM) */ +#define _PAGE_SPECIAL 0x010 /* SW associated with special page */ #define __HAVE_ARCH_PTE_SPECIAL /* Set of bits not changed in pte_modify */ -#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL) +#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_SWC | _PAGE_SWR) /* Six different types of pages. */ #define _PAGE_TYPE_EMPTY 0x400 @@ -256,8 +257,6 @@ extern unsigned long VMALLOC_START; #define _PAGE_TYPE_FILE 0x601 /* bit 0x002 is used for offset !! */ #define _PAGE_TYPE_RO 0x200 #define _PAGE_TYPE_RW 0x000 -#define _PAGE_TYPE_EX_RO 0x202 -#define _PAGE_TYPE_EX_RW 0x002 /* * Only four types for huge pages, using the invalid bit and protection bit @@ -287,8 +286,6 @@ extern unsigned long VMALLOC_START; * _PAGE_TYPE_FILE 11?1 -> 11?1 * _PAGE_TYPE_RO 0100 -> 1100 * _PAGE_TYPE_RW 0000 -> 1000 - * _PAGE_TYPE_EX_RO 0110 -> 1110 - * _PAGE_TYPE_EX_RW 0010 -> 1010 * * pte_none is true for bits combinations 1000, 1010, 1100, 1110 * pte_present is true for bits combinations 0000, 0010, 0100, 0110, 1001 @@ -297,14 +294,17 @@ extern unsigned long VMALLOC_START; */ /* Page status table bits for virtualization */ -#define RCP_PCL_BIT 55 -#define RCP_HR_BIT 54 -#define RCP_HC_BIT 53 -#define RCP_GR_BIT 50 -#define RCP_GC_BIT 49 - -/* User dirty bit for KVM's migration feature */ -#define KVM_UD_BIT 47 +#define RCP_ACC_BITS 0xf000000000000000UL +#define RCP_FP_BIT 0x0800000000000000UL +#define RCP_PCL_BIT 0x0080000000000000UL +#define RCP_HR_BIT 0x0040000000000000UL +#define RCP_HC_BIT 0x0020000000000000UL +#define RCP_GR_BIT 0x0004000000000000UL +#define RCP_GC_BIT 0x0002000000000000UL + +/* User dirty / referenced bit for KVM's migration feature */ +#define KVM_UR_BIT 0x0000800000000000UL +#define KVM_UC_BIT 0x0000400000000000UL #ifndef __s390x__ @@ -377,85 +377,54 @@ extern unsigned long VMALLOC_START; #define _ASCE_USER_BITS (_ASCE_SPACE_SWITCH | _ASCE_PRIVATE_SPACE | \ _ASCE_ALT_EVENT) -/* Bits int the storage key */ -#define _PAGE_CHANGED 0x02 /* HW changed bit */ -#define _PAGE_REFERENCED 0x04 /* HW referenced bit */ - /* * Page protection definitions. */ #define PAGE_NONE __pgprot(_PAGE_TYPE_NONE) #define PAGE_RO __pgprot(_PAGE_TYPE_RO) #define PAGE_RW __pgprot(_PAGE_TYPE_RW) -#define PAGE_EX_RO __pgprot(_PAGE_TYPE_EX_RO) -#define PAGE_EX_RW __pgprot(_PAGE_TYPE_EX_RW) #define PAGE_KERNEL PAGE_RW #define PAGE_COPY PAGE_RO /* - * Dependent on the EXEC_PROTECT option s390 can do execute protection. - * Write permission always implies read permission. In theory with a - * primary/secondary page table execute only can be implemented but - * it would cost an additional bit in the pte to distinguish all the - * different pte types. To avoid that execute permission currently - * implies read permission as well. + * On s390 the page table entry has an invalid bit and a read-only bit. + * Read permission implies execute permission and write permission + * implies read permission. */ /*xwr*/ #define __P000 PAGE_NONE #define __P001 PAGE_RO #define __P010 PAGE_RO #define __P011 PAGE_RO -#define __P100 PAGE_EX_RO -#define __P101 PAGE_EX_RO -#define __P110 PAGE_EX_RO -#define __P111 PAGE_EX_RO +#define __P100 PAGE_RO +#define __P101 PAGE_RO +#define __P110 PAGE_RO +#define __P111 PAGE_RO #define __S000 PAGE_NONE #define __S001 PAGE_RO #define __S010 PAGE_RW #define __S011 PAGE_RW -#define __S100 PAGE_EX_RO -#define __S101 PAGE_EX_RO -#define __S110 PAGE_EX_RW -#define __S111 PAGE_EX_RW - -#ifndef __s390x__ -# define PxD_SHADOW_SHIFT 1 -#else /* __s390x__ */ -# define PxD_SHADOW_SHIFT 2 -#endif /* __s390x__ */ +#define __S100 PAGE_RO +#define __S101 PAGE_RO +#define __S110 PAGE_RW +#define __S111 PAGE_RW -static inline void *get_shadow_table(void *table) +static inline int mm_exclusive(struct mm_struct *mm) { - unsigned long addr, offset; - struct page *page; - - addr = (unsigned long) table; - offset = addr & ((PAGE_SIZE << PxD_SHADOW_SHIFT) - 1); - page = virt_to_page((void *)(addr ^ offset)); - return (void *)(addr_t)(page->index ? (page->index | offset) : 0UL); + return likely(mm == current->active_mm && + atomic_read(&mm->context.attach_count) <= 1); } -/* - * Certain architectures need to do special things when PTEs - * within a page table are directly modified. Thus, the following - * hook is made available. - */ -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t entry) +static inline int mm_has_pgste(struct mm_struct *mm) { - *ptep = entry; - if (mm->context.noexec) { - if (!(pte_val(entry) & _PAGE_INVALID) && - (pte_val(entry) & _PAGE_SWX)) - pte_val(entry) |= _PAGE_RO; - else - pte_val(entry) = _PAGE_TYPE_EMPTY; - ptep[PTRS_PER_PTE] = entry; - } +#ifdef CONFIG_PGSTE + if (unlikely(mm->context.has_pgste)) + return 1; +#endif + return 0; } - /* * pgd/pmd/pte query functions */ @@ -568,52 +537,127 @@ static inline int pte_special(pte_t pte) } #define __HAVE_ARCH_PTE_SAME -#define pte_same(a,b) (pte_val(a) == pte_val(b)) +static inline int pte_same(pte_t a, pte_t b) +{ + return pte_val(a) == pte_val(b); +} -static inline void rcp_lock(pte_t *ptep) +static inline pgste_t pgste_get_lock(pte_t *ptep) { + unsigned long new = 0; #ifdef CONFIG_PGSTE - unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE); + unsigned long old; + preempt_disable(); - while (test_and_set_bit(RCP_PCL_BIT, pgste)) - ; + asm( + " lg %0,%2\n" + "0: lgr %1,%0\n" + " nihh %0,0xff7f\n" /* clear RCP_PCL_BIT in old */ + " oihh %1,0x0080\n" /* set RCP_PCL_BIT in new */ + " csg %0,%1,%2\n" + " jl 0b\n" + : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE]) + : "Q" (ptep[PTRS_PER_PTE]) : "cc"); #endif + return __pgste(new); } -static inline void rcp_unlock(pte_t *ptep) +static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) { #ifdef CONFIG_PGSTE - unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE); - clear_bit(RCP_PCL_BIT, pgste); + asm( + " nihh %1,0xff7f\n" /* clear RCP_PCL_BIT */ + " stg %1,%0\n" + : "=Q" (ptep[PTRS_PER_PTE]) + : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) : "cc"); preempt_enable(); #endif } -/* forward declaration for SetPageUptodate in page-flags.h*/ -static inline void page_clear_dirty(struct page *page, int mapped); -#include <linux/page-flags.h> - -static inline void ptep_rcp_copy(pte_t *ptep) +static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste) { #ifdef CONFIG_PGSTE - struct page *page = virt_to_page(pte_val(*ptep)); - unsigned int skey; - unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE); - - skey = page_get_storage_key(page_to_phys(page)); - if (skey & _PAGE_CHANGED) { - set_bit_simple(RCP_GC_BIT, pgste); - set_bit_simple(KVM_UD_BIT, pgste); + unsigned long address, bits; + unsigned char skey; + + address = pte_val(*ptep) & PAGE_MASK; + skey = page_get_storage_key(address); + bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); + /* Clear page changed & referenced bit in the storage key */ + if (bits) { + skey ^= bits; + page_set_storage_key(address, skey, 1); } - if (skey & _PAGE_REFERENCED) - set_bit_simple(RCP_GR_BIT, pgste); - if (test_and_clear_bit_simple(RCP_HC_BIT, pgste)) { - SetPageDirty(page); - set_bit_simple(KVM_UD_BIT, pgste); - } - if (test_and_clear_bit_simple(RCP_HR_BIT, pgste)) - SetPageReferenced(page); + /* Transfer page changed & referenced bit to guest bits in pgste */ + pgste_val(pgste) |= bits << 48; /* RCP_GR_BIT & RCP_GC_BIT */ + /* Get host changed & referenced bits from pgste */ + bits |= (pgste_val(pgste) & (RCP_HR_BIT | RCP_HC_BIT)) >> 52; + /* Clear host bits in pgste. */ + pgste_val(pgste) &= ~(RCP_HR_BIT | RCP_HC_BIT); + pgste_val(pgste) &= ~(RCP_ACC_BITS | RCP_FP_BIT); + /* Copy page access key and fetch protection bit to pgste */ + pgste_val(pgste) |= + (unsigned long) (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; + /* Transfer changed and referenced to kvm user bits */ + pgste_val(pgste) |= bits << 45; /* KVM_UR_BIT & KVM_UC_BIT */ + /* Transfer changed & referenced to pte sofware bits */ + pte_val(*ptep) |= bits << 1; /* _PAGE_SWR & _PAGE_SWC */ #endif + return pgste; + +} + +static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste) +{ +#ifdef CONFIG_PGSTE + int young; + + young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK); + /* Transfer page referenced bit to pte software bit (host view) */ + if (young || (pgste_val(pgste) & RCP_HR_BIT)) + pte_val(*ptep) |= _PAGE_SWR; + /* Clear host referenced bit in pgste. */ + pgste_val(pgste) &= ~RCP_HR_BIT; + /* Transfer page referenced bit to guest bit in pgste */ + pgste_val(pgste) |= (unsigned long) young << 50; /* set RCP_GR_BIT */ +#endif + return pgste; + +} + +static inline void pgste_set_pte(pte_t *ptep, pgste_t pgste) +{ +#ifdef CONFIG_PGSTE + unsigned long address; + unsigned long okey, nkey; + + address = pte_val(*ptep) & PAGE_MASK; + okey = nkey = page_get_storage_key(address); + nkey &= ~(_PAGE_ACC_BITS | _PAGE_FP_BIT); + /* Set page access key and fetch protection bit from pgste */ + nkey |= (pgste_val(pgste) & (RCP_ACC_BITS | RCP_FP_BIT)) >> 56; + if (okey != nkey) + page_set_storage_key(address, nkey, 1); +#endif +} + +/* + * Certain architectures need to do special things when PTEs + * within a page table are directly modified. Thus, the following + * hook is made available. + */ +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t entry) +{ + pgste_t pgste; + + if (mm_has_pgste(mm)) { + pgste = pgste_get_lock(ptep); + pgste_set_pte(ptep, pgste); + *ptep = entry; + pgste_set_unlock(ptep, pgste); + } else + *ptep = entry; } /* @@ -627,19 +671,19 @@ static inline int pte_write(pte_t pte) static inline int pte_dirty(pte_t pte) { - /* A pte is neither clean nor dirty on s/390. The dirty bit - * is in the storage key. See page_test_and_clear_dirty for - * details. - */ +#ifdef CONFIG_PGSTE + if (pte_val(pte) & _PAGE_SWC) + return 1; +#endif return 0; } static inline int pte_young(pte_t pte) { - /* A pte is neither young nor old on s/390. The young bit - * is in the storage key. See page_test_and_clear_young for - * details. - */ +#ifdef CONFIG_PGSTE + if (pte_val(pte) & _PAGE_SWR) + return 1; +#endif return 0; } @@ -647,64 +691,30 @@ static inline int pte_young(pte_t pte) * pgd/pmd/pte modification functions */ -#ifndef __s390x__ - -#define pgd_clear(pgd) do { } while (0) -#define pud_clear(pud) do { } while (0) - -#else /* __s390x__ */ - -static inline void pgd_clear_kernel(pgd_t * pgd) +static inline void pgd_clear(pgd_t *pgd) { +#ifdef __s390x__ if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) pgd_val(*pgd) = _REGION2_ENTRY_EMPTY; +#endif } -static inline void pgd_clear(pgd_t * pgd) -{ - pgd_t *shadow = get_shadow_table(pgd); - - pgd_clear_kernel(pgd); - if (shadow) - pgd_clear_kernel(shadow); -} - -static inline void pud_clear_kernel(pud_t *pud) +static inline void pud_clear(pud_t *pud) { +#ifdef __s390x__ if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) pud_val(*pud) = _REGION3_ENTRY_EMPTY; +#endif } -static inline void pud_clear(pud_t *pud) -{ - pud_t *shadow = get_shadow_table(pud); - - pud_clear_kernel(pud); - if (shadow) - pud_clear_kernel(shadow); -} - -#endif /* __s390x__ */ - -static inline void pmd_clear_kernel(pmd_t * pmdp) +static inline void pmd_clear(pmd_t *pmdp) { pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; } -static inline void pmd_clear(pmd_t *pmd) -{ - pmd_t *shadow = get_shadow_table(pmd); - - pmd_clear_kernel(pmd); - if (shadow) - pmd_clear_kernel(shadow); -} - static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pte_val(*ptep) = _PAGE_TYPE_EMPTY; - if (mm->context.noexec) - pte_val(ptep[PTRS_PER_PTE]) = _PAGE_TYPE_EMPTY; } /* @@ -734,35 +744,27 @@ static inline pte_t pte_mkwrite(pte_t pte) static inline pte_t pte_mkclean(pte_t pte) { - /* The only user of pte_mkclean is the fork() code. - We must *not* clear the *physical* page dirty bit - just because fork() wants to clear the dirty bit in - *one* of the page's mappings. So we just do nothing. */ +#ifdef CONFIG_PGSTE + pte_val(pte) &= ~_PAGE_SWC; +#endif return pte; } static inline pte_t pte_mkdirty(pte_t pte) { - /* We do not explicitly set the dirty bit because the - * sske instruction is slow. It is faster to let the - * next instruction set the dirty bit. - */ return pte; } static inline pte_t pte_mkold(pte_t pte) { - /* S/390 doesn't keep its dirty/referenced bit in the pte. - * There is no point in clearing the real referenced bit. - */ +#ifdef CONFIG_PGSTE + pte_val(pte) &= ~_PAGE_SWR; +#endif return pte; } static inline pte_t pte_mkyoung(pte_t pte) { - /* S/390 doesn't keep its dirty/referenced bit in the pte. - * There is no point in setting the real referenced bit. - */ return pte; } @@ -800,62 +802,60 @@ static inline pte_t pte_mkhuge(pte_t pte) } #endif -#ifdef CONFIG_PGSTE /* - * Get (and clear) the user dirty bit for a PTE. + * Get (and clear) the user dirty bit for a pte. */ -static inline int kvm_s390_test_and_clear_page_dirty(struct mm_struct *mm, - pte_t *ptep) +static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm, + pte_t *ptep) { - int dirty; - unsigned long *pgste; - struct page *page; - unsigned int skey; - - if (!mm->context.has_pgste) - return -EINVAL; - rcp_lock(ptep); - pgste = (unsigned long *) (ptep + PTRS_PER_PTE); - page = virt_to_page(pte_val(*ptep)); - skey = page_get_storage_key(page_to_phys(page)); - if (skey & _PAGE_CHANGED) { - set_bit_simple(RCP_GC_BIT, pgste); - set_bit_simple(KVM_UD_BIT, pgste); + pgste_t pgste; + int dirty = 0; + + if (mm_has_pgste(mm)) { + pgste = pgste_get_lock(ptep); + pgste = pgste_update_all(ptep, pgste); + dirty = !!(pgste_val(pgste) & KVM_UC_BIT); + pgste_val(pgste) &= ~KVM_UC_BIT; + pgste_set_unlock(ptep, pgste); + return dirty; } - if (test_and_clear_bit_simple(RCP_HC_BIT, pgste)) { - SetPageDirty(page); - set_bit_simple(KVM_UD_BIT, pgste); - } - dirty = test_and_clear_bit_simple(KVM_UD_BIT, pgste); - if (skey & _PAGE_CHANGED) - page_clear_dirty(page, 1); - rcp_unlock(ptep); return dirty; } -#endif + +/* + * Get (and clear) the user referenced bit for a pte. + */ +static inline int ptep_test_and_clear_user_young(struct mm_struct *mm, + pte_t *ptep) +{ + pgste_t pgste; + int young = 0; + + if (mm_has_pgste(mm)) { + pgste = pgste_get_lock(ptep); + pgste = pgste_update_young(ptep, pgste); + young = !!(pgste_val(pgste) & KVM_UR_BIT); + pgste_val(pgste) &= ~KVM_UR_BIT; + pgste_set_unlock(ptep, pgste); + } + return young; +} #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { -#ifdef CONFIG_PGSTE - unsigned long physpage; - int young; - unsigned long *pgste; + pgste_t pgste; + pte_t pte; - if (!vma->vm_mm->context.has_pgste) - return 0; - physpage = pte_val(*ptep) & PAGE_MASK; - pgste = (unsigned long *) (ptep + PTRS_PER_PTE); - - young = ((page_get_storage_key(physpage) & _PAGE_REFERENCED) != 0); - rcp_lock(ptep); - if (young) - set_bit_simple(RCP_GR_BIT, pgste); - young |= test_and_clear_bit_simple(RCP_HR_BIT, pgste); - rcp_unlock(ptep); - return young; -#endif + if (mm_has_pgste(vma->vm_mm)) { + pgste = pgste_get_lock(ptep); + pgste = pgste_update_young(ptep, pgste); + pte = *ptep; + *ptep = pte_mkold(pte); + pgste_set_unlock(ptep, pgste); + return pte_young(pte); + } return 0; } @@ -867,10 +867,7 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, * On s390 reference bits are in storage key and never in TLB * With virtualization we handle the reference bit, without we * we can simply return */ -#ifdef CONFIG_PGSTE return ptep_test_and_clear_young(vma, address, ptep); -#endif - return 0; } static inline void __ptep_ipte(unsigned long address, pte_t *ptep) @@ -890,25 +887,6 @@ static inline void __ptep_ipte(unsigned long address, pte_t *ptep) } } -static inline void ptep_invalidate(struct mm_struct *mm, - unsigned long address, pte_t *ptep) -{ - if (mm->context.has_pgste) { - rcp_lock(ptep); - __ptep_ipte(address, ptep); - ptep_rcp_copy(ptep); - pte_val(*ptep) = _PAGE_TYPE_EMPTY; - rcp_unlock(ptep); - return; - } - __ptep_ipte(address, ptep); - pte_val(*ptep) = _PAGE_TYPE_EMPTY; - if (mm->context.noexec) { - __ptep_ipte(address, ptep + PTRS_PER_PTE); - pte_val(*(ptep + PTRS_PER_PTE)) = _PAGE_TYPE_EMPTY; - } -} - /* * This is hard to understand. ptep_get_and_clear and ptep_clear_flush * both clear the TLB for the unmapped pte. The reason is that @@ -923,24 +901,72 @@ static inline void ptep_invalidate(struct mm_struct *mm, * is a nop. */ #define __HAVE_ARCH_PTEP_GET_AND_CLEAR -#define ptep_get_and_clear(__mm, __address, __ptep) \ -({ \ - pte_t __pte = *(__ptep); \ - (__mm)->context.flush_mm = 1; \ - if (atomic_read(&(__mm)->context.attach_count) > 1 || \ - (__mm) != current->active_mm) \ - ptep_invalidate(__mm, __address, __ptep); \ - else \ - pte_clear((__mm), (__address), (__ptep)); \ - __pte; \ -}) +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, + unsigned long address, pte_t *ptep) +{ + pgste_t pgste; + pte_t pte; + + mm->context.flush_mm = 1; + if (mm_has_pgste(mm)) + pgste = pgste_get_lock(ptep); + + pte = *ptep; + if (!mm_exclusive(mm)) + __ptep_ipte(address, ptep); + pte_val(*ptep) = _PAGE_TYPE_EMPTY; + + if (mm_has_pgste(mm)) { + pgste = pgste_update_all(&pte, pgste); + pgste_set_unlock(ptep, pgste); + } + return pte; +} + +#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION +static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, + unsigned long address, + pte_t *ptep) +{ + pte_t pte; + + mm->context.flush_mm = 1; + if (mm_has_pgste(mm)) + pgste_get_lock(ptep); + + pte = *ptep; + if (!mm_exclusive(mm)) + __ptep_ipte(address, ptep); + return pte; +} + +static inline void ptep_modify_prot_commit(struct mm_struct *mm, + unsigned long address, + pte_t *ptep, pte_t pte) +{ + *ptep = pte; + if (mm_has_pgste(mm)) + pgste_set_unlock(ptep, *(pgste_t *)(ptep + PTRS_PER_PTE)); +} #define __HAVE_ARCH_PTEP_CLEAR_FLUSH static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { - pte_t pte = *ptep; - ptep_invalidate(vma->vm_mm, address, ptep); + pgste_t pgste; + pte_t pte; + + if (mm_has_pgste(vma->vm_mm)) + pgste = pgste_get_lock(ptep); + + pte = *ptep; + __ptep_ipte(address, ptep); + pte_val(*ptep) = _PAGE_TYPE_EMPTY; + + if (mm_has_pgste(vma->vm_mm)) { + pgste = pgste_update_all(&pte, pgste); + pgste_set_unlock(ptep, pgste); + } return pte; } @@ -953,76 +979,67 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, */ #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, - unsigned long addr, + unsigned long address, pte_t *ptep, int full) { - pte_t pte = *ptep; + pgste_t pgste; + pte_t pte; + + if (mm_has_pgste(mm)) + pgste = pgste_get_lock(ptep); + + pte = *ptep; + if (!full) + __ptep_ipte(address, ptep); + pte_val(*ptep) = _PAGE_TYPE_EMPTY; - if (full) - pte_clear(mm, addr, ptep); - else - ptep_invalidate(mm, addr, ptep); + if (mm_has_pgste(mm)) { + pgste = pgste_update_all(&pte, pgste); + pgste_set_unlock(ptep, pgste); + } return pte; } #define __HAVE_ARCH_PTEP_SET_WRPROTECT -#define ptep_set_wrprotect(__mm, __addr, __ptep) \ -({ \ - pte_t __pte = *(__ptep); \ - if (pte_write(__pte)) { \ - (__mm)->context.flush_mm = 1; \ - if (atomic_read(&(__mm)->context.attach_count) > 1 || \ - (__mm) != current->active_mm) \ - ptep_invalidate(__mm, __addr, __ptep); \ - set_pte_at(__mm, __addr, __ptep, pte_wrprotect(__pte)); \ - } \ -}) +static inline pte_t ptep_set_wrprotect(struct mm_struct *mm, + unsigned long address, pte_t *ptep) +{ + pgste_t pgste; + pte_t pte = *ptep; -#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS -#define ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \ -({ \ - int __changed = !pte_same(*(__ptep), __entry); \ - if (__changed) { \ - ptep_invalidate((__vma)->vm_mm, __addr, __ptep); \ - set_pte_at((__vma)->vm_mm, __addr, __ptep, __entry); \ - } \ - __changed; \ -}) + if (pte_write(pte)) { + mm->context.flush_mm = 1; + if (mm_has_pgste(mm)) + pgste = pgste_get_lock(ptep); -/* - * Test and clear dirty bit in storage key. - * We can't clear the changed bit atomically. This is a potential - * race against modification of the referenced bit. This function - * should therefore only be called if it is not mapped in any - * address space. - */ -#define __HAVE_ARCH_PAGE_TEST_DIRTY -static inline int page_test_dirty(struct page *page) -{ - return (page_get_storage_key(page_to_phys(page)) & _PAGE_CHANGED) != 0; -} + if (!mm_exclusive(mm)) + __ptep_ipte(address, ptep); + *ptep = pte_wrprotect(pte); -#define __HAVE_ARCH_PAGE_CLEAR_DIRTY -static inline void page_clear_dirty(struct page *page, int mapped) -{ - page_set_storage_key(page_to_phys(page), PAGE_DEFAULT_KEY, mapped); + if (mm_has_pgste(mm)) + pgste_set_unlock(ptep, pgste); + } + return pte; } -/* - * Test and clear referenced bit in storage key. - */ -#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG -static inline int page_test_and_clear_young(struct page *page) +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS +static inline int ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep, + pte_t entry, int dirty) { - unsigned long physpage = page_to_phys(page); - int ccode; - - asm volatile( - " rrbe 0,%1\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (ccode) : "a" (physpage) : "cc" ); - return ccode & 2; + pgste_t pgste; + + if (pte_same(*ptep, entry)) + return 0; + if (mm_has_pgste(vma->vm_mm)) + pgste = pgste_get_lock(ptep); + + __ptep_ipte(address, ptep); + *ptep = entry; + + if (mm_has_pgste(vma->vm_mm)) + pgste_set_unlock(ptep, pgste); + return 1; } /* diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 2c79b6416271..1300c3025334 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -84,6 +84,7 @@ struct thread_struct { struct per_event per_event; /* Cause of the last PER trap */ /* pfault_wait is used to block the process on a pfault event */ unsigned long pfault_wait; + struct list_head list; }; typedef struct thread_struct thread_struct; diff --git a/arch/s390/include/asm/s390_ext.h b/arch/s390/include/asm/s390_ext.h deleted file mode 100644 index 080876d5f196..000000000000 --- a/arch/s390/include/asm/s390_ext.h +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Copyright IBM Corp. 1999,2010 - * Author(s): Holger Smolinski <Holger.Smolinski@de.ibm.com>, - * Martin Schwidefsky <schwidefsky@de.ibm.com>, - */ - -#ifndef _S390_EXTINT_H -#define _S390_EXTINT_H - -#include <linux/types.h> - -typedef void (*ext_int_handler_t)(unsigned int, unsigned int, unsigned long); - -int register_external_interrupt(__u16 code, ext_int_handler_t handler); -int unregister_external_interrupt(__u16 code, ext_int_handler_t handler); - -#endif /* _S390_EXTINT_H */ diff --git a/arch/s390/include/asm/suspend.h b/arch/s390/include/asm/suspend.h deleted file mode 100644 index dc75c616eafe..000000000000 --- a/arch/s390/include/asm/suspend.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef __ASM_S390_SUSPEND_H -#define __ASM_S390_SUSPEND_H - -static inline int arch_prepare_suspend(void) -{ - return 0; -} - -#endif - diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index 9074a54c4d10..77eee5477a52 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -29,65 +29,77 @@ #include <asm/smp.h> #include <asm/tlbflush.h> -#ifndef CONFIG_SMP -#define TLB_NR_PTRS 1 -#else -#define TLB_NR_PTRS 508 -#endif - struct mmu_gather { struct mm_struct *mm; unsigned int fullmm; unsigned int nr_ptes; unsigned int nr_pxds; - void *array[TLB_NR_PTRS]; + unsigned int max; + void **array; + void *local[8]; }; -DECLARE_PER_CPU(struct mmu_gather, mmu_gathers); - -static inline struct mmu_gather *tlb_gather_mmu(struct mm_struct *mm, - unsigned int full_mm_flush) +static inline void __tlb_alloc_page(struct mmu_gather *tlb) { - struct mmu_gather *tlb = &get_cpu_var(mmu_gathers); + unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0); + if (addr) { + tlb->array = (void *) addr; + tlb->max = PAGE_SIZE / sizeof(void *); + } +} + +static inline void tlb_gather_mmu(struct mmu_gather *tlb, + struct mm_struct *mm, + unsigned int full_mm_flush) +{ tlb->mm = mm; + tlb->max = ARRAY_SIZE(tlb->local); + tlb->array = tlb->local; tlb->fullmm = full_mm_flush; - tlb->nr_ptes = 0; - tlb->nr_pxds = TLB_NR_PTRS; if (tlb->fullmm) __tlb_flush_mm(mm); - return tlb; + else + __tlb_alloc_page(tlb); + tlb->nr_ptes = 0; + tlb->nr_pxds = tlb->max; } -static inline void tlb_flush_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end) +static inline void tlb_flush_mmu(struct mmu_gather *tlb) { - if (!tlb->fullmm && (tlb->nr_ptes > 0 || tlb->nr_pxds < TLB_NR_PTRS)) + if (!tlb->fullmm && (tlb->nr_ptes > 0 || tlb->nr_pxds < tlb->max)) __tlb_flush_mm(tlb->mm); while (tlb->nr_ptes > 0) page_table_free_rcu(tlb->mm, tlb->array[--tlb->nr_ptes]); - while (tlb->nr_pxds < TLB_NR_PTRS) + while (tlb->nr_pxds < tlb->max) crst_table_free_rcu(tlb->mm, tlb->array[tlb->nr_pxds++]); } static inline void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) { - tlb_flush_mmu(tlb, start, end); + tlb_flush_mmu(tlb); rcu_table_freelist_finish(); /* keep the page table cache within bounds */ check_pgt_cache(); - put_cpu_var(mmu_gathers); + if (tlb->array != tlb->local) + free_pages((unsigned long) tlb->array, 0); } /* * Release the page cache reference for a pte removed by - * tlb_ptep_clear_flush. In both flush modes the tlb fo a page cache page + * tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page * has already been freed, so just do free_page_and_swap_cache. */ +static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) +{ + free_page_and_swap_cache(page); + return 1; /* avoid calling tlb_flush_mmu */ +} + static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) { free_page_and_swap_cache(page); @@ -103,7 +115,7 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, if (!tlb->fullmm) { tlb->array[tlb->nr_ptes++] = pte; if (tlb->nr_ptes >= tlb->nr_pxds) - tlb_flush_mmu(tlb, 0, 0); + tlb_flush_mmu(tlb); } else page_table_free(tlb->mm, (unsigned long *) pte); } @@ -124,7 +136,7 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, if (!tlb->fullmm) { tlb->array[--tlb->nr_pxds] = pmd; if (tlb->nr_ptes >= tlb->nr_pxds) - tlb_flush_mmu(tlb, 0, 0); + tlb_flush_mmu(tlb); } else crst_table_free(tlb->mm, (unsigned long *) pmd); #endif @@ -146,7 +158,7 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, if (!tlb->fullmm) { tlb->array[--tlb->nr_pxds] = pud; if (tlb->nr_ptes >= tlb->nr_pxds) - tlb_flush_mmu(tlb, 0, 0); + tlb_flush_mmu(tlb); } else crst_table_free(tlb->mm, (unsigned long *) pud); #endif diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 29d5d6d4becc..b7a4f2eb0057 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -50,7 +50,7 @@ static inline void __tlb_flush_full(struct mm_struct *mm) /* * If the process only ran on the local cpu, do a local flush. */ - local_cpumask = cpumask_of_cpu(smp_processor_id()); + cpumask_copy(&local_cpumask, cpumask_of(smp_processor_id())); if (cpumask_equal(mm_cpumask(mm), &local_cpumask)) __tlb_flush_local(); else @@ -80,16 +80,11 @@ static inline void __tlb_flush_mm(struct mm_struct * mm) * on all cpus instead of doing a local flush if the mm * only ran on the local cpu. */ - if (MACHINE_HAS_IDTE) { - if (mm->context.noexec) - __tlb_flush_idte((unsigned long) - get_shadow_table(mm->pgd) | - mm->context.asce_bits); + if (MACHINE_HAS_IDTE) __tlb_flush_idte((unsigned long) mm->pgd | mm->context.asce_bits); - return; - } - __tlb_flush_full(mm); + else + __tlb_flush_full(mm); } static inline void __tlb_flush_mm_cond(struct mm_struct * mm) diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index c5338834ddbd..005d77d8ae2a 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -7,7 +7,7 @@ extern unsigned char cpu_core_id[NR_CPUS]; extern cpumask_t cpu_core_map[NR_CPUS]; -static inline const struct cpumask *cpu_coregroup_mask(unsigned int cpu) +static inline const struct cpumask *cpu_coregroup_mask(int cpu) { return &cpu_core_map[cpu]; } @@ -21,7 +21,7 @@ static inline const struct cpumask *cpu_coregroup_mask(unsigned int cpu) extern unsigned char cpu_book_id[NR_CPUS]; extern cpumask_t cpu_book_map[NR_CPUS]; -static inline const struct cpumask *cpu_book_mask(unsigned int cpu) +static inline const struct cpumask *cpu_book_mask(int cpu) { return &cpu_book_map[cpu]; } diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 2d9ea11f919a..2b23885e81e9 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -49,12 +49,13 @@ #define segment_eq(a,b) ((a).ar4 == (b).ar4) +#define __access_ok(addr, size) \ +({ \ + __chk_user_ptr(addr); \ + 1; \ +}) -static inline int __access_ok(const void __user *addr, unsigned long size) -{ - return 1; -} -#define access_ok(type,addr,size) __access_ok(addr,size) +#define access_ok(type, addr, size) __access_ok(addr, size) /* * The exception table consists of pairs of addresses: the first is the diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h index e82152572377..404bdb9671b4 100644 --- a/arch/s390/include/asm/unistd.h +++ b/arch/s390/include/asm/unistd.h @@ -276,7 +276,8 @@ #define __NR_open_by_handle_at 336 #define __NR_clock_adjtime 337 #define __NR_syncfs 338 -#define NR_syscalls 339 +#define __NR_setns 339 +#define NR_syscalls 340 /* * There are some system calls that are not present on 64 bit, some @@ -385,6 +386,7 @@ /* Ignore system calls that are also reachable via sys_socket */ #define __IGNORE_recvmmsg +#define __IGNORE_sendmmsg #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR |