From d5e842c4b79cc8e454c4fbbc1ce6a43d43184367 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 11 Feb 2009 10:37:28 +0100 Subject: [S390] vdso: fix per cpu vdso pointer in lowcore The vdso_per_cpu_data entry in the lowcore structure uses __u32 instead of __u64. If the data page is above 4GB the pointer is truncated and the kernel crashes. Reported-by: Mijo Safradin Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/lowcore.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index ffdef5fe8587..f3720defdd16 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -384,8 +384,8 @@ struct _lowcore __u32 panic_magic; /* 0xe00 */ /* Per cpu primary space access list */ - __u8 pad_0xe04[0xe3c-0xe04]; /* 0xe04 */ - __u32 vdso_per_cpu_data; /* 0xe3c */ + __u8 pad_0xe04[0xe38-0xe04]; /* 0xe04 */ + __u64 vdso_per_cpu_data; /* 0xe38 */ __u32 paste[16]; /* 0xe40 */ __u8 pad13[0x11b8-0xe80]; /* 0xe80 */ -- cgit v1.2.3 From d5cd0343d2878b66e25e044f644563c6bf708833 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 19 Feb 2009 15:19:00 +0100 Subject: [S390] Fix timeval regression on s390 commit aa5e97ce4bbc9d5daeec16b1d15bb3f6b7b4f4d4 [PATCH] improve precision of process accounting. Introduced a timing regression: -bash-3.2# time ls real 0m0.006s user 0m1.754s sys 0m1.094s The problem was introduced by an error in cputime_to_timeval. Cputime is now 1/4096 microsecond, therefore, we have to divide the remainder with 4096 to get the microseconds. Signed-off-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cputime.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index 521726430afa..95b0f7db3c69 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -145,7 +145,7 @@ cputime_to_timeval(const cputime_t cputime, struct timeval *value) value->tv_usec = rp.subreg.even / 4096; value->tv_sec = rp.subreg.odd; #else - value->tv_usec = cputime % 4096000000ULL; + value->tv_usec = (cputime % 4096000000ULL) / 4096; value->tv_sec = cputime / 4096000000ULL; #endif } -- cgit v1.2.3 From 23d75d9cadd79bc9fd6553857d57c679cf18d4cb Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 19 Feb 2009 15:19:01 +0100 Subject: [S390] fix "mem=" handling in case of standby memory Standby memory detected with the sclp interface gets always registered with add_memory calls without considering the limitationt that the "mem=" kernel paramater implies. So fix this and only register standby memory that is below the specified limit. This fixes zfcpdump since it uses "mem=32M". In case there is appr. 2GB standby memory present all of usable memory would be used for the struct pages needed for standby memory. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/setup.h | 2 ++ arch/s390/kernel/setup.c | 9 +++++++-- drivers/s390/char/sclp_cmd.c | 5 +++++ 3 files changed, 14 insertions(+), 2 deletions(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 2bd9faeb3919..e8bd6ac22c99 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -43,6 +43,8 @@ struct mem_chunk { extern struct mem_chunk memory_chunk[]; extern unsigned long real_memory_size; +extern int memory_end_set; +extern unsigned long memory_end; void detect_memory_layout(struct mem_chunk chunk[]); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index d825f4950e4e..c5cfb6185eac 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -82,7 +82,9 @@ char elf_platform[ELF_PLATFORM_SIZE]; struct mem_chunk __initdata memory_chunk[MEMORY_CHUNKS]; volatile int __cpu_logical_map[NR_CPUS]; /* logical cpu to cpu address */ -static unsigned long __initdata memory_end; + +int __initdata memory_end_set; +unsigned long __initdata memory_end; /* * This is set up by the setup-routine at boot-time @@ -281,6 +283,7 @@ void (*pm_power_off)(void) = machine_power_off; static int __init early_parse_mem(char *p) { memory_end = memparse(p, &p); + memory_end_set = 1; return 0; } early_param("mem", early_parse_mem); @@ -508,8 +511,10 @@ static void __init setup_memory_end(void) int i; #if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_ZFCPDUMP_MODULE) - if (ipl_info.type == IPL_TYPE_FCP_DUMP) + if (ipl_info.type == IPL_TYPE_FCP_DUMP) { memory_end = ZFCPDUMP_HSA_SIZE; + memory_end_set = 1; + } #endif memory_size = 0; memory_end &= PAGE_MASK; diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c index 506390496416..77ab6e34a100 100644 --- a/drivers/s390/char/sclp_cmd.c +++ b/drivers/s390/char/sclp_cmd.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "sclp.h" @@ -474,6 +475,10 @@ static void __init add_memory_merged(u16 rn) goto skip_add; if (start + size > VMEM_MAX_PHYS) size = VMEM_MAX_PHYS - start; + if (memory_end_set && (start >= memory_end)) + goto skip_add; + if (memory_end_set && (start + size > memory_end)) + size = memory_end - start; add_memory(0, start, size); skip_add: first_rn = rn; -- cgit v1.2.3 From f55d63854e426e95d7858c2662c2353262a5af70 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 18 Mar 2009 13:27:33 +0100 Subject: [S390] topology: define SD_MC_INIT to fix performance regression The default values for SD_MC_INIT cause an additional cpu usage of up to 40% on some network benchmarks compared to the plain SD_CPU_INIT values. So just define SD_MC_INIT to SD_CPU_INIT. More tuning needs to be done. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/topology.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index c93eb50e1d09..c979c3b56ab0 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -30,6 +30,8 @@ static inline void s390_init_cpu_topology(void) }; #endif +#define SD_MC_INIT SD_CPU_INIT + #include #endif /* _ASM_S390_TOPOLOGY_H */ -- cgit v1.2.3 From f481bfafd36e621d6cbc62d4b25f74811410aef7 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 18 Mar 2009 13:27:36 +0100 Subject: [S390] make page table walking more robust Make page table walking on s390 more robust. The current code requires that the pgd/pud/pmd/pte loop is only done for address ranges that are below the end address of the last vma of the address space. But this is not always true, e.g. the generic page table walker does not guarantee this. Change TASK_SIZE/TASK_SIZE_OF to reflect the current size of the address space. This makes the generic page table walker happy but it breaks the upgrade of a 3 level page table to a 4 level page table. To make the upgrade work again another fix is required. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/processor.h | 5 ++--- arch/s390/mm/mmap.c | 4 ++-- arch/s390/mm/pgtable.c | 2 ++ 3 files changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 066b99502e09..db4523fe38ac 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -61,7 +61,7 @@ extern void print_cpu_info(struct cpuinfo_S390 *); extern int get_cpu_capability(unsigned int *); /* - * User space process size: 2GB for 31 bit, 4TB for 64 bit. + * User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit. */ #ifndef __s390x__ @@ -70,8 +70,7 @@ extern int get_cpu_capability(unsigned int *); #else /* __s390x__ */ -#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk,TIF_31BIT) ? \ - (1UL << 31) : (1UL << 53)) +#define TASK_SIZE_OF(tsk) ((tsk)->mm->context.asce_limit) #define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \ (1UL << 30) : (1UL << 41)) #define TASK_SIZE TASK_SIZE_OF(current) diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 5932a824547a..346dd0c5cbde 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -35,7 +35,7 @@ * Leave an at least ~128 MB hole. */ #define MIN_GAP (128*1024*1024) -#define MAX_GAP (TASK_SIZE/6*5) +#define MAX_GAP (STACK_TOP/6*5) static inline unsigned long mmap_base(void) { @@ -46,7 +46,7 @@ static inline unsigned long mmap_base(void) else if (gap > MAX_GAP) gap = MAX_GAP; - return TASK_SIZE - (gap & PAGE_MASK); + return STACK_TOP - (gap & PAGE_MASK); } static inline int mmap_is_legacy(void) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 0767827540b1..6b6ddc4ea02b 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -117,6 +117,7 @@ repeat: crst_table_init(table, entry); pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); mm->pgd = (pgd_t *) table; + mm->task_size = mm->context.asce_limit; table = NULL; } spin_unlock(&mm->page_table_lock); @@ -154,6 +155,7 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) BUG(); } mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); + mm->task_size = mm->context.asce_limit; crst_table_free(mm, (unsigned long *) pgd); } update_mm(mm, current); -- cgit v1.2.3 From 0fb1d9bcbcf701a45835aa150c57ca54ea685bfa Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 18 Mar 2009 13:27:37 +0100 Subject: [S390] make page table upgrade work again After TASK_SIZE now gives the current size of the address space the upgrade of a 64 bit process from 3 to 4 levels of page table needs to use the arch_mmap_check hook to catch large mmap lengths. The get_unmapped_area* functions need to check for -ENOMEM from the arch_get_unmapped_area*, upgrade the page table and retry. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/mman.h | 5 +++++ arch/s390/mm/mmap.c | 44 ++++++++++++++++++++++++++++++-------------- 2 files changed, 35 insertions(+), 14 deletions(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/mman.h b/arch/s390/include/asm/mman.h index 7839767d837e..da01432e8f44 100644 --- a/arch/s390/include/asm/mman.h +++ b/arch/s390/include/asm/mman.h @@ -22,4 +22,9 @@ #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ +#if defined(__KERNEL__) && !defined(__ASSEMBLY__) && defined(CONFIG_64BIT) +int s390_mmap_check(unsigned long addr, unsigned long len); +#define arch_mmap_check(addr,len,flags) s390_mmap_check(addr,len) +#endif + #endif /* __S390_MMAN_H__ */ diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 346dd0c5cbde..e008d236cc15 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -89,42 +89,58 @@ EXPORT_SYMBOL_GPL(arch_pick_mmap_layout); #else +int s390_mmap_check(unsigned long addr, unsigned long len) +{ + if (!test_thread_flag(TIF_31BIT) && + len >= TASK_SIZE && TASK_SIZE < (1UL << 53)) + return crst_table_upgrade(current->mm, 1UL << 53); + return 0; +} + static unsigned long s390_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { struct mm_struct *mm = current->mm; + unsigned long area; int rc; - addr = arch_get_unmapped_area(filp, addr, len, pgoff, flags); - if (addr & ~PAGE_MASK) - return addr; - if (unlikely(mm->context.asce_limit < addr + len)) { - rc = crst_table_upgrade(mm, addr + len); + area = arch_get_unmapped_area(filp, addr, len, pgoff, flags); + if (!(area & ~PAGE_MASK)) + return area; + if (area == -ENOMEM && + !test_thread_flag(TIF_31BIT) && TASK_SIZE < (1UL << 53)) { + /* Upgrade the page table to 4 levels and retry. */ + rc = crst_table_upgrade(mm, 1UL << 53); if (rc) return (unsigned long) rc; + area = arch_get_unmapped_area(filp, addr, len, pgoff, flags); } - return addr; + return area; } static unsigned long -s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, +s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr, const unsigned long len, const unsigned long pgoff, const unsigned long flags) { struct mm_struct *mm = current->mm; - unsigned long addr = addr0; + unsigned long area; int rc; - addr = arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags); - if (addr & ~PAGE_MASK) - return addr; - if (unlikely(mm->context.asce_limit < addr + len)) { - rc = crst_table_upgrade(mm, addr + len); + area = arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags); + if (!(area & ~PAGE_MASK)) + return area; + if (area == -ENOMEM && + !test_thread_flag(TIF_31BIT) && TASK_SIZE < (1UL << 53)) { + /* Upgrade the page table to 4 levels and retry. */ + rc = crst_table_upgrade(mm, 1UL << 53); if (rc) return (unsigned long) rc; + area = arch_get_unmapped_area_topdown(filp, addr, len, + pgoff, flags); } - return addr; + return area; } /* * This function, called very early during the creation of a new -- cgit v1.2.3