From 5cfba5df8c76851ab311a2818a5e688f20833cac Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 1 Sep 2008 14:12:51 +0100 Subject: S390: Update comments about why we don't use Signed-off-by: David Woodhouse --- arch/s390/include/asm/statfs.h | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/statfs.h b/arch/s390/include/asm/statfs.h index 099a45579190..06cc70307ece 100644 --- a/arch/s390/include/asm/statfs.h +++ b/arch/s390/include/asm/statfs.h @@ -12,19 +12,16 @@ #ifndef __s390x__ #include #else +/* + * We can't use because in 64-bit mode + * we mix ints of different sizes in our struct statfs. + */ #ifndef __KERNEL_STRICT_NAMES - #include - typedef __kernel_fsid_t fsid_t; - #endif -/* - * This is ugly -- we're already 64-bit clean, so just duplicate the - * definitions. - */ struct statfs { int f_type; int f_bsize; -- cgit v1.2.3 From e545a6140b698b2494daf0b32107bdcc5e901390 Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Sun, 7 Sep 2008 16:57:22 +0200 Subject: kernel/cpu.c: create a CPU_STARTING cpu_chain notifier Right now, there is no notifier that is called on a new cpu, before the new cpu begins processing interrupts/softirqs. Various kernel function would need that notification, e.g. kvm works around by calling smp_call_function_single(), rcu polls cpu_online_map. The patch adds a CPU_STARTING notification. It also adds a helper function that sends the message to all cpu_chain handlers. Tested on x86-64. All other archs are untested. Especially on sparc, I'm not sure if I got it right. Signed-off-by: Manfred Spraul Signed-off-by: Ingo Molnar --- arch/alpha/kernel/smp.c | 3 +++ arch/arm/kernel/smp.c | 1 + arch/cris/arch-v32/kernel/smp.c | 1 + arch/ia64/kernel/smpboot.c | 1 + arch/m32r/kernel/smpboot.c | 2 ++ arch/mips/kernel/smp.c | 2 ++ arch/powerpc/kernel/smp.c | 1 + arch/s390/kernel/smp.c | 2 ++ arch/sh/kernel/smp.c | 2 ++ arch/sparc/kernel/sun4d_smp.c | 1 + arch/sparc/kernel/sun4m_smp.c | 2 ++ arch/um/kernel/smp.c | 1 + arch/x86/kernel/smpboot.c | 1 + arch/x86/mach-voyager/voyager_smp.c | 2 ++ include/linux/cpu.h | 1 + include/linux/notifier.h | 10 +++++++++- kernel/cpu.c | 19 +++++++++++++++++++ 17 files changed, 51 insertions(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index 83df541650fc..06b6fdab639f 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -149,6 +149,9 @@ smp_callin(void) atomic_inc(&init_mm.mm_count); current->active_mm = &init_mm; + /* inform the notifiers about the new cpu */ + notify_cpu_starting(cpuid); + /* Must have completely accurate bogos. */ local_irq_enable(); diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index e9842f6767f9..e42a749a56dd 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -277,6 +277,7 @@ asmlinkage void __cpuinit secondary_start_kernel(void) /* * Enable local interrupts. */ + notify_cpu_starting(cpu); local_irq_enable(); local_fiq_enable(); diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c index 952a24b2f5a9..52e16c6436f9 100644 --- a/arch/cris/arch-v32/kernel/smp.c +++ b/arch/cris/arch-v32/kernel/smp.c @@ -178,6 +178,7 @@ void __init smp_callin(void) unmask_irq(IPI_INTR_VECT); unmask_irq(TIMER0_INTR_VECT); preempt_disable(); + notify_cpu_starting(cpu); local_irq_enable(); cpu_set(cpu, cpu_online_map); diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index bcea81e432fd..333b58f218d0 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -401,6 +401,7 @@ smp_callin (void) spin_lock(&vector_lock); /* Setup the per cpu irq handling data structures */ __setup_vector_irq(cpuid); + notify_cpu_starting(cpuid); cpu_set(cpuid, cpu_online_map); per_cpu(cpu_state, cpuid) = CPU_ONLINE; spin_unlock(&vector_lock); diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c index 2c03ac1d005f..fc2994811f15 100644 --- a/arch/m32r/kernel/smpboot.c +++ b/arch/m32r/kernel/smpboot.c @@ -498,6 +498,8 @@ static void __init smp_online(void) { int cpu_id = smp_processor_id(); + notify_cpu_starting(cpu_id); + local_irq_enable(); /* Get our bogomips. */ diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 4410f172b8ab..7b59cfb7e602 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -121,6 +121,8 @@ asmlinkage __cpuinit void start_secondary(void) cpu = smp_processor_id(); cpu_data[cpu].udelay_val = loops_per_jiffy; + notify_cpu_starting(cpu); + mp_ops->smp_finish(); set_cpu_sibling_map(cpu); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 5337ca7bb649..c27b10a1bd79 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -453,6 +453,7 @@ int __devinit start_secondary(void *unused) secondary_cpu_time_init(); ipi_call_lock(); + notify_cpu_starting(cpu); cpu_set(cpu, cpu_online_map); /* Update sibling maps */ base = cpu_first_thread_in_core(cpu); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 00b9b4dec5eb..9e8b1f9b8f4d 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -585,6 +585,8 @@ int __cpuinit start_secondary(void *cpuvoid) /* Enable pfault pseudo page faults on this cpu. */ pfault_init(); + /* call cpu notifiers */ + notify_cpu_starting(smp_processor_id()); /* Mark this cpu as online */ spin_lock(&call_lock); cpu_set(smp_processor_id(), cpu_online_map); diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c index 60c50841143e..001778f9adaf 100644 --- a/arch/sh/kernel/smp.c +++ b/arch/sh/kernel/smp.c @@ -82,6 +82,8 @@ asmlinkage void __cpuinit start_secondary(void) preempt_disable(); + notify_cpu_starting(smp_processor_id()); + local_irq_enable(); calibrate_delay(); diff --git a/arch/sparc/kernel/sun4d_smp.c b/arch/sparc/kernel/sun4d_smp.c index 69596402a500..446767e8f569 100644 --- a/arch/sparc/kernel/sun4d_smp.c +++ b/arch/sparc/kernel/sun4d_smp.c @@ -88,6 +88,7 @@ void __init smp4d_callin(void) local_flush_cache_all(); local_flush_tlb_all(); + notify_cpu_starting(cpuid); /* * Unblock the master CPU _only_ when the scheduler state * of all secondary CPUs will be up-to-date, so after diff --git a/arch/sparc/kernel/sun4m_smp.c b/arch/sparc/kernel/sun4m_smp.c index a14a76ac7f36..9964890dc1db 100644 --- a/arch/sparc/kernel/sun4m_smp.c +++ b/arch/sparc/kernel/sun4m_smp.c @@ -71,6 +71,8 @@ void __cpuinit smp4m_callin(void) local_flush_cache_all(); local_flush_tlb_all(); + notify_cpu_starting(cpuid); + /* Get our local ticker going. */ smp_setup_percpu_timer(); diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c index be2d50c3aa95..045772142844 100644 --- a/arch/um/kernel/smp.c +++ b/arch/um/kernel/smp.c @@ -85,6 +85,7 @@ static int idle_proc(void *cpup) while (!cpu_isset(cpu, smp_commenced_mask)) cpu_relax(); + notify_cpu_starting(cpu); cpu_set(cpu, cpu_online_map); default_idle(); return 0; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7985c5b3f916..0b8261c3cac2 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -257,6 +257,7 @@ static void __cpuinit smp_callin(void) end_local_APIC_setup(); map_cpu_to_logical_apicid(); + notify_cpu_starting(cpuid); /* * Get our bogomips. * diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index ee0fba092157..199a5f4a873c 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -448,6 +448,8 @@ static void __init start_secondary(void *unused) VDEBUG(("VOYAGER SMP: CPU%d, stack at about %p\n", cpuid, &cpuid)); + notify_cpu_starting(cpuid); + /* enable interrupts */ local_irq_enable(); diff --git a/include/linux/cpu.h b/include/linux/cpu.h index d7faf8808497..c2747ac2ae43 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -69,6 +69,7 @@ static inline void unregister_cpu_notifier(struct notifier_block *nb) #endif int cpu_up(unsigned int cpu); +void notify_cpu_starting(unsigned int cpu); extern void cpu_hotplug_init(void); extern void cpu_maps_update_begin(void); extern void cpu_maps_update_done(void); diff --git a/include/linux/notifier.h b/include/linux/notifier.h index da2698b0fdd1..b86fa2ffca0c 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -213,9 +213,16 @@ static inline int notifier_to_errno(int ret) #define CPU_DOWN_FAILED 0x0006 /* CPU (unsigned)v NOT going down */ #define CPU_DEAD 0x0007 /* CPU (unsigned)v dead */ #define CPU_DYING 0x0008 /* CPU (unsigned)v not running any task, - * not handling interrupts, soon dead */ + * not handling interrupts, soon dead. + * Called on the dying cpu, interrupts + * are already disabled. Must not + * sleep, must not fail */ #define CPU_POST_DEAD 0x0009 /* CPU (unsigned)v dead, cpu_hotplug * lock is dropped */ +#define CPU_STARTING 0x000A /* CPU (unsigned)v soon running. + * Called on the new cpu, just before + * enabling interrupts. Must not sleep, + * must not fail */ /* Used for CPU hotplug events occuring while tasks are frozen due to a suspend * operation in progress @@ -229,6 +236,7 @@ static inline int notifier_to_errno(int ret) #define CPU_DOWN_FAILED_FROZEN (CPU_DOWN_FAILED | CPU_TASKS_FROZEN) #define CPU_DEAD_FROZEN (CPU_DEAD | CPU_TASKS_FROZEN) #define CPU_DYING_FROZEN (CPU_DYING | CPU_TASKS_FROZEN) +#define CPU_STARTING_FROZEN (CPU_STARTING | CPU_TASKS_FROZEN) /* Hibernation and suspend events */ #define PM_HIBERNATION_PREPARE 0x0001 /* Going to hibernate */ diff --git a/kernel/cpu.c b/kernel/cpu.c index f17e9854c246..dc45f2459efb 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -453,6 +453,25 @@ out: } #endif /* CONFIG_PM_SLEEP_SMP */ +/** + * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers + * @cpu: cpu that just started + * + * This function calls the cpu_chain notifiers with CPU_STARTING. + * It must be called by the arch code on the new cpu, before the new cpu + * enables interrupts and before the "boot" cpu returns from __cpu_up(). + */ +void notify_cpu_starting(unsigned int cpu) +{ + unsigned long val = CPU_STARTING; + +#ifdef CONFIG_PM_SLEEP_SMP + if (cpu_isset(cpu, frozen_cpus)) + val = CPU_STARTING_FROZEN; +#endif /* CONFIG_PM_SLEEP_SMP */ + raw_notifier_call_chain(&cpu_chain, val, (void *)(long)cpu); +} + #endif /* CONFIG_SMP */ /* -- cgit v1.2.3 From d3d238c7744d08c36a114a59cb537d4c0c6c9a86 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 3 Oct 2008 21:54:59 +0200 Subject: [S390] nohz: Fix __udelay. This fixes a regression that came with 934b2857cc576ae53c92a66e63fce7ddcfa74691 ("[S390] nohz/sclp: disable timer on synchronous waits."). If udelay() gets called from a disabled context it sets the clock comparator to a value where it expects the next interrupt. When the interrupt happens the clock comparator gets not reset and therefore the interrupt condition doesn't get cleared. The result is an endless timer interrupt loop. In addition this patch fixes also the following: rcutorture reveals that our __udelay implementation is still buggy, since it might schedule tasklets, but prevents their execution: NOHZ: local_softirq_pending 42 NOHZ: local_softirq_pending 02 NOHZ: local_softirq_pending 142 NOHZ: local_softirq_pending 02 To fix this we make sure that only the clock comparator interrupt is enabled when the enabled wait psw is loaded. Also no code gets called anymore which might schedule tasklets. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/time.c | 2 ++ arch/s390/lib/delay.c | 88 ++++++++++++++++++++++++++++++------------------- 2 files changed, 56 insertions(+), 34 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index ca114fe46ffb..06acb1a18bbc 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -169,6 +169,8 @@ void init_cpu_timer(void) static void clock_comparator_interrupt(__u16 code) { + if (S390_lowcore.clock_comparator == -1ULL) + set_clock_comparator(S390_lowcore.clock_comparator); } static void etr_timing_alert(struct etr_irq_parm *); diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index fc6ab6094df8..0953cee05efc 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -1,14 +1,9 @@ /* - * arch/s390/lib/delay.c * Precise Delay Loops for S390 * - * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), - * - * Derived from "arch/i386/lib/delay.c" - * Copyright (C) 1993 Linus Torvalds - * Copyright (C) 1997 Martin Mares + * Copyright IBM Corp. 1999,2008 + * Author(s): Martin Schwidefsky , + * Heiko Carstens , */ #include @@ -29,30 +24,31 @@ void __delay(unsigned long loops) asm volatile("0: brct %0,0b" : : "d" ((loops/2) + 1)); } -/* - * Waits for 'usecs' microseconds using the TOD clock comparator. - */ -void __udelay(unsigned long usecs) +static void __udelay_disabled(unsigned long usecs) { - u64 end, time, old_cc = 0; - unsigned long flags, cr0, mask, dummy; - int irq_context; + unsigned long mask, cr0, cr0_saved; + u64 clock_saved; - irq_context = in_interrupt(); - if (!irq_context) - local_bh_disable(); - local_irq_save(flags); - if (raw_irqs_disabled_flags(flags)) { - old_cc = local_tick_disable(); - S390_lowcore.clock_comparator = -1ULL; - __ctl_store(cr0, 0, 0); - dummy = (cr0 & 0xffff00e0) | 0x00000800; - __ctl_load(dummy , 0, 0); - mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT; - } else - mask = psw_kernel_bits | PSW_MASK_WAIT | - PSW_MASK_EXT | PSW_MASK_IO; + clock_saved = local_tick_disable(); + set_clock_comparator(get_clock() + ((u64) usecs << 12)); + __ctl_store(cr0_saved, 0, 0); + cr0 = (cr0_saved & 0xffff00e0) | 0x00000800; + __ctl_load(cr0 , 0, 0); + mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT; + trace_hardirqs_on(); + __load_psw_mask(mask); + local_irq_disable(); + __ctl_load(cr0_saved, 0, 0); + local_tick_enable(clock_saved); + set_clock_comparator(S390_lowcore.clock_comparator); +} +static void __udelay_enabled(unsigned long usecs) +{ + unsigned long mask; + u64 end, time; + + mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT | PSW_MASK_IO; end = get_clock() + ((u64) usecs << 12); do { time = end < S390_lowcore.clock_comparator ? @@ -62,13 +58,37 @@ void __udelay(unsigned long usecs) __load_psw_mask(mask); local_irq_disable(); } while (get_clock() < end); + set_clock_comparator(S390_lowcore.clock_comparator); +} - if (raw_irqs_disabled_flags(flags)) { - __ctl_load(cr0, 0, 0); - local_tick_enable(old_cc); +/* + * Waits for 'usecs' microseconds using the TOD clock comparator. + */ +void __udelay(unsigned long usecs) +{ + unsigned long flags; + + preempt_disable(); + local_irq_save(flags); + if (in_irq()) { + __udelay_disabled(usecs); + goto out; + } + if (in_softirq()) { + if (raw_irqs_disabled_flags(flags)) + __udelay_disabled(usecs); + else + __udelay_enabled(usecs); + goto out; } - if (!irq_context) + if (raw_irqs_disabled_flags(flags)) { + local_bh_disable(); + __udelay_disabled(usecs); _local_bh_enable(); - set_clock_comparator(S390_lowcore.clock_comparator); + goto out; + } + __udelay_enabled(usecs); +out: local_irq_restore(flags); + preempt_enable(); } -- cgit v1.2.3 From 7a0f475513fa573bc8e072021960313da32f0ee3 Mon Sep 17 00:00:00 2001 From: Klaus-Dieter Wacker Date: Fri, 10 Oct 2008 21:33:18 +0200 Subject: [S390] qdio enhanced SIGA (iqdio) support. Add support for z10 HiperSockets multiwrite SBALs on output queues. This is used on LPAR with EDDP enabled devices. Signed-off-by: Klaus-Dieter Wacker Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/qdio.h | 8 +++++++- drivers/s390/cio/qdio.h | 3 +++ drivers/s390/cio/qdio_main.c | 24 +++++++++++++++++++----- 3 files changed, 29 insertions(+), 6 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 6813772171f2..4734c3f05354 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -299,7 +299,13 @@ struct qdio_ssqd_desc { u8 mbccnt; u16 qdioac2; u64 sch_token; - u64:64; + u8 mro; + u8 mri; + u8:8; + u8 sbalic; + u16:16; + u8:8; + u8 mmwc; } __attribute__ ((packed)); /* params are: ccw_device, qdio_error, queue_number, diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h index af867731a5f4..e3ea1d5f2810 100644 --- a/drivers/s390/cio/qdio.h +++ b/drivers/s390/cio/qdio.h @@ -203,6 +203,9 @@ struct qdio_output_q { /* PCIs are enabled for the queue */ int pci_out_enabled; + /* IQDIO: output multiple buffers (enhanced SIGA) */ + int use_enh_siga; + /* timer to check for more outbound work */ struct timer_list timer; }; diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 719066ec0c01..a50682d2a0fa 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -316,6 +316,9 @@ static inline int qdio_do_siga_output(struct qdio_q *q, unsigned int *busy_bit) unsigned int fc = 0; unsigned long schid; + if (q->u.out.use_enh_siga) { + fc = 3; + } if (!is_qebsm(q)) schid = *((u32 *)&q->irq_ptr->schid); else { @@ -1449,6 +1452,8 @@ int qdio_establish(struct qdio_initialize *init_data) } qdio_setup_ssqd_info(irq_ptr); + sprintf(dbf_text, "qDmmwc%2x", irq_ptr->ssqd_desc.mmwc); + QDIO_DBF_TEXT2(0, setup, dbf_text); sprintf(dbf_text, "qib ac%2x", irq_ptr->qib.ac); QDIO_DBF_TEXT2(0, setup, dbf_text); @@ -1621,12 +1626,21 @@ static void handle_outbound(struct qdio_q *q, unsigned int callflags, if (multicast_outbound(q)) qdio_kick_outbound_q(q); else - /* - * One siga-w per buffer required for unicast - * HiperSockets. - */ - while (count--) + if ((q->irq_ptr->ssqd_desc.mmwc > 1) && + (count > 1) && + (count <= q->irq_ptr->ssqd_desc.mmwc)) { + /* exploit enhanced SIGA */ + q->u.out.use_enh_siga = 1; qdio_kick_outbound_q(q); + } else { + /* + * One siga-w per buffer required for unicast + * HiperSockets. + */ + q->u.out.use_enh_siga = 0; + while (count--) + qdio_kick_outbound_q(q); + } goto out; } -- cgit v1.2.3 From d86730bb9597b02bff59a3a5a01c0094d71a265f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 10 Oct 2008 21:33:19 +0200 Subject: [S390] s390: use sys_pause for 31bit pause entry point sys32_pause is a useless copy of the generic sys_pause. (and it's certainly not there for old sparc32 binaries..) Signed-off-by: Christoph Hellwig Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/compat_linux.c | 8 -------- arch/s390/kernel/compat_linux.h | 1 - arch/s390/kernel/compat_wrapper.S | 2 -- arch/s390/kernel/syscalls.S | 2 +- 4 files changed, 1 insertion(+), 12 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index d7f22226fc4e..98e246dc0233 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -608,14 +608,6 @@ asmlinkage long sys32_settimeofday(struct compat_timeval __user *tv, struct time return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); } -/* These are here just in case some old sparc32 binary calls it. */ -asmlinkage long sys32_pause(void) -{ - current->state = TASK_INTERRUPTIBLE; - schedule(); - return -ERESTARTNOHAND; -} - asmlinkage long sys32_pread64(unsigned int fd, char __user *ubuf, size_t count, u32 poshi, u32 poslo) { diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h index 20723a062017..05f8516366ab 100644 --- a/arch/s390/kernel/compat_linux.h +++ b/arch/s390/kernel/compat_linux.h @@ -206,7 +206,6 @@ long sys32_gettimeofday(struct compat_timeval __user *tv, struct timezone __user *tz); long sys32_settimeofday(struct compat_timeval __user *tv, struct timezone __user *tz); -long sys32_pause(void); long sys32_pread64(unsigned int fd, char __user *ubuf, size_t count, u32 poshi, u32 poslo); long sys32_pwrite64(unsigned int fd, const char __user *ubuf, diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 328a20e880b5..ee51ca9e23b5 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -128,8 +128,6 @@ sys32_alarm_wrapper: llgfr %r2,%r2 # unsigned int jg sys_alarm # branch to system call -#sys32_pause_wrapper # void - .globl compat_sys_utime_wrapper compat_sys_utime_wrapper: llgtr %r2,%r2 # char * diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index c66d35e55142..3ae303914b42 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -37,7 +37,7 @@ SYSCALL(sys_stime,sys_ni_syscall,sys32_stime_wrapper) /* 25 old stime syscall * SYSCALL(sys_ptrace,sys_ptrace,sys32_ptrace_wrapper) SYSCALL(sys_alarm,sys_alarm,sys32_alarm_wrapper) NI_SYSCALL /* old fstat syscall */ -SYSCALL(sys_pause,sys_pause,sys32_pause) +SYSCALL(sys_pause,sys_pause,sys_pause) SYSCALL(sys_utime,sys_utime,compat_sys_utime_wrapper) /* 30 */ NI_SYSCALL /* old stty syscall */ NI_SYSCALL /* old gtty syscall */ -- cgit v1.2.3 From 753c4dd6a2fa2af81f5d809d610d29f2d9dd9bc1 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 10 Oct 2008 21:33:20 +0200 Subject: [S390] ptrace changes * System call parameter and result access functions * Add tracehook calls * Split syscall_trace into two functions do_syscall_trace_enter and do_syscall_trace_exit Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 1 + arch/s390/include/asm/ptrace.h | 1 + arch/s390/include/asm/syscall.h | 80 +++++++++++++++++++++++++++++++++++++ arch/s390/include/asm/thread_info.h | 2 + arch/s390/kernel/entry.S | 50 ++++++++++++++++++----- arch/s390/kernel/entry64.S | 42 ++++++++++++++----- arch/s390/kernel/ptrace.c | 61 +++++++++++++++------------- arch/s390/kernel/signal.c | 13 ++++++ 8 files changed, 202 insertions(+), 48 deletions(-) create mode 100644 arch/s390/include/asm/syscall.h (limited to 'arch/s390') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 8d41908e2513..4c03049e7db9 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -74,6 +74,7 @@ config S390 select HAVE_KPROBES select HAVE_KRETPROBES select HAVE_KVM if 64BIT + select HAVE_ARCH_TRACEHOOK source "init/Kconfig" diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index af2c9ac28a07..a7226f8143fb 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -490,6 +490,7 @@ extern void user_disable_single_step(struct task_struct *); #define user_mode(regs) (((regs)->psw.mask & PSW_MASK_PSTATE) != 0) #define instruction_pointer(regs) ((regs)->psw.addr & PSW_ADDR_INSN) +#define user_stack_pointer(regs)((regs)->gprs[15]) #define regs_return_value(regs)((regs)->gprs[2]) #define profile_pc(regs) instruction_pointer(regs) extern void show_regs(struct pt_regs * regs); diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h new file mode 100644 index 000000000000..6e623971fbb9 --- /dev/null +++ b/arch/s390/include/asm/syscall.h @@ -0,0 +1,80 @@ +/* + * Access to user system call parameters and results + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ + +#ifndef _ASM_SYSCALL_H +#define _ASM_SYSCALL_H 1 + +#include + +static inline long syscall_get_nr(struct task_struct *task, + struct pt_regs *regs) +{ + if (regs->trap != __LC_SVC_OLD_PSW) + return -1; + return regs->gprs[2]; +} + +static inline void syscall_rollback(struct task_struct *task, + struct pt_regs *regs) +{ + regs->gprs[2] = regs->orig_gpr2; +} + +static inline long syscall_get_error(struct task_struct *task, + struct pt_regs *regs) +{ + return (regs->gprs[2] >= -4096UL) ? -regs->gprs[2] : 0; +} + +static inline long syscall_get_return_value(struct task_struct *task, + struct pt_regs *regs) +{ + return regs->gprs[2]; +} + +static inline void syscall_set_return_value(struct task_struct *task, + struct pt_regs *regs, + int error, long val) +{ + regs->gprs[2] = error ? -error : val; +} + +static inline void syscall_get_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + unsigned long *args) +{ + BUG_ON(i + n > 6); +#ifdef CONFIG_COMPAT + if (test_tsk_thread_flag(task, TIF_31BIT)) { + if (i + n == 6) + args[--n] = (u32) regs->args[0]; + while (n-- > 0) + args[n] = (u32) regs->gprs[2 + i + n]; + } +#endif + if (i + n == 6) + args[--n] = regs->args[0]; + memcpy(args, ®s->gprs[2 + i], n * sizeof(args[0])); +} + +static inline void syscall_set_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + const unsigned long *args) +{ + BUG_ON(i + n > 6); + if (i + n == 6) + regs->args[0] = args[--n]; + memcpy(®s->gprs[2 + i], args, n * sizeof(args[0])); +} + +#endif /* _ASM_SYSCALL_H */ diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 91a8f93ad355..ea40a9d690fc 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -86,6 +86,7 @@ static inline struct thread_info *current_thread_info(void) * thread information flags bit numbers */ #define TIF_SYSCALL_TRACE 0 /* syscall trace active */ +#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */ #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_RESTART_SVC 4 /* restart svc with new svc number */ @@ -100,6 +101,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_RESTORE_SIGMASK 20 /* restore signal mask in do_signal() */ #define _TIF_SYSCALL_TRACE (1< #include #include +#include #include #include @@ -639,40 +640,44 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, } #endif -asmlinkage void -syscall_trace(struct pt_regs *regs, int entryexit) +asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) { - if (unlikely(current->audit_context) && entryexit) - audit_syscall_exit(AUDITSC_RESULT(regs->gprs[2]), regs->gprs[2]); - - if (!test_thread_flag(TIF_SYSCALL_TRACE)) - goto out; - if (!(current->ptrace & PT_PTRACED)) - goto out; - ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) - ? 0x80 : 0)); + long ret; /* - * If the debuffer has set an invalid system call number, - * we prepare to skip the system call restart handling. + * The sysc_tracesys code in entry.S stored the system + * call number to gprs[2]. */ - if (!entryexit && regs->gprs[2] >= NR_syscalls) + ret = regs->gprs[2]; + if (test_thread_flag(TIF_SYSCALL_TRACE) && + (tracehook_report_syscall_entry(regs) || + regs->gprs[2] >= NR_syscalls)) { + /* + * Tracing decided this syscall should not happen or the + * debugger stored an invalid system call number. Skip + * the system call and the system call restart handling. + */ regs->trap = -1; - - /* - * this isn't the same as continuing with a signal, but it will do - * for normal use. strace only continues with a signal if the - * stopping signal is not SIGTRAP. -brl - */ - if (current->exit_code) { - send_sig(current->exit_code, current, 1); - current->exit_code = 0; + ret = -1; } - out: - if (unlikely(current->audit_context) && !entryexit) - audit_syscall_entry(test_thread_flag(TIF_31BIT)?AUDIT_ARCH_S390:AUDIT_ARCH_S390X, - regs->gprs[2], regs->orig_gpr2, regs->gprs[3], - regs->gprs[4], regs->gprs[5]); + + if (unlikely(current->audit_context)) + audit_syscall_entry(test_thread_flag(TIF_31BIT) ? + AUDIT_ARCH_S390 : AUDIT_ARCH_S390X, + regs->gprs[2], regs->orig_gpr2, + regs->gprs[3], regs->gprs[4], + regs->gprs[5]); + return ret; +} + +asmlinkage void do_syscall_trace_exit(struct pt_regs *regs) +{ + if (unlikely(current->audit_context)) + audit_syscall_exit(AUDITSC_RESULT(regs->gprs[2]), + regs->gprs[2]); + + if (test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall_exit(regs, 0); } /* diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index b97682040215..4f7fc3059a8e 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -507,6 +508,12 @@ void do_signal(struct pt_regs *regs) */ if (current->thread.per_info.single_step) set_thread_flag(TIF_SINGLE_STEP); + + /* + * Let tracing know that we've done the handler setup. + */ + tracehook_signal_handler(signr, &info, &ka, regs, + test_thread_flag(TIF_SINGLE_STEP)); } return; } @@ -526,3 +533,9 @@ void do_signal(struct pt_regs *regs) set_thread_flag(TIF_RESTART_SVC); } } + +void do_notify_resume(struct pt_regs *regs) +{ + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); +} -- cgit v1.2.3 From b2300b9efe1b8174833e17f37e975c9da00c388a Mon Sep 17 00:00:00 2001 From: Hongjie Yang Date: Fri, 10 Oct 2008 21:33:21 +0200 Subject: [S390] dcssblk: add >2G DCSSs support and stacked contiguous DCSSs support. The DCSS block device driver is modified to add >2G DCSSs support and allow a DCSS block device to map to a set of contiguous DCSSs. The extmem code is also modified to use new Diagnose x'64' subcodes for >2G DCSSs. Signed-off-by: Hongjie Yang Signed-off-by: Martin Schwidefsky --- arch/s390/mm/extmem.c | 251 +++++++++++++++++---- drivers/s390/block/dcssblk.c | 515 ++++++++++++++++++++++++++++++++----------- 2 files changed, 596 insertions(+), 170 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index f231f5ec74b6..580fc64cc735 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -43,20 +43,40 @@ #define DCSS_FINDSEG 0x0c #define DCSS_LOADNOLY 0x10 #define DCSS_SEGEXT 0x18 +#define DCSS_LOADSHRX 0x20 +#define DCSS_LOADNSRX 0x24 +#define DCSS_FINDSEGX 0x2c +#define DCSS_SEGEXTX 0x38 #define DCSS_FINDSEGA 0x0c struct qrange { - unsigned int start; // 3byte start address, 1 byte type - unsigned int end; // 3byte end address, 1 byte reserved + unsigned long start; /* last byte type */ + unsigned long end; /* last byte reserved */ }; struct qout64 { + unsigned long segstart; + unsigned long segend; + int segcnt; + int segrcnt; + struct qrange range[6]; +}; + +#ifdef CONFIG_64BIT +struct qrange_old { + unsigned int start; /* last byte type */ + unsigned int end; /* last byte reserved */ +}; + +/* output area format for the Diag x'64' old subcode x'18' */ +struct qout64_old { int segstart; int segend; int segcnt; int segrcnt; - struct qrange range[6]; + struct qrange_old range[6]; }; +#endif struct qin64 { char qopcode; @@ -86,6 +106,55 @@ static DEFINE_MUTEX(dcss_lock); static LIST_HEAD(dcss_list); static char *segtype_string[] = { "SW", "EW", "SR", "ER", "SN", "EN", "SC", "EW/EN-MIXED" }; +static int loadshr_scode, loadnsr_scode, findseg_scode; +static int segext_scode, purgeseg_scode; +static int scode_set; + +/* set correct Diag x'64' subcodes. */ +static int +dcss_set_subcodes(void) +{ +#ifdef CONFIG_64BIT + char *name = kmalloc(8 * sizeof(char), GFP_DMA); + unsigned long rx, ry; + int rc; + + if (name == NULL) + return -ENOMEM; + + rx = (unsigned long) name; + ry = DCSS_FINDSEGX; + + strcpy(name, "dummy"); + asm volatile( + " diag %0,%1,0x64\n" + "0: ipm %2\n" + " srl %2,28\n" + " j 2f\n" + "1: la %2,3\n" + "2:\n" + EX_TABLE(0b, 1b) + : "+d" (rx), "+d" (ry), "=d" (rc) : : "cc"); + + kfree(name); + /* Diag x'64' new subcodes are supported, set to new subcodes */ + if (rc != 3) { + loadshr_scode = DCSS_LOADSHRX; + loadnsr_scode = DCSS_LOADNSRX; + purgeseg_scode = DCSS_PURGESEG; + findseg_scode = DCSS_FINDSEGX; + segext_scode = DCSS_SEGEXTX; + return 0; + } +#endif + /* Diag x'64' new subcodes are not supported, set to old subcodes */ + loadshr_scode = DCSS_LOADNOLY; + loadnsr_scode = DCSS_LOADNSR; + purgeseg_scode = DCSS_PURGESEG; + findseg_scode = DCSS_FINDSEG; + segext_scode = DCSS_SEGEXT; + return 0; +} /* * Create the 8 bytes, ebcdic VM segment name from @@ -135,25 +204,45 @@ segment_by_name (char *name) * Perform a function on a dcss segment. */ static inline int -dcss_diag (__u8 func, void *parameter, +dcss_diag(int *func, void *parameter, unsigned long *ret1, unsigned long *ret2) { unsigned long rx, ry; int rc; + if (scode_set == 0) { + rc = dcss_set_subcodes(); + if (rc < 0) + return rc; + scode_set = 1; + } rx = (unsigned long) parameter; - ry = (unsigned long) func; - asm volatile( + ry = (unsigned long) *func; + #ifdef CONFIG_64BIT - " sam31\n" - " diag %0,%1,0x64\n" - " sam64\n" + /* 64-bit Diag x'64' new subcode, keep in 64-bit addressing mode */ + if (*func > DCSS_SEGEXT) + asm volatile( + " diag %0,%1,0x64\n" + " ipm %2\n" + " srl %2,28\n" + : "+d" (rx), "+d" (ry), "=d" (rc) : : "cc"); + /* 31-bit Diag x'64' old subcode, switch to 31-bit addressing mode */ + else + asm volatile( + " sam31\n" + " diag %0,%1,0x64\n" + " sam64\n" + " ipm %2\n" + " srl %2,28\n" + : "+d" (rx), "+d" (ry), "=d" (rc) : : "cc"); #else + asm volatile( " diag %0,%1,0x64\n" -#endif " ipm %2\n" " srl %2,28\n" : "+d" (rx), "+d" (ry), "=d" (rc) : : "cc"); +#endif *ret1 = rx; *ret2 = ry; return rc; @@ -190,14 +279,45 @@ query_segment_type (struct dcss_segment *seg) qin->qoutlen = sizeof(struct qout64); memcpy (qin->qname, seg->dcss_name, 8); - diag_cc = dcss_diag (DCSS_SEGEXT, qin, &dummy, &vmrc); + diag_cc = dcss_diag(&segext_scode, qin, &dummy, &vmrc); + if (diag_cc < 0) { + rc = diag_cc; + goto out_free; + } if (diag_cc > 1) { PRINT_WARN ("segment_type: diag returned error %ld\n", vmrc); rc = dcss_diag_translate_rc (vmrc); goto out_free; } +#ifdef CONFIG_64BIT + /* Only old format of output area of Diagnose x'64' is supported, + copy data for the new format. */ + if (segext_scode == DCSS_SEGEXT) { + struct qout64_old *qout_old; + qout_old = kzalloc(sizeof(struct qout64_old), GFP_DMA); + if (qout_old == NULL) { + rc = -ENOMEM; + goto out_free; + } + memcpy(qout_old, qout, sizeof(struct qout64_old)); + qout->segstart = (unsigned long) qout_old->segstart; + qout->segend = (unsigned long) qout_old->segend; + qout->segcnt = qout_old->segcnt; + qout->segrcnt = qout_old->segrcnt; + + if (qout->segcnt > 6) + qout->segrcnt = 6; + for (i = 0; i < qout->segrcnt; i++) { + qout->range[i].start = + (unsigned long) qout_old->range[i].start; + qout->range[i].end = + (unsigned long) qout_old->range[i].end; + } + kfree(qout_old); + } +#endif if (qout->segcnt > 6) { rc = -ENOTSUPP; goto out_free; @@ -268,6 +388,30 @@ segment_type (char* name) return seg.vm_segtype; } +/* + * check if segment collides with other segments that are currently loaded + * returns 1 if this is the case, 0 if no collision was found + */ +static int +segment_overlaps_others (struct dcss_segment *seg) +{ + struct list_head *l; + struct dcss_segment *tmp; + + BUG_ON(!mutex_is_locked(&dcss_lock)); + list_for_each(l, &dcss_list) { + tmp = list_entry(l, struct dcss_segment, list); + if ((tmp->start_addr >> 20) > (seg->end >> 20)) + continue; + if ((tmp->end >> 20) < (seg->start_addr >> 20)) + continue; + if (seg == tmp) + continue; + return 1; + } + return 0; +} + /* * real segment loading function, called from segment_load */ @@ -276,7 +420,8 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long { struct dcss_segment *seg = kmalloc(sizeof(struct dcss_segment), GFP_DMA); - int dcss_command, rc, diag_cc; + int rc, diag_cc; + unsigned long start_addr, end_addr, dummy; if (seg == NULL) { rc = -ENOMEM; @@ -287,6 +432,13 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long if (rc < 0) goto out_free; + if (loadshr_scode == DCSS_LOADSHRX) { + if (segment_overlaps_others(seg)) { + rc = -EBUSY; + goto out_free; + } + } + rc = vmem_add_mapping(seg->start_addr, seg->end - seg->start_addr + 1); if (rc) @@ -316,20 +468,28 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long } if (do_nonshared) - dcss_command = DCSS_LOADNSR; + diag_cc = dcss_diag(&loadnsr_scode, seg->dcss_name, + &start_addr, &end_addr); else - dcss_command = DCSS_LOADNOLY; - - diag_cc = dcss_diag(dcss_command, seg->dcss_name, - &seg->start_addr, &seg->end); + diag_cc = dcss_diag(&loadshr_scode, seg->dcss_name, + &start_addr, &end_addr); + if (diag_cc < 0) { + dcss_diag(&purgeseg_scode, seg->dcss_name, + &dummy, &dummy); + rc = diag_cc; + goto out_resource; + } if (diag_cc > 1) { PRINT_WARN ("segment_load: could not load segment %s - " - "diag returned error (%ld)\n",name,seg->end); - rc = dcss_diag_translate_rc (seg->end); - dcss_diag(DCSS_PURGESEG, seg->dcss_name, - &seg->start_addr, &seg->end); + "diag returned error (%ld)\n", + name, end_addr); + rc = dcss_diag_translate_rc(end_addr); + dcss_diag(&purgeseg_scode, seg->dcss_name, + &dummy, &dummy); goto out_resource; } + seg->start_addr = start_addr; + seg->end = end_addr; seg->do_nonshared = do_nonshared; atomic_set(&seg->ref_count, 1); list_add(&seg->list, &dcss_list); @@ -423,8 +583,8 @@ int segment_modify_shared (char *name, int do_nonshared) { struct dcss_segment *seg; - unsigned long dummy; - int dcss_command, rc, diag_cc; + unsigned long start_addr, end_addr, dummy; + int rc, diag_cc; mutex_lock(&dcss_lock); seg = segment_by_name (name); @@ -445,38 +605,51 @@ segment_modify_shared (char *name, int do_nonshared) goto out_unlock; } release_resource(seg->res); - if (do_nonshared) { - dcss_command = DCSS_LOADNSR; + if (do_nonshared) seg->res->flags &= ~IORESOURCE_READONLY; - } else { - dcss_command = DCSS_LOADNOLY; + else if (seg->vm_segtype == SEG_TYPE_SR || seg->vm_segtype == SEG_TYPE_ER) seg->res->flags |= IORESOURCE_READONLY; - } + if (request_resource(&iomem_resource, seg->res)) { PRINT_WARN("segment_modify_shared: could not reload segment %s" " - overlapping resources\n", name); rc = -EBUSY; kfree(seg->res); - goto out_del; + goto out_del_mem; + } + + dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy); + if (do_nonshared) + diag_cc = dcss_diag(&loadnsr_scode, seg->dcss_name, + &start_addr, &end_addr); + else + diag_cc = dcss_diag(&loadshr_scode, seg->dcss_name, + &start_addr, &end_addr); + if (diag_cc < 0) { + rc = diag_cc; + goto out_del_res; } - dcss_diag(DCSS_PURGESEG, seg->dcss_name, &dummy, &dummy); - diag_cc = dcss_diag(dcss_command, seg->dcss_name, - &seg->start_addr, &seg->end); if (diag_cc > 1) { PRINT_WARN ("segment_modify_shared: could not reload segment %s" - " - diag returned error (%ld)\n",name,seg->end); - rc = dcss_diag_translate_rc (seg->end); - goto out_del; + " - diag returned error (%ld)\n", + name, end_addr); + rc = dcss_diag_translate_rc(end_addr); + goto out_del_res; } + seg->start_addr = start_addr; + seg->end = end_addr; seg->do_nonshared = do_nonshared; rc = 0; goto out_unlock; - out_del: + out_del_res: + release_resource(seg->res); + kfree(seg->res); + out_del_mem: vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1); list_del(&seg->list); - dcss_diag(DCSS_PURGESEG, seg->dcss_name, &dummy, &dummy); + dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy); kfree(seg); out_unlock: mutex_unlock(&dcss_lock); @@ -510,7 +683,7 @@ segment_unload(char *name) kfree(seg->res); vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1); list_del(&seg->list); - dcss_diag(DCSS_PURGESEG, seg->dcss_name, &dummy, &dummy); + dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy); kfree(seg); out_unlock: mutex_unlock(&dcss_lock); @@ -545,7 +718,7 @@ segment_save(char *name) endpfn = (seg->end) >> PAGE_SHIFT; sprintf(cmd1, "DEFSEG %s", name); for (i=0; isegcnt; i++) { - sprintf(cmd1+strlen(cmd1), " %X-%X %s", + sprintf(cmd1+strlen(cmd1), " %lX-%lX %s", seg->range[i].start >> PAGE_SHIFT, seg->range[i].end >> PAGE_SHIFT, segtype_string[seg->range[i].start & 0xff]); diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index ea4272c8c677..a7ff167d5b81 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -31,7 +31,6 @@ #define PRINT_WARN(x...) printk(KERN_WARNING DCSSBLK_NAME " warning: " x) #define PRINT_ERR(x...) printk(KERN_ERR DCSSBLK_NAME " error: " x) - static int dcssblk_open(struct inode *inode, struct file *filp); static int dcssblk_release(struct inode *inode, struct file *filp); static int dcssblk_make_request(struct request_queue *q, struct bio *bio); @@ -48,6 +47,30 @@ static struct block_device_operations dcssblk_devops = { .direct_access = dcssblk_direct_access, }; +struct dcssblk_dev_info { + struct list_head lh; + struct device dev; + char segment_name[BUS_ID_SIZE]; + atomic_t use_count; + struct gendisk *gd; + unsigned long start; + unsigned long end; + int segment_type; + unsigned char save_pending; + unsigned char is_shared; + struct request_queue *dcssblk_queue; + int num_of_segments; + struct list_head seg_list; +}; + +struct segment_info { + struct list_head lh; + char segment_name[BUS_ID_SIZE]; + unsigned long start; + unsigned long end; + int segment_type; +}; + static ssize_t dcssblk_add_store(struct device * dev, struct device_attribute *attr, const char * buf, size_t count); static ssize_t dcssblk_remove_store(struct device * dev, struct device_attribute *attr, const char * buf, @@ -58,30 +81,20 @@ static ssize_t dcssblk_save_show(struct device *dev, struct device_attribute *at static ssize_t dcssblk_shared_store(struct device * dev, struct device_attribute *attr, const char * buf, size_t count); static ssize_t dcssblk_shared_show(struct device *dev, struct device_attribute *attr, char *buf); +static ssize_t dcssblk_seglist_show(struct device *dev, + struct device_attribute *attr, + char *buf); static DEVICE_ATTR(add, S_IWUSR, NULL, dcssblk_add_store); static DEVICE_ATTR(remove, S_IWUSR, NULL, dcssblk_remove_store); -static DEVICE_ATTR(save, S_IWUSR | S_IRUGO, dcssblk_save_show, +static DEVICE_ATTR(save, S_IWUSR | S_IRUSR, dcssblk_save_show, dcssblk_save_store); -static DEVICE_ATTR(shared, S_IWUSR | S_IRUGO, dcssblk_shared_show, +static DEVICE_ATTR(shared, S_IWUSR | S_IRUSR, dcssblk_shared_show, dcssblk_shared_store); +static DEVICE_ATTR(seglist, S_IRUSR, dcssblk_seglist_show, NULL); static struct device *dcssblk_root_dev; -struct dcssblk_dev_info { - struct list_head lh; - struct device dev; - char segment_name[BUS_ID_SIZE]; - atomic_t use_count; - struct gendisk *gd; - unsigned long start; - unsigned long end; - int segment_type; - unsigned char save_pending; - unsigned char is_shared; - struct request_queue *dcssblk_queue; -}; - static LIST_HEAD(dcssblk_devices); static struct rw_semaphore dcssblk_devices_sem; @@ -91,8 +104,15 @@ static struct rw_semaphore dcssblk_devices_sem; static void dcssblk_release_segment(struct device *dev) { - PRINT_DEBUG("segment release fn called for %s\n", dev_name(dev)); - kfree(container_of(dev, struct dcssblk_dev_info, dev)); + struct dcssblk_dev_info *dev_info; + struct segment_info *entry, *temp; + + dev_info = container_of(dev, struct dcssblk_dev_info, dev); + list_for_each_entry_safe(entry, temp, &dev_info->seg_list, lh) { + list_del(&entry->lh); + kfree(entry); + } + kfree(dev_info); module_put(THIS_MODULE); } @@ -142,6 +162,169 @@ dcssblk_get_device_by_name(char *name) return NULL; } +/* + * get the struct segment_info from seg_list + * for the given name. + * down_read(&dcssblk_devices_sem) must be held. + */ +static struct segment_info * +dcssblk_get_segment_by_name(char *name) +{ + struct dcssblk_dev_info *dev_info; + struct segment_info *entry; + + list_for_each_entry(dev_info, &dcssblk_devices, lh) { + list_for_each_entry(entry, &dev_info->seg_list, lh) { + if (!strcmp(name, entry->segment_name)) + return entry; + } + } + return NULL; +} + +/* + * get the highest address of the multi-segment block. + */ +static unsigned long +dcssblk_find_highest_addr(struct dcssblk_dev_info *dev_info) +{ + unsigned long highest_addr; + struct segment_info *entry; + + highest_addr = 0; + list_for_each_entry(entry, &dev_info->seg_list, lh) { + if (highest_addr < entry->end) + highest_addr = entry->end; + } + return highest_addr; +} + +/* + * get the lowest address of the multi-segment block. + */ +static unsigned long +dcssblk_find_lowest_addr(struct dcssblk_dev_info *dev_info) +{ + int set_first; + unsigned long lowest_addr; + struct segment_info *entry; + + set_first = 0; + lowest_addr = 0; + list_for_each_entry(entry, &dev_info->seg_list, lh) { + if (set_first == 0) { + lowest_addr = entry->start; + set_first = 1; + } else { + if (lowest_addr > entry->start) + lowest_addr = entry->start; + } + } + return lowest_addr; +} + +/* + * Check continuity of segments. + */ +static int +dcssblk_is_continuous(struct dcssblk_dev_info *dev_info) +{ + int i, j, rc; + struct segment_info *sort_list, *entry, temp; + + if (dev_info->num_of_segments <= 1) + return 0; + + sort_list = kzalloc( + sizeof(struct segment_info) * dev_info->num_of_segments, + GFP_KERNEL); + if (sort_list == NULL) + return -ENOMEM; + i = 0; + list_for_each_entry(entry, &dev_info->seg_list, lh) { + memcpy(&sort_list[i], entry, sizeof(struct segment_info)); + i++; + } + + /* sort segments */ + for (i = 0; i < dev_info->num_of_segments; i++) + for (j = 0; j < dev_info->num_of_segments; j++) + if (sort_list[j].start > sort_list[i].start) { + memcpy(&temp, &sort_list[i], + sizeof(struct segment_info)); + memcpy(&sort_list[i], &sort_list[j], + sizeof(struct segment_info)); + memcpy(&sort_list[j], &temp, + sizeof(struct segment_info)); + } + + /* check continuity */ + for (i = 0; i < dev_info->num_of_segments - 1; i++) { + if ((sort_list[i].end + 1) != sort_list[i+1].start) { + PRINT_ERR("Segment %s is not contiguous with " + "segment %s\n", + sort_list[i].segment_name, + sort_list[i+1].segment_name); + rc = -EINVAL; + goto out; + } + /* EN and EW are allowed in a block device */ + if (sort_list[i].segment_type != sort_list[i+1].segment_type) { + if (!(sort_list[i].segment_type & SEGMENT_EXCLUSIVE) || + (sort_list[i].segment_type == SEG_TYPE_ER) || + !(sort_list[i+1].segment_type & + SEGMENT_EXCLUSIVE) || + (sort_list[i+1].segment_type == SEG_TYPE_ER)) { + PRINT_ERR("Segment %s has different type from " + "segment %s\n", + sort_list[i].segment_name, + sort_list[i+1].segment_name); + rc = -EINVAL; + goto out; + } + } + } + rc = 0; +out: + kfree(sort_list); + return rc; +} + +/* + * Load a segment + */ +static int +dcssblk_load_segment(char *name, struct segment_info **seg_info) +{ + int rc; + + /* already loaded? */ + down_read(&dcssblk_devices_sem); + *seg_info = dcssblk_get_segment_by_name(name); + up_read(&dcssblk_devices_sem); + if (*seg_info != NULL) + return -EEXIST; + + /* get a struct segment_info */ + *seg_info = kzalloc(sizeof(struct segment_info), GFP_KERNEL); + if (*seg_info == NULL) + return -ENOMEM; + + strcpy((*seg_info)->segment_name, name); + + /* load the segment */ + rc = segment_load(name, SEGMENT_SHARED, + &(*seg_info)->start, &(*seg_info)->end); + if (rc < 0) { + segment_warning(rc, (*seg_info)->segment_name); + kfree(*seg_info); + } else { + INIT_LIST_HEAD(&(*seg_info)->lh); + (*seg_info)->segment_type = rc; + } + return rc; +} + static void dcssblk_unregister_callback(struct device *dev) { device_unregister(dev); @@ -165,6 +348,7 @@ static ssize_t dcssblk_shared_store(struct device *dev, struct device_attribute *attr, const char *inbuf, size_t count) { struct dcssblk_dev_info *dev_info; + struct segment_info *entry, *temp; int rc; if ((count > 1) && (inbuf[1] != '\n') && (inbuf[1] != '\0')) @@ -172,46 +356,46 @@ dcssblk_shared_store(struct device *dev, struct device_attribute *attr, const ch down_write(&dcssblk_devices_sem); dev_info = container_of(dev, struct dcssblk_dev_info, dev); if (atomic_read(&dev_info->use_count)) { - PRINT_ERR("share: segment %s is busy!\n", - dev_info->segment_name); rc = -EBUSY; goto out; } if (inbuf[0] == '1') { - // reload segment in shared mode - rc = segment_modify_shared(dev_info->segment_name, - SEGMENT_SHARED); - if (rc < 0) { - BUG_ON(rc == -EINVAL); - if (rc != -EAGAIN) - goto removeseg; - } else { - dev_info->is_shared = 1; - switch (dev_info->segment_type) { - case SEG_TYPE_SR: - case SEG_TYPE_ER: - case SEG_TYPE_SC: - set_disk_ro(dev_info->gd,1); + /* reload segments in shared mode */ + list_for_each_entry(entry, &dev_info->seg_list, lh) { + rc = segment_modify_shared(entry->segment_name, + SEGMENT_SHARED); + if (rc < 0) { + BUG_ON(rc == -EINVAL); + if (rc != -EAGAIN) + goto removeseg; } } + dev_info->is_shared = 1; + switch (dev_info->segment_type) { + case SEG_TYPE_SR: + case SEG_TYPE_ER: + case SEG_TYPE_SC: + set_disk_ro(dev_info->gd, 1); + } } else if (inbuf[0] == '0') { - // reload segment in exclusive mode + /* reload segments in exclusive mode */ if (dev_info->segment_type == SEG_TYPE_SC) { PRINT_ERR("Segment type SC (%s) cannot be loaded in " - "non-shared mode\n", dev_info->segment_name); + "non-shared mode\n", dev_info->segment_name); rc = -EINVAL; goto out; } - rc = segment_modify_shared(dev_info->segment_name, - SEGMENT_EXCLUSIVE); - if (rc < 0) { - BUG_ON(rc == -EINVAL); - if (rc != -EAGAIN) - goto removeseg; - } else { - dev_info->is_shared = 0; - set_disk_ro(dev_info->gd, 0); + list_for_each_entry(entry, &dev_info->seg_list, lh) { + rc = segment_modify_shared(entry->segment_name, + SEGMENT_EXCLUSIVE); + if (rc < 0) { + BUG_ON(rc == -EINVAL); + if (rc != -EAGAIN) + goto removeseg; + } } + dev_info->is_shared = 0; + set_disk_ro(dev_info->gd, 0); } else { rc = -EINVAL; goto out; @@ -220,8 +404,14 @@ dcssblk_shared_store(struct device *dev, struct device_attribute *attr, const ch goto out; removeseg: - PRINT_ERR("Could not reload segment %s, removing it now!\n", - dev_info->segment_name); + PRINT_ERR("Could not reload segment(s) of the device %s, removing " + "segment(s) now!\n", + dev_info->segment_name); + temp = entry; + list_for_each_entry(entry, &dev_info->seg_list, lh) { + if (entry != temp) + segment_unload(entry->segment_name); + } list_del(&dev_info->lh); del_gendisk(dev_info->gd); @@ -254,6 +444,7 @@ static ssize_t dcssblk_save_store(struct device *dev, struct device_attribute *attr, const char *inbuf, size_t count) { struct dcssblk_dev_info *dev_info; + struct segment_info *entry; if ((count > 1) && (inbuf[1] != '\n') && (inbuf[1] != '\0')) return -EINVAL; @@ -263,14 +454,16 @@ dcssblk_save_store(struct device *dev, struct device_attribute *attr, const char if (inbuf[0] == '1') { if (atomic_read(&dev_info->use_count) == 0) { // device is idle => we save immediately - PRINT_INFO("Saving segment %s\n", + PRINT_INFO("Saving segment(s) of the device %s\n", dev_info->segment_name); - segment_save(dev_info->segment_name); + list_for_each_entry(entry, &dev_info->seg_list, lh) { + segment_save(entry->segment_name); + } } else { // device is busy => we save it when it becomes // idle in dcssblk_release - PRINT_INFO("Segment %s is currently busy, it will " - "be saved when it becomes idle...\n", + PRINT_INFO("Device %s is currently busy, segment(s) " + "will be saved when it becomes idle...\n", dev_info->segment_name); dev_info->save_pending = 1; } @@ -279,7 +472,8 @@ dcssblk_save_store(struct device *dev, struct device_attribute *attr, const char // device is busy & the user wants to undo his save // request dev_info->save_pending = 0; - PRINT_INFO("Pending save for segment %s deactivated\n", + PRINT_INFO("Pending save for segment(s) of the device " + "%s deactivated\n", dev_info->segment_name); } } else { @@ -290,67 +484,124 @@ dcssblk_save_store(struct device *dev, struct device_attribute *attr, const char return count; } +/* + * device attribute for showing all segments in a device + */ +static ssize_t +dcssblk_seglist_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + int i; + + struct dcssblk_dev_info *dev_info; + struct segment_info *entry; + + down_read(&dcssblk_devices_sem); + dev_info = container_of(dev, struct dcssblk_dev_info, dev); + i = 0; + buf[0] = '\0'; + list_for_each_entry(entry, &dev_info->seg_list, lh) { + strcpy(&buf[i], entry->segment_name); + i += strlen(entry->segment_name); + buf[i] = '\n'; + i++; + } + up_read(&dcssblk_devices_sem); + return i; +} + /* * device attribute for adding devices */ static ssize_t dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - int rc, i; + int rc, i, j, num_of_segments; struct dcssblk_dev_info *dev_info; + struct segment_info *seg_info, *temp; char *local_buf; unsigned long seg_byte_size; dev_info = NULL; + seg_info = NULL; if (dev != dcssblk_root_dev) { rc = -EINVAL; goto out_nobuf; } + if ((count < 1) || (buf[0] == '\0') || (buf[0] == '\n')) { + rc = -ENAMETOOLONG; + goto out_nobuf; + } + local_buf = kmalloc(count + 1, GFP_KERNEL); if (local_buf == NULL) { rc = -ENOMEM; goto out_nobuf; } + /* * parse input */ + num_of_segments = 0; for (i = 0; ((buf[i] != '\0') && (buf[i] != '\n') && i < count); i++) { - local_buf[i] = toupper(buf[i]); + for (j = i; (buf[j] != ':') && + (buf[j] != '\0') && + (buf[j] != '\n') && + j < count; j++) { + local_buf[j-i] = toupper(buf[j]); + } + local_buf[j-i] = '\0'; + if (((j - i) == 0) || ((j - i) > 8)) { + rc = -ENAMETOOLONG; + goto seg_list_del; + } + + rc = dcssblk_load_segment(local_buf, &seg_info); + if (rc < 0) + goto seg_list_del; + /* + * get a struct dcssblk_dev_info + */ + if (num_of_segments == 0) { + dev_info = kzalloc(sizeof(struct dcssblk_dev_info), + GFP_KERNEL); + if (dev_info == NULL) { + rc = -ENOMEM; + goto out; + } + strcpy(dev_info->segment_name, local_buf); + dev_info->segment_type = seg_info->segment_type; + INIT_LIST_HEAD(&dev_info->seg_list); + } + list_add_tail(&seg_info->lh, &dev_info->seg_list); + num_of_segments++; + i = j; + + if ((buf[j] == '\0') || (buf[j] == '\n')) + break; } - local_buf[i] = '\0'; - if ((i == 0) || (i > 8)) { + + /* no trailing colon at the end of the input */ + if ((i > 0) && (buf[i-1] == ':')) { rc = -ENAMETOOLONG; - goto out; - } - /* - * already loaded? - */ - down_read(&dcssblk_devices_sem); - dev_info = dcssblk_get_device_by_name(local_buf); - up_read(&dcssblk_devices_sem); - if (dev_info != NULL) { - PRINT_WARN("Segment %s already loaded!\n", local_buf); - rc = -EEXIST; - goto out; - } - /* - * get a struct dcssblk_dev_info - */ - dev_info = kzalloc(sizeof(struct dcssblk_dev_info), GFP_KERNEL); - if (dev_info == NULL) { - rc = -ENOMEM; - goto out; + goto seg_list_del; } + strlcpy(local_buf, buf, i + 1); + dev_info->num_of_segments = num_of_segments; + rc = dcssblk_is_continuous(dev_info); + if (rc < 0) + goto seg_list_del; + + dev_info->start = dcssblk_find_lowest_addr(dev_info); + dev_info->end = dcssblk_find_highest_addr(dev_info); - strcpy(dev_info->segment_name, local_buf); - dev_set_name(&dev_info->dev, local_buf); + dev_set_name(&dev_info->dev, dev_info->segment_name); dev_info->dev.release = dcssblk_release_segment; INIT_LIST_HEAD(&dev_info->lh); - dev_info->gd = alloc_disk(DCSSBLK_MINORS_PER_DISK); if (dev_info->gd == NULL) { rc = -ENOMEM; - goto free_dev_info; + goto seg_list_del; } dev_info->gd->major = dcssblk_major; dev_info->gd->fops = &dcssblk_devops; @@ -360,65 +611,52 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char dev_info->gd->driverfs_dev = &dev_info->dev; blk_queue_make_request(dev_info->dcssblk_queue, dcssblk_make_request); blk_queue_hardsect_size(dev_info->dcssblk_queue, 4096); - /* - * load the segment - */ - rc = segment_load(local_buf, SEGMENT_SHARED, - &dev_info->start, &dev_info->end); - if (rc < 0) { - segment_warning(rc, dev_info->segment_name); - goto dealloc_gendisk; - } + seg_byte_size = (dev_info->end - dev_info->start + 1); set_capacity(dev_info->gd, seg_byte_size >> 9); // size in sectors - PRINT_INFO("Loaded segment %s, size = %lu Byte, " + PRINT_INFO("Loaded segment(s) %s, size = %lu Byte, " "capacity = %lu (512 Byte) sectors\n", local_buf, seg_byte_size, seg_byte_size >> 9); - dev_info->segment_type = rc; dev_info->save_pending = 0; dev_info->is_shared = 1; dev_info->dev.parent = dcssblk_root_dev; /* - * get minor, add to list + *get minor, add to list */ down_write(&dcssblk_devices_sem); - if (dcssblk_get_device_by_name(local_buf)) { - up_write(&dcssblk_devices_sem); + if (dcssblk_get_segment_by_name(local_buf)) { rc = -EEXIST; - goto unload_seg; + goto release_gd; } rc = dcssblk_assign_free_minor(dev_info); - if (rc) { - up_write(&dcssblk_devices_sem); - PRINT_ERR("No free minor number available! " - "Unloading segment...\n"); - goto unload_seg; - } + if (rc) + goto release_gd; sprintf(dev_info->gd->disk_name, "dcssblk%d", MINOR(disk_devt(dev_info->gd))); list_add_tail(&dev_info->lh, &dcssblk_devices); if (!try_module_get(THIS_MODULE)) { rc = -ENODEV; - goto list_del; + goto dev_list_del; } /* * register the device */ rc = device_register(&dev_info->dev); if (rc) { - PRINT_ERR("Segment %s could not be registered RC=%d\n", - local_buf, rc); module_put(THIS_MODULE); - goto list_del; + goto dev_list_del; } get_device(&dev_info->dev); rc = device_create_file(&dev_info->dev, &dev_attr_shared); if (rc) goto unregister_dev; rc = device_create_file(&dev_info->dev, &dev_attr_save); + if (rc) + goto unregister_dev; + rc = device_create_file(&dev_info->dev, &dev_attr_seglist); if (rc) goto unregister_dev; @@ -434,7 +672,6 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char set_disk_ro(dev_info->gd,0); break; } - PRINT_DEBUG("Segment %s loaded successfully\n", local_buf); up_write(&dcssblk_devices_sem); rc = count; goto out; @@ -445,20 +682,27 @@ unregister_dev: dev_info->gd->queue = NULL; put_disk(dev_info->gd); device_unregister(&dev_info->dev); - segment_unload(dev_info->segment_name); + list_for_each_entry(seg_info, &dev_info->seg_list, lh) { + segment_unload(seg_info->segment_name); + } put_device(&dev_info->dev); up_write(&dcssblk_devices_sem); goto out; -list_del: +dev_list_del: list_del(&dev_info->lh); - up_write(&dcssblk_devices_sem); -unload_seg: - segment_unload(local_buf); -dealloc_gendisk: +release_gd: blk_cleanup_queue(dev_info->dcssblk_queue); dev_info->gd->queue = NULL; put_disk(dev_info->gd); -free_dev_info: + up_write(&dcssblk_devices_sem); +seg_list_del: + if (dev_info == NULL) + goto out; + list_for_each_entry_safe(seg_info, temp, &dev_info->seg_list, lh) { + list_del(&seg_info->lh); + segment_unload(seg_info->segment_name); + kfree(seg_info); + } kfree(dev_info); out: kfree(local_buf); @@ -473,6 +717,7 @@ static ssize_t dcssblk_remove_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct dcssblk_dev_info *dev_info; + struct segment_info *entry; int rc, i; char *local_buf; @@ -499,26 +744,28 @@ dcssblk_remove_store(struct device *dev, struct device_attribute *attr, const ch dev_info = dcssblk_get_device_by_name(local_buf); if (dev_info == NULL) { up_write(&dcssblk_devices_sem); - PRINT_WARN("Segment %s is not loaded!\n", local_buf); + PRINT_WARN("Device %s is not loaded!\n", local_buf); rc = -ENODEV; goto out_buf; } if (atomic_read(&dev_info->use_count) != 0) { up_write(&dcssblk_devices_sem); - PRINT_WARN("Segment %s is in use!\n", local_buf); + PRINT_WARN("Device %s is in use!\n", local_buf); rc = -EBUSY; goto out_buf; } - list_del(&dev_info->lh); + list_del(&dev_info->lh); del_gendisk(dev_info->gd); blk_cleanup_queue(dev_info->dcssblk_queue); dev_info->gd->queue = NULL; put_disk(dev_info->gd); device_unregister(&dev_info->dev); - segment_unload(dev_info->segment_name); - PRINT_DEBUG("Segment %s unloaded successfully\n", - dev_info->segment_name); + + /* unload all related segments */ + list_for_each_entry(entry, &dev_info->seg_list, lh) + segment_unload(entry->segment_name); + put_device(&dev_info->dev); up_write(&dcssblk_devices_sem); @@ -550,6 +797,7 @@ static int dcssblk_release(struct inode *inode, struct file *filp) { struct dcssblk_dev_info *dev_info; + struct segment_info *entry; int rc; dev_info = inode->i_bdev->bd_disk->private_data; @@ -560,9 +808,11 @@ dcssblk_release(struct inode *inode, struct file *filp) down_write(&dcssblk_devices_sem); if (atomic_dec_and_test(&dev_info->use_count) && (dev_info->save_pending)) { - PRINT_INFO("Segment %s became idle and is being saved now\n", + PRINT_INFO("Device %s became idle and is being saved now\n", dev_info->segment_name); - segment_save(dev_info->segment_name); + list_for_each_entry(entry, &dev_info->seg_list, lh) { + segment_save(entry->segment_name); + } dev_info->save_pending = 0; } up_write(&dcssblk_devices_sem); @@ -602,7 +852,7 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio) case SEG_TYPE_SC: /* cannot write to these segments */ if (bio_data_dir(bio) == WRITE) { - PRINT_WARN("rejecting write to ro segment %s\n", + PRINT_WARN("rejecting write to ro device %s\n", dev_name(&dev_info->dev)); goto fail; } @@ -658,7 +908,7 @@ static void dcssblk_check_params(void) { int rc, i, j, k; - char buf[9]; + char buf[DCSSBLK_PARM_LEN + 1]; struct dcssblk_dev_info *dev_info; for (i = 0; (i < DCSSBLK_PARM_LEN) && (dcssblk_segments[i] != '\0'); @@ -666,15 +916,16 @@ dcssblk_check_params(void) for (j = i; (dcssblk_segments[j] != ',') && (dcssblk_segments[j] != '\0') && (dcssblk_segments[j] != '(') && - (j - i) < 8; j++) + (j < DCSSBLK_PARM_LEN); j++) { buf[j-i] = dcssblk_segments[j]; } buf[j-i] = '\0'; rc = dcssblk_add_store(dcssblk_root_dev, NULL, buf, j-i); if ((rc >= 0) && (dcssblk_segments[j] == '(')) { - for (k = 0; buf[k] != '\0'; k++) + for (k = 0; (buf[k] != ':') && (buf[k] != '\0'); k++) buf[k] = toupper(buf[k]); + buf[k] = '\0'; if (!strncmp(&dcssblk_segments[j], "(local)", 7)) { down_read(&dcssblk_devices_sem); dev_info = dcssblk_get_device_by_name(buf); @@ -741,10 +992,12 @@ module_exit(dcssblk_exit); module_param_string(segments, dcssblk_segments, DCSSBLK_PARM_LEN, 0444); MODULE_PARM_DESC(segments, "Name of DCSS segment(s) to be loaded, " - "comma-separated list, each name max. 8 chars.\n" - "Adding \"(local)\" to segment name equals echoing 0 to " - "/sys/devices/dcssblk//shared after loading " - "the segment - \n" - "e.g. segments=\"mydcss1,mydcss2,mydcss3(local)\""); + "comma-separated list, names in each set separated " + "by commas are separated by colons, each set contains " + "names of contiguous segments and each name max. 8 chars.\n" + "Adding \"(local)\" to the end of each set equals echoing 0 " + "to /sys/devices/dcssblk//shared after loading " + "the contiguous segments - \n" + "e.g. segments=\"mydcss1,mydcss2:mydcss3,mydcss4(local)\""); MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 5a0d0e65379256b4da2c9092e197a2c761f51c01 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 10 Oct 2008 21:33:22 +0200 Subject: [S390] Move private simple udelay function to arch/s390/lib/delay.c. Move cio's private simple udelay function to lib/delay.c and turn it into something much more readable. So we have all implementations at one place. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/delay.h | 1 + arch/s390/lib/delay.c | 13 +++++++++++++ drivers/s390/cio/cio.c | 17 ++--------------- 3 files changed, 16 insertions(+), 15 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/delay.h b/arch/s390/include/asm/delay.h index 78357314c450..a356c958e260 100644 --- a/arch/s390/include/asm/delay.h +++ b/arch/s390/include/asm/delay.h @@ -15,6 +15,7 @@ #define _S390_DELAY_H extern void __udelay(unsigned long usecs); +extern void udelay_simple(unsigned long usecs); extern void __delay(unsigned long loops); #define udelay(n) __udelay(n) diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index 0953cee05efc..6ccb9fab055a 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -92,3 +92,16 @@ out: local_irq_restore(flags); preempt_enable(); } + +/* + * Simple udelay variant. To be used on startup and reboot + * when the interrupt handler isn't working. + */ +void udelay_simple(unsigned long usecs) +{ + u64 end; + + end = get_clock() + ((u64) usecs << 12); + while (get_clock() < end) + cpu_relax(); +} diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index c0cb72547256..3db2c386546f 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -859,19 +859,6 @@ __disable_subchannel_easy(struct subchannel_id schid, struct schib *schib) return -EBUSY; /* uhm... */ } -/* we can't use the normal udelay here, since it enables external interrupts */ - -static void udelay_reset(unsigned long usecs) -{ - uint64_t start_cc, end_cc; - - asm volatile ("STCK %0" : "=m" (start_cc)); - do { - cpu_relax(); - asm volatile ("STCK %0" : "=m" (end_cc)); - } while (((end_cc - start_cc)/4096) < usecs); -} - static int __clear_io_subchannel_easy(struct subchannel_id schid) { @@ -887,7 +874,7 @@ __clear_io_subchannel_easy(struct subchannel_id schid) if (schid_equal(&ti.schid, &schid)) return 0; } - udelay_reset(100); + udelay_simple(100); } return -EBUSY; } @@ -895,7 +882,7 @@ __clear_io_subchannel_easy(struct subchannel_id schid) static void __clear_chsc_subchannel_easy(void) { /* It seems we can only wait for a bit here :/ */ - udelay_reset(100); + udelay_simple(100); } static int pgm_check_occured; -- cgit v1.2.3 From ab1d848fd6a9151b02c6cbf4bddce6e24707b094 Mon Sep 17 00:00:00 2001 From: Nigel Hislop Date: Fri, 10 Oct 2008 21:33:25 +0200 Subject: [S390] Add ioctl support for EMC Symmetrix Subsystem Control I/O EMC Symmetrix Subsystem Control I/O through CKD dasd requires a specific parameter list sent to the array via a Perform Subsystem Function CCW. The Symmetrix response is retrieved from the array via a Read Subsystem Data CCW. Signed-off-by: Nigel Hislop Signed-off-by: Hannes Reinecke Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/dasd.h | 13 ++++++ drivers/s390/block/dasd_eckd.c | 101 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/dasd.h b/arch/s390/include/asm/dasd.h index 3f002e13d024..55b2b80cdf6e 100644 --- a/arch/s390/include/asm/dasd.h +++ b/arch/s390/include/asm/dasd.h @@ -3,6 +3,8 @@ * Author(s)......: Holger Smolinski * Bugreports.to..: * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 1999,2000 + * EMC Symmetrix ioctl Copyright EMC Corporation, 2008 + * Author.........: Nigel Hislop * * This file is the interface of the DASD device driver, which is exported to user space * any future changes wrt the API will result in a change of the APIVERSION reported @@ -202,6 +204,16 @@ typedef struct attrib_data_t { #define DASD_SEQ_PRESTAGE 0x4 #define DASD_REC_ACCESS 0x5 +/* + * Perform EMC Symmetrix I/O + */ +typedef struct dasd_symmio_parms { + unsigned char reserved[8]; /* compat with older releases */ + unsigned long long psf_data; /* char * cast to u64 */ + unsigned long long rssd_result; /* char * cast to u64 */ + int psf_data_len; + int rssd_result_len; +} __attribute__ ((packed)) dasd_symmio_parms_t; /******************************************************************************** * SECTION: Definition of IOCTLs @@ -247,6 +259,7 @@ typedef struct attrib_data_t { /* Set Attributes (cache operations) */ #define BIODASDSATTR _IOW(DASD_IOCTL_LETTER,2,attrib_data_t) +#define BIODASDSYMMIO _IOWR(DASD_IOCTL_LETTER, 240, dasd_symmio_parms_t) #endif /* DASD_H */ diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 8095629bc493..49f9d221e23d 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -6,6 +6,8 @@ * Martin Schwidefsky * Bugreports.to..: * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 1999,2000 + * EMC Symmetrix ioctl Copyright EMC Corporation, 2008 + * Author.........: Nigel Hislop * */ @@ -2083,6 +2085,103 @@ dasd_eckd_set_attrib(struct dasd_device *device, void __user *argp) return 0; } +/* + * Issue syscall I/O to EMC Symmetrix array. + * CCWs are PSF and RSSD + */ +static int dasd_symm_io(struct dasd_device *device, void __user *argp) +{ + struct dasd_symmio_parms usrparm; + char *psf_data, *rssd_result; + struct dasd_ccw_req *cqr; + struct ccw1 *ccw; + int rc; + + /* Copy parms from caller */ + rc = -EFAULT; + if (copy_from_user(&usrparm, argp, sizeof(usrparm))) + goto out; +#ifndef CONFIG_64BIT + /* Make sure pointers are sane even on 31 bit. */ + if ((usrparm.psf_data >> 32) != 0 || (usrparm.rssd_result >> 32) != 0) { + rc = -EINVAL; + goto out; + } +#endif + /* alloc I/O data area */ + psf_data = kzalloc(usrparm.psf_data_len, GFP_KERNEL | GFP_DMA); + rssd_result = kzalloc(usrparm.rssd_result_len, GFP_KERNEL | GFP_DMA); + if (!psf_data || !rssd_result) { + rc = -ENOMEM; + goto out_free; + } + + /* get syscall header from user space */ + rc = -EFAULT; + if (copy_from_user(psf_data, + (void __user *)(unsigned long) usrparm.psf_data, + usrparm.psf_data_len)) + goto out_free; + + /* sanity check on syscall header */ + if (psf_data[0] != 0x17 && psf_data[1] != 0xce) { + rc = -EINVAL; + goto out_free; + } + + /* setup CCWs for PSF + RSSD */ + cqr = dasd_smalloc_request("ECKD", 2 , 0, device); + if (IS_ERR(cqr)) { + DEV_MESSAGE(KERN_WARNING, device, "%s", + "Could not allocate initialization request"); + rc = PTR_ERR(cqr); + goto out_free; + } + + cqr->startdev = device; + cqr->memdev = device; + cqr->retries = 3; + cqr->expires = 10 * HZ; + cqr->buildclk = get_clock(); + cqr->status = DASD_CQR_FILLED; + + /* Build the ccws */ + ccw = cqr->cpaddr; + + /* PSF ccw */ + ccw->cmd_code = DASD_ECKD_CCW_PSF; + ccw->count = usrparm.psf_data_len; + ccw->flags |= CCW_FLAG_CC; + ccw->cda = (__u32)(addr_t) psf_data; + + ccw++; + + /* RSSD ccw */ + ccw->cmd_code = DASD_ECKD_CCW_RSSD; + ccw->count = usrparm.rssd_result_len; + ccw->flags = CCW_FLAG_SLI ; + ccw->cda = (__u32)(addr_t) rssd_result; + + rc = dasd_sleep_on(cqr); + if (rc) + goto out_sfree; + + rc = -EFAULT; + if (copy_to_user((void __user *)(unsigned long) usrparm.rssd_result, + rssd_result, usrparm.rssd_result_len)) + goto out_sfree; + rc = 0; + +out_sfree: + dasd_sfree_request(cqr, cqr->memdev); +out_free: + kfree(rssd_result); + kfree(psf_data); +out: + DBF_DEV_EVENT(DBF_WARNING, device, "Symmetrix ioctl: rc=%d", rc); + return rc; +} + static int dasd_eckd_ioctl(struct dasd_block *block, unsigned int cmd, void __user *argp) { @@ -2101,6 +2200,8 @@ dasd_eckd_ioctl(struct dasd_block *block, unsigned int cmd, void __user *argp) return dasd_eckd_reserve(device); case BIODASDSLCK: return dasd_eckd_steal_lock(device); + case BIODASDSYMMIO: + return dasd_symm_io(device, argp); default: return -ENOIOCTLCMD; } -- cgit v1.2.3 From 15e86b0c752d50e910b2cca6e83ce74c4440d06c Mon Sep 17 00:00:00 2001 From: Florian Funke Date: Fri, 10 Oct 2008 21:33:26 +0200 Subject: [S390] introduce dirty bit for kvm live migration This patch defines a dirty bit in the PGSTE that can be used to implement dirty pages logging for KVM's live migration. The bit is set in the ptep_rcp_copy function, which is called to save dirty and referenced information from the storage key in the PGSTE. The bit can be tested and reset by KVM using the kvm_s390_test_and_clear_page_dirty function that is introduced by this patch. Acked-by: Carsten Otte Signed-off-by: Florian Funke Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 45 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 0bdb704ae051..1a928f84afd6 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -281,6 +281,9 @@ extern char empty_zero_page[PAGE_SIZE]; #define RCP_GR_BIT 50 #define RCP_GC_BIT 49 +/* User dirty bit for KVM's migration feature */ +#define KVM_UD_BIT 47 + #ifndef __s390x__ /* Bits in the segment table address-space-control-element */ @@ -575,12 +578,16 @@ static inline void ptep_rcp_copy(pte_t *ptep) unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE); skey = page_get_storage_key(page_to_phys(page)); - if (skey & _PAGE_CHANGED) + if (skey & _PAGE_CHANGED) { set_bit_simple(RCP_GC_BIT, pgste); + set_bit_simple(KVM_UD_BIT, pgste); + } if (skey & _PAGE_REFERENCED) set_bit_simple(RCP_GR_BIT, pgste); - if (test_and_clear_bit_simple(RCP_HC_BIT, pgste)) + if (test_and_clear_bit_simple(RCP_HC_BIT, pgste)) { SetPageDirty(page); + set_bit_simple(KVM_UD_BIT, pgste); + } if (test_and_clear_bit_simple(RCP_HR_BIT, pgste)) SetPageReferenced(page); #endif @@ -744,6 +751,40 @@ static inline pte_t pte_mkspecial(pte_t pte) return pte; } +#ifdef CONFIG_PGSTE +/* + * Get (and clear) the user dirty bit for a PTE. + */ +static inline int kvm_s390_test_and_clear_page_dirty(struct mm_struct *mm, + pte_t *ptep) +{ + int dirty; + unsigned long *pgste; + struct page *page; + unsigned int skey; + + if (!mm->context.pgstes) + return -EINVAL; + rcp_lock(ptep); + pgste = (unsigned long *) (ptep + PTRS_PER_PTE); + page = virt_to_page(pte_val(*ptep)); + skey = page_get_storage_key(page_to_phys(page)); + if (skey & _PAGE_CHANGED) { + set_bit_simple(RCP_GC_BIT, pgste); + set_bit_simple(KVM_UD_BIT, pgste); + } + if (test_and_clear_bit_simple(RCP_HC_BIT, pgste)) { + SetPageDirty(page); + set_bit_simple(KVM_UD_BIT, pgste); + } + dirty = test_and_clear_bit_simple(KVM_UD_BIT, pgste); + if (skey & _PAGE_CHANGED) + page_clear_dirty(page); + rcp_unlock(ptep); + return dirty; +} +#endif + #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) -- cgit v1.2.3 From 4a672cfa3a7fcbc6f2adc558f34148be1096c561 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 10 Oct 2008 21:33:29 +0200 Subject: [S390] fix initialization of stp chsc_sstpc returns -EIO on error and 0 on success but stp_reset checks against 1 instead of 0. chsc_sstpc used to return 1 on success, one call location has not been updated .. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 06acb1a18bbc..b94e9e3b694a 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -1356,7 +1356,7 @@ static void __init stp_reset(void) stp_page = alloc_bootmem_pages(PAGE_SIZE); rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000); - if (rc == 1) + if (rc == 0) set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags); else if (stp_online) { printk(KERN_WARNING "Running on non STP capable machine.\n"); -- cgit v1.2.3 From a447c0932445f92ce6f4c1bd020f62c5097a7842 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 13 Oct 2008 10:46:57 +0100 Subject: vfs: Use const for kernel parser table This is a much better version of a previous patch to make the parser tables constant. Rather than changing the typedef, we put the "const" in all the various places where its required, allowing the __initconst exception for nfsroot which was the cause of the previous trouble. This was posted for review some time ago and I believe its been in -mm since then. Signed-off-by: Steven Whitehouse Cc: Alexander Viro Signed-off-by: Linus Torvalds --- arch/powerpc/platforms/cell/spufs/inode.c | 2 +- arch/s390/hypfs/inode.c | 2 +- drivers/infiniband/ulp/srp/ib_srp.c | 2 +- drivers/usb/core/inode.c | 2 +- fs/9p/v9fs.c | 2 +- fs/adfs/super.c | 2 +- fs/affs/super.c | 2 +- fs/afs/super.c | 2 +- fs/autofs/inode.c | 2 +- fs/autofs4/inode.c | 2 +- fs/befs/linuxvfs.c | 2 +- fs/devpts/inode.c | 2 +- fs/ecryptfs/main.c | 2 +- fs/ext2/super.c | 2 +- fs/ext3/super.c | 2 +- fs/ext4/super.c | 2 +- fs/fat/inode.c | 6 +++--- fs/fuse/inode.c | 2 +- fs/gfs2/mount.c | 2 +- fs/hfs/super.c | 2 +- fs/hfsplus/options.c | 2 +- fs/hpfs/super.c | 2 +- fs/hugetlbfs/inode.c | 2 +- fs/isofs/inode.c | 2 +- fs/jfs/super.c | 2 +- fs/nfs/nfsroot.c | 2 +- fs/nfs/super.c | 6 +++--- fs/ocfs2/super.c | 2 +- fs/omfs/inode.c | 2 +- fs/ubifs/super.c | 2 +- fs/udf/super.c | 2 +- fs/ufs/super.c | 4 ++-- fs/xfs/linux-2.6/xfs_super.c | 2 +- include/linux/parser.h | 2 +- lib/parser.c | 2 +- net/9p/client.c | 2 +- net/9p/trans_fd.c | 2 +- security/selinux/hooks.c | 2 +- 38 files changed, 43 insertions(+), 43 deletions(-) (limited to 'arch/s390') diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 690ca7b0dcf6..2c8b8091250f 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -659,7 +659,7 @@ enum { Opt_uid, Opt_gid, Opt_mode, Opt_debug, Opt_err, }; -static match_table_t spufs_tokens = { +static const match_table_t spufs_tokens = { { Opt_uid, "uid=%d" }, { Opt_gid, "gid=%d" }, { Opt_mode, "mode=%o" }, diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 7383781f3e6a..36313801cd5c 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -219,7 +219,7 @@ static int hypfs_release(struct inode *inode, struct file *filp) enum { opt_uid, opt_gid, opt_err }; -static match_table_t hypfs_tokens = { +static const match_table_t hypfs_tokens = { {opt_uid, "uid=%u"}, {opt_gid, "gid=%u"}, {opt_err, NULL} diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index ed7c5f72cb8b..5b8b533f2908 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1683,7 +1683,7 @@ enum { SRP_OPT_SERVICE_ID), }; -static match_table_t srp_opt_tokens = { +static const match_table_t srp_opt_tokens = { { SRP_OPT_ID_EXT, "id_ext=%s" }, { SRP_OPT_IOC_GUID, "ioc_guid=%s" }, { SRP_OPT_DGID, "dgid=%s" }, diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c index db410e92c80d..77fa7a080801 100644 --- a/drivers/usb/core/inode.c +++ b/drivers/usb/core/inode.c @@ -97,7 +97,7 @@ enum { Opt_err, }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_devuid, "devuid=%u"}, {Opt_devgid, "devgid=%u"}, {Opt_devmode, "devmode=%o"}, diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 047c791427aa..c061c3f18e7c 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -55,7 +55,7 @@ enum { Opt_err }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_debug, "debug=%x"}, {Opt_dfltuid, "dfltuid=%u"}, {Opt_dfltgid, "dfltgid=%u"}, diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 26f3b43726bb..7f83a46f2b7e 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -157,7 +157,7 @@ static int adfs_show_options(struct seq_file *seq, struct vfsmount *mnt) enum {Opt_uid, Opt_gid, Opt_ownmask, Opt_othmask, Opt_err}; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_uid, "uid=%u"}, {Opt_gid, "gid=%u"}, {Opt_ownmask, "ownmask=%o"}, diff --git a/fs/affs/super.c b/fs/affs/super.c index 3a89094f93d0..8989c93193ed 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -135,7 +135,7 @@ enum { Opt_verbose, Opt_volume, Opt_ignore, Opt_err, }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_bs, "bs=%u"}, {Opt_mode, "mode=%o"}, {Opt_mufs, "mufs"}, diff --git a/fs/afs/super.c b/fs/afs/super.c index 250d8c4d66e4..aee239a048cb 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -64,7 +64,7 @@ enum { afs_opt_vol, }; -static match_table_t afs_options_list = { +static const match_table_t afs_options_list = { { afs_opt_cell, "cell=%s" }, { afs_opt_rwpath, "rwpath" }, { afs_opt_vol, "vol=%s" }, diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index dda510d31f84..b70eea1e8c59 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c @@ -59,7 +59,7 @@ static const struct super_operations autofs_sops = { enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto}; -static match_table_t autofs_tokens = { +static const match_table_t autofs_tokens = { {Opt_fd, "fd=%u"}, {Opt_uid, "uid=%u"}, {Opt_gid, "gid=%u"}, diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 7bb3e5ba0537..45d55819203d 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -213,7 +213,7 @@ static const struct super_operations autofs4_sops = { enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto, Opt_indirect, Opt_direct, Opt_offset}; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_fd, "fd=%u"}, {Opt_uid, "uid=%u"}, {Opt_gid, "gid=%u"}, diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 740f53672a8a..9286b2af893a 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -650,7 +650,7 @@ enum { Opt_uid, Opt_gid, Opt_charset, Opt_debug, Opt_err, }; -static match_table_t befs_tokens = { +static const match_table_t befs_tokens = { {Opt_uid, "uid=%d"}, {Opt_gid, "gid=%d"}, {Opt_charset, "iocharset=%s"}, diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index a70d5d0890c7..4a714f6c1bed 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -49,7 +49,7 @@ enum { Opt_err }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_uid, "uid=%u"}, {Opt_gid, "gid=%u"}, {Opt_mode, "mode=%o"}, diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 448dfd597b5f..8ebe9a5d1d99 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -211,7 +211,7 @@ enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig, ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata, ecryptfs_opt_encrypted_view, ecryptfs_opt_err }; -static match_table_t tokens = { +static const match_table_t tokens = { {ecryptfs_opt_sig, "sig=%s"}, {ecryptfs_opt_ecryptfs_sig, "ecryptfs_sig=%s"}, {ecryptfs_opt_cipher, "cipher=%s"}, diff --git a/fs/ext2/super.c b/fs/ext2/super.c index fd88c7b43e66..647cd888ac87 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -393,7 +393,7 @@ enum { Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_bsd_df, "bsddf"}, {Opt_minix_df, "minixdf"}, {Opt_grpid, "grpid"}, diff --git a/fs/ext3/super.c b/fs/ext3/super.c index f38a5afc39a1..399a96a6c556 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -760,7 +760,7 @@ enum { Opt_grpquota }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_bsd_df, "bsddf"}, {Opt_minix_df, "minixdf"}, {Opt_grpid, "grpid"}, diff --git a/fs/ext4/super.c b/fs/ext4/super.c index fb940c22ab0d..dea8f13c2fd9 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -919,7 +919,7 @@ enum { Opt_inode_readahead_blks }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_bsd_df, "bsddf"}, {Opt_minix_df, "minixdf"}, {Opt_grpid, "grpid"}, diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 80ff3381fa21..d12cdf2a0406 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -855,7 +855,7 @@ enum { Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_err, }; -static match_table_t fat_tokens = { +static const match_table_t fat_tokens = { {Opt_check_r, "check=relaxed"}, {Opt_check_s, "check=strict"}, {Opt_check_n, "check=normal"}, @@ -890,14 +890,14 @@ static match_table_t fat_tokens = { {Opt_tz_utc, "tz=UTC"}, {Opt_err, NULL}, }; -static match_table_t msdos_tokens = { +static const match_table_t msdos_tokens = { {Opt_nodots, "nodots"}, {Opt_nodots, "dotsOK=no"}, {Opt_dots, "dots"}, {Opt_dots, "dotsOK=yes"}, {Opt_err, NULL} }; -static match_table_t vfat_tokens = { +static const match_table_t vfat_tokens = { {Opt_charset, "iocharset=%s"}, {Opt_shortname_lower, "shortname=lower"}, {Opt_shortname_win95, "shortname=win95"}, diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index d2249f174e20..6a84388cacff 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -354,7 +354,7 @@ enum { OPT_ERR }; -static match_table_t tokens = { +static const match_table_t tokens = { {OPT_FD, "fd=%u"}, {OPT_ROOTMODE, "rootmode=%o"}, {OPT_USER_ID, "user_id=%u"}, diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c index df48333e6f01..f96eb90a2cfa 100644 --- a/fs/gfs2/mount.c +++ b/fs/gfs2/mount.c @@ -46,7 +46,7 @@ enum { Opt_err, }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_lockproto, "lockproto=%s"}, {Opt_locktable, "locktable=%s"}, {Opt_hostdata, "hostdata=%s"}, diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 4abb1047c689..3c7c7637719c 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -173,7 +173,7 @@ enum { opt_err }; -static match_table_t tokens = { +static const match_table_t tokens = { { opt_uid, "uid=%u" }, { opt_gid, "gid=%u" }, { opt_umask, "umask=%o" }, diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c index 9997cbf8beb5..9699c56d323f 100644 --- a/fs/hfsplus/options.c +++ b/fs/hfsplus/options.c @@ -25,7 +25,7 @@ enum { opt_force, opt_err }; -static match_table_t tokens = { +static const match_table_t tokens = { { opt_creator, "creator=%s" }, { opt_type, "type=%s" }, { opt_umask, "umask=%o" }, diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index b8ae9c90ada0..29ad461d568f 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -215,7 +215,7 @@ enum { Opt_timeshift, Opt_err, }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_help, "help"}, {Opt_uid, "uid=%u"}, {Opt_gid, "gid=%u"}, diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 3f58923fb39b..61edc701b0e6 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -57,7 +57,7 @@ enum { Opt_err, }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_size, "size=%s"}, {Opt_nr_inodes, "nr_inodes=%s"}, {Opt_mode, "mode=%o"}, diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 26948a6033b6..3f8af0f1505b 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -310,7 +310,7 @@ enum { Opt_nocompress, Opt_hide, Opt_showassoc, Opt_dmode, }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_norock, "norock"}, {Opt_nojoliet, "nojoliet"}, {Opt_unhide, "unhide"}, diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 3630718be395..0dae345e481b 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -199,7 +199,7 @@ enum { Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_integrity, "integrity"}, {Opt_nointegrity, "nointegrity"}, {Opt_iocharset, "iocharset=%s"}, diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 46763d1cd397..8478fc25daee 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -127,7 +127,7 @@ enum { Opt_err }; -static match_table_t __initdata tokens = { +static match_table_t __initconst tokens = { {Opt_port, "port=%u"}, {Opt_rsize, "rsize=%u"}, {Opt_wsize, "wsize=%u"}, diff --git a/fs/nfs/super.c b/fs/nfs/super.c index e9b20173fef3..ffb697416cb1 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -98,7 +98,7 @@ enum { Opt_err }; -static match_table_t nfs_mount_option_tokens = { +static const match_table_t nfs_mount_option_tokens = { { Opt_userspace, "bg" }, { Opt_userspace, "fg" }, { Opt_userspace, "retry=%s" }, @@ -163,7 +163,7 @@ enum { Opt_xprt_err }; -static match_table_t nfs_xprt_protocol_tokens = { +static const match_table_t nfs_xprt_protocol_tokens = { { Opt_xprt_udp, "udp" }, { Opt_xprt_tcp, "tcp" }, { Opt_xprt_rdma, "rdma" }, @@ -180,7 +180,7 @@ enum { Opt_sec_err }; -static match_table_t nfs_secflavor_tokens = { +static const match_table_t nfs_secflavor_tokens = { { Opt_sec_none, "none" }, { Opt_sec_none, "null" }, { Opt_sec_sys, "sys" }, diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 88255d3f52b4..70334d85aff1 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -157,7 +157,7 @@ enum { Opt_err, }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_barrier, "barrier=%u"}, {Opt_err_panic, "errors=panic"}, {Opt_err_ro, "errors=remount-ro"}, diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index d29047b1b9b0..cbf047a847c5 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c @@ -346,7 +346,7 @@ enum { Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_uid, "uid=%u"}, {Opt_gid, "gid=%u"}, {Opt_umask, "umask=%o"}, diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 3f4902060c7a..9a9220333b3b 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -848,7 +848,7 @@ enum { Opt_err, }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_fast_unmount, "fast_unmount"}, {Opt_norm_unmount, "norm_unmount"}, {Opt_err, NULL}, diff --git a/fs/udf/super.c b/fs/udf/super.c index 5698bbf83bbf..e25e7010627b 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -369,7 +369,7 @@ enum { Opt_err, Opt_uforget, Opt_uignore, Opt_gforget, Opt_gignore }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_novrs, "novrs"}, {Opt_nostrict, "nostrict"}, {Opt_bs, "bs=%u"}, diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 3141969b456d..e65212dfb60e 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -309,7 +309,7 @@ enum { Opt_err }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_type_old, "ufstype=old"}, {Opt_type_sunx86, "ufstype=sunx86"}, {Opt_type_sun, "ufstype=sun"}, @@ -1233,7 +1233,7 @@ static int ufs_show_options(struct seq_file *seq, struct vfsmount *vfs) { struct ufs_sb_info *sbi = UFS_SB(vfs->mnt_sb); unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE; - struct match_token *tp = tokens; + const struct match_token *tp = tokens; while (tp->token != Opt_onerror_panic && tp->token != mval) ++tp; diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 18d3c8487835..7227b2efef22 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -158,7 +158,7 @@ enum { Opt_barrier, Opt_nobarrier, Opt_err }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_barrier, "barrier"}, {Opt_nobarrier, "nobarrier"}, {Opt_err, NULL} diff --git a/include/linux/parser.h b/include/linux/parser.h index 7dcd05075756..ea2281e726f6 100644 --- a/include/linux/parser.h +++ b/include/linux/parser.h @@ -25,7 +25,7 @@ typedef struct { char *to; } substring_t; -int match_token(char *, match_table_t table, substring_t args[]); +int match_token(char *, const match_table_t table, substring_t args[]); int match_int(substring_t *, int *result); int match_octal(substring_t *, int *result); int match_hex(substring_t *, int *result); diff --git a/lib/parser.c b/lib/parser.c index 4f0cbc03e0e8..b00d02059a5f 100644 --- a/lib/parser.c +++ b/lib/parser.c @@ -100,7 +100,7 @@ static int match_one(char *s, const char *p, substring_t args[]) * format identifiers which will be taken into account when matching the * tokens, and whose locations will be returned in the @args array. */ -int match_token(char *s, match_table_t table, substring_t args[]) +int match_token(char *s, const match_table_t table, substring_t args[]) { const struct match_token *p; diff --git a/net/9p/client.c b/net/9p/client.c index 10e320307ec0..e053e06028a5 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -52,7 +52,7 @@ enum { Opt_err, }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_msize, "msize=%u"}, {Opt_legacy, "noextend"}, {Opt_trans, "trans=%s"}, diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index d652baf5ff91..6dabbdb66651 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -86,7 +86,7 @@ enum { Opt_port, Opt_rfdno, Opt_wfdno, Opt_err, }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_port, "port=%u"}, {Opt_rfdno, "rfdno=%u"}, {Opt_wfdno, "wfdno=%u"}, diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 88f19536efad..576e51199079 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -325,7 +325,7 @@ enum { Opt_rootcontext = 4, }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_context, CONTEXT_STR "%s"}, {Opt_fscontext, FSCONTEXT_STR "%s"}, {Opt_defcontext, DEFCONTEXT_STR "%s"}, -- cgit v1.2.3 From a0046b6db1c514149585e11895cd8434e0eafa79 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 29 Aug 2008 13:29:45 +0200 Subject: KVM: s390: Make facility bits future-proof Heiko Carstens pointed out, that its safer to activate working facilities instead of disabling problematic facilities. The new code uses the host facility bits and masks it with known good ones. Signed-off-by: Christian Borntraeger Signed-off-by: Avi Kivity --- arch/s390/kvm/priv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index d1faf5c54405..cce40ff2913b 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -157,8 +157,8 @@ static int handle_stfl(struct kvm_vcpu *vcpu) int rc; vcpu->stat.instruction_stfl++; - facility_list &= ~(1UL<<24); /* no stfle */ - facility_list &= ~(1UL<<23); /* no large pages */ + /* only pass the facility bits, which we can handle */ + facility_list &= 0xfe00fff3; rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list), &facility_list, sizeof(facility_list)); -- cgit v1.2.3 From 20766c083e6ab3c33125f07c7ffe39914c106d98 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 29 Aug 2008 13:30:56 +0200 Subject: KVM: s390: change help text of guest Kconfig The current help text for CONFIG_S390_GUEST is not very helpful. Lets add more text. Signed-off-by: Christian Borntraeger Signed-off-by: Avi Kivity --- arch/s390/Kconfig | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 4c03049e7db9..bc581d8a7cd9 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -565,13 +565,16 @@ config ZFCPDUMP Refer to for more details on this. config S390_GUEST -bool "s390 guest support (EXPERIMENTAL)" +bool "s390 guest support for KVM (EXPERIMENTAL)" depends on 64BIT && EXPERIMENTAL select VIRTIO select VIRTIO_RING select VIRTIO_CONSOLE help - Select this option if you want to run the kernel under s390 linux + Select this option if you want to run the kernel as a guest under + the KVM hypervisor. This will add detection for KVM as well as a + virtio transport. If KVM is detected, the virtio console will be + the default console. endmenu source "net/Kconfig" -- cgit v1.2.3 From 0b59268285ca6cdc46191f2995bf632088e3e277 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 16 Oct 2008 15:39:57 +0200 Subject: [PATCH] remove unused ibcs2/PER_SVR4 in SET_PERSONALITY The SET_PERSONALITY macro is always called with a second argument of 0. Remove the ibcs argument and the various tests to set the PER_SVR4 personality. Signed-off-by: Martin Schwidefsky --- arch/alpha/include/asm/elf.h | 4 ++-- arch/arm/include/asm/elf.h | 2 +- arch/avr32/include/asm/elf.h | 2 +- arch/blackfin/include/asm/elf.h | 2 +- arch/h8300/include/asm/elf.h | 2 +- arch/ia64/ia32/binfmt_elf32.c | 2 +- arch/ia64/ia32/ia32priv.h | 4 ++-- arch/ia64/include/asm/elf.h | 2 +- arch/m68knommu/include/asm/elf.h | 2 +- arch/mips/include/asm/elf.h | 10 +++------- arch/parisc/kernel/binfmt_elf32.c | 2 +- arch/powerpc/include/asm/elf.h | 4 ++-- arch/s390/include/asm/elf.h | 8 +++----- arch/sh/include/asm/elf.h | 2 +- arch/sparc/include/asm/elf_32.h | 2 +- arch/sparc/include/asm/elf_64.h | 6 ++---- fs/binfmt_elf.c | 6 +++--- include/asm-cris/elf.h | 2 +- include/asm-frv/elf.h | 2 +- include/asm-m32r/elf.h | 2 +- include/asm-m68k/elf.h | 2 +- include/asm-mn10300/elf.h | 2 +- include/asm-parisc/elf.h | 2 +- include/asm-um/elf-i386.h | 2 +- include/asm-um/elf-ppc.h | 2 +- include/asm-um/elf-x86_64.h | 2 +- include/asm-x86/elf.h | 4 ++-- include/asm-xtensa/elf.h | 2 +- 28 files changed, 39 insertions(+), 47 deletions(-) (limited to 'arch/s390') diff --git a/arch/alpha/include/asm/elf.h b/arch/alpha/include/asm/elf.h index fc1002ea1e0c..5c75c1b2352a 100644 --- a/arch/alpha/include/asm/elf.h +++ b/arch/alpha/include/asm/elf.h @@ -144,9 +144,9 @@ extern int dump_elf_task_fp(elf_fpreg_t *dest, struct task_struct *task); : amask (AMASK_CIX) ? "ev6" : "ev67"); \ }) -#define SET_PERSONALITY(EX, IBCS2) \ +#define SET_PERSONALITY(EX) \ set_personality(((EX).e_flags & EF_ALPHA_32BIT) \ - ? PER_LINUX_32BIT : (IBCS2) ? PER_SVR4 : PER_LINUX) + ? PER_LINUX_32BIT : PER_LINUX) extern int alpha_l1i_cacheshape; extern int alpha_l1d_cacheshape; diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h index 5be016980c19..a58378c343b9 100644 --- a/arch/arm/include/asm/elf.h +++ b/arch/arm/include/asm/elf.h @@ -107,6 +107,6 @@ extern int arm_elf_read_implies_exec(const struct elf32_hdr *, int); #define ELF_PLAT_INIT(_r, load_addr) (_r)->ARM_r0 = 0 extern void elf_set_personality(const struct elf32_hdr *); -#define SET_PERSONALITY(ex, ibcs2) elf_set_personality(&(ex)) +#define SET_PERSONALITY(ex) elf_set_personality(&(ex)) #endif diff --git a/arch/avr32/include/asm/elf.h b/arch/avr32/include/asm/elf.h index 64ce40ee1d58..d5d1d41c600a 100644 --- a/arch/avr32/include/asm/elf.h +++ b/arch/avr32/include/asm/elf.h @@ -103,6 +103,6 @@ typedef struct user_fpu_struct elf_fpregset_t; #define ELF_PLATFORM (NULL) -#define SET_PERSONALITY(ex, ibcs2) set_personality(PER_LINUX_32BIT) +#define SET_PERSONALITY(ex) set_personality(PER_LINUX_32BIT) #endif /* __ASM_AVR32_ELF_H */ diff --git a/arch/blackfin/include/asm/elf.h b/arch/blackfin/include/asm/elf.h index 67a03a8a353e..cdbfcfc30f6a 100644 --- a/arch/blackfin/include/asm/elf.h +++ b/arch/blackfin/include/asm/elf.h @@ -122,6 +122,6 @@ do { \ #define ELF_PLATFORM (NULL) -#define SET_PERSONALITY(ex, ibcs2) set_personality((ibcs2)?PER_SVR4:PER_LINUX) +#define SET_PERSONALITY(ex) set_personality(PER_LINUX) #endif diff --git a/arch/h8300/include/asm/elf.h b/arch/h8300/include/asm/elf.h index a8b57d1f4128..94e2284c8816 100644 --- a/arch/h8300/include/asm/elf.h +++ b/arch/h8300/include/asm/elf.h @@ -55,7 +55,7 @@ typedef unsigned long elf_fpregset_t; #define ELF_PLATFORM (NULL) -#define SET_PERSONALITY(ex, ibcs2) set_personality(PER_LINUX) +#define SET_PERSONALITY(ex) set_personality(PER_LINUX) #define R_H8_NONE 0 #define R_H8_DIR32 1 diff --git a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c index 4f0c30c38e99..f92bdaac8976 100644 --- a/arch/ia64/ia32/binfmt_elf32.c +++ b/arch/ia64/ia32/binfmt_elf32.c @@ -41,7 +41,7 @@ randomize_stack_top(unsigned long stack_top); #define elf_map elf32_map #undef SET_PERSONALITY -#define SET_PERSONALITY(ex, ibcs2) elf32_set_personality() +#define SET_PERSONALITY(ex) elf32_set_personality() #define elf_read_implies_exec(ex, have_pt_gnu_stack) (!(have_pt_gnu_stack)) diff --git a/arch/ia64/ia32/ia32priv.h b/arch/ia64/ia32/ia32priv.h index dd0c53687a96..0f15349c3c6b 100644 --- a/arch/ia64/ia32/ia32priv.h +++ b/arch/ia64/ia32/ia32priv.h @@ -332,8 +332,8 @@ void ia64_elf32_init(struct pt_regs *regs); #define ELF_PLATFORM NULL #ifdef __KERNEL__ -# define SET_PERSONALITY(EX,IBCS2) \ - (current->personality = (IBCS2) ? PER_SVR4 : PER_LINUX) +# define SET_PERSONALITY(EX) \ + (current->personality = PER_LINUX) #endif #define IA32_EFLAG 0x200 diff --git a/arch/ia64/include/asm/elf.h b/arch/ia64/include/asm/elf.h index 2acb6b6543c9..86eddee029cb 100644 --- a/arch/ia64/include/asm/elf.h +++ b/arch/ia64/include/asm/elf.h @@ -202,7 +202,7 @@ extern void ia64_elf_core_copy_regs (struct pt_regs *src, elf_gregset_t dst); relevant until we have real hardware to play with... */ #define ELF_PLATFORM NULL -#define SET_PERSONALITY(ex, ibcs2) set_personality(PER_LINUX) +#define SET_PERSONALITY(ex) set_personality(PER_LINUX) #define elf_read_implies_exec(ex, executable_stack) \ ((executable_stack!=EXSTACK_DISABLE_X) && ((ex).e_flags & EF_IA_64_LINUX_EXECUTABLE_STACK) != 0) diff --git a/arch/m68knommu/include/asm/elf.h b/arch/m68knommu/include/asm/elf.h index 27f0ec70fba8..b8046837f384 100644 --- a/arch/m68knommu/include/asm/elf.h +++ b/arch/m68knommu/include/asm/elf.h @@ -105,6 +105,6 @@ typedef struct user_m68kfp_struct elf_fpregset_t; #define ELF_PLATFORM (NULL) -#define SET_PERSONALITY(ex, ibcs2) set_personality((ibcs2)?PER_SVR4:PER_LINUX) +#define SET_PERSONALITY(ex) set_personality(PER_LINUX) #endif diff --git a/arch/mips/include/asm/elf.h b/arch/mips/include/asm/elf.h index f69f7acba637..a8eac1697b3d 100644 --- a/arch/mips/include/asm/elf.h +++ b/arch/mips/include/asm/elf.h @@ -247,10 +247,8 @@ extern struct mips_abi mips_abi_n32; #ifdef CONFIG_32BIT -#define SET_PERSONALITY(ex, ibcs2) \ +#define SET_PERSONALITY(ex) \ do { \ - if (ibcs2) \ - set_personality(PER_SVR4); \ set_personality(PER_LINUX); \ \ current->thread.abi = &mips_abi; \ @@ -296,7 +294,7 @@ do { \ #define __SET_PERSONALITY32(ex) do { } while (0) #endif -#define SET_PERSONALITY(ex, ibcs2) \ +#define SET_PERSONALITY(ex) \ do { \ clear_thread_flag(TIF_32BIT_REGS); \ clear_thread_flag(TIF_32BIT_ADDR); \ @@ -306,9 +304,7 @@ do { \ else \ current->thread.abi = &mips_abi; \ \ - if (ibcs2) \ - set_personality(PER_SVR4); \ - else if (current->personality != PER_LINUX32) \ + if (current->personality != PER_LINUX32) \ set_personality(PER_LINUX); \ } while (0) diff --git a/arch/parisc/kernel/binfmt_elf32.c b/arch/parisc/kernel/binfmt_elf32.c index ecb10a4f63c6..f61692d2b557 100644 --- a/arch/parisc/kernel/binfmt_elf32.c +++ b/arch/parisc/kernel/binfmt_elf32.c @@ -85,7 +85,7 @@ struct elf_prpsinfo32 * could set a processor dependent flag in the thread_struct. */ -#define SET_PERSONALITY(ex, ibcs2) \ +#define SET_PERSONALITY(ex) \ set_thread_flag(TIF_32BIT); \ current->thread.map_base = DEFAULT_MAP_BASE32; \ current->thread.task_size = DEFAULT_TASK_SIZE32 \ diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index 64c6ee22eefd..d812929390e4 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -232,7 +232,7 @@ typedef elf_vrregset_t elf_fpxregset_t; #endif /* __powerpc64__ */ #ifdef __powerpc64__ -# define SET_PERSONALITY(ex, ibcs2) \ +# define SET_PERSONALITY(ex) \ do { \ unsigned long new_flags = 0; \ if ((ex).e_ident[EI_CLASS] == ELFCLASS32) \ @@ -256,7 +256,7 @@ do { \ # define elf_read_implies_exec(ex, exec_stk) (test_thread_flag(TIF_32BIT) ? \ (exec_stk != EXSTACK_DISABLE_X) : 0) #else -# define SET_PERSONALITY(ex, ibcs2) set_personality((ibcs2)?PER_SVR4:PER_LINUX) +# define SET_PERSONALITY(ex) set_personality(PER_LINUX) #endif /* __powerpc64__ */ extern int dcache_bsize; diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 3cad56923815..261785ab5b22 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -166,13 +166,11 @@ extern char elf_platform[]; #define ELF_PLATFORM (elf_platform) #ifndef __s390x__ -#define SET_PERSONALITY(ex, ibcs2) set_personality((ibcs2)?PER_SVR4:PER_LINUX) +#define SET_PERSONALITY(ex) set_personality(PER_LINUX) #else /* __s390x__ */ -#define SET_PERSONALITY(ex, ibcs2) \ +#define SET_PERSONALITY(ex) \ do { \ - if (ibcs2) \ - set_personality(PER_SVR4); \ - else if (current->personality != PER_LINUX32) \ + if (current->personality != PER_LINUX32) \ set_personality(PER_LINUX); \ if ((ex).e_ident[EI_CLASS] == ELFCLASS32) \ set_thread_flag(TIF_31BIT); \ diff --git a/arch/sh/include/asm/elf.h b/arch/sh/include/asm/elf.h index f01449a8d378..ee02db110f0d 100644 --- a/arch/sh/include/asm/elf.h +++ b/arch/sh/include/asm/elf.h @@ -189,7 +189,7 @@ do { \ } while (0) #endif -#define SET_PERSONALITY(ex, ibcs2) set_personality(PER_LINUX_32BIT) +#define SET_PERSONALITY(ex) set_personality(PER_LINUX_32BIT) struct task_struct; extern int dump_task_regs (struct task_struct *, elf_gregset_t *); extern int dump_task_fpu (struct task_struct *, elf_fpregset_t *); diff --git a/arch/sparc/include/asm/elf_32.h b/arch/sparc/include/asm/elf_32.h index b7ab60547827..381a1b5256d6 100644 --- a/arch/sparc/include/asm/elf_32.h +++ b/arch/sparc/include/asm/elf_32.h @@ -137,6 +137,6 @@ typedef struct { #define ELF_PLATFORM (NULL) -#define SET_PERSONALITY(ex, ibcs2) set_personality((ibcs2)?PER_SVR4:PER_LINUX) +#define SET_PERSONALITY(ex) set_personality(PER_LINUX) #endif /* !(__ASMSPARC_ELF_H) */ diff --git a/arch/sparc/include/asm/elf_64.h b/arch/sparc/include/asm/elf_64.h index 0818a1308f4e..425c2f9be6d5 100644 --- a/arch/sparc/include/asm/elf_64.h +++ b/arch/sparc/include/asm/elf_64.h @@ -195,7 +195,7 @@ static inline unsigned int sparc64_elf_hwcap(void) #define ELF_PLATFORM (NULL) -#define SET_PERSONALITY(ex, ibcs2) \ +#define SET_PERSONALITY(ex) \ do { unsigned long new_flags = current_thread_info()->flags; \ new_flags &= _TIF_32BIT; \ if ((ex).e_ident[EI_CLASS] == ELFCLASS32) \ @@ -208,9 +208,7 @@ do { unsigned long new_flags = current_thread_info()->flags; \ else \ clear_thread_flag(TIF_ABI_PENDING); \ /* flush_thread will update pgd cache */ \ - if (ibcs2) \ - set_personality(PER_SVR4); \ - else if (current->personality != PER_LINUX32) \ + if (current->personality != PER_LINUX32) \ set_personality(PER_LINUX); \ } while (0) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 655ed8d30a86..c76afa26edf7 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -683,7 +683,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) * switch really is going to happen - do this in * flush_thread(). - akpm */ - SET_PERSONALITY(loc->elf_ex, 0); + SET_PERSONALITY(loc->elf_ex); interpreter = open_exec(elf_interpreter); retval = PTR_ERR(interpreter); @@ -734,7 +734,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) goto out_free_dentry; } else { /* Executables without an interpreter also need a personality */ - SET_PERSONALITY(loc->elf_ex, 0); + SET_PERSONALITY(loc->elf_ex); } /* Flush all traces of the currently running executable */ @@ -748,7 +748,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) /* Do this immediately, since STACK_TOP as used in setup_arg_pages may depend on the personality. */ - SET_PERSONALITY(loc->elf_ex, 0); + SET_PERSONALITY(loc->elf_ex); if (elf_read_implies_exec(loc->elf_ex, executable_stack)) current->personality |= READ_IMPLIES_EXEC; diff --git a/include/asm-cris/elf.h b/include/asm-cris/elf.h index 001f64ad11e8..f0d17fbc81ba 100644 --- a/include/asm-cris/elf.h +++ b/include/asm-cris/elf.h @@ -88,6 +88,6 @@ typedef unsigned long elf_fpregset_t; #define ELF_PLATFORM (NULL) -#define SET_PERSONALITY(ex, ibcs2) set_personality((ibcs2)?PER_SVR4:PER_LINUX) +#define SET_PERSONALITY(ex) set_personality(PER_LINUX) #endif diff --git a/include/asm-frv/elf.h b/include/asm-frv/elf.h index 9fb946bb7dc9..7279ec07d62e 100644 --- a/include/asm-frv/elf.h +++ b/include/asm-frv/elf.h @@ -137,6 +137,6 @@ do { \ #define ELF_PLATFORM (NULL) -#define SET_PERSONALITY(ex, ibcs2) set_personality((ibcs2)?PER_SVR4:PER_LINUX) +#define SET_PERSONALITY(ex) set_personality(PER_LINUX) #endif diff --git a/include/asm-m32r/elf.h b/include/asm-m32r/elf.h index 67bcd77494a5..0cc34c94bf2b 100644 --- a/include/asm-m32r/elf.h +++ b/include/asm-m32r/elf.h @@ -129,6 +129,6 @@ typedef elf_fpreg_t elf_fpregset_t; intent than poking at uname or /proc/cpuinfo. */ #define ELF_PLATFORM (NULL) -#define SET_PERSONALITY(ex, ibcs2) set_personality(PER_LINUX) +#define SET_PERSONALITY(ex) set_personality(PER_LINUX) #endif /* _ASM_M32R__ELF_H */ diff --git a/include/asm-m68k/elf.h b/include/asm-m68k/elf.h index 14ea42152b97..0b0f49eb876b 100644 --- a/include/asm-m68k/elf.h +++ b/include/asm-m68k/elf.h @@ -114,6 +114,6 @@ typedef struct user_m68kfp_struct elf_fpregset_t; #define ELF_PLATFORM (NULL) -#define SET_PERSONALITY(ex, ibcs2) set_personality((ibcs2)?PER_SVR4:PER_LINUX) +#define SET_PERSONALITY(ex) set_personality(PER_LINUX) #endif diff --git a/include/asm-mn10300/elf.h b/include/asm-mn10300/elf.h index 256a70466ca4..bf09f8bb392e 100644 --- a/include/asm-mn10300/elf.h +++ b/include/asm-mn10300/elf.h @@ -141,7 +141,7 @@ do { \ #define ELF_PLATFORM (NULL) #ifdef __KERNEL__ -#define SET_PERSONALITY(ex, ibcs2) set_personality(PER_LINUX) +#define SET_PERSONALITY(ex) set_personality(PER_LINUX) #endif #endif /* _ASM_ELF_H */ diff --git a/include/asm-parisc/elf.h b/include/asm-parisc/elf.h index d0a4a8262818..7fa675799e6d 100644 --- a/include/asm-parisc/elf.h +++ b/include/asm-parisc/elf.h @@ -236,7 +236,7 @@ typedef unsigned long elf_greg_t; #define ELF_PLATFORM ("PARISC\0") -#define SET_PERSONALITY(ex, ibcs2) \ +#define SET_PERSONALITY(ex) \ current->personality = PER_LINUX; \ current->thread.map_base = DEFAULT_MAP_BASE; \ current->thread.task_size = DEFAULT_TASK_SIZE \ diff --git a/include/asm-um/elf-i386.h b/include/asm-um/elf-i386.h index 23d6893e8617..d0da9d7c5371 100644 --- a/include/asm-um/elf-i386.h +++ b/include/asm-um/elf-i386.h @@ -86,7 +86,7 @@ extern long elf_aux_hwcap; extern char * elf_aux_platform; #define ELF_PLATFORM (elf_aux_platform) -#define SET_PERSONALITY(ex, ibcs2) do { } while (0) +#define SET_PERSONALITY(ex) do { } while (0) extern unsigned long vsyscall_ehdr; extern unsigned long vsyscall_end; diff --git a/include/asm-um/elf-ppc.h b/include/asm-um/elf-ppc.h index d3b90b7ac3e9..af9463cd8ce5 100644 --- a/include/asm-um/elf-ppc.h +++ b/include/asm-um/elf-ppc.h @@ -5,7 +5,7 @@ extern long elf_aux_hwcap; #define ELF_HWCAP (elf_aux_hwcap) -#define SET_PERSONALITY(ex, ibcs2) do ; while(0) +#define SET_PERSONALITY(ex) do ; while(0) #define ELF_EXEC_PAGESIZE 4096 diff --git a/include/asm-um/elf-x86_64.h b/include/asm-um/elf-x86_64.h index 3b2d5224a7e1..6e8a9195e952 100644 --- a/include/asm-um/elf-x86_64.h +++ b/include/asm-um/elf-x86_64.h @@ -114,6 +114,6 @@ extern long elf_aux_hwcap; #define ELF_PLATFORM "x86_64" -#define SET_PERSONALITY(ex, ibcs2) do ; while(0) +#define SET_PERSONALITY(ex) do ; while(0) #endif diff --git a/include/asm-x86/elf.h b/include/asm-x86/elf.h index 5c4745bec906..26bc15f01e78 100644 --- a/include/asm-x86/elf.h +++ b/include/asm-x86/elf.h @@ -186,7 +186,7 @@ do { \ set_fs(USER_DS); \ } while (0) -#define COMPAT_SET_PERSONALITY(ex, ibcs2) \ +#define COMPAT_SET_PERSONALITY(ex) \ do { \ if (test_thread_flag(TIF_IA32)) \ clear_thread_flag(TIF_ABI_PENDING); \ @@ -267,7 +267,7 @@ extern int force_personality32; For the moment, we have only optimizations for the Intel generations, but that could change... */ -#define SET_PERSONALITY(ex, ibcs2) set_personality_64bit() +#define SET_PERSONALITY(ex) set_personality_64bit() /* * An executable for which elf_read_implies_exec() returns TRUE will diff --git a/include/asm-xtensa/elf.h b/include/asm-xtensa/elf.h index ca6e5101a2cb..c3f53e755ca5 100644 --- a/include/asm-xtensa/elf.h +++ b/include/asm-xtensa/elf.h @@ -189,7 +189,7 @@ typedef struct { #endif } elf_xtregs_t; -#define SET_PERSONALITY(ex, ibcs2) set_personality(PER_LINUX_32BIT) +#define SET_PERSONALITY(ex) set_personality(PER_LINUX_32BIT) struct task_struct; -- cgit v1.2.3 From f7a5000f7a8924e9c5fad1801616601d6dc65a17 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 15 Oct 2008 22:02:05 -0700 Subject: compat: move cp_compat_stat to common code struct stat / compat_stat is the same on all architectures, so cp_compat_stat should be, too. Turns out it is, except that various architectures have slightly and some high2lowuid/high2lowgid or the direct assignment instead of the SET_UID/SET_GID that expands to the correct one anyway. This patch replaces the arch-specific cp_compat_stat implementations with a common one based on the x86-64 one. Signed-off-by: Christoph Hellwig Acked-by: David S. Miller [ sparc bits ] Acked-by: Kyle McMartin [ parisc bits ] Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/ia32/sys_ia32.c | 35 ----------------------------- arch/mips/kernel/linux32.c | 35 ----------------------------- arch/parisc/kernel/sys_parisc32.c | 47 --------------------------------------- arch/powerpc/kernel/sys_ppc32.c | 36 ------------------------------ arch/s390/kernel/compat_linux.c | 35 ----------------------------- arch/sparc64/kernel/sys_sparc32.c | 35 ----------------------------- arch/x86/ia32/sys_ia32.c | 35 ----------------------------- fs/compat.c | 39 ++++++++++++++++++++++++++++++++ include/linux/compat.h | 1 - 9 files changed, 39 insertions(+), 259 deletions(-) (limited to 'arch/s390') diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c index bf196cbb3796..2362a8eefb30 100644 --- a/arch/ia64/ia32/sys_ia32.c +++ b/arch/ia64/ia32/sys_ia32.c @@ -118,41 +118,6 @@ sys32_execve (char __user *name, compat_uptr_t __user *argv, compat_uptr_t __use return error; } -int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) -{ - compat_ino_t ino; - int err; - - if ((u64) stat->size > MAX_NON_LFS || - !old_valid_dev(stat->dev) || - !old_valid_dev(stat->rdev)) - return -EOVERFLOW; - - ino = stat->ino; - if (sizeof(ino) < sizeof(stat->ino) && ino != stat->ino) - return -EOVERFLOW; - - if (clear_user(ubuf, sizeof(*ubuf))) - return -EFAULT; - - err = __put_user(old_encode_dev(stat->dev), &ubuf->st_dev); - err |= __put_user(ino, &ubuf->st_ino); - err |= __put_user(stat->mode, &ubuf->st_mode); - err |= __put_user(stat->nlink, &ubuf->st_nlink); - err |= __put_user(high2lowuid(stat->uid), &ubuf->st_uid); - err |= __put_user(high2lowgid(stat->gid), &ubuf->st_gid); - err |= __put_user(old_encode_dev(stat->rdev), &ubuf->st_rdev); - err |= __put_user(stat->size, &ubuf->st_size); - err |= __put_user(stat->atime.tv_sec, &ubuf->st_atime); - err |= __put_user(stat->atime.tv_nsec, &ubuf->st_atime_nsec); - err |= __put_user(stat->mtime.tv_sec, &ubuf->st_mtime); - err |= __put_user(stat->mtime.tv_nsec, &ubuf->st_mtime_nsec); - err |= __put_user(stat->ctime.tv_sec, &ubuf->st_ctime); - err |= __put_user(stat->ctime.tv_nsec, &ubuf->st_ctime_nsec); - err |= __put_user(stat->blksize, &ubuf->st_blksize); - err |= __put_user(stat->blocks, &ubuf->st_blocks); - return err; -} #if PAGE_SHIFT > IA32_PAGE_SHIFT diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c index 2fefb14414b7..89223a9bff2c 100644 --- a/arch/mips/kernel/linux32.c +++ b/arch/mips/kernel/linux32.c @@ -63,41 +63,6 @@ #define merge_64(r1, r2) ((((r2) & 0xffffffffUL) << 32) + ((r1) & 0xffffffffUL)) #endif -/* - * Revalidate the inode. This is required for proper NFS attribute caching. - */ - -int cp_compat_stat(struct kstat *stat, struct compat_stat __user *statbuf) -{ - struct compat_stat tmp; - - if (!new_valid_dev(stat->dev) || !new_valid_dev(stat->rdev)) - return -EOVERFLOW; - - memset(&tmp, 0, sizeof(tmp)); - tmp.st_dev = new_encode_dev(stat->dev); - tmp.st_ino = stat->ino; - if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino) - return -EOVERFLOW; - tmp.st_mode = stat->mode; - tmp.st_nlink = stat->nlink; - SET_UID(tmp.st_uid, stat->uid); - SET_GID(tmp.st_gid, stat->gid); - tmp.st_rdev = new_encode_dev(stat->rdev); - tmp.st_size = stat->size; - tmp.st_atime = stat->atime.tv_sec; - tmp.st_mtime = stat->mtime.tv_sec; - tmp.st_ctime = stat->ctime.tv_sec; -#ifdef STAT_HAVE_NSEC - tmp.st_atime_nsec = stat->atime.tv_nsec; - tmp.st_mtime_nsec = stat->mtime.tv_nsec; - tmp.st_ctime_nsec = stat->ctime.tv_nsec; -#endif - tmp.st_blocks = stat->blocks; - tmp.st_blksize = stat->blksize; - return copy_to_user(statbuf, &tmp, sizeof(tmp)) ? -EFAULT : 0; -} - asmlinkage unsigned long sys32_mmap2(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c index 71efd6a28e2a..2c3af17e049c 100644 --- a/arch/parisc/kernel/sys_parisc32.c +++ b/arch/parisc/kernel/sys_parisc32.c @@ -237,53 +237,6 @@ int sys32_settimeofday(struct compat_timeval __user *tv, struct timezone __user return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); } -int cp_compat_stat(struct kstat *stat, struct compat_stat __user *statbuf) -{ - compat_ino_t ino; - int err; - - if (stat->size > MAX_NON_LFS || !new_valid_dev(stat->dev) || - !new_valid_dev(stat->rdev)) - return -EOVERFLOW; - - ino = stat->ino; - if (sizeof(ino) < sizeof(stat->ino) && ino != stat->ino) - return -EOVERFLOW; - - err = put_user(new_encode_dev(stat->dev), &statbuf->st_dev); - err |= put_user(ino, &statbuf->st_ino); - err |= put_user(stat->mode, &statbuf->st_mode); - err |= put_user(stat->nlink, &statbuf->st_nlink); - err |= put_user(0, &statbuf->st_reserved1); - err |= put_user(0, &statbuf->st_reserved2); - err |= put_user(new_encode_dev(stat->rdev), &statbuf->st_rdev); - err |= put_user(stat->size, &statbuf->st_size); - err |= put_user(stat->atime.tv_sec, &statbuf->st_atime); - err |= put_user(stat->atime.tv_nsec, &statbuf->st_atime_nsec); - err |= put_user(stat->mtime.tv_sec, &statbuf->st_mtime); - err |= put_user(stat->mtime.tv_nsec, &statbuf->st_mtime_nsec); - err |= put_user(stat->ctime.tv_sec, &statbuf->st_ctime); - err |= put_user(stat->ctime.tv_nsec, &statbuf->st_ctime_nsec); - err |= put_user(stat->blksize, &statbuf->st_blksize); - err |= put_user(stat->blocks, &statbuf->st_blocks); - err |= put_user(0, &statbuf->__unused1); - err |= put_user(0, &statbuf->__unused2); - err |= put_user(0, &statbuf->__unused3); - err |= put_user(0, &statbuf->__unused4); - err |= put_user(0, &statbuf->__unused5); - err |= put_user(0, &statbuf->st_fstype); /* not avail */ - err |= put_user(0, &statbuf->st_realdev); /* not avail */ - err |= put_user(0, &statbuf->st_basemode); /* not avail */ - err |= put_user(0, &statbuf->st_spareshort); - err |= put_user(stat->uid, &statbuf->st_uid); - err |= put_user(stat->gid, &statbuf->st_gid); - err |= put_user(0, &statbuf->st_spare4[0]); - err |= put_user(0, &statbuf->st_spare4[1]); - err |= put_user(0, &statbuf->st_spare4[2]); - - return err; -} - /*** copied from mips64 ***/ /* * Ooo, nasty. We need here to frob 32-bit unsigned longs to diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index ff7de7b0797e..d00599bb24a1 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -61,42 +61,6 @@ asmlinkage long ppc32_select(u32 n, compat_ulong_t __user *inp, return compat_sys_select((int)n, inp, outp, exp, compat_ptr(tvp_x)); } -int cp_compat_stat(struct kstat *stat, struct compat_stat __user *statbuf) -{ - compat_ino_t ino; - long err; - - if (stat->size > MAX_NON_LFS || !new_valid_dev(stat->dev) || - !new_valid_dev(stat->rdev)) - return -EOVERFLOW; - - ino = stat->ino; - if (sizeof(ino) < sizeof(stat->ino) && ino != stat->ino) - return -EOVERFLOW; - - err = access_ok(VERIFY_WRITE, statbuf, sizeof(*statbuf)) ? 0 : -EFAULT; - err |= __put_user(new_encode_dev(stat->dev), &statbuf->st_dev); - err |= __put_user(ino, &statbuf->st_ino); - err |= __put_user(stat->mode, &statbuf->st_mode); - err |= __put_user(stat->nlink, &statbuf->st_nlink); - err |= __put_user(stat->uid, &statbuf->st_uid); - err |= __put_user(stat->gid, &statbuf->st_gid); - err |= __put_user(new_encode_dev(stat->rdev), &statbuf->st_rdev); - err |= __put_user(stat->size, &statbuf->st_size); - err |= __put_user(stat->atime.tv_sec, &statbuf->st_atime); - err |= __put_user(stat->atime.tv_nsec, &statbuf->st_atime_nsec); - err |= __put_user(stat->mtime.tv_sec, &statbuf->st_mtime); - err |= __put_user(stat->mtime.tv_nsec, &statbuf->st_mtime_nsec); - err |= __put_user(stat->ctime.tv_sec, &statbuf->st_ctime); - err |= __put_user(stat->ctime.tv_nsec, &statbuf->st_ctime_nsec); - err |= __put_user(stat->blksize, &statbuf->st_blksize); - err |= __put_user(stat->blocks, &statbuf->st_blocks); - err |= __put_user(0, &statbuf->__unused4[0]); - err |= __put_user(0, &statbuf->__unused4[1]); - - return err; -} - /* Note: it is necessary to treat option as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 98e246dc0233..9b471d785ec1 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -362,41 +362,6 @@ asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned return sys_ftruncate(fd, (high << 32) | low); } -int cp_compat_stat(struct kstat *stat, struct compat_stat __user *statbuf) -{ - compat_ino_t ino; - int err; - - if (!old_valid_dev(stat->dev) || !old_valid_dev(stat->rdev)) - return -EOVERFLOW; - - ino = stat->ino; - if (sizeof(ino) < sizeof(stat->ino) && ino != stat->ino) - return -EOVERFLOW; - - err = put_user(old_encode_dev(stat->dev), &statbuf->st_dev); - err |= put_user(stat->ino, &statbuf->st_ino); - err |= put_user(stat->mode, &statbuf->st_mode); - err |= put_user(stat->nlink, &statbuf->st_nlink); - err |= put_user(high2lowuid(stat->uid), &statbuf->st_uid); - err |= put_user(high2lowgid(stat->gid), &statbuf->st_gid); - err |= put_user(old_encode_dev(stat->rdev), &statbuf->st_rdev); - err |= put_user(stat->size, &statbuf->st_size); - err |= put_user(stat->atime.tv_sec, &statbuf->st_atime); - err |= put_user(stat->atime.tv_nsec, &statbuf->st_atime_nsec); - err |= put_user(stat->mtime.tv_sec, &statbuf->st_mtime); - err |= put_user(stat->mtime.tv_nsec, &statbuf->st_mtime_nsec); - err |= put_user(stat->ctime.tv_sec, &statbuf->st_ctime); - err |= put_user(stat->ctime.tv_nsec, &statbuf->st_ctime_nsec); - err |= put_user(stat->blksize, &statbuf->st_blksize); - err |= put_user(stat->blocks, &statbuf->st_blocks); -/* fixme - err |= put_user(0, &statbuf->__unused4[0]); - err |= put_user(0, &statbuf->__unused4[1]); -*/ - return err; -} - asmlinkage long sys32_sched_rr_get_interval(compat_pid_t pid, struct compat_timespec __user *interval) { diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c index 3320c9d0075f..73a33dc3bcca 100644 --- a/arch/sparc64/kernel/sys_sparc32.c +++ b/arch/sparc64/kernel/sys_sparc32.c @@ -148,41 +148,6 @@ asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned return sys_ftruncate(fd, (high << 32) | low); } -int cp_compat_stat(struct kstat *stat, struct compat_stat __user *statbuf) -{ - compat_ino_t ino; - int err; - - if (stat->size > MAX_NON_LFS || !old_valid_dev(stat->dev) || - !old_valid_dev(stat->rdev)) - return -EOVERFLOW; - - ino = stat->ino; - if (sizeof(ino) < sizeof(stat->ino) && ino != stat->ino) - return -EOVERFLOW; - - err = put_user(old_encode_dev(stat->dev), &statbuf->st_dev); - err |= put_user(stat->ino, &statbuf->st_ino); - err |= put_user(stat->mode, &statbuf->st_mode); - err |= put_user(stat->nlink, &statbuf->st_nlink); - err |= put_user(high2lowuid(stat->uid), &statbuf->st_uid); - err |= put_user(high2lowgid(stat->gid), &statbuf->st_gid); - err |= put_user(old_encode_dev(stat->rdev), &statbuf->st_rdev); - err |= put_user(stat->size, &statbuf->st_size); - err |= put_user(stat->atime.tv_sec, &statbuf->st_atime); - err |= put_user(stat->atime.tv_nsec, &statbuf->st_atime_nsec); - err |= put_user(stat->mtime.tv_sec, &statbuf->st_mtime); - err |= put_user(stat->mtime.tv_nsec, &statbuf->st_mtime_nsec); - err |= put_user(stat->ctime.tv_sec, &statbuf->st_ctime); - err |= put_user(stat->ctime.tv_nsec, &statbuf->st_ctime_nsec); - err |= put_user(stat->blksize, &statbuf->st_blksize); - err |= put_user(stat->blocks, &statbuf->st_blocks); - err |= put_user(0, &statbuf->__unused4[0]); - err |= put_user(0, &statbuf->__unused4[1]); - - return err; -} - static int cp_compat_stat64(struct kstat *stat, struct compat_stat64 __user *statbuf) { diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index beda4232ce69..4d3ad8d78a4d 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -49,41 +49,6 @@ #define AA(__x) ((unsigned long)(__x)) -int cp_compat_stat(struct kstat *kbuf, struct compat_stat __user *ubuf) -{ - compat_ino_t ino; - - typeof(ubuf->st_uid) uid = 0; - typeof(ubuf->st_gid) gid = 0; - SET_UID(uid, kbuf->uid); - SET_GID(gid, kbuf->gid); - if (!old_valid_dev(kbuf->dev) || !old_valid_dev(kbuf->rdev)) - return -EOVERFLOW; - if (kbuf->size >= 0x7fffffff) - return -EOVERFLOW; - ino = kbuf->ino; - if (sizeof(ino) < sizeof(kbuf->ino) && ino != kbuf->ino) - return -EOVERFLOW; - if (!access_ok(VERIFY_WRITE, ubuf, sizeof(struct compat_stat)) || - __put_user(old_encode_dev(kbuf->dev), &ubuf->st_dev) || - __put_user(ino, &ubuf->st_ino) || - __put_user(kbuf->mode, &ubuf->st_mode) || - __put_user(kbuf->nlink, &ubuf->st_nlink) || - __put_user(uid, &ubuf->st_uid) || - __put_user(gid, &ubuf->st_gid) || - __put_user(old_encode_dev(kbuf->rdev), &ubuf->st_rdev) || - __put_user(kbuf->size, &ubuf->st_size) || - __put_user(kbuf->atime.tv_sec, &ubuf->st_atime) || - __put_user(kbuf->atime.tv_nsec, &ubuf->st_atime_nsec) || - __put_user(kbuf->mtime.tv_sec, &ubuf->st_mtime) || - __put_user(kbuf->mtime.tv_nsec, &ubuf->st_mtime_nsec) || - __put_user(kbuf->ctime.tv_sec, &ubuf->st_ctime) || - __put_user(kbuf->ctime.tv_nsec, &ubuf->st_ctime_nsec) || - __put_user(kbuf->blksize, &ubuf->st_blksize) || - __put_user(kbuf->blocks, &ubuf->st_blocks)) - return -EFAULT; - return 0; -} asmlinkage long sys32_truncate64(char __user *filename, unsigned long offset_low, diff --git a/fs/compat.c b/fs/compat.c index aae13d31612f..5f9ec449c799 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -137,6 +137,45 @@ asmlinkage long compat_sys_utimes(char __user *filename, struct compat_timeval _ return compat_sys_futimesat(AT_FDCWD, filename, t); } +static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) +{ + compat_ino_t ino = stat->ino; + typeof(ubuf->st_uid) uid = 0; + typeof(ubuf->st_gid) gid = 0; + int err; + + SET_UID(uid, stat->uid); + SET_GID(gid, stat->gid); + + if ((u64) stat->size > MAX_NON_LFS || + !old_valid_dev(stat->dev) || + !old_valid_dev(stat->rdev)) + return -EOVERFLOW; + if (sizeof(ino) < sizeof(stat->ino) && ino != stat->ino) + return -EOVERFLOW; + + if (clear_user(ubuf, sizeof(*ubuf))) + return -EFAULT; + + err = __put_user(old_encode_dev(stat->dev), &ubuf->st_dev); + err |= __put_user(ino, &ubuf->st_ino); + err |= __put_user(stat->mode, &ubuf->st_mode); + err |= __put_user(stat->nlink, &ubuf->st_nlink); + err |= __put_user(uid, &ubuf->st_uid); + err |= __put_user(gid, &ubuf->st_gid); + err |= __put_user(old_encode_dev(stat->rdev), &ubuf->st_rdev); + err |= __put_user(stat->size, &ubuf->st_size); + err |= __put_user(stat->atime.tv_sec, &ubuf->st_atime); + err |= __put_user(stat->atime.tv_nsec, &ubuf->st_atime_nsec); + err |= __put_user(stat->mtime.tv_sec, &ubuf->st_mtime); + err |= __put_user(stat->mtime.tv_nsec, &ubuf->st_mtime_nsec); + err |= __put_user(stat->ctime.tv_sec, &ubuf->st_ctime); + err |= __put_user(stat->ctime.tv_nsec, &ubuf->st_ctime_nsec); + err |= __put_user(stat->blksize, &ubuf->st_blksize); + err |= __put_user(stat->blocks, &ubuf->st_blocks); + return err; +} + asmlinkage long compat_sys_newstat(char __user * filename, struct compat_stat __user *statbuf) { diff --git a/include/linux/compat.h b/include/linux/compat.h index cf8d11cad5ae..999dddd8d939 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -78,7 +78,6 @@ typedef struct { compat_sigset_word sig[_COMPAT_NSIG_WORDS]; } compat_sigset_t; -extern int cp_compat_stat(struct kstat *, struct compat_stat __user *); extern int get_compat_timespec(struct timespec *, const struct compat_timespec __user *); extern int put_compat_timespec(const struct timespec *, struct compat_timespec __user *); -- cgit v1.2.3 From b418da16dd44810e5d5a22bba377cca80512a524 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 15 Oct 2008 22:02:06 -0700 Subject: compat: generic compat get/settimeofday Nothing arch specific in get/settimeofday. The details of the timeval conversion varied a little from arch to arch, but all with the same results. Also add an extern declaration for sys_tz to linux/time.h because externs in .c files are fowned upon. I'll kill the externs in various other files in a sparate patch. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Christoph Hellwig Acked-by: David S. Miller [ sparc bits ] Cc: "Luck, Tony" Cc: Ralf Baechle Acked-by: Kyle McMartin Cc: Matthew Wilcox Cc: Grant Grundler Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/ia32/ia32_entry.S | 4 +-- arch/ia64/ia32/sys_ia32.c | 56 ------------------------------- arch/mips/kernel/linux32.c | 66 ------------------------------------- arch/mips/kernel/scall64-n32.S | 4 +-- arch/mips/kernel/scall64-o32.S | 4 +-- arch/parisc/kernel/sys_parisc32.c | 58 --------------------------------- arch/parisc/kernel/syscall_table.S | 4 +-- arch/powerpc/kernel/sys_ppc32.c | 63 ----------------------------------- arch/s390/kernel/compat_linux.c | 67 -------------------------------------- arch/s390/kernel/compat_linux.h | 4 --- arch/s390/kernel/compat_wrapper.S | 12 +++---- arch/s390/kernel/syscalls.S | 4 +-- arch/sparc64/kernel/sys_sparc32.c | 62 ----------------------------------- arch/sparc64/kernel/systbls.S | 4 +-- arch/x86/ia32/ia32entry.S | 4 +-- arch/x86/ia32/sys_ia32.c | 64 ------------------------------------ include/linux/compat.h | 5 +++ include/linux/time.h | 2 ++ kernel/compat.c | 58 +++++++++++++++++++++++++++++++++ 19 files changed, 85 insertions(+), 460 deletions(-) (limited to 'arch/s390') diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S index ff88c48c5d19..53505bb04771 100644 --- a/arch/ia64/ia32/ia32_entry.S +++ b/arch/ia64/ia32/ia32_entry.S @@ -251,8 +251,8 @@ ia32_syscall_table: data8 compat_sys_setrlimit /* 75 */ data8 compat_sys_old_getrlimit data8 compat_sys_getrusage - data8 sys32_gettimeofday - data8 sys32_settimeofday + data8 compat_sys_gettimeofday + data8 compat_sys_settimeofday data8 sys32_getgroups16 /* 80 */ data8 sys32_setgroups16 data8 sys32_old_select diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c index 2362a8eefb30..f4430bb4bbdc 100644 --- a/arch/ia64/ia32/sys_ia32.c +++ b/arch/ia64/ia32/sys_ia32.c @@ -1113,68 +1113,12 @@ sys32_pipe (int __user *fd) return retval; } -static inline long -get_tv32 (struct timeval *o, struct compat_timeval __user *i) -{ - return (!access_ok(VERIFY_READ, i, sizeof(*i)) || - (__get_user(o->tv_sec, &i->tv_sec) | __get_user(o->tv_usec, &i->tv_usec))); -} - -static inline long -put_tv32 (struct compat_timeval __user *o, struct timeval *i) -{ - return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) || - (__put_user(i->tv_sec, &o->tv_sec) | __put_user(i->tv_usec, &o->tv_usec))); -} - asmlinkage unsigned long sys32_alarm (unsigned int seconds) { return alarm_setitimer(seconds); } -/* Translations due to time_t size differences. Which affects all - sorts of things, like timeval and itimerval. */ - -extern struct timezone sys_tz; - -asmlinkage long -sys32_gettimeofday (struct compat_timeval __user *tv, struct timezone __user *tz) -{ - if (tv) { - struct timeval ktv; - do_gettimeofday(&ktv); - if (put_tv32(tv, &ktv)) - return -EFAULT; - } - if (tz) { - if (copy_to_user(tz, &sys_tz, sizeof(sys_tz))) - return -EFAULT; - } - return 0; -} - -asmlinkage long -sys32_settimeofday (struct compat_timeval __user *tv, struct timezone __user *tz) -{ - struct timeval ktv; - struct timespec kts; - struct timezone ktz; - - if (tv) { - if (get_tv32(&ktv, tv)) - return -EFAULT; - kts.tv_sec = ktv.tv_sec; - kts.tv_nsec = ktv.tv_usec * 1000; - } - if (tz) { - if (copy_from_user(&ktz, tz, sizeof(ktz))) - return -EFAULT; - } - - return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); -} - struct sel_arg_struct { unsigned int n; unsigned int inp; diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c index 89223a9bff2c..aa2c55e3b55f 100644 --- a/arch/mips/kernel/linux32.c +++ b/arch/mips/kernel/linux32.c @@ -133,72 +133,6 @@ asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long __dummy, return sys_ftruncate(fd, merge_64(a2, a3)); } -static inline long -get_tv32(struct timeval *o, struct compat_timeval __user *i) -{ - return (!access_ok(VERIFY_READ, i, sizeof(*i)) || - (__get_user(o->tv_sec, &i->tv_sec) | - __get_user(o->tv_usec, &i->tv_usec))); -} - -static inline long -put_tv32(struct compat_timeval __user *o, struct timeval *i) -{ - return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) || - (__put_user(i->tv_sec, &o->tv_sec) | - __put_user(i->tv_usec, &o->tv_usec))); -} - -extern struct timezone sys_tz; - -asmlinkage int -sys32_gettimeofday(struct compat_timeval __user *tv, struct timezone __user *tz) -{ - if (tv) { - struct timeval ktv; - do_gettimeofday(&ktv); - if (put_tv32(tv, &ktv)) - return -EFAULT; - } - if (tz) { - if (copy_to_user(tz, &sys_tz, sizeof(sys_tz))) - return -EFAULT; - } - return 0; -} - -static inline long get_ts32(struct timespec *o, struct compat_timeval __user *i) -{ - long usec; - - if (!access_ok(VERIFY_READ, i, sizeof(*i))) - return -EFAULT; - if (__get_user(o->tv_sec, &i->tv_sec)) - return -EFAULT; - if (__get_user(usec, &i->tv_usec)) - return -EFAULT; - o->tv_nsec = usec * 1000; - return 0; -} - -asmlinkage int -sys32_settimeofday(struct compat_timeval __user *tv, struct timezone __user *tz) -{ - struct timespec kts; - struct timezone ktz; - - if (tv) { - if (get_ts32(&kts, tv)) - return -EFAULT; - } - if (tz) { - if (copy_from_user(&ktz, tz, sizeof(ktz))) - return -EFAULT; - } - - return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); -} - asmlinkage int sys32_llseek(unsigned int fd, unsigned int offset_high, unsigned int offset_low, loff_t __user * result, unsigned int origin) diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 324c5499dec2..e266b3aa6560 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -214,7 +214,7 @@ EXPORT(sysn32_call_table) PTR sys_fchown PTR sys_lchown PTR sys_umask - PTR sys32_gettimeofday + PTR compat_sys_gettimeofday PTR compat_sys_getrlimit /* 6095 */ PTR compat_sys_getrusage PTR compat_sys_sysinfo @@ -279,7 +279,7 @@ EXPORT(sysn32_call_table) PTR sys_chroot PTR sys_sync PTR sys_acct - PTR sys32_settimeofday + PTR compat_sys_settimeofday PTR compat_sys_mount /* 6160 */ PTR sys_umount PTR sys_swapon diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index 85fedac99a57..6c7ef8313ebd 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -283,8 +283,8 @@ sys_call_table: PTR compat_sys_setrlimit /* 4075 */ PTR compat_sys_getrlimit PTR compat_sys_getrusage - PTR sys32_gettimeofday - PTR sys32_settimeofday + PTR compat_sys_gettimeofday + PTR compat_sys_settimeofday PTR sys_getgroups /* 4080 */ PTR sys_setgroups PTR sys_ni_syscall /* old_select */ diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c index 2c3af17e049c..0838155b7a88 100644 --- a/arch/parisc/kernel/sys_parisc32.c +++ b/arch/parisc/kernel/sys_parisc32.c @@ -179,64 +179,6 @@ asmlinkage long sys32_sched_rr_get_interval(pid_t pid, return ret; } -static int -put_compat_timeval(struct compat_timeval __user *u, struct timeval *t) -{ - struct compat_timeval t32; - t32.tv_sec = t->tv_sec; - t32.tv_usec = t->tv_usec; - return copy_to_user(u, &t32, sizeof t32); -} - -static inline long get_ts32(struct timespec *o, struct compat_timeval __user *i) -{ - long usec; - - if (__get_user(o->tv_sec, &i->tv_sec)) - return -EFAULT; - if (__get_user(usec, &i->tv_usec)) - return -EFAULT; - o->tv_nsec = usec * 1000; - return 0; -} - -asmlinkage int -sys32_gettimeofday(struct compat_timeval __user *tv, struct timezone __user *tz) -{ - extern void do_gettimeofday(struct timeval *tv); - - if (tv) { - struct timeval ktv; - do_gettimeofday(&ktv); - if (put_compat_timeval(tv, &ktv)) - return -EFAULT; - } - if (tz) { - extern struct timezone sys_tz; - if (copy_to_user(tz, &sys_tz, sizeof(sys_tz))) - return -EFAULT; - } - return 0; -} - -asmlinkage -int sys32_settimeofday(struct compat_timeval __user *tv, struct timezone __user *tz) -{ - struct timespec kts; - struct timezone ktz; - - if (tv) { - if (get_ts32(&kts, tv)) - return -EFAULT; - } - if (tz) { - if (copy_from_user(&ktz, tz, sizeof(ktz))) - return -EFAULT; - } - - return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); -} - /*** copied from mips64 ***/ /* * Ooo, nasty. We need here to frob 32-bit unsigned longs to diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index 6b5ac38f5a99..c7e59f548817 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -149,8 +149,8 @@ ENTRY_COMP(getrlimit) ENTRY_COMP(getrusage) /* struct timeval and timezone are maybe?? consistent wide and narrow */ - ENTRY_DIFF(gettimeofday) - ENTRY_DIFF(settimeofday) + ENTRY_COMP(gettimeofday) + ENTRY_COMP(settimeofday) ENTRY_SAME(getgroups) /* 80 */ ENTRY_SAME(setgroups) /* struct socketaddr... */ diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index d00599bb24a1..bb1cfcfdbbbb 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -71,69 +71,6 @@ asmlinkage long compat_sys_sysfs(u32 option, u32 arg1, u32 arg2) return sys_sysfs((int)option, arg1, arg2); } -static inline long get_ts32(struct timespec *o, struct compat_timeval __user *i) -{ - long usec; - - if (!access_ok(VERIFY_READ, i, sizeof(*i))) - return -EFAULT; - if (__get_user(o->tv_sec, &i->tv_sec)) - return -EFAULT; - if (__get_user(usec, &i->tv_usec)) - return -EFAULT; - o->tv_nsec = usec * 1000; - return 0; -} - -static inline long put_tv32(struct compat_timeval __user *o, struct timeval *i) -{ - return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) || - (__put_user(i->tv_sec, &o->tv_sec) | - __put_user(i->tv_usec, &o->tv_usec))); -} - - - - -/* Translations due to time_t size differences. Which affects all - sorts of things, like timeval and itimerval. */ -extern struct timezone sys_tz; - -asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv, struct timezone __user *tz) -{ - if (tv) { - struct timeval ktv; - do_gettimeofday(&ktv); - if (put_tv32(tv, &ktv)) - return -EFAULT; - } - if (tz) { - if (copy_to_user(tz, &sys_tz, sizeof(sys_tz))) - return -EFAULT; - } - - return 0; -} - - - -asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv, struct timezone __user *tz) -{ - struct timespec kts; - struct timezone ktz; - - if (tv) { - if (get_ts32(&kts, tv)) - return -EFAULT; - } - if (tz) { - if (copy_from_user(&ktz, tz, sizeof(ktz))) - return -EFAULT; - } - - return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); -} - #ifdef CONFIG_SYSVIPC long compat_sys_ipc(u32 call, u32 first, u32 second, u32 third, compat_uptr_t ptr, u32 fifth) diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 9b471d785ec1..4646382af34f 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -279,22 +279,6 @@ asmlinkage long sys32_getegid16(void) return high2lowgid(current->egid); } -/* 32-bit timeval and related flotsam. */ - -static inline long get_tv32(struct timeval *o, struct compat_timeval __user *i) -{ - return (!access_ok(VERIFY_READ, o, sizeof(*o)) || - (__get_user(o->tv_sec, &i->tv_sec) || - __get_user(o->tv_usec, &i->tv_usec))); -} - -static inline long put_tv32(struct compat_timeval __user *o, struct timeval *i) -{ - return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) || - (__put_user(i->tv_sec, &o->tv_sec) || - __put_user(i->tv_usec, &o->tv_usec))); -} - /* * sys32_ipc() is the de-multiplexer for the SysV IPC calls in 32bit emulation. * @@ -522,57 +506,6 @@ sys32_delete_module(const char __user *name_user, unsigned int flags) #endif /* CONFIG_MODULES */ -/* Translations due to time_t size differences. Which affects all - sorts of things, like timeval and itimerval. */ - -extern struct timezone sys_tz; - -asmlinkage long sys32_gettimeofday(struct compat_timeval __user *tv, struct timezone __user *tz) -{ - if (tv) { - struct timeval ktv; - do_gettimeofday(&ktv); - if (put_tv32(tv, &ktv)) - return -EFAULT; - } - if (tz) { - if (copy_to_user(tz, &sys_tz, sizeof(sys_tz))) - return -EFAULT; - } - return 0; -} - -static inline long get_ts32(struct timespec *o, struct compat_timeval __user *i) -{ - long usec; - - if (!access_ok(VERIFY_READ, i, sizeof(*i))) - return -EFAULT; - if (__get_user(o->tv_sec, &i->tv_sec)) - return -EFAULT; - if (__get_user(usec, &i->tv_usec)) - return -EFAULT; - o->tv_nsec = usec * 1000; - return 0; -} - -asmlinkage long sys32_settimeofday(struct compat_timeval __user *tv, struct timezone __user *tz) -{ - struct timespec kts; - struct timezone ktz; - - if (tv) { - if (get_ts32(&kts, tv)) - return -EFAULT; - } - if (tz) { - if (copy_from_user(&ktz, tz, sizeof(ktz))) - return -EFAULT; - } - - return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); -} - asmlinkage long sys32_pread64(unsigned int fd, char __user *ubuf, size_t count, u32 poshi, u32 poslo) { diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h index 05f8516366ab..836a28842900 100644 --- a/arch/s390/kernel/compat_linux.h +++ b/arch/s390/kernel/compat_linux.h @@ -202,10 +202,6 @@ long sys32_execve(void); long sys32_init_module(void __user *umod, unsigned long len, const char __user *uargs); long sys32_delete_module(const char __user *name_user, unsigned int flags); -long sys32_gettimeofday(struct compat_timeval __user *tv, - struct timezone __user *tz); -long sys32_settimeofday(struct compat_timeval __user *tv, - struct timezone __user *tz); long sys32_pread64(unsigned int fd, char __user *ubuf, size_t count, u32 poshi, u32 poslo); long sys32_pwrite64(unsigned int fd, const char __user *ubuf, diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index ee51ca9e23b5..fc2c97197a53 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -332,17 +332,17 @@ compat_sys_getrusage_wrapper: llgtr %r3,%r3 # struct rusage_emu31 * jg compat_sys_getrusage # branch to system call - .globl sys32_gettimeofday_wrapper -sys32_gettimeofday_wrapper: + .globl compat_sys_gettimeofday_wrapper +compat_sys_gettimeofday_wrapper: llgtr %r2,%r2 # struct timeval_emu31 * llgtr %r3,%r3 # struct timezone * - jg sys32_gettimeofday # branch to system call + jg compat_sys_gettimeofday # branch to system call - .globl sys32_settimeofday_wrapper -sys32_settimeofday_wrapper: + .globl compat_sys_settimeofday_wrapper +compat_sys_settimeofday_wrapper: llgtr %r2,%r2 # struct timeval_emu31 * llgtr %r3,%r3 # struct timezone * - jg sys32_settimeofday # branch to system call + jg compat_sys_settimeofday # branch to system call .globl sys32_getgroups16_wrapper sys32_getgroups16_wrapper: diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 3ae303914b42..2d61787949d5 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -86,8 +86,8 @@ SYSCALL(sys_sethostname,sys_sethostname,sys32_sethostname_wrapper) SYSCALL(sys_setrlimit,sys_setrlimit,compat_sys_setrlimit_wrapper) /* 75 */ SYSCALL(sys_old_getrlimit,sys_getrlimit,compat_sys_old_getrlimit_wrapper) SYSCALL(sys_getrusage,sys_getrusage,compat_sys_getrusage_wrapper) -SYSCALL(sys_gettimeofday,sys_gettimeofday,sys32_gettimeofday_wrapper) -SYSCALL(sys_settimeofday,sys_settimeofday,sys32_settimeofday_wrapper) +SYSCALL(sys_gettimeofday,sys_gettimeofday,compat_sys_gettimeofday_wrapper) +SYSCALL(sys_settimeofday,sys_settimeofday,compat_sys_settimeofday_wrapper) SYSCALL(sys_getgroups16,sys_ni_syscall,sys32_getgroups16_wrapper) /* 80 old getgroups16 syscall */ SYSCALL(sys_setgroups16,sys_ni_syscall,sys32_setgroups16_wrapper) /* old setgroups16 syscall */ NI_SYSCALL /* old select syscall */ diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c index 73a33dc3bcca..e800503879e4 100644 --- a/arch/sparc64/kernel/sys_sparc32.c +++ b/arch/sparc64/kernel/sys_sparc32.c @@ -58,15 +58,6 @@ #include #include -/* 32-bit timeval and related flotsam. */ - -static inline long put_tv32(struct compat_timeval __user *o, struct timeval *i) -{ - return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) || - (__put_user(i->tv_sec, &o->tv_sec) | - __put_user(i->tv_usec, &o->tv_usec))); -} - #ifdef CONFIG_SYSVIPC asmlinkage long compat_sys_ipc(u32 call, u32 first, u32 second, u32 third, compat_uptr_t ptr, u32 fifth) { @@ -487,59 +478,6 @@ asmlinkage long sys32_delete_module(const char __user *name_user) #endif /* CONFIG_MODULES */ -/* Translations due to time_t size differences. Which affects all - sorts of things, like timeval and itimerval. */ - -extern struct timezone sys_tz; - -asmlinkage long sys32_gettimeofday(struct compat_timeval __user *tv, - struct timezone __user *tz) -{ - if (tv) { - struct timeval ktv; - do_gettimeofday(&ktv); - if (put_tv32(tv, &ktv)) - return -EFAULT; - } - if (tz) { - if (copy_to_user(tz, &sys_tz, sizeof(sys_tz))) - return -EFAULT; - } - return 0; -} - -static inline long get_ts32(struct timespec *o, struct compat_timeval __user *i) -{ - long usec; - - if (!access_ok(VERIFY_READ, i, sizeof(*i))) - return -EFAULT; - if (__get_user(o->tv_sec, &i->tv_sec)) - return -EFAULT; - if (__get_user(usec, &i->tv_usec)) - return -EFAULT; - o->tv_nsec = usec * 1000; - return 0; -} - -asmlinkage long sys32_settimeofday(struct compat_timeval __user *tv, - struct timezone __user *tz) -{ - struct timespec kts; - struct timezone ktz; - - if (tv) { - if (get_ts32(&kts, tv)) - return -EFAULT; - } - if (tz) { - if (copy_from_user(&ktz, tz, sizeof(ktz))) - return -EFAULT; - } - - return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); -} - asmlinkage compat_ssize_t sys32_pread64(unsigned int fd, char __user *ubuf, compat_size_t count, diff --git a/arch/sparc64/kernel/systbls.S b/arch/sparc64/kernel/systbls.S index 5daee4b04dd5..b2fa4c163638 100644 --- a/arch/sparc64/kernel/systbls.S +++ b/arch/sparc64/kernel/systbls.S @@ -41,8 +41,8 @@ sys_call_table32: /*100*/ .word sys32_getpriority, sys32_rt_sigreturn, sys32_rt_sigaction, sys32_rt_sigprocmask, sys32_rt_sigpending .word compat_sys_rt_sigtimedwait, sys32_rt_sigqueueinfo, compat_sys_rt_sigsuspend, sys_setresuid, sys_getresuid /*110*/ .word sys_setresgid, sys_getresgid, sys_setregid, sys_nis_syscall, sys_nis_syscall - .word sys32_getgroups, sys32_gettimeofday, sys32_getrusage, sys_nis_syscall, sys_getcwd -/*120*/ .word compat_sys_readv, compat_sys_writev, sys32_settimeofday, sys_fchown16, sys_fchmod + .word sys32_getgroups, compat_sys_gettimeofday, sys32_getrusage, sys_nis_syscall, sys_getcwd +/*120*/ .word compat_sys_readv, compat_sys_writev, compat_sys_settimeofday, sys_fchown16, sys_fchmod .word sys_nis_syscall, sys_setreuid16, sys_setregid16, sys_rename, sys_truncate /*130*/ .word sys_ftruncate, sys_flock, compat_sys_lstat64, sys_nis_syscall, sys_nis_syscall .word sys_nis_syscall, sys32_mkdir, sys_rmdir, compat_sys_utimes, compat_sys_stat64 diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index eb4314768bf7..256b00b61892 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -571,8 +571,8 @@ ia32_sys_call_table: .quad compat_sys_setrlimit /* 75 */ .quad compat_sys_old_getrlimit /* old_getrlimit */ .quad compat_sys_getrusage - .quad sys32_gettimeofday - .quad sys32_settimeofday + .quad compat_sys_gettimeofday + .quad compat_sys_settimeofday .quad sys_getgroups16 /* 80 */ .quad sys_setgroups16 .quad sys32_old_select diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 4d3ad8d78a4d..2e09dcd3c0a6 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -367,75 +367,11 @@ asmlinkage long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set, return 0; } -static inline long get_tv32(struct timeval *o, struct compat_timeval __user *i) -{ - int err = -EFAULT; - - if (access_ok(VERIFY_READ, i, sizeof(*i))) { - err = __get_user(o->tv_sec, &i->tv_sec); - err |= __get_user(o->tv_usec, &i->tv_usec); - } - return err; -} - -static inline long put_tv32(struct compat_timeval __user *o, struct timeval *i) -{ - int err = -EFAULT; - - if (access_ok(VERIFY_WRITE, o, sizeof(*o))) { - err = __put_user(i->tv_sec, &o->tv_sec); - err |= __put_user(i->tv_usec, &o->tv_usec); - } - return err; -} - asmlinkage long sys32_alarm(unsigned int seconds) { return alarm_setitimer(seconds); } -/* - * Translations due to time_t size differences. Which affects all - * sorts of things, like timeval and itimerval. - */ -asmlinkage long sys32_gettimeofday(struct compat_timeval __user *tv, - struct timezone __user *tz) -{ - if (tv) { - struct timeval ktv; - - do_gettimeofday(&ktv); - if (put_tv32(tv, &ktv)) - return -EFAULT; - } - if (tz) { - if (copy_to_user(tz, &sys_tz, sizeof(sys_tz))) - return -EFAULT; - } - return 0; -} - -asmlinkage long sys32_settimeofday(struct compat_timeval __user *tv, - struct timezone __user *tz) -{ - struct timeval ktv; - struct timespec kts; - struct timezone ktz; - - if (tv) { - if (get_tv32(&ktv, tv)) - return -EFAULT; - kts.tv_sec = ktv.tv_sec; - kts.tv_nsec = ktv.tv_usec * NSEC_PER_USEC; - } - if (tz) { - if (copy_from_user(&ktz, tz, sizeof(ktz))) - return -EFAULT; - } - - return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); -} - struct sel_arg_struct { unsigned int n; unsigned int inp; diff --git a/include/linux/compat.h b/include/linux/compat.h index 999dddd8d939..f061a1ea1b74 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -234,6 +234,11 @@ extern int get_compat_itimerspec(struct itimerspec *dst, extern int put_compat_itimerspec(struct compat_itimerspec __user *dst, const struct itimerspec *src); +asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv, + struct timezone __user *tz); +asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv, + struct timezone __user *tz); + asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp); extern int compat_printk(const char *fmt, ...); diff --git a/include/linux/time.h b/include/linux/time.h index e15206a7e82e..51e883df0fa5 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -29,6 +29,8 @@ struct timezone { #ifdef __KERNEL__ +extern struct timezone sys_tz; + /* Parameters used to convert the timespec values: */ #define MSEC_PER_SEC 1000L #define USEC_PER_MSEC 1000L diff --git a/kernel/compat.c b/kernel/compat.c index 32c254a8ab9a..143990e48cb9 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -26,6 +26,64 @@ #include +/* + * Note that the native side is already converted to a timespec, because + * that's what we want anyway. + */ +static int compat_get_timeval(struct timespec *o, + struct compat_timeval __user *i) +{ + long usec; + + if (get_user(o->tv_sec, &i->tv_sec) || + get_user(usec, &i->tv_usec)) + return -EFAULT; + o->tv_nsec = usec * 1000; + return 0; +} + +static int compat_put_timeval(struct compat_timeval __user *o, + struct timeval *i) +{ + return (put_user(i->tv_sec, &o->tv_sec) || + put_user(i->tv_usec, &o->tv_usec)) ? -EFAULT : 0; +} + +asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv, + struct timezone __user *tz) +{ + if (tv) { + struct timeval ktv; + do_gettimeofday(&ktv); + if (compat_put_timeval(tv, &ktv)) + return -EFAULT; + } + if (tz) { + if (copy_to_user(tz, &sys_tz, sizeof(sys_tz))) + return -EFAULT; + } + + return 0; +} + +asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv, + struct timezone __user *tz) +{ + struct timespec kts; + struct timezone ktz; + + if (tv) { + if (compat_get_timeval(&kts, tv)) + return -EFAULT; + } + if (tz) { + if (copy_from_user(&ktz, tz, sizeof(ktz))) + return -EFAULT; + } + + return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); +} + int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts) { return (!access_ok(VERIFY_READ, cts, sizeof(*cts)) || -- cgit v1.2.3 From 71088785c6bc68fddb450063d57b1bd1c78e0ea1 Mon Sep 17 00:00:00 2001 From: Badari Pulavarty Date: Sat, 18 Oct 2008 20:25:58 -0700 Subject: mm: cleanup to make remove_memory() arch-neutral There is nothing architecture specific about remove_memory(). remove_memory() function is common for all architectures which support hotplug memory remove. Instead of duplicating it in every architecture, collapse them into arch neutral function. [akpm@linux-foundation.org: fix the export] Signed-off-by: Badari Pulavarty Cc: Yasunori Goto Cc: Gary Hade Cc: Mel Gorman Cc: Yasunori Goto Cc: "Luck, Tony" Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Heiko Carstens Cc: Martin Schwidefsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/mm/init.c | 17 ----------------- arch/powerpc/mm/mem.c | 17 ----------------- arch/s390/mm/init.c | 11 ----------- mm/memory_hotplug.c | 12 +++++++++++- 4 files changed, 11 insertions(+), 46 deletions(-) (limited to 'arch/s390') diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index f482a9098e32..054bcd9439aa 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -700,23 +700,6 @@ int arch_add_memory(int nid, u64 start, u64 size) return ret; } -#ifdef CONFIG_MEMORY_HOTREMOVE -int remove_memory(u64 start, u64 size) -{ - unsigned long start_pfn, end_pfn; - unsigned long timeout = 120 * HZ; - int ret; - start_pfn = start >> PAGE_SHIFT; - end_pfn = start_pfn + (size >> PAGE_SHIFT); - ret = offline_pages(start_pfn, end_pfn, timeout); - if (ret) - goto out; - /* we can free mem_map at this point */ -out: - return ret; -} -EXPORT_SYMBOL_GPL(remove_memory); -#endif /* CONFIG_MEMORY_HOTREMOVE */ #endif /* diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 98d7bf99533a..b9e1a1da6e52 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -134,23 +134,6 @@ int arch_add_memory(int nid, u64 start, u64 size) return __add_pages(zone, start_pfn, nr_pages); } - -#ifdef CONFIG_MEMORY_HOTREMOVE -int remove_memory(u64 start, u64 size) -{ - unsigned long start_pfn, end_pfn; - int ret; - - start_pfn = start >> PAGE_SHIFT; - end_pfn = start_pfn + (size >> PAGE_SHIFT); - ret = offline_pages(start_pfn, end_pfn, 120 * HZ); - if (ret) - goto out; - /* Arch-specific calls go here - next patch */ -out: - return ret; -} -#endif /* CONFIG_MEMORY_HOTREMOVE */ #endif /* CONFIG_MEMORY_HOTPLUG */ /* diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 1169130a97ef..158b0d6d7046 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -189,14 +189,3 @@ int arch_add_memory(int nid, u64 start, u64 size) return rc; } #endif /* CONFIG_MEMORY_HOTPLUG */ - -#ifdef CONFIG_MEMORY_HOTREMOVE -int remove_memory(u64 start, u64 size) -{ - unsigned long start_pfn, end_pfn; - - start_pfn = PFN_DOWN(start); - end_pfn = start_pfn + PFN_DOWN(size); - return offline_pages(start_pfn, end_pfn, 120 * HZ); -} -#endif /* CONFIG_MEMORY_HOTREMOVE */ diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 89fee2dcb039..c299d083d8e2 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -26,6 +26,7 @@ #include #include #include +#include #include @@ -849,10 +850,19 @@ failed_removal: return ret; } + +int remove_memory(u64 start, u64 size) +{ + unsigned long start_pfn, end_pfn; + + start_pfn = PFN_DOWN(start); + end_pfn = start_pfn + PFN_DOWN(size); + return offline_pages(start_pfn, end_pfn, 120 * HZ); +} #else int remove_memory(u64 start, u64 size) { return -EINVAL; } -EXPORT_SYMBOL_GPL(remove_memory); #endif /* CONFIG_MEMORY_HOTREMOVE */ +EXPORT_SYMBOL_GPL(remove_memory); -- cgit v1.2.3 From 83224b08372be48d5fcefedc4886457da29130c8 Mon Sep 17 00:00:00 2001 From: Matt Helsley Date: Sat, 18 Oct 2008 20:27:18 -0700 Subject: container freezer: add TIF_FREEZE flag to all architectures This patch series introduces a cgroup subsystem that utilizes the swsusp freezer to freeze a group of tasks. It's immediately useful for batch job management scripts. It should also be useful in the future for implementing container checkpoint/restart. The freezer subsystem in the container filesystem defines a cgroup file named freezer.state. Reading freezer.state will return the current state of the cgroup. Writing "FROZEN" to the state file will freeze all tasks in the cgroup. Subsequently writing "RUNNING" will unfreeze the tasks in the cgroup. * Examples of usage : # mkdir /containers/freezer # mount -t cgroup -ofreezer freezer /containers # mkdir /containers/0 # echo $some_pid > /containers/0/tasks to get status of the freezer subsystem : # cat /containers/0/freezer.state RUNNING to freeze all tasks in the container : # echo FROZEN > /containers/0/freezer.state # cat /containers/0/freezer.state FREEZING # cat /containers/0/freezer.state FROZEN to unfreeze all tasks in the container : # echo RUNNING > /containers/0/freezer.state # cat /containers/0/freezer.state RUNNING This patch: The first step in making the refrigerator() available to all architectures, even for those without power management. The purpose of such a change is to be able to use the refrigerator() in a new control group subsystem which will implement a control group freezer. [akpm@linux-foundation.org: fix sparc] Signed-off-by: Cedric Le Goater Signed-off-by: Matt Helsley Acked-by: Pavel Machek Acked-by: Serge E. Hallyn Acked-by: Rafael J. Wysocki Acked-by: Nigel Cunningham Tested-by: Matt Helsley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/thread_info.h | 2 ++ arch/avr32/include/asm/thread_info.h | 1 + arch/h8300/include/asm/thread_info.h | 2 ++ arch/m68knommu/include/asm/thread_info.h | 2 ++ arch/s390/include/asm/thread_info.h | 2 ++ arch/sparc/include/asm/thread_info_32.h | 2 ++ arch/sparc/include/asm/thread_info_64.h | 2 ++ include/asm-cris/thread_info.h | 2 ++ include/asm-m68k/thread_info.h | 1 + include/asm-parisc/thread_info.h | 2 ++ include/asm-um/thread_info.h | 2 ++ include/asm-xtensa/thread_info.h | 2 ++ 12 files changed, 22 insertions(+) (limited to 'arch/s390') diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h index 15fda4344424..d069526bd767 100644 --- a/arch/alpha/include/asm/thread_info.h +++ b/arch/alpha/include/asm/thread_info.h @@ -74,12 +74,14 @@ register struct thread_info *__current_thread_info __asm__("$8"); #define TIF_UAC_SIGBUS 7 #define TIF_MEMDIE 8 #define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */ +#define TIF_FREEZE 16 /* is freezing for suspend */ #define _TIF_SYSCALL_TRACE (1< Date: Sat, 18 Oct 2008 20:27:21 -0700 Subject: container freezer: implement freezer cgroup subsystem This patch implements a new freezer subsystem in the control groups framework. It provides a way to stop and resume execution of all tasks in a cgroup by writing in the cgroup filesystem. The freezer subsystem in the container filesystem defines a file named freezer.state. Writing "FROZEN" to the state file will freeze all tasks in the cgroup. Subsequently writing "RUNNING" will unfreeze the tasks in the cgroup. Reading will return the current state. * Examples of usage : # mkdir /containers/freezer # mount -t cgroup -ofreezer freezer /containers # mkdir /containers/0 # echo $some_pid > /containers/0/tasks to get status of the freezer subsystem : # cat /containers/0/freezer.state RUNNING to freeze all tasks in the container : # echo FROZEN > /containers/0/freezer.state # cat /containers/0/freezer.state FREEZING # cat /containers/0/freezer.state FROZEN to unfreeze all tasks in the container : # echo RUNNING > /containers/0/freezer.state # cat /containers/0/freezer.state RUNNING This is the basic mechanism which should do the right thing for user space task in a simple scenario. It's important to note that freezing can be incomplete. In that case we return EBUSY. This means that some tasks in the cgroup are busy doing something that prevents us from completely freezing the cgroup at this time. After EBUSY, the cgroup will remain partially frozen -- reflected by freezer.state reporting "FREEZING" when read. The state will remain "FREEZING" until one of these things happens: 1) Userspace cancels the freezing operation by writing "RUNNING" to the freezer.state file 2) Userspace retries the freezing operation by writing "FROZEN" to the freezer.state file (writing "FREEZING" is not legal and returns EIO) 3) The tasks that blocked the cgroup from entering the "FROZEN" state disappear from the cgroup's set of tasks. [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: export thaw_process] Signed-off-by: Cedric Le Goater Signed-off-by: Matt Helsley Acked-by: Serge E. Hallyn Tested-by: Matt Helsley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/Kconfig | 1 + arch/arm/Kconfig | 2 + arch/avr32/Kconfig | 2 + arch/blackfin/Kconfig | 3 + arch/cris/Kconfig | 2 + arch/frv/Kconfig | 2 + arch/h8300/Kconfig | 2 + arch/ia64/Kconfig | 2 + arch/m32r/Kconfig | 2 + arch/m68k/Kconfig | 2 + arch/m68knommu/Kconfig | 2 + arch/mips/Kconfig | 2 + arch/mn10300/Kconfig | 2 + arch/parisc/Kconfig | 2 + arch/powerpc/Kconfig | 2 + arch/s390/Kconfig | 2 + arch/sh/Kconfig | 2 + arch/sparc/Kconfig | 2 + arch/sparc64/Kconfig | 1 + arch/um/Kconfig | 2 + arch/x86/Kconfig | 1 + arch/xtensa/Kconfig | 1 + include/linux/cgroup_subsys.h | 6 + include/linux/freezer.h | 29 ++-- init/Kconfig | 7 + kernel/Kconfig.freezer | 2 + kernel/Makefile | 1 + kernel/cgroup_freezer.c | 366 ++++++++++++++++++++++++++++++++++++++++++ kernel/freezer.c | 32 ++++ kernel/power/Kconfig | 3 - 30 files changed, 465 insertions(+), 22 deletions(-) create mode 100644 kernel/Kconfig.freezer create mode 100644 kernel/cgroup_freezer.c (limited to 'arch/s390') diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index a0f642b6a4b9..6110197757a3 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -70,6 +70,7 @@ config AUTO_IRQ_AFFINITY default y source "init/Kconfig" +source "kernel/Kconfig.freezer" menu "System setup" diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 4853f9df37bd..df39d20f7425 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -192,6 +192,8 @@ config VECTORS_BASE source "init/Kconfig" +source "kernel/Kconfig.freezer" + menu "System Type" choice diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig index 7c239a916275..33a5b2969eb4 100644 --- a/arch/avr32/Kconfig +++ b/arch/avr32/Kconfig @@ -72,6 +72,8 @@ config GENERIC_BUG source "init/Kconfig" +source "kernel/Kconfig.freezer" + menu "System Type and features" source "kernel/time/Kconfig" diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index 8102c79aaa94..29e71ed6b8a7 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -64,8 +64,11 @@ config HARDWARE_PM depends on OPROFILE source "init/Kconfig" + source "kernel/Kconfig.preempt" +source "kernel/Kconfig.freezer" + menu "Blackfin Processor Options" comment "Processor and Board Settings" diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig index 9389d38f222f..07335e719bf8 100644 --- a/arch/cris/Kconfig +++ b/arch/cris/Kconfig @@ -62,6 +62,8 @@ config HZ source "init/Kconfig" +source "kernel/Kconfig.freezer" + menu "General setup" source "fs/Kconfig.binfmt" diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig index a5aac1b07562..9d1552a9ee2c 100644 --- a/arch/frv/Kconfig +++ b/arch/frv/Kconfig @@ -66,6 +66,8 @@ mainmenu "Fujitsu FR-V Kernel Configuration" source "init/Kconfig" +source "kernel/Kconfig.freezer" + menu "Fujitsu FR-V system setup" diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index c7966746fbfe..bd1995403c67 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -90,6 +90,8 @@ config HZ source "init/Kconfig" +source "kernel/Kconfig.freezer" + source "arch/h8300/Kconfig.cpu" menu "Executable file formats" diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 3b7aa38254a8..912c57db2d21 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -7,6 +7,8 @@ mainmenu "IA-64 Linux Kernel Configuration" source "init/Kconfig" +source "kernel/Kconfig.freezer" + menu "Processor type and features" config IA64 diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig index 00289c178f89..dbaed4a63815 100644 --- a/arch/m32r/Kconfig +++ b/arch/m32r/Kconfig @@ -42,6 +42,8 @@ config HZ source "init/Kconfig" +source "kernel/Kconfig.freezer" + menu "Processor type and features" diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 677c93a490f6..836fb66f080d 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -62,6 +62,8 @@ mainmenu "Linux/68k Kernel Configuration" source "init/Kconfig" +source "kernel/Kconfig.freezer" + menu "Platform dependent setup" config EISA diff --git a/arch/m68knommu/Kconfig b/arch/m68knommu/Kconfig index 0a8998315e5e..76b66feb74df 100644 --- a/arch/m68knommu/Kconfig +++ b/arch/m68knommu/Kconfig @@ -75,6 +75,8 @@ config NO_IOPORT source "init/Kconfig" +source "kernel/Kconfig.freezer" + menu "Processor type and features" choice diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index b905744d7915..5f149b030c0f 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1885,6 +1885,8 @@ config PROBE_INITRD_HEADER add initrd or initramfs image to the kernel image. Otherwise, say N. +source "kernel/Kconfig.freezer" + menu "Bus options (PCI, PCMCIA, EISA, ISA, TC)" config HW_HAS_EISA diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig index dd557c9cf001..9a9f43358879 100644 --- a/arch/mn10300/Kconfig +++ b/arch/mn10300/Kconfig @@ -68,6 +68,8 @@ mainmenu "Matsushita MN10300/AM33 Kernel Configuration" source "init/Kconfig" +source "kernel/Kconfig.freezer" + menu "Matsushita MN10300 system setup" diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 8313fccced5e..2bd1f6ef5db0 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -90,6 +90,8 @@ config ARCH_MAY_HAVE_PC_FDC source "init/Kconfig" +source "kernel/Kconfig.freezer" + menu "Processor type and features" diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 380baa1780e9..9391199d9e77 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -230,6 +230,8 @@ config PPC_OF_PLATFORM_PCI source "init/Kconfig" +source "kernel/Kconfig.freezer" + source "arch/powerpc/sysdev/Kconfig" source "arch/powerpc/platforms/Kconfig" diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index bc581d8a7cd9..70b7645ce745 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -78,6 +78,8 @@ config S390 source "init/Kconfig" +source "kernel/Kconfig.freezer" + menu "Base setup" comment "Processor type and features" diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 5131d50f851a..2ed5713b7540 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -106,6 +106,8 @@ config IO_TRAPPED source "init/Kconfig" +source "kernel/Kconfig.freezer" + menu "System type" # diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 97671dac12a6..e594559c8dba 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -37,6 +37,8 @@ config HZ source "init/Kconfig" +source "kernel/Kconfig.freezer" + menu "General machine setup" config SMP diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index 5446e2a499b1..035b15af90d8 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig @@ -96,6 +96,7 @@ config GENERIC_HARDIRQS_NO__DO_IRQ def_bool y source "init/Kconfig" +source "kernel/Kconfig.freezer" menu "Processor type and features" diff --git a/arch/um/Kconfig b/arch/um/Kconfig index 6976812cfb18..393bccfe1785 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -229,6 +229,8 @@ endmenu source "init/Kconfig" +source "kernel/Kconfig.freezer" + source "drivers/block/Kconfig" source "arch/um/Kconfig.char" diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index bd3c2c53873e..49349ba77d80 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -193,6 +193,7 @@ config X86_TRAMPOLINE config KTIME_SCALAR def_bool X86_32 source "init/Kconfig" +source "kernel/Kconfig.freezer" menu "Processor type and features" diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 02e417d3d8e9..a213260b51e5 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -55,6 +55,7 @@ config HZ default 100 source "init/Kconfig" +source "kernel/Kconfig.freezer" menu "Processor type and features" diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index e2877454ec82..9c22396e8b50 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -48,3 +48,9 @@ SUBSYS(devices) #endif /* */ + +#ifdef CONFIG_CGROUP_FREEZER +SUBSYS(freezer) +#endif + +/* */ diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 17e3bb42dd3c..8f225339eee9 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -46,26 +46,11 @@ static inline bool should_send_signal(struct task_struct *p) /* * Wake up a frozen process - * - * task_lock() is taken to prevent the race with refrigerator() which may - * occur if the freezing of tasks fails. Namely, without the lock, if the - * freezing of tasks failed, thaw_tasks() might have run before a task in - * refrigerator() could call frozen_process(), in which case the task would be - * frozen and no one would thaw it. */ -static inline int thaw_process(struct task_struct *p) -{ - task_lock(p); - if (frozen(p)) { - p->flags &= ~PF_FROZEN; - task_unlock(p); - wake_up_process(p); - return 1; - } - clear_freeze_flag(p); - task_unlock(p); - return 0; -} +extern int __thaw_process(struct task_struct *p); + +/* Takes and releases task alloc lock using task_lock() */ +extern int thaw_process(struct task_struct *p); extern void refrigerator(void); extern int freeze_processes(void); @@ -83,6 +68,12 @@ static inline int try_to_freeze(void) extern bool freeze_task(struct task_struct *p, bool sig_only); extern void cancel_freezing(struct task_struct *p); +#ifdef CONFIG_CGROUP_FREEZER +extern int cgroup_frozen(struct task_struct *task); +#else /* !CONFIG_CGROUP_FREEZER */ +static inline int cgroup_frozen(struct task_struct *task) { return 0; } +#endif /* !CONFIG_CGROUP_FREEZER */ + /* * The PF_FREEZER_SKIP flag should be set by a vfork parent right before it * calls wait_for_completion(&vfork) and reset right after it returns from this diff --git a/init/Kconfig b/init/Kconfig index 5ceff3249a2d..8828ed0b2051 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -299,6 +299,13 @@ config CGROUP_NS for instance virtual servers and checkpoint/restart jobs. +config CGROUP_FREEZER + bool "control group freezer subsystem" + depends on CGROUPS + help + Provides a way to freeze and unfreeze all tasks in a + cgroup. + config CGROUP_DEVICE bool "Device controller for cgroups" depends on CGROUPS && EXPERIMENTAL diff --git a/kernel/Kconfig.freezer b/kernel/Kconfig.freezer new file mode 100644 index 000000000000..a3bb4cb52539 --- /dev/null +++ b/kernel/Kconfig.freezer @@ -0,0 +1,2 @@ +config FREEZER + def_bool PM_SLEEP || CGROUP_FREEZER diff --git a/kernel/Makefile b/kernel/Makefile index e8194d15d5f4..066550aa61c5 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -56,6 +56,7 @@ obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o obj-$(CONFIG_COMPAT) += compat.o obj-$(CONFIG_CGROUPS) += cgroup.o obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o +obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o obj-$(CONFIG_CPUSETS) += cpuset.o obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o obj-$(CONFIG_UTS_NS) += utsname.o diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c new file mode 100644 index 000000000000..b08722de610c --- /dev/null +++ b/kernel/cgroup_freezer.c @@ -0,0 +1,366 @@ +/* + * cgroup_freezer.c - control group freezer subsystem + * + * Copyright IBM Corporation, 2007 + * + * Author : Cedric Le Goater + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ + +#include +#include +#include +#include +#include +#include + +enum freezer_state { + STATE_RUNNING = 0, + STATE_FREEZING, + STATE_FROZEN, +}; + +struct freezer { + struct cgroup_subsys_state css; + enum freezer_state state; + spinlock_t lock; /* protects _writes_ to state */ +}; + +static inline struct freezer *cgroup_freezer( + struct cgroup *cgroup) +{ + return container_of( + cgroup_subsys_state(cgroup, freezer_subsys_id), + struct freezer, css); +} + +static inline struct freezer *task_freezer(struct task_struct *task) +{ + return container_of(task_subsys_state(task, freezer_subsys_id), + struct freezer, css); +} + +int cgroup_frozen(struct task_struct *task) +{ + struct freezer *freezer; + enum freezer_state state; + + task_lock(task); + freezer = task_freezer(task); + state = freezer->state; + task_unlock(task); + + return state == STATE_FROZEN; +} + +/* + * cgroups_write_string() limits the size of freezer state strings to + * CGROUP_LOCAL_BUFFER_SIZE + */ +static const char *freezer_state_strs[] = { + "RUNNING", + "FREEZING", + "FROZEN", +}; + +/* + * State diagram + * Transitions are caused by userspace writes to the freezer.state file. + * The values in parenthesis are state labels. The rest are edge labels. + * + * (RUNNING) --FROZEN--> (FREEZING) --FROZEN--> (FROZEN) + * ^ ^ | | + * | \_______RUNNING_______/ | + * \_____________________________RUNNING___________/ + */ + +struct cgroup_subsys freezer_subsys; + +/* Locks taken and their ordering + * ------------------------------ + * css_set_lock + * cgroup_mutex (AKA cgroup_lock) + * task->alloc_lock (AKA task_lock) + * freezer->lock + * task->sighand->siglock + * + * cgroup code forces css_set_lock to be taken before task->alloc_lock + * + * freezer_create(), freezer_destroy(): + * cgroup_mutex [ by cgroup core ] + * + * can_attach(): + * cgroup_mutex + * + * cgroup_frozen(): + * task->alloc_lock (to get task's cgroup) + * + * freezer_fork() (preserving fork() performance means can't take cgroup_mutex): + * task->alloc_lock (to get task's cgroup) + * freezer->lock + * sighand->siglock (if the cgroup is freezing) + * + * freezer_read(): + * cgroup_mutex + * freezer->lock + * read_lock css_set_lock (cgroup iterator start) + * + * freezer_write() (freeze): + * cgroup_mutex + * freezer->lock + * read_lock css_set_lock (cgroup iterator start) + * sighand->siglock + * + * freezer_write() (unfreeze): + * cgroup_mutex + * freezer->lock + * read_lock css_set_lock (cgroup iterator start) + * task->alloc_lock (to prevent races with freeze_task()) + * sighand->siglock + */ +static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss, + struct cgroup *cgroup) +{ + struct freezer *freezer; + + freezer = kzalloc(sizeof(struct freezer), GFP_KERNEL); + if (!freezer) + return ERR_PTR(-ENOMEM); + + spin_lock_init(&freezer->lock); + freezer->state = STATE_RUNNING; + return &freezer->css; +} + +static void freezer_destroy(struct cgroup_subsys *ss, + struct cgroup *cgroup) +{ + kfree(cgroup_freezer(cgroup)); +} + + +static int freezer_can_attach(struct cgroup_subsys *ss, + struct cgroup *new_cgroup, + struct task_struct *task) +{ + struct freezer *freezer; + int retval = 0; + + /* + * The call to cgroup_lock() in the freezer.state write method prevents + * a write to that file racing against an attach, and hence the + * can_attach() result will remain valid until the attach completes. + */ + freezer = cgroup_freezer(new_cgroup); + if (freezer->state == STATE_FROZEN) + retval = -EBUSY; + return retval; +} + +static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task) +{ + struct freezer *freezer; + + task_lock(task); + freezer = task_freezer(task); + task_unlock(task); + + BUG_ON(freezer->state == STATE_FROZEN); + spin_lock_irq(&freezer->lock); + /* Locking avoids race with FREEZING -> RUNNING transitions. */ + if (freezer->state == STATE_FREEZING) + freeze_task(task, true); + spin_unlock_irq(&freezer->lock); +} + +/* + * caller must hold freezer->lock + */ +static void check_if_frozen(struct cgroup *cgroup, + struct freezer *freezer) +{ + struct cgroup_iter it; + struct task_struct *task; + unsigned int nfrozen = 0, ntotal = 0; + + cgroup_iter_start(cgroup, &it); + while ((task = cgroup_iter_next(cgroup, &it))) { + ntotal++; + /* + * Task is frozen or will freeze immediately when next it gets + * woken + */ + if (frozen(task) || + (task_is_stopped_or_traced(task) && freezing(task))) + nfrozen++; + } + + /* + * Transition to FROZEN when no new tasks can be added ensures + * that we never exist in the FROZEN state while there are unfrozen + * tasks. + */ + if (nfrozen == ntotal) + freezer->state = STATE_FROZEN; + cgroup_iter_end(cgroup, &it); +} + +static int freezer_read(struct cgroup *cgroup, struct cftype *cft, + struct seq_file *m) +{ + struct freezer *freezer; + enum freezer_state state; + + if (!cgroup_lock_live_group(cgroup)) + return -ENODEV; + + freezer = cgroup_freezer(cgroup); + spin_lock_irq(&freezer->lock); + state = freezer->state; + if (state == STATE_FREEZING) { + /* We change from FREEZING to FROZEN lazily if the cgroup was + * only partially frozen when we exitted write. */ + check_if_frozen(cgroup, freezer); + state = freezer->state; + } + spin_unlock_irq(&freezer->lock); + cgroup_unlock(); + + seq_puts(m, freezer_state_strs[state]); + seq_putc(m, '\n'); + return 0; +} + +static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) +{ + struct cgroup_iter it; + struct task_struct *task; + unsigned int num_cant_freeze_now = 0; + + freezer->state = STATE_FREEZING; + cgroup_iter_start(cgroup, &it); + while ((task = cgroup_iter_next(cgroup, &it))) { + if (!freeze_task(task, true)) + continue; + if (task_is_stopped_or_traced(task) && freezing(task)) + /* + * The freeze flag is set so these tasks will + * immediately go into the fridge upon waking. + */ + continue; + if (!freezing(task) && !freezer_should_skip(task)) + num_cant_freeze_now++; + } + cgroup_iter_end(cgroup, &it); + + return num_cant_freeze_now ? -EBUSY : 0; +} + +static int unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) +{ + struct cgroup_iter it; + struct task_struct *task; + + cgroup_iter_start(cgroup, &it); + while ((task = cgroup_iter_next(cgroup, &it))) { + int do_wake; + + task_lock(task); + do_wake = __thaw_process(task); + task_unlock(task); + if (do_wake) + wake_up_process(task); + } + cgroup_iter_end(cgroup, &it); + freezer->state = STATE_RUNNING; + + return 0; +} + +static int freezer_change_state(struct cgroup *cgroup, + enum freezer_state goal_state) +{ + struct freezer *freezer; + int retval = 0; + + freezer = cgroup_freezer(cgroup); + spin_lock_irq(&freezer->lock); + check_if_frozen(cgroup, freezer); /* may update freezer->state */ + if (goal_state == freezer->state) + goto out; + switch (freezer->state) { + case STATE_RUNNING: + retval = try_to_freeze_cgroup(cgroup, freezer); + break; + case STATE_FREEZING: + if (goal_state == STATE_FROZEN) { + /* Userspace is retrying after + * "/bin/echo FROZEN > freezer.state" returned -EBUSY */ + retval = try_to_freeze_cgroup(cgroup, freezer); + break; + } + /* state == FREEZING and goal_state == RUNNING, so unfreeze */ + case STATE_FROZEN: + retval = unfreeze_cgroup(cgroup, freezer); + break; + default: + break; + } +out: + spin_unlock_irq(&freezer->lock); + + return retval; +} + +static int freezer_write(struct cgroup *cgroup, + struct cftype *cft, + const char *buffer) +{ + int retval; + enum freezer_state goal_state; + + if (strcmp(buffer, freezer_state_strs[STATE_RUNNING]) == 0) + goal_state = STATE_RUNNING; + else if (strcmp(buffer, freezer_state_strs[STATE_FROZEN]) == 0) + goal_state = STATE_FROZEN; + else + return -EIO; + + if (!cgroup_lock_live_group(cgroup)) + return -ENODEV; + retval = freezer_change_state(cgroup, goal_state); + cgroup_unlock(); + return retval; +} + +static struct cftype files[] = { + { + .name = "state", + .read_seq_string = freezer_read, + .write_string = freezer_write, + }, +}; + +static int freezer_populate(struct cgroup_subsys *ss, struct cgroup *cgroup) +{ + return cgroup_add_files(cgroup, ss, files, ARRAY_SIZE(files)); +} + +struct cgroup_subsys freezer_subsys = { + .name = "freezer", + .create = freezer_create, + .destroy = freezer_destroy, + .populate = freezer_populate, + .subsys_id = freezer_subsys_id, + .can_attach = freezer_can_attach, + .attach = NULL, + .fork = freezer_fork, + .exit = NULL, +}; diff --git a/kernel/freezer.c b/kernel/freezer.c index cb0931f89306..ba6248b323ef 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -120,3 +120,35 @@ void cancel_freezing(struct task_struct *p) spin_unlock_irqrestore(&p->sighand->siglock, flags); } } + +/* + * Wake up a frozen process + * + * task_lock() is needed to prevent the race with refrigerator() which may + * occur if the freezing of tasks fails. Namely, without the lock, if the + * freezing of tasks failed, thaw_tasks() might have run before a task in + * refrigerator() could call frozen_process(), in which case the task would be + * frozen and no one would thaw it. + */ +int __thaw_process(struct task_struct *p) +{ + if (frozen(p)) { + p->flags &= ~PF_FROZEN; + return 1; + } + clear_freeze_flag(p); + return 0; +} + +int thaw_process(struct task_struct *p) +{ + task_lock(p); + if (__thaw_process(p) == 1) { + task_unlock(p); + wake_up_process(p); + return 1; + } + task_unlock(p); + return 0; +} +EXPORT_SYMBOL(thaw_process); diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index ebdd7f55273d..dcd165f92a88 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -85,9 +85,6 @@ config PM_SLEEP depends on SUSPEND || HIBERNATION || XEN_SAVE_RESTORE default y -config FREEZER - def_bool PM_SLEEP - config SUSPEND bool "Suspend to RAM and standby" depends on PM && ARCH_SUSPEND_POSSIBLE -- cgit v1.2.3 From 250cf776f74b5932a1977d0489cae9206e2351dd Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 28 Oct 2008 11:10:15 +0100 Subject: [S390] pgtables: Fix race in enable_sie vs. page table ops The current enable_sie code sets the mm->context.pgstes bit to tell dup_mm that the new mm should have extended page tables. This bit is also used by the s390 specific page table primitives to decide about the page table layout - which means context.pgstes has two meanings. This can cause any kind of bugs. For example - e.g. shrink_zone can call ptep_clear_flush_young while enable_sie is running. ptep_clear_flush_young will test for context.pgstes. Since enable_sie changed that value of the old struct mm without changing the page table layout ptep_clear_flush_young will do the wrong thing. The solution is to split pgstes into two bits - one for the allocation - one for the current state Signed-off-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/mmu.h | 3 ++- arch/s390/include/asm/mmu_context.h | 19 ++++++++++++++++--- arch/s390/include/asm/pgtable.h | 8 ++++---- arch/s390/mm/pgtable.c | 16 ++++++++-------- 4 files changed, 30 insertions(+), 16 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index 5dd5e7b3476f..d2b4ff831477 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -7,7 +7,8 @@ typedef struct { unsigned long asce_bits; unsigned long asce_limit; int noexec; - int pgstes; + int has_pgste; /* The mmu context has extended page tables */ + int alloc_pgste; /* cloned contexts will have extended page tables */ } mm_context_t; #endif diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 4c2fbf48c9c4..28ec870655af 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -20,12 +20,25 @@ static inline int init_new_context(struct task_struct *tsk, #ifdef CONFIG_64BIT mm->context.asce_bits |= _ASCE_TYPE_REGION3; #endif - if (current->mm->context.pgstes) { + if (current->mm->context.alloc_pgste) { + /* + * alloc_pgste indicates, that any NEW context will be created + * with extended page tables. The old context is unchanged. The + * page table allocation and the page table operations will + * look at has_pgste to distinguish normal and extended page + * tables. The only way to create extended page tables is to + * set alloc_pgste and then create a new context (e.g. dup_mm). + * The page table allocation is called after init_new_context + * and if has_pgste is set, it will create extended page + * tables. + */ mm->context.noexec = 0; - mm->context.pgstes = 1; + mm->context.has_pgste = 1; + mm->context.alloc_pgste = 1; } else { mm->context.noexec = s390_noexec; - mm->context.pgstes = 0; + mm->context.has_pgste = 0; + mm->context.alloc_pgste = 0; } mm->context.asce_limit = STACK_TOP_MAX; crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 1a928f84afd6..7fc76133b3e4 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -679,7 +679,7 @@ static inline void pmd_clear(pmd_t *pmd) static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - if (mm->context.pgstes) + if (mm->context.has_pgste) ptep_rcp_copy(ptep); pte_val(*ptep) = _PAGE_TYPE_EMPTY; if (mm->context.noexec) @@ -763,7 +763,7 @@ static inline int kvm_s390_test_and_clear_page_dirty(struct mm_struct *mm, struct page *page; unsigned int skey; - if (!mm->context.pgstes) + if (!mm->context.has_pgste) return -EINVAL; rcp_lock(ptep); pgste = (unsigned long *) (ptep + PTRS_PER_PTE); @@ -794,7 +794,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, int young; unsigned long *pgste; - if (!vma->vm_mm->context.pgstes) + if (!vma->vm_mm->context.has_pgste) return 0; physpage = pte_val(*ptep) & PAGE_MASK; pgste = (unsigned long *) (ptep + PTRS_PER_PTE); @@ -844,7 +844,7 @@ static inline void __ptep_ipte(unsigned long address, pte_t *ptep) static inline void ptep_invalidate(struct mm_struct *mm, unsigned long address, pte_t *ptep) { - if (mm->context.pgstes) { + if (mm->context.has_pgste) { rcp_lock(ptep); __ptep_ipte(address, ptep); ptep_rcp_copy(ptep); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 3d98ba82ea67..ef3635b52fc0 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -169,7 +169,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) unsigned long *table; unsigned long bits; - bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL; + bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; spin_lock(&mm->page_table_lock); page = NULL; if (!list_empty(&mm->context.pgtable_list)) { @@ -186,7 +186,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) pgtable_page_ctor(page); page->flags &= ~FRAG_MASK; table = (unsigned long *) page_to_phys(page); - if (mm->context.pgstes) + if (mm->context.has_pgste) clear_table_pgstes(table); else clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); @@ -210,7 +210,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) struct page *page; unsigned long bits; - bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL; + bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); page = pfn_to_page(__pa(table) >> PAGE_SHIFT); spin_lock(&mm->page_table_lock); @@ -257,7 +257,7 @@ int s390_enable_sie(void) struct mm_struct *mm, *old_mm; /* Do we have pgstes? if yes, we are done */ - if (tsk->mm->context.pgstes) + if (tsk->mm->context.has_pgste) return 0; /* lets check if we are allowed to replace the mm */ @@ -269,14 +269,14 @@ int s390_enable_sie(void) } task_unlock(tsk); - /* we copy the mm with pgstes enabled */ - tsk->mm->context.pgstes = 1; + /* we copy the mm and let dup_mm create the page tables with_pgstes */ + tsk->mm->context.alloc_pgste = 1; mm = dup_mm(tsk); - tsk->mm->context.pgstes = 0; + tsk->mm->context.alloc_pgste = 0; if (!mm) return -ENOMEM; - /* Now lets check again if somebody attached ptrace etc */ + /* Now lets check again if something happened */ task_lock(tsk); if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) { -- cgit v1.2.3 From da5aae7036692fa8d03da1b705c76fd750ed9e38 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 28 Oct 2008 11:10:16 +0100 Subject: [S390] Fix sysdev class file creation. Use sysdev_class_create_file() to create create sysdev class attributes instead of sysfs_create_file(). Using sysfs_create_file() wasn't a very good idea since the show and store functions have a different amount of parameters for sysfs files and sysdev class files. In particular the pointer to the buffer is the last argument and therefore accesses to random memory regions happened. Still worked surprisingly well until we got a kernel panic. Cc: stable@kernel.org Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/smp.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 9e8b1f9b8f4d..b5595688a477 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -1119,9 +1119,7 @@ out: return rc; } -static ssize_t __ref rescan_store(struct sys_device *dev, - struct sysdev_attribute *attr, - const char *buf, +static ssize_t __ref rescan_store(struct sysdev_class *class, const char *buf, size_t count) { int rc; @@ -1129,12 +1127,10 @@ static ssize_t __ref rescan_store(struct sys_device *dev, rc = smp_rescan_cpus(); return rc ? rc : count; } -static SYSDEV_ATTR(rescan, 0200, NULL, rescan_store); +static SYSDEV_CLASS_ATTR(rescan, 0200, NULL, rescan_store); #endif /* CONFIG_HOTPLUG_CPU */ -static ssize_t dispatching_show(struct sys_device *dev, - struct sysdev_attribute *attr, - char *buf) +static ssize_t dispatching_show(struct sysdev_class *class, char *buf) { ssize_t count; @@ -1144,9 +1140,8 @@ static ssize_t dispatching_show(struct sys_device *dev, return count; } -static ssize_t dispatching_store(struct sys_device *dev, - struct sysdev_attribute *attr, - const char *buf, size_t count) +static ssize_t dispatching_store(struct sysdev_class *dev, const char *buf, + size_t count) { int val, rc; char delim; @@ -1168,7 +1163,8 @@ out: put_online_cpus(); return rc ? rc : count; } -static SYSDEV_ATTR(dispatching, 0644, dispatching_show, dispatching_store); +static SYSDEV_CLASS_ATTR(dispatching, 0644, dispatching_show, + dispatching_store); static int __init topology_init(void) { @@ -1178,13 +1174,11 @@ static int __init topology_init(void) register_cpu_notifier(&smp_cpu_nb); #ifdef CONFIG_HOTPLUG_CPU - rc = sysfs_create_file(&cpu_sysdev_class.kset.kobj, - &attr_rescan.attr); + rc = sysdev_class_create_file(&cpu_sysdev_class, &attr_rescan); if (rc) return rc; #endif - rc = sysfs_create_file(&cpu_sysdev_class.kset.kobj, - &attr_dispatching.attr); + rc = sysdev_class_create_file(&cpu_sysdev_class, &attr_dispatching); if (rc) return rc; for_each_present_cpu(cpu) { -- cgit v1.2.3 From 13f8b7c5e6fa13622592042f3b5aa88ba785cec2 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Tue, 28 Oct 2008 11:10:18 +0100 Subject: [S390] appldata: unsigned ops->size cannot be negative unsigned ops->size cannot be negative Signed-off-by: Roel Kluin Signed-off-by: Martin Schwidefsky --- arch/s390/appldata/appldata_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index a7f8979fb925..a06a47cdd5e0 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -424,7 +424,7 @@ out: */ int appldata_register_ops(struct appldata_ops *ops) { - if ((ops->size > APPLDATA_MAX_REC_SIZE) || (ops->size < 0)) + if (ops->size > APPLDATA_MAX_REC_SIZE) return -EINVAL; ops->ctl_table = kzalloc(4 * sizeof(struct ctl_table), GFP_KERNEL); -- cgit v1.2.3 From 46e7951f9431b5e6bfbeb3044fdb4b522f58101c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 28 Oct 2008 11:10:20 +0100 Subject: [S390] Change default IPL method to IPL_VM. allyesconfig and allmodconfig built kernels have a tape IPL record. A the vmreader record makes much more sense, since hardly anybody will ever IPL a kernel from tape. So change the default. As I side effect I can test these kernels without fiddling around with the kernel config ;) Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 70b7645ce745..5f5c29269fb1 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -384,7 +384,7 @@ config IPL choice prompt "IPL method generated into head.S" depends on IPL - default IPL_TAPE + default IPL_VM help Select "tape" if you want to IPL the image from a Tape. -- cgit v1.2.3 From 7f5a8ba6b0297ca941f43f8f5cbf0e5c8c4dd916 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 28 Oct 2008 11:10:21 +0100 Subject: [S390] No more 4kb stacks. We got a stack overflow with a small stack configuration on a 32 bit system. It just looks like as 4kb isn't enough and too dangerous. So lets get rid of 4kb stacks on 32 bit. But one thing I completely dislike about the call trace below is that just for debugging or tracing purposes sprintf gets called (cio_start_key): /* process condition code */ sprintf(dbf_txt, "ccode:%d", ccode); CIO_TRACE_EVENT(4, dbf_txt); But maybe its just me who thinks that this could be done better. <4>Kernel stack overflow. <4>Modules linked in: dm_multipath sunrpc bonding qeth_l2 dm_mod qeth ccwgroup vmur <4>CPU: 1 Not tainted 2.6.27-30.x.20081015-s390default #1 <4>Process httpd (pid: 3807, task: 20ae2df8, ksp: 1666fb78) <4>Krnl PSW : 040c0000 8027098a (number+0xe/0x348) <4> R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:0 CC:0 PM:0 <4>Krnl GPRS: 00d43318 0027097c 1666f277 9666f270 <4> 00000000 00000000 0000000a ffffffff <4> 9666f270 1666f228 1666f277 1666f098 <4> 00000002 80270982 80271016 1666f098 <4>Krnl Code: 8027097e: f0340dd0a7f1 srp 3536(4,%r0),2033(%r10),4 <4> 80270984: 0f00 clcl %r0,%r0 <4> 80270986: a7840001 brc 8,80270988 <4> >8027098a: 18ef lr %r14,%r15 <4> 8027098c: a7faff68 ahi %r15,-152 <4> 80270990: 18bf lr %r11,%r15 <4> 80270992: 18a2 lr %r10,%r2 <4> 80270994: 1893 lr %r9,%r3 Modified calltrace with annotated stackframe size of each function: stackframe size | 0 304 vsnprintf+850 [0x271016] 1 72 sprintf+74 [0x271522] 2 56 cio_start_key+262 [0x2d4c16] 3 56 ccw_device_start_key+222 [0x2dfe92] 4 56 ccw_device_start+40 [0x2dff28] 5 48 raw3215_start_io+104 [0x30b0f8] 6 56 raw3215_write+494 [0x30ba0a] 7 40 con3215_write+68 [0x30bafc] 8 40 __call_console_drivers+146 [0x12b0fa] 9 32 _call_console_drivers+102 [0x12b192] 10 64 release_console_sem+268 [0x12b614] 11 168 vprintk+462 [0x12bca6] 12 72 printk+68 [0x12bfd0] 13 256 __print_symbol+50 [0x15a882] 14 56 __show_trace+162 [0x103d06] 15 32 show_trace+224 [0x103e70] 16 48 show_stack+152 [0x103f20] 17 56 dump_stack+126 [0x104612] 18 96 __alloc_pages_internal+592 [0x175004] 19 80 cache_alloc_refill+776 [0x196f3c] 20 40 __kmalloc+258 [0x1972ae] 21 40 __alloc_skb+94 [0x328086] 22 32 pskb_copy+50 [0x328252] 23 32 skb_realloc_headroom+110 [0x328a72] 24 104 qeth_l2_hard_start_xmit+378 [0x7803bfde] 25 56 dev_hard_start_xmit+450 [0x32ef6e] 26 56 __qdisc_run+390 [0x3425d6] 27 48 dev_queue_xmit+410 [0x331e06] 28 40 ip_finish_output+308 [0x354ac8] 29 56 ip_output+218 [0x355b6e] 30 24 ip_local_out+56 [0x354584] 31 120 ip_queue_xmit+300 [0x355cec] 32 96 tcp_transmit_skb+812 [0x367da8] 33 40 tcp_push_one+158 [0x369fda] 34 112 tcp_sendmsg+852 [0x35d5a0] 35 240 sock_sendmsg+164 [0x32035c] 36 56 kernel_sendmsg+86 [0x32064a] 37 88 sock_no_sendpage+98 [0x322b22] 38 104 tcp_sendpage+70 [0x35cc1e] 39 48 sock_sendpage+74 [0x31eb66] 40 64 pipe_to_sendpage+102 [0x1c4b2e] 41 64 __splice_from_pipe+120 [0x1c5340] 42 72 splice_from_pipe+90 [0x1c57e6] 43 56 generic_splice_sendpage+38 [0x1c5832] 44 48 do_splice_from+104 [0x1c4c38] 45 48 direct_splice_actor+52 [0x1c4c88] 46 80 splice_direct_to_actor+180 [0x1c4f80] 47 72 do_splice_direct+70 [0x1c5112] 48 64 do_sendfile+360 [0x19de18] 49 72 sys_sendfile64+126 [0x19df32] 50 336 sysc_do_restart+18 [0x111a1a] Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 14 ++++++-------- arch/s390/include/asm/thread_info.h | 5 ----- 2 files changed, 6 insertions(+), 13 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 5f5c29269fb1..8116a3328a19 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -241,19 +241,17 @@ config PACK_STACK Say Y if you are unsure. config SMALL_STACK - bool "Use 4kb/8kb for kernel stack instead of 8kb/16kb" - depends on PACK_STACK && !LOCKDEP + bool "Use 8kb for kernel stack instead of 16kb" + depends on PACK_STACK && 64BIT && !LOCKDEP help If you say Y here and the compiler supports the -mkernel-backchain - option the kernel will use a smaller kernel stack size. For 31 bit - the reduced size is 4kb instead of 8kb and for 64 bit it is 8kb - instead of 16kb. This allows to run more thread on a system and - reduces the pressure on the memory management for higher order - page allocations. + option the kernel will use a smaller kernel stack size. The reduced + size is 8kb instead of 16kb. This allows to run more threads on a + system and reduces the pressure on the memory management for higher + order page allocations. Say N if you are unsure. - config CHECK_STACK bool "Detect kernel stack overflow" help diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index de3fad60c682..c1eaf9604da7 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -15,13 +15,8 @@ * Size of kernel stack for each process */ #ifndef __s390x__ -#ifndef __SMALL_STACK #define THREAD_ORDER 1 #define ASYNC_ORDER 1 -#else -#define THREAD_ORDER 0 -#define ASYNC_ORDER 0 -#endif #else /* __s390x__ */ #ifndef __SMALL_STACK #define THREAD_ORDER 2 -- cgit v1.2.3 From ea4bfdf52a5a84492cce881baadc5fab36adeade Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 28 Oct 2008 11:10:22 +0100 Subject: [S390] s390: Fix build for !CONFIG_S390_GUEST + CONFIG_VIRTIO_CONSOLE The s390 kernel does not compile if virtio console is enabled, but guest support is disabled: LD .tmp_vmlinux1 arch/s390/kernel/built-in.o: In function `setup_arch': /space/linux-2.5/arch/s390/kernel/setup.c:773: undefined reference to `s390_virtio_console_init' The fix is related to commit 99e65c92f2bbf84f43766a8bf701e36817d62822 Author: Christian Borntraeger Date: Fri Jul 25 15:50:04 2008 +0200 KVM: s390: Fix guest kconfig Which changed the build process to build kvm_virtio.c only if CONFIG_S390_GUEST is set. We must ifdef the prototype in the header file accordingly. Reported-by: Heiko Carstens Signed-off-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/kvm_virtio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/kvm_virtio.h b/arch/s390/include/asm/kvm_virtio.h index 146100224def..c13568b9351c 100644 --- a/arch/s390/include/asm/kvm_virtio.h +++ b/arch/s390/include/asm/kvm_virtio.h @@ -52,7 +52,7 @@ struct kvm_vqconfig { #ifdef __KERNEL__ /* early virtio console setup */ -#ifdef CONFIG_VIRTIO_CONSOLE +#ifdef CONFIG_S390_GUEST extern void s390_virtio_console_init(void); #else static inline void s390_virtio_console_init(void) -- cgit v1.2.3 From fb2e7c5e33b341699f139b2ed972dca0a463a670 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Fri, 14 Nov 2008 18:18:00 +0100 Subject: [S390] Fix range for add_active_range() in setup_memory() add_active_range() expects start_pfn + size as end_pfn value, i.e. not the pfn of the last page frame but the one behind that. We used the pfn of the last page frame so far, which can lead to a BUG_ON in move_freepages(), when the kernelcore parameter is specified (page_zone(start_page) != page_zone(end_page)). Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/setup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 62122bad1e33..400b040df7fa 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -604,13 +604,13 @@ setup_memory(void) if (memory_chunk[i].type != CHUNK_READ_WRITE) continue; start_chunk = PFN_DOWN(memory_chunk[i].addr); - end_chunk = start_chunk + PFN_DOWN(memory_chunk[i].size) - 1; + end_chunk = start_chunk + PFN_DOWN(memory_chunk[i].size); end_chunk = min(end_chunk, end_pfn); if (start_chunk >= end_chunk) continue; add_active_range(0, start_chunk, end_chunk); pfn = max(start_chunk, start_pfn); - for (; pfn <= end_chunk; pfn++) + for (; pfn < end_chunk; pfn++) page_set_storage_key(PFN_PHYS(pfn), PAGE_DEFAULT_KEY); } -- cgit v1.2.3 From af4c68740e848019d8d14c52704ed8eacceddac6 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 14 Nov 2008 18:18:03 +0100 Subject: [S390] lockdep: fix compile bug arch/s390/kernel/built-in.o: In function `cleanup_io_leave_insn': mem_detect.c:(.text+0x10592): undefined reference to `lockdep_sys_exit' Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry.S | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index ed500ef799b7..5f0c4fba87c3 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -1116,6 +1116,8 @@ cleanup_io_leave_insn: .Ltrace_irq_on: .long trace_hardirqs_on .Ltrace_irq_off: .long trace_hardirqs_off +#endif +#ifdef CONFIG_LOCKDEP .Llockdep_sys_exit: .long lockdep_sys_exit #endif -- cgit v1.2.3 From 632448f65001c4935ed0d3bb362017d773da2eca Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 14 Nov 2008 18:18:04 +0100 Subject: [S390] ftrace: disable tracing on idle psw Disable tracing on idle psw. Otherwise it would give us huge preempt off times for idle. Which is rather pointless. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/process.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 3e2c05cb6a87..04f8c67a6101 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -136,9 +136,12 @@ static void default_idle(void) return; } trace_hardirqs_on(); + /* Don't trace preempt off for idle. */ + stop_critical_timings(); /* Wait for external, I/O or machine check interrupt. */ __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_IO | PSW_MASK_EXT); + start_critical_timings(); } void cpu_idle(void) -- cgit v1.2.3 From 50bec4ce5d36ebf96189dcc54e20c7fce4bf61bf Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 14 Nov 2008 18:18:05 +0100 Subject: [S390] ftrace: fix kernel stack backchain walking With CONFIG_IRQSOFF_TRACER the trace_hardirqs_off() function includes a call to __builtin_return_address(1). But we calltrace_hardirqs_off() from early entry code. There we have just a single stack frame. So this results in a kernel stack backchain walk that would walk beyond the kernel stack. Following the NULL terminated backchain this results in a lowcore read access. To fix this we simply call trace_hardirqs_off_caller() and pass the current instruction pointer. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry.S | 18 +++++++++++------- arch/s390/kernel/entry64.S | 11 +++++++---- 2 files changed, 18 insertions(+), 11 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 5f0c4fba87c3..08844fc24a2e 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -61,22 +61,25 @@ STACK_SIZE = 1 << STACK_SHIFT #ifdef CONFIG_TRACE_IRQFLAGS .macro TRACE_IRQS_ON - l %r1,BASED(.Ltrace_irq_on) + basr %r2,%r0 + l %r1,BASED(.Ltrace_irq_on_caller) basr %r14,%r1 .endm .macro TRACE_IRQS_OFF - l %r1,BASED(.Ltrace_irq_off) + basr %r2,%r0 + l %r1,BASED(.Ltrace_irq_off_caller) basr %r14,%r1 .endm .macro TRACE_IRQS_CHECK + basr %r2,%r0 tm SP_PSW(%r15),0x03 # irqs enabled? jz 0f - l %r1,BASED(.Ltrace_irq_on) + l %r1,BASED(.Ltrace_irq_on_caller) basr %r14,%r1 j 1f -0: l %r1,BASED(.Ltrace_irq_off) +0: l %r1,BASED(.Ltrace_irq_off_caller) basr %r14,%r1 1: .endm @@ -1113,9 +1116,10 @@ cleanup_io_leave_insn: .Lschedtail: .long schedule_tail .Lsysc_table: .long sys_call_table #ifdef CONFIG_TRACE_IRQFLAGS -.Ltrace_irq_on: .long trace_hardirqs_on -.Ltrace_irq_off: - .long trace_hardirqs_off +.Ltrace_irq_on_caller: + .long trace_hardirqs_on_caller +.Ltrace_irq_off_caller: + .long trace_hardirqs_off_caller #endif #ifdef CONFIG_LOCKDEP .Llockdep_sys_exit: diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index d7ce150453f2..41aca06682aa 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -61,19 +61,22 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ #ifdef CONFIG_TRACE_IRQFLAGS .macro TRACE_IRQS_ON - brasl %r14,trace_hardirqs_on + basr %r2,%r0 + brasl %r14,trace_hardirqs_on_caller .endm .macro TRACE_IRQS_OFF - brasl %r14,trace_hardirqs_off + basr %r2,%r0 + brasl %r14,trace_hardirqs_off_caller .endm .macro TRACE_IRQS_CHECK + basr %r2,%r0 tm SP_PSW(%r15),0x03 # irqs enabled? jz 0f - brasl %r14,trace_hardirqs_on + brasl %r14,trace_hardirqs_on_caller j 1f -0: brasl %r14,trace_hardirqs_off +0: brasl %r14,trace_hardirqs_off_caller 1: .endm #else -- cgit v1.2.3 From 74af283102b358b0da545460d0d176f473e110f6 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 14 Nov 2008 18:18:07 +0100 Subject: [S390] cpu topology: fix locking cpu_coregroup_map used to grab a mutex on s390 since it was only called from process context. Since c7c22e4d5c1fdebfac4dba76de7d0338c2b0d832 "block: add support for IO CPU affinity" this is not true anymore. It now also gets called from softirq context. To prevent possible deadlocks change this in architecture code and use a spinlock instead of a mutex. Cc: stable@kernel.org Cc: Jens Axboe Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/topology.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 632b13e10053..a947899dcba1 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -65,18 +65,21 @@ static int machine_has_topology_irq; static struct timer_list topology_timer; static void set_topology_timer(void); static DECLARE_WORK(topology_work, topology_work_fn); +/* topology_lock protects the core linked list */ +static DEFINE_SPINLOCK(topology_lock); cpumask_t cpu_core_map[NR_CPUS]; cpumask_t cpu_coregroup_map(unsigned int cpu) { struct core_info *core = &core_info; + unsigned long flags; cpumask_t mask; cpus_clear(mask); if (!machine_has_topology) return cpu_present_map; - mutex_lock(&smp_cpu_state_mutex); + spin_lock_irqsave(&topology_lock, flags); while (core) { if (cpu_isset(cpu, core->mask)) { mask = core->mask; @@ -84,7 +87,7 @@ cpumask_t cpu_coregroup_map(unsigned int cpu) } core = core->next; } - mutex_unlock(&smp_cpu_state_mutex); + spin_unlock_irqrestore(&topology_lock, flags); if (cpus_empty(mask)) mask = cpumask_of_cpu(cpu); return mask; @@ -133,7 +136,7 @@ static void tl_to_cores(struct tl_info *info) union tl_entry *tle, *end; struct core_info *core = &core_info; - mutex_lock(&smp_cpu_state_mutex); + spin_lock_irq(&topology_lock); clear_cores(); tle = info->tle; end = (union tl_entry *)((unsigned long)info + info->length); @@ -157,7 +160,7 @@ static void tl_to_cores(struct tl_info *info) } tle = next_tle(tle); } - mutex_unlock(&smp_cpu_state_mutex); + spin_unlock_irq(&topology_lock); } static void topology_update_polarization_simple(void) -- cgit v1.2.3 From d2f019fe40e8fecd822f87bc759f74925a5c31d6 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Nov 2008 18:18:09 +0100 Subject: [S390] fix s390x_newuname The uname system call for 64 bit compares current->personality without masking the upper 16 bits. If e.g. READ_IMPLIES_EXEC is set the result of a uname system call will always be s390x even if the process uses the s390 personality. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/sys_s390.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c index 5fdb799062b7..4fe952e557ac 100644 --- a/arch/s390/kernel/sys_s390.c +++ b/arch/s390/kernel/sys_s390.c @@ -198,7 +198,7 @@ asmlinkage long s390x_newuname(struct new_utsname __user *name) { int ret = sys_newuname(name); - if (current->personality == PER_LINUX32 && !ret) { + if (personality(current->personality) == PER_LINUX32 && !ret) { ret = copy_to_user(name->machine, "s390\0\0\0\0", 8); if (ret) ret = -EFAULT; } -- cgit v1.2.3