From 7267fa6819467669f5cc2ba81a615dcc88158b4b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 29 Apr 2009 00:16:21 -0400 Subject: tracing: fix ref count in splice pages The pages allocated for the splice binary buffer did not initialize the ref count correctly. This caused pages not to be freed and causes a drastic memory leak. Thanks to logdev I was able to trace the tracer to find where the leak was. [ Impact: stop memory leak when using splice ] Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 1ce5dc6372b8..a884c09006c4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3448,6 +3448,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, if (!ref) break; + ref->ref = 1; ref->buffer = info->tr->buffer; ref->page = ring_buffer_alloc_read_page(ref->buffer); if (!ref->page) { -- cgit v1.2.3 From f5f293a4e3d0a0c52cec31de6762c95050156516 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 29 Apr 2009 14:44:49 +0200 Subject: sched: account system time properly Andrew Gallatin reported that IRQ and SOFTIRQ times were sometime not reported correctly on recent kernels, and even bisected to commit 457533a7d3402d1d91fbc125c8bd1bd16dcd3cd4 ([PATCH] fix scaled & unscaled cputime accounting) as the first bad commit. Further analysis pointed that commit 79741dd35713ff4f6fd0eafd59fa94e8a4ba922d ([PATCH] idle cputime accounting) was the real cause of the problem. account_process_tick() was not taking into account timer IRQ interrupting the idle task servicing a hard or soft irq. On mostly idle cpu, irqs were thus not accounted and top or mpstat could tell user/admin that cpu was 100 % idle, 0.00 % irq, 0.00 % softirq, while it was not. [ Impact: fix occasionally incorrect CPU statistics in top/mpstat ] Reported-by: Andrew Gallatin Re-reported-by: Andrew Morton Signed-off-by: Eric Dumazet Acked-by: Martin Schwidefsky Cc: rick.jones2@hp.com Cc: brice@myri.com Cc: Paul Mackerras Cc: Benjamin Herrenschmidt LKML-Reference: <49F84BC1.7080602@cosmosbay.com> Signed-off-by: Ingo Molnar --- kernel/sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index b902e587a3a0..26efa475bdc1 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4732,7 +4732,7 @@ void account_process_tick(struct task_struct *p, int user_tick) if (user_tick) account_user_time(p, one_jiffy, one_jiffy_scaled); - else if (p != rq->idle) + else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) account_system_time(p, HARDIRQ_OFFSET, one_jiffy, one_jiffy_scaled); else -- cgit v1.2.3 From 6e85c5ba73c07b990798087e9b858c065db2b234 Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Wed, 29 Apr 2009 19:14:32 -0400 Subject: kernel/posix-cpu-timers.c: fix sparse warning Sparse reports the following in kernel/posix-cpu-timers.c: warning: symbol 'firing' shadows an earlier one Signed-off-by: H Hartley Sweeten Cc: Subrata Modak LKML-Reference: Signed-off-by: Ingo Molnar --- kernel/posix-cpu-timers.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index c9dcf98b4463..bece7c0b67b2 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -1420,19 +1420,19 @@ void run_posix_cpu_timers(struct task_struct *tsk) * timer call will interfere. */ list_for_each_entry_safe(timer, next, &firing, it.cpu.entry) { - int firing; + int cpu_firing; + spin_lock(&timer->it_lock); list_del_init(&timer->it.cpu.entry); - firing = timer->it.cpu.firing; + cpu_firing = timer->it.cpu.firing; timer->it.cpu.firing = 0; /* * The firing flag is -1 if we collided with a reset * of the timer, which already reported this * almost-firing as an overrun. So don't generate an event. */ - if (likely(firing >= 0)) { + if (likely(cpu_firing >= 0)) cpu_timer_fire(timer); - } spin_unlock(&timer->it_lock); } } -- cgit v1.2.3 From d7226fb6ec5d4f325e4e7fd905894e2ea3eb3ae0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 1 May 2009 15:16:04 +0200 Subject: Revert "genirq: assert that irq handlers are indeed running in hardirq context" This reverts commit 044d408409cc4e1bc75c886e27ca85c270db104c. The commit added a warning when handle_IRQ_event() is called outside of hard interrupt context. This breaks the generic tasklet based interrupt resend mechanism which is used when the hardware has no way to retrigger the interrupt. So we get a warning for a use case which is correct and worked for years. Remove it. Signed-off-by: Thomas Gleixner --- kernel/irq/handle.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel') diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index d82142be8dd2..26e08754744f 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -363,8 +363,6 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) irqreturn_t ret, retval = IRQ_NONE; unsigned int status = 0; - WARN_ONCE(!in_irq(), "BUG: IRQ handler called from non-hardirq context!"); - if (!(action->flags & IRQF_DISABLED)) local_irq_enable_in_hardirq(); -- cgit v1.2.3 From 74a03b69d1b5ce00a568e142ca97e76b7f5239c6 Mon Sep 17 00:00:00 2001 From: john stultz Date: Fri, 1 May 2009 13:10:25 -0700 Subject: clockevents: prevent endless loop in tick_handle_periodic() tick_handle_periodic() can lock up hard when a one shot clock event device is used in combination with jiffies clocksource. Avoid an endless loop issue by requiring that a highres valid clocksource be installed before we call tick_periodic() in a loop when using ONESHOT mode. The result is we will only increment jiffies once per interrupt until a continuous hardware clocksource is available. Without this, we can run into a endless loop, where each cycle through the loop, jiffies is updated which increments time by tick_period or more (due to clock steering), which can cause the event programming to think the next event was before the newly incremented time and fail causing tick_periodic() to be called again and the whole process loops forever. [ Impact: prevent hard lock up ] Signed-off-by: John Stultz Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner Cc: stable@kernel.org --- kernel/time/tick-common.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 21a5ca849514..83c4417b6a3c 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -93,7 +93,17 @@ void tick_handle_periodic(struct clock_event_device *dev) for (;;) { if (!clockevents_program_event(dev, next, ktime_get())) return; - tick_periodic(cpu); + /* + * Have to be careful here. If we're in oneshot mode, + * before we call tick_periodic() in a loop, we need + * to be sure we're using a real hardware clocksource. + * Otherwise we could get trapped in an infinite + * loop, as the tick_periodic() increments jiffies, + * when then will increment time, posibly causing + * the loop to trigger again and again. + */ + if (timekeeping_valid_for_hres()) + tick_periodic(cpu); next = ktime_add(next, tick_period); } } -- cgit v1.2.3 From 9e4a5bda89034502fb144331e71a0efdfd5fae97 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Thu, 30 Apr 2009 15:08:57 -0700 Subject: mm: prevent divide error for small values of vm_dirty_bytes Avoid setting less than two pages for vm_dirty_bytes: this is necessary to avoid potential division by 0 (like the following) in get_dirty_limits(). [ 49.951610] divide error: 0000 [#1] PREEMPT SMP [ 49.952195] last sysfs file: /sys/devices/pci0000:00/0000:00:01.1/host0/target0:0:0/0:0:0:0/block/sda/uevent [ 49.952195] CPU 1 [ 49.952195] Modules linked in: pcspkr [ 49.952195] Pid: 3064, comm: dd Not tainted 2.6.30-rc3 #1 [ 49.952195] RIP: 0010:[] [] get_dirty_limits+0xe9/0x2c0 [ 49.952195] RSP: 0018:ffff88001de03a98 EFLAGS: 00010202 [ 49.952195] RAX: 00000000000000c0 RBX: ffff88001de03b80 RCX: 28f5c28f5c28f5c3 [ 49.952195] RDX: 0000000000000000 RSI: 00000000000000c0 RDI: 0000000000000000 [ 49.952195] RBP: ffff88001de03ae8 R08: 0000000000000000 R09: 0000000000000000 [ 49.952195] R10: ffff88001ddda9a0 R11: 0000000000000001 R12: 0000000000000001 [ 49.952195] R13: ffff88001fbc8218 R14: ffff88001de03b70 R15: ffff88001de03b78 [ 49.952195] FS: 00007fe9a435b6f0(0000) GS:ffff8800025d9000(0000) knlGS:0000000000000000 [ 49.952195] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 49.952195] CR2: 00007fe9a39ab000 CR3: 000000001de38000 CR4: 00000000000006e0 [ 49.952195] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 49.952195] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 49.952195] Process dd (pid: 3064, threadinfo ffff88001de02000, task ffff88001ddda250) [ 49.952195] Stack: [ 49.952195] ffff88001fa0de00 ffff88001f2dbd70 ffff88001f9fe800 000080b900000000 [ 49.952195] 00000000000000c0 ffff8800027a6100 0000000000000400 ffff88001fbc8218 [ 49.952195] 0000000000000000 0000000000000600 ffff88001de03bb8 ffffffff802d3ed7 [ 49.952195] Call Trace: [ 49.952195] [] balance_dirty_pages_ratelimited_nr+0x1d7/0x3f0 [ 49.952195] [] ? ext3_writeback_write_end+0x9e/0x120 [ 49.952195] [] generic_file_buffered_write+0x12f/0x330 [ 49.952195] [] __generic_file_aio_write_nolock+0x26d/0x460 [ 49.952195] [] ? generic_file_aio_write+0x52/0xd0 [ 49.952195] [] generic_file_aio_write+0x69/0xd0 [ 49.952195] [] ext3_file_write+0x26/0xc0 [ 49.952195] [] do_sync_write+0xf1/0x140 [ 49.952195] [] ? get_lock_stats+0x2a/0x60 [ 49.952195] [] ? autoremove_wake_function+0x0/0x40 [ 49.952195] [] vfs_write+0xcb/0x190 [ 49.952195] [] sys_write+0x50/0x90 [ 49.952195] [] system_call_fastpath+0x16/0x1b [ 49.952195] Code: 00 00 00 2b 05 09 1c 17 01 48 89 c6 49 0f af f4 48 c1 ee 02 48 89 f0 48 f7 e1 48 89 d6 31 d2 48 c1 ee 02 48 0f af 75 d0 48 89 f0 <48> f7 f7 41 8b 95 ac 01 00 00 48 89 c7 49 0f af d4 48 c1 ea 02 [ 49.952195] RIP [] get_dirty_limits+0xe9/0x2c0 [ 49.952195] RSP [ 50.096523] ---[ end trace 008d7aa02f244d7b ]--- Signed-off-by: Andrea Righi Cc: Peter Zijlstra Cc: David Rientjes Cc: Dave Chinner Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/sysctl/vm.txt | 4 ++++ kernel/sysctl.c | 5 ++++- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 97c4b3284329..b716d33912d8 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -90,6 +90,10 @@ will itself start writeback. If dirty_bytes is written, dirty_ratio becomes a function of its value (dirty_bytes / the amount of dirtyable system memory). +Note: the minimum value allowed for dirty_bytes is two pages (in bytes); any +value lower than this limit will be ignored and the old configuration will be +retained. + ============================================================== dirty_expire_centisecs diff --git a/kernel/sysctl.c b/kernel/sysctl.c index e3d2c7dd59b9..ea78fa101ad6 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -103,6 +103,9 @@ static unsigned long one_ul = 1; static int one_hundred = 100; static int one_thousand = 1000; +/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */ +static unsigned long dirty_bytes_min = 2 * PAGE_SIZE; + /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ static int maxolduid = 65535; static int minolduid; @@ -1006,7 +1009,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = &dirty_bytes_handler, .strategy = &sysctl_intvec, - .extra1 = &one_ul, + .extra1 = &dirty_bytes_min, }, { .procname = "dirty_writeback_centisecs", -- cgit v1.2.3 From 381a80e6df396eaabef2c00f85974a4579ac1c70 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Wed, 6 May 2009 16:02:50 -0700 Subject: inotify: use GFP_NOFS in kernel_event() to work around a lockdep false-positive There is what we believe to be a false positive reported by lockdep. inotify_inode_queue_event() => take inotify_mutex => kernel_event() => kmalloc() => SLOB => alloc_pages_node() => page reclaim => slab reclaim => dcache reclaim => inotify_inode_is_dead => take inotify_mutex => deadlock The plan is to fix this via lockdep annotation, but that is proving to be quite involved. The patch flips the allocation over to GFP_NFS to shut the warning up, for the 2.6.30 release. Hopefully we will fix this for real in 2.6.31. I'll queue a patch in -mm to switch it back to GFP_KERNEL so we don't forget. ================================= [ INFO: inconsistent lock state ] 2.6.30-rc2-next-20090417 #203 --------------------------------- inconsistent {RECLAIM_FS-ON-W} -> {IN-RECLAIM_FS-W} usage. kswapd0/380 [HC0[0]:SC0[0]:HE1:SE1] takes: (&inode->inotify_mutex){+.+.?.}, at: [] inotify_inode_is_dead+0x35/0xb0 {RECLAIM_FS-ON-W} state was registered at: [] mark_held_locks+0x68/0x90 [] lockdep_trace_alloc+0xf5/0x100 [] __kmalloc_node+0x31/0x1e0 [] kernel_event+0xe2/0x190 [] inotify_dev_queue_event+0x126/0x230 [] inotify_inode_queue_event+0xc6/0x110 [] vfs_create+0xcd/0x140 [] do_filp_open+0x88d/0xa20 [] do_sys_open+0x98/0x140 [] sys_open+0x20/0x30 [] system_call_fastpath+0x16/0x1b [] 0xffffffffffffffff irq event stamp: 690455 hardirqs last enabled at (690455): [] _spin_unlock_irqrestore+0x44/0x80 hardirqs last disabled at (690454): [] _spin_lock_irqsave+0x32/0xa0 softirqs last enabled at (690178): [] __do_softirq+0x202/0x220 softirqs last disabled at (690157): [] call_softirq+0x1c/0x50 other info that might help us debug this: 2 locks held by kswapd0/380: #0: (shrinker_rwsem){++++..}, at: [] shrink_slab+0x37/0x180 #1: (&type->s_umount_key#17){++++..}, at: [] shrink_dcache_memory+0x11f/0x1e0 stack backtrace: Pid: 380, comm: kswapd0 Not tainted 2.6.30-rc2-next-20090417 #203 Call Trace: [] print_usage_bug+0x19f/0x200 [] ? save_stack_trace+0x2f/0x50 [] mark_lock+0x4bb/0x6d0 [] ? check_usage_forwards+0x0/0xc0 [] __lock_acquire+0xc62/0x1ae0 [] ? slob_free+0x10c/0x370 [] lock_acquire+0xe1/0x120 [] ? inotify_inode_is_dead+0x35/0xb0 [] mutex_lock_nested+0x63/0x420 [] ? inotify_inode_is_dead+0x35/0xb0 [] ? inotify_inode_is_dead+0x35/0xb0 [] ? sched_clock+0x9/0x10 [] ? lock_release_holdtime+0x35/0x1c0 [] inotify_inode_is_dead+0x35/0xb0 [] dentry_iput+0xbc/0xe0 [] d_kill+0x33/0x60 [] __shrink_dcache_sb+0x2d3/0x350 [] shrink_dcache_memory+0x15a/0x1e0 [] shrink_slab+0x125/0x180 [] kswapd+0x560/0x7a0 [] ? isolate_pages_global+0x0/0x2c0 [] ? autoremove_wake_function+0x0/0x40 [] ? trace_hardirqs_on+0xd/0x10 [] ? kswapd+0x0/0x7a0 [] kthread+0x5b/0xa0 [] child_rip+0xa/0x20 [] ? restore_args+0x0/0x30 [] ? kthread+0x0/0xa0 [] ? child_rip+0x0/0x20 [eparis@redhat.com: fix audit too] Cc: Al Viro Cc: Matt Mackall Cc: Christoph Lameter Signed-off-by: Wu Fengguang Signed-off-by: Eric Paris Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/notify/inotify/inotify_user.c | 2 +- kernel/auditfilter.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index bed766e435b5..1634319e2404 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -220,7 +220,7 @@ static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 cookie, rem = 0; } - kevent->name = kmalloc(len + rem, GFP_KERNEL); + kevent->name = kmalloc(len + rem, GFP_NOFS); if (unlikely(!kevent->name)) { kmem_cache_free(event_cachep, kevent); return NULL; diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index a6fe71fd5d1b..713098ee5a02 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -1028,7 +1028,7 @@ static void audit_update_watch(struct audit_parent *parent, if (audit_enabled) { struct audit_buffer *ab; - ab = audit_log_start(NULL, GFP_KERNEL, + ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE); audit_log_format(ab, "auid=%u ses=%u", audit_get_loginuid(current), @@ -1067,7 +1067,7 @@ static void audit_remove_parent_watches(struct audit_parent *parent) e = container_of(r, struct audit_entry, rule); if (audit_enabled) { struct audit_buffer *ab; - ab = audit_log_start(NULL, GFP_KERNEL, + ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE); audit_log_format(ab, "auid=%u ses=%u", audit_get_loginuid(current), -- cgit v1.2.3 From 57adc4d2dbf968fdbe516359688094eef4d46581 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 6 May 2009 16:02:53 -0700 Subject: Eliminate thousands of warnings with gcc 3.2 build When building with gcc 3.2 I get thousands of warnings such as include/linux/gfp.h: In function `allocflags_to_migratetype': include/linux/gfp.h:105: warning: null format string due to passing a NULL format string to warn_slowpath() in #define __WARN() warn_slowpath(__FILE__, __LINE__, NULL) Split this case out into a separate call. This also shrinks the kernel slightly: text data bss dec hex filename 4802274 707668 712704 6222646 5ef336 vmlinux text data bss dec hex filename 4799027 703572 712704 6215303 5ed687 vmlinux due to removeing one argument from the commonly-called __WARN(). [akpm@linux-foundation.org: reduce scope of `empty'] Acked-by: Jesper Nilsson Acked-by: Johannes Weiner Acked-by: Arjan van de Ven Signed-off-by: Andi Kleen Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/bug.h | 7 ++++--- kernel/panic.c | 13 ++++++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h index e727fe0d1451..4b6755984d24 100644 --- a/include/asm-generic/bug.h +++ b/include/asm-generic/bug.h @@ -58,12 +58,13 @@ struct bug_entry { */ #ifndef __WARN #ifndef __ASSEMBLY__ -extern void warn_slowpath(const char *file, const int line, +extern void warn_slowpath_fmt(const char *file, const int line, const char *fmt, ...) __attribute__((format(printf, 3, 4))); +extern void warn_slowpath_null(const char *file, const int line); #define WANT_WARN_ON_SLOWPATH #endif -#define __WARN() warn_slowpath(__FILE__, __LINE__, NULL) -#define __WARN_printf(arg...) warn_slowpath(__FILE__, __LINE__, arg) +#define __WARN() warn_slowpath_null(__FILE__, __LINE__) +#define __WARN_printf(arg...) warn_slowpath_fmt(__FILE__, __LINE__, arg) #else #define __WARN_printf(arg...) do { printk(arg); __WARN(); } while (0) #endif diff --git a/kernel/panic.c b/kernel/panic.c index 3dcaa1661357..874ecf1307ae 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -340,7 +340,7 @@ void oops_exit(void) } #ifdef WANT_WARN_ON_SLOWPATH -void warn_slowpath(const char *file, int line, const char *fmt, ...) +void warn_slowpath_fmt(const char *file, int line, const char *fmt, ...) { va_list args; char function[KSYM_SYMBOL_LEN]; @@ -356,7 +356,7 @@ void warn_slowpath(const char *file, int line, const char *fmt, ...) if (board) printk(KERN_WARNING "Hardware name: %s\n", board); - if (fmt) { + if (*fmt) { va_start(args, fmt); vprintk(fmt, args); va_end(args); @@ -367,7 +367,14 @@ void warn_slowpath(const char *file, int line, const char *fmt, ...) print_oops_end_marker(); add_taint(TAINT_WARN); } -EXPORT_SYMBOL(warn_slowpath); +EXPORT_SYMBOL(warn_slowpath_fmt); + +void warn_slowpath_null(const char *file, int line) +{ + static const char *empty = ""; + warn_slowpath_fmt(file, line, empty); +} +EXPORT_SYMBOL(warn_slowpath_null); #endif #ifdef CONFIG_CC_STACKPROTECTOR -- cgit v1.2.3 From 201517a7f3ec497fff545a7659c6c876f89f9054 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 7 May 2009 16:31:26 -0400 Subject: kprobes: fix to use text_mutex around arm/disarm kprobe Fix kprobes to lock text_mutex around some arch_arm/disarm_kprobe() which are newly added by commit de5bd88d5a5cce3cacea904d3503e5ebdb3852a2. Signed-off-by: Masami Hiramatsu Acked-by: Ananth N Mavinakayanahalli Cc: Mathieu Desnoyers Cc: Jim Keniston Cc: Ingo Molnar Signed-off-by: Linus Torvalds --- kernel/kprobes.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/kprobes.c b/kernel/kprobes.c index a5e74ddee0e2..c0fa54b276d9 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -319,6 +319,22 @@ struct kprobe __kprobes *get_kprobe(void *addr) return NULL; } +/* Arm a kprobe with text_mutex */ +static void __kprobes arm_kprobe(struct kprobe *kp) +{ + mutex_lock(&text_mutex); + arch_arm_kprobe(kp); + mutex_unlock(&text_mutex); +} + +/* Disarm a kprobe with text_mutex */ +static void __kprobes disarm_kprobe(struct kprobe *kp) +{ + mutex_lock(&text_mutex); + arch_disarm_kprobe(kp); + mutex_unlock(&text_mutex); +} + /* * Aggregate handlers for multiple kprobes support - these handlers * take care of invoking the individual kprobe handlers on p->list @@ -538,7 +554,7 @@ static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p) ap->flags &= ~KPROBE_FLAG_DISABLED; if (!kprobes_all_disarmed) /* Arm the breakpoint again. */ - arch_arm_kprobe(ap); + arm_kprobe(ap); } return 0; } @@ -789,11 +805,8 @@ static int __kprobes __unregister_kprobe_top(struct kprobe *p) * enabled and not gone - otherwise, the breakpoint would * already have been removed. We save on flushing icache. */ - if (!kprobes_all_disarmed && !kprobe_disabled(old_p)) { - mutex_lock(&text_mutex); - arch_disarm_kprobe(p); - mutex_unlock(&text_mutex); - } + if (!kprobes_all_disarmed && !kprobe_disabled(old_p)) + disarm_kprobe(p); hlist_del_rcu(&old_p->hlist); } else { if (p->break_handler && !kprobe_gone(p)) @@ -810,7 +823,7 @@ noclean: if (!kprobe_disabled(old_p)) { try_to_disable_aggr_kprobe(old_p); if (!kprobes_all_disarmed && kprobe_disabled(old_p)) - arch_disarm_kprobe(old_p); + disarm_kprobe(old_p); } } return 0; @@ -1364,7 +1377,7 @@ int __kprobes disable_kprobe(struct kprobe *kp) try_to_disable_aggr_kprobe(p); if (!kprobes_all_disarmed && kprobe_disabled(p)) - arch_disarm_kprobe(p); + disarm_kprobe(p); out: mutex_unlock(&kprobe_mutex); return ret; @@ -1393,7 +1406,7 @@ int __kprobes enable_kprobe(struct kprobe *kp) } if (!kprobes_all_disarmed && kprobe_disabled(p)) - arch_arm_kprobe(p); + arm_kprobe(p); p->flags &= ~KPROBE_FLAG_DISABLED; if (p != kp) -- cgit v1.2.3 From 6f5bbff9a1b7d6864a495763448a363bbfa96324 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 6 May 2009 01:34:22 -0400 Subject: Convert obvious places to deactivate_locked_super() Signed-off-by: Al Viro --- drivers/mtd/mtdsuper.c | 3 +-- fs/9p/vfs_super.c | 5 +---- fs/afs/super.c | 3 +-- fs/btrfs/super.c | 12 ++++-------- fs/cifs/cifsfs.c | 3 +-- fs/devpts/inode.c | 5 ++--- fs/ecryptfs/main.c | 5 ++--- fs/libfs.c | 3 +-- fs/nfs/super.c | 15 +++++---------- fs/proc/root.c | 3 +-- fs/ubifs/super.c | 3 +-- kernel/cgroup.c | 3 +-- 12 files changed, 21 insertions(+), 42 deletions(-) (limited to 'kernel') diff --git a/drivers/mtd/mtdsuper.c b/drivers/mtd/mtdsuper.c index 92285d0089c2..af8b42e0a55b 100644 --- a/drivers/mtd/mtdsuper.c +++ b/drivers/mtd/mtdsuper.c @@ -74,8 +74,7 @@ static int get_sb_mtd_aux(struct file_system_type *fs_type, int flags, ret = fill_super(sb, data, flags & MS_SILENT ? 1 : 0); if (ret < 0) { - up_write(&sb->s_umount); - deactivate_super(sb); + deactivate_locked_super(sb); return ret; } diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 7d23214e5599..0d29a57c63e6 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -174,10 +174,7 @@ P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n"); return 0; release_sb: - if (sb) { - up_write(&sb->s_umount); - deactivate_super(sb); - } + deactivate_locked_super(sb); free_stat: kfree(st); diff --git a/fs/afs/super.c b/fs/afs/super.c index aee239a048cb..2753f16dd315 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -405,8 +405,7 @@ static int afs_get_sb(struct file_system_type *fs_type, sb->s_flags = flags; ret = afs_fill_super(sb, ¶ms); if (ret < 0) { - up_write(&sb->s_umount); - deactivate_super(sb); + deactivate_locked_super(sb); goto error; } sb->s_options = new_opts; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 3536bdb2d7cb..6dfae5b28f59 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -502,8 +502,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, if (s->s_root) { if ((flags ^ s->s_flags) & MS_RDONLY) { - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); error = -EBUSY; goto error_close_devices; } @@ -517,8 +516,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, error = btrfs_fill_super(s, fs_devices, data, flags & MS_SILENT ? 1 : 0); if (error) { - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); goto error_free_subvol_name; } @@ -535,15 +533,13 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, mutex_unlock(&s->s_root->d_inode->i_mutex); if (IS_ERR(root)) { - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); error = PTR_ERR(root); goto error_free_subvol_name; } if (!root->d_inode) { dput(root); - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); error = -ENXIO; goto error_free_subvol_name; } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 355e0efec0cf..5e6d35804d73 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -602,8 +602,7 @@ cifs_get_sb(struct file_system_type *fs_type, rc = cifs_read_super(sb, data, dev_name, flags & MS_SILENT ? 1 : 0); if (rc) { - up_write(&sb->s_umount); - deactivate_super(sb); + deactivate_locked_super(sb); return rc; } sb->s_flags |= MS_ACTIVE; diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 63a4a59e4148..21165cf934ff 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -389,11 +389,10 @@ static int devpts_get_sb(struct file_system_type *fs_type, return 0; out_dput: - dput(s->s_root); + dput(s->s_root); /* undo dget() in simple_set_mnt() */ out_undo_sget: - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); return error; } diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index ccabd5faa04d..9f0aa9883c28 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -614,9 +614,8 @@ static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags, } goto out; out_abort: - dput(sb->s_root); - up_write(&sb->s_umount); - deactivate_super(sb); + dput(sb->s_root); /* aka mnt->mnt_root, as set by get_sb_nodev() */ + deactivate_locked_super(sb); out: return rc; } diff --git a/fs/libfs.c b/fs/libfs.c index cd223190c4e9..80046ddf5063 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -246,8 +246,7 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name, return 0; Enomem: - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); return -ENOMEM; } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 1679a164c8c9..d2d67781c579 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2111,8 +2111,7 @@ out_err_nosb: error_splat_root: dput(mntroot); error_splat_super: - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); goto out; } @@ -2208,8 +2207,7 @@ out_err_noserver: return error; error_splat_super: - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); dprintk("<-- nfs_xdev_get_sb() = %d [splat]\n", error); return error; } @@ -2469,8 +2467,7 @@ out_free: error_splat_root: dput(mntroot); error_splat_super: - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); goto out; } @@ -2564,8 +2561,7 @@ out_err_noserver: return error; error_splat_super: - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); dprintk("<-- nfs4_xdev_get_sb() = %d [splat]\n", error); return error; } @@ -2649,8 +2645,7 @@ out_err_noserver: return error; error_splat_super: - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error); return error; } diff --git a/fs/proc/root.c b/fs/proc/root.c index 1e15a2b176e8..b080b791d9e3 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -67,8 +67,7 @@ static int proc_get_sb(struct file_system_type *fs_type, sb->s_flags = flags; err = proc_fill_super(sb); if (err) { - up_write(&sb->s_umount); - deactivate_super(sb); + deactivate_locked_super(sb); return err; } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index faa44f90608a..e9f7a754c4f7 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -2055,8 +2055,7 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags, return 0; out_deact: - up_write(&sb->s_umount); - deactivate_super(sb); + deactivate_locked_super(sb); out_close: ubi_close_volume(ubi); return err; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 382109b5baeb..a7267bfd3765 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1133,8 +1133,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type, free_cg_links: free_cg_links(&tmp_cg_links); drop_new_super: - up_write(&sb->s_umount); - deactivate_super(sb); + deactivate_locked_super(sb); return ret; } -- cgit v1.2.3 From cd17cbfda004fe5f406c01b318c6378d9895896f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 15 May 2009 11:32:24 +0200 Subject: Revert "mm: add /proc controls for pdflush threads" This reverts commit fafd688e4c0c34da0f3de909881117d374e4c7af. Work is progressing to switch away from pdflush as the process backing for flushing out dirty data. So it seems pointless to add more knobs to control pdflush threads. The original author of the patch did not have any specific use cases for adding the knobs, so we can easily revert this before 2.6.30 to avoid having to maintain this API forever. Signed-off-by: Jens Axboe --- Documentation/sysctl/vm.txt | 28 ---------------------------- include/linux/writeback.h | 2 -- kernel/sysctl.c | 23 ----------------------- mm/pdflush.c | 31 ++++++++++++------------------- 4 files changed, 12 insertions(+), 72 deletions(-) (limited to 'kernel') diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index b716d33912d8..c302ddf629a0 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -39,8 +39,6 @@ Currently, these files are in /proc/sys/vm: - nr_hugepages - nr_overcommit_hugepages - nr_pdflush_threads -- nr_pdflush_threads_min -- nr_pdflush_threads_max - nr_trim_pages (only if CONFIG_MMU=n) - numa_zonelist_order - oom_dump_tasks @@ -469,32 +467,6 @@ The default value is 0. ============================================================== -nr_pdflush_threads_min - -This value controls the minimum number of pdflush threads. - -At boot time, the kernel will create and maintain 'nr_pdflush_threads_min' -threads for the kernel's lifetime. - -The default value is 2. The minimum value you can specify is 1, and -the maximum value is the current setting of 'nr_pdflush_threads_max'. - -See 'nr_pdflush_threads_max' below for more information. - -============================================================== - -nr_pdflush_threads_max - -This value controls the maximum number of pdflush threads that can be -created. The pdflush algorithm will create a new pdflush thread (up to -this maximum) if no pdflush threads have been available for >= 1 second. - -The default value is 8. The minimum value you can specify is the -current value of 'nr_pdflush_threads_min' and the -maximum is 1000. - -============================================================== - overcommit_memory: This value contains a flag that enables memory overcommitment. diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 9c1ed1fb6ddb..93445477f86a 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -168,8 +168,6 @@ void writeback_set_ratelimit(void); /* pdflush.c */ extern int nr_pdflush_threads; /* Global so it can be exported to sysctl read-only. */ -extern int nr_pdflush_threads_max; /* Global so it can be exported to sysctl */ -extern int nr_pdflush_threads_min; /* Global so it can be exported to sysctl */ #endif /* WRITEBACK_H */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ea78fa101ad6..b2970d56fb76 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -101,7 +101,6 @@ static int __maybe_unused one = 1; static int __maybe_unused two = 2; static unsigned long one_ul = 1; static int one_hundred = 100; -static int one_thousand = 1000; /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */ static unsigned long dirty_bytes_min = 2 * PAGE_SIZE; @@ -1033,28 +1032,6 @@ static struct ctl_table vm_table[] = { .mode = 0444 /* read-only*/, .proc_handler = &proc_dointvec, }, - { - .ctl_name = CTL_UNNUMBERED, - .procname = "nr_pdflush_threads_min", - .data = &nr_pdflush_threads_min, - .maxlen = sizeof nr_pdflush_threads_min, - .mode = 0644 /* read-write */, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &one, - .extra2 = &nr_pdflush_threads_max, - }, - { - .ctl_name = CTL_UNNUMBERED, - .procname = "nr_pdflush_threads_max", - .data = &nr_pdflush_threads_max, - .maxlen = sizeof nr_pdflush_threads_max, - .mode = 0644 /* read-write */, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &nr_pdflush_threads_min, - .extra2 = &one_thousand, - }, { .ctl_name = VM_SWAPPINESS, .procname = "swappiness", diff --git a/mm/pdflush.c b/mm/pdflush.c index f2caf96993f8..235ac440c44e 100644 --- a/mm/pdflush.c +++ b/mm/pdflush.c @@ -57,14 +57,6 @@ static DEFINE_SPINLOCK(pdflush_lock); */ int nr_pdflush_threads = 0; -/* - * The max/min number of pdflush threads. R/W by sysctl at - * /proc/sys/vm/nr_pdflush_threads_max/min - */ -int nr_pdflush_threads_max __read_mostly = MAX_PDFLUSH_THREADS; -int nr_pdflush_threads_min __read_mostly = MIN_PDFLUSH_THREADS; - - /* * The time at which the pdflush thread pool last went empty */ @@ -76,7 +68,7 @@ static unsigned long last_empty_jifs; * Thread pool management algorithm: * * - The minimum and maximum number of pdflush instances are bound - * by nr_pdflush_threads_min and nr_pdflush_threads_max. + * by MIN_PDFLUSH_THREADS and MAX_PDFLUSH_THREADS. * * - If there have been no idle pdflush instances for 1 second, create * a new one. @@ -142,13 +134,14 @@ static int __pdflush(struct pdflush_work *my_work) * To throttle creation, we reset last_empty_jifs. */ if (time_after(jiffies, last_empty_jifs + 1 * HZ)) { - if (list_empty(&pdflush_list) && - nr_pdflush_threads < nr_pdflush_threads_max) { - last_empty_jifs = jiffies; - nr_pdflush_threads++; - spin_unlock_irq(&pdflush_lock); - start_one_pdflush_thread(); - spin_lock_irq(&pdflush_lock); + if (list_empty(&pdflush_list)) { + if (nr_pdflush_threads < MAX_PDFLUSH_THREADS) { + last_empty_jifs = jiffies; + nr_pdflush_threads++; + spin_unlock_irq(&pdflush_lock); + start_one_pdflush_thread(); + spin_lock_irq(&pdflush_lock); + } } } @@ -160,7 +153,7 @@ static int __pdflush(struct pdflush_work *my_work) */ if (list_empty(&pdflush_list)) continue; - if (nr_pdflush_threads <= nr_pdflush_threads_min) + if (nr_pdflush_threads <= MIN_PDFLUSH_THREADS) continue; pdf = list_entry(pdflush_list.prev, struct pdflush_work, list); if (time_after(jiffies, pdf->when_i_went_to_sleep + 1 * HZ)) { @@ -266,9 +259,9 @@ static int __init pdflush_init(void) * Pre-set nr_pdflush_threads... If we fail to create, * the count will be decremented. */ - nr_pdflush_threads = nr_pdflush_threads_min; + nr_pdflush_threads = MIN_PDFLUSH_THREADS; - for (i = 0; i < nr_pdflush_threads_min; i++) + for (i = 0; i < MIN_PDFLUSH_THREADS; i++) start_one_pdflush_thread(); return 0; } -- cgit v1.2.3 From 364b5b7b1d793a7f98be55b6b154716dcae78dfc Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Wed, 13 May 2009 21:56:59 -0500 Subject: sysrq, intel_fb: fix sysrq g collision Commit 79e539453b34e35f39299a899d263b0a1f1670bd introduced a regression where you cannot use sysrq 'g' to enter kgdb. The solution is to move the intel fb sysrq over to V for video instead of G for graphics. The SMP VOYAGER code to register for the sysrq-v is not anywhere to be found in the mainline kernel, so the comments in the code were cleaned up as well. This patch also cleans up the sysrq definitions for kgdb to make it generic for the kernel debugger, such that the sysrq 'g' can be used in the future to enter a gdbstub or another kernel debugger. Signed-off-by: Jason Wessel Acked-by: Jesse Barnes Acked-by: Randy Dunlap Signed-off-by: Andrew Morton --- drivers/char/sysrq.c | 4 ++-- drivers/gpu/drm/i915/intel_fb.c | 4 ++-- kernel/kgdb.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index b0a6a3e51924..d6a807f4077d 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c @@ -406,7 +406,7 @@ static struct sysrq_key_op *sysrq_key_table[36] = { &sysrq_showlocks_op, /* d */ &sysrq_term_op, /* e */ &sysrq_moom_op, /* f */ - /* g: May be registered by ppc for kgdb */ + /* g: May be registered for the kernel debugger */ NULL, /* g */ NULL, /* h - reserved for help */ &sysrq_kill_op, /* i */ @@ -431,7 +431,7 @@ static struct sysrq_key_op *sysrq_key_table[36] = { &sysrq_sync_op, /* s */ &sysrq_showstate_op, /* t */ &sysrq_mountro_op, /* u */ - /* v: May be registered at init time by SMP VOYAGER */ + /* v: May be registered for frame buffer console restore */ NULL, /* v */ &sysrq_showstate_blocked_op, /* w */ /* x: May be registered on ppc/powerpc for xmon */ diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c index 3e094beecb99..e4652dcdd9bb 100644 --- a/drivers/gpu/drm/i915/intel_fb.c +++ b/drivers/gpu/drm/i915/intel_fb.c @@ -864,7 +864,7 @@ static void intelfb_sysrq(int dummy1, struct tty_struct *dummy3) static struct sysrq_key_op sysrq_intelfb_restore_op = { .handler = intelfb_sysrq, - .help_msg = "force-fb(G)", + .help_msg = "force-fb(V)", .action_msg = "Restore framebuffer console", }; @@ -898,7 +898,7 @@ int intelfb_probe(struct drm_device *dev) ret = intelfb_single_fb_probe(dev); } - register_sysrq_key('g', &sysrq_intelfb_restore_op); + register_sysrq_key('v', &sysrq_intelfb_restore_op); return ret; } diff --git a/kernel/kgdb.c b/kernel/kgdb.c index e4dcfb2272a4..9147a3190c9d 100644 --- a/kernel/kgdb.c +++ b/kernel/kgdb.c @@ -1583,8 +1583,8 @@ static void sysrq_handle_gdb(int key, struct tty_struct *tty) static struct sysrq_key_op sysrq_gdb_op = { .handler = sysrq_handle_gdb, - .help_msg = "Gdb", - .action_msg = "GDB", + .help_msg = "debug(G)", + .action_msg = "DEBUG", }; #endif -- cgit v1.2.3