From b842f8faf6c7dc2005c6a70631c1a91bac02f180 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Fri, 1 Oct 2010 17:23:41 -0400 Subject: jump label: Fix module __init section race Jump label uses is_module_text_address() to ensure that the module __init sections are valid before updating them. However, between the check for a valid module __init section and the subsequent jump label update, the module's __init section could be freed out from under us. We fix this potential race by adding a notifier callback to the MODULE_STATE_LIVE state. This notifier is called *after* the __init section has been run but before it is going to be freed. In the callback, the jump label code zeros the key value for any __init jump code within the module, and we add a check for a non-zero key value when we update jump labels. In this way we require no additional data structures. Thanks to Mathieu Desnoyers for pointing out this race condition. Reported-by: Mathieu Desnoyers Cc: Masami Hiramatsu Signed-off-by: Jason Baron LKML-Reference: [ Renamed remove_module_init() to remove_jump_label_module_init() as suggested by Masami Hiramatsu. ] Signed-off-by: Steven Rostedt --- kernel/jump_label.c | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 7be868bf25c6..be9e105345eb 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -168,7 +168,8 @@ void jump_label_update(unsigned long key, enum jump_label_type type) count = e_module->nr_entries; iter = e_module->table; while (count--) { - if (kernel_text_address(iter->code)) + if (iter->key && + kernel_text_address(iter->code)) arch_jump_label_transform(iter, type); iter++; } @@ -366,6 +367,39 @@ static void remove_jump_label_module(struct module *mod) } } +static void remove_jump_label_module_init(struct module *mod) +{ + struct hlist_head *head; + struct hlist_node *node, *node_next, *module_node, *module_node_next; + struct jump_label_entry *e; + struct jump_label_module_entry *e_module; + struct jump_entry *iter; + int i, count; + + /* if the module doesn't have jump label entries, just return */ + if (!mod->num_jump_entries) + return; + + for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) { + head = &jump_label_table[i]; + hlist_for_each_entry_safe(e, node, node_next, head, hlist) { + hlist_for_each_entry_safe(e_module, module_node, + module_node_next, + &(e->modules), hlist) { + if (e_module->mod != mod) + continue; + count = e_module->nr_entries; + iter = e_module->table; + while (count--) { + if (within_module_init(iter->code, mod)) + iter->key = 0; + iter++; + } + } + } + } +} + static int jump_label_module_notify(struct notifier_block *self, unsigned long val, void *data) @@ -386,6 +420,11 @@ jump_label_module_notify(struct notifier_block *self, unsigned long val, remove_jump_label_module(mod); mutex_unlock(&jump_label_mutex); break; + case MODULE_STATE_LIVE: + mutex_lock(&jump_label_mutex); + remove_jump_label_module_init(mod); + mutex_unlock(&jump_label_mutex); + break; } return ret; } -- cgit v1.2.3 From 91bad2f8d3057482b9afb599f14421b007136960 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Fri, 1 Oct 2010 17:23:48 -0400 Subject: jump label: Fix deadlock b/w jump_label_mutex vs. text_mutex register_kprobe() downs the 'text_mutex' and then calls jump_label_text_reserved(), which downs the 'jump_label_mutex'. However, the jump label code takes those mutexes in the reverse order. Fix by requiring the caller of jump_label_text_reserved() to do the jump label locking via the newly added: jump_label_lock(), jump_label_unlock(). Currently, kprobes is the only user of jump_label_text_reserved(). Reported-by: Ingo Molnar Acked-by: Masami Hiramatsu Signed-off-by: Jason Baron LKML-Reference: <759032c48d5e30c27f0bba003d09bffa8e9f28bb.1285965957.git.jbaron@redhat.com> Signed-off-by: Steven Rostedt --- include/linux/jump_label.h | 5 +++++ kernel/jump_label.c | 33 +++++++++++++++++++++------------ kernel/kprobes.c | 6 ++++++ 3 files changed, 32 insertions(+), 12 deletions(-) diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index b67cb180e6e9..1947a1212678 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -18,6 +18,8 @@ struct module; extern struct jump_entry __start___jump_table[]; extern struct jump_entry __stop___jump_table[]; +extern void jump_label_lock(void); +extern void jump_label_unlock(void); extern void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type); extern void arch_jump_label_text_poke_early(jump_label_t addr); @@ -59,6 +61,9 @@ static inline int jump_label_text_reserved(void *start, void *end) return 0; } +static inline void jump_label_lock(void) {} +static inline void jump_label_unlock(void) {} + #endif #define COND_STMT(key, stmt) \ diff --git a/kernel/jump_label.c b/kernel/jump_label.c index be9e105345eb..12cce78e9568 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -39,6 +39,16 @@ struct jump_label_module_entry { struct module *mod; }; +void jump_label_lock(void) +{ + mutex_lock(&jump_label_mutex); +} + +void jump_label_unlock(void) +{ + mutex_unlock(&jump_label_mutex); +} + static int jump_label_cmp(const void *a, const void *b) { const struct jump_entry *jea = a; @@ -152,7 +162,7 @@ void jump_label_update(unsigned long key, enum jump_label_type type) struct jump_label_module_entry *e_module; int count; - mutex_lock(&jump_label_mutex); + jump_label_lock(); entry = get_jump_label_entry((jump_label_t)key); if (entry) { count = entry->nr_entries; @@ -175,7 +185,7 @@ void jump_label_update(unsigned long key, enum jump_label_type type) } } } - mutex_unlock(&jump_label_mutex); + jump_label_unlock(); } static int addr_conflict(struct jump_entry *entry, void *start, void *end) @@ -232,6 +242,7 @@ out: * overlaps with any of the jump label patch addresses. Code * that wants to modify kernel text should first verify that * it does not overlap with any of the jump label addresses. + * Caller must hold jump_label_mutex. * * returns 1 if there is an overlap, 0 otherwise */ @@ -242,7 +253,6 @@ int jump_label_text_reserved(void *start, void *end) struct jump_entry *iter_stop = __start___jump_table; int conflict = 0; - mutex_lock(&jump_label_mutex); iter = iter_start; while (iter < iter_stop) { if (addr_conflict(iter, start, end)) { @@ -257,7 +267,6 @@ int jump_label_text_reserved(void *start, void *end) conflict = module_conflict(start, end); #endif out: - mutex_unlock(&jump_label_mutex); return conflict; } @@ -268,7 +277,7 @@ static __init int init_jump_label(void) struct jump_entry *iter_stop = __stop___jump_table; struct jump_entry *iter; - mutex_lock(&jump_label_mutex); + jump_label_lock(); ret = build_jump_label_hashtable(__start___jump_table, __stop___jump_table); iter = iter_start; @@ -276,7 +285,7 @@ static __init int init_jump_label(void) arch_jump_label_text_poke_early(iter->code); iter++; } - mutex_unlock(&jump_label_mutex); + jump_label_unlock(); return ret; } early_initcall(init_jump_label); @@ -409,21 +418,21 @@ jump_label_module_notify(struct notifier_block *self, unsigned long val, switch (val) { case MODULE_STATE_COMING: - mutex_lock(&jump_label_mutex); + jump_label_lock(); ret = add_jump_label_module(mod); if (ret) remove_jump_label_module(mod); - mutex_unlock(&jump_label_mutex); + jump_label_unlock(); break; case MODULE_STATE_GOING: - mutex_lock(&jump_label_mutex); + jump_label_lock(); remove_jump_label_module(mod); - mutex_unlock(&jump_label_mutex); + jump_label_unlock(); break; case MODULE_STATE_LIVE: - mutex_lock(&jump_label_mutex); + jump_label_lock(); remove_jump_label_module_init(mod); - mutex_unlock(&jump_label_mutex); + jump_label_unlock(); break; } return ret; diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 99865c33a60d..9437e14f36bd 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1146,13 +1146,16 @@ int __kprobes register_kprobe(struct kprobe *p) return ret; preempt_disable(); + jump_label_lock(); if (!kernel_text_address((unsigned long) p->addr) || in_kprobes_functions((unsigned long) p->addr) || ftrace_text_reserved(p->addr, p->addr) || jump_label_text_reserved(p->addr, p->addr)) { preempt_enable(); + jump_label_unlock(); return -EINVAL; } + jump_label_unlock(); /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */ p->flags &= KPROBE_FLAG_DISABLED; @@ -1187,6 +1190,8 @@ int __kprobes register_kprobe(struct kprobe *p) INIT_LIST_HEAD(&p->list); mutex_lock(&kprobe_mutex); + jump_label_lock(); /* needed to call jump_label_text_reserved() */ + get_online_cpus(); /* For avoiding text_mutex deadlock. */ mutex_lock(&text_mutex); @@ -1214,6 +1219,7 @@ int __kprobes register_kprobe(struct kprobe *p) out: mutex_unlock(&text_mutex); put_online_cpus(); + jump_label_unlock(); mutex_unlock(&kprobe_mutex); if (probed_mod) -- cgit v1.2.3 From 4ac3dbec800d93485a5c84e37af676278eea657c Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Wed, 27 Oct 2010 11:17:15 -0400 Subject: oprofile: Fix the hang while taking the cpu offline The kernel build with CONFIG_OPROFILE and CPU_HOTPLUG enabled. The oprofile is initialised using system timer in absence of hardware counters supports. Oprofile isn't started from userland. In this setup while doing a CPU offline the kernel hangs in infinite for loop inside lock_hrtimer_base() function This happens because as part of oprofile_cpu_notify(, it tries to stop an hrtimer which was never started. These per-cpu hrtimers are started when the oprfile is started. echo 1 > /dev/oprofile/enable This problem also existwhen the cpu is booted with maxcpus parameter set. When bringing the remaining cpus online the timers are started even if oprofile is not yet enabled. This patch fix this issue by adding a state variable so that these hrtimer start/stop is only attempted when oprofile is started For stable kernels v2.6.35.y and v2.6.36.y. Reported-by: Jan Sebastien Tested-by: sricharan Signed-off-by: Santosh Shilimkar Cc: stable@kernel.org Signed-off-by: Robert Richter --- drivers/oprofile/timer_int.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/oprofile/timer_int.c b/drivers/oprofile/timer_int.c index dc0ae4d14dff..010725117dbb 100644 --- a/drivers/oprofile/timer_int.c +++ b/drivers/oprofile/timer_int.c @@ -21,6 +21,7 @@ #include "oprof.h" static DEFINE_PER_CPU(struct hrtimer, oprofile_hrtimer); +static int ctr_running; static enum hrtimer_restart oprofile_hrtimer_notify(struct hrtimer *hrtimer) { @@ -33,6 +34,9 @@ static void __oprofile_hrtimer_start(void *unused) { struct hrtimer *hrtimer = &__get_cpu_var(oprofile_hrtimer); + if (!ctr_running) + return; + hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hrtimer->function = oprofile_hrtimer_notify; @@ -42,7 +46,10 @@ static void __oprofile_hrtimer_start(void *unused) static int oprofile_hrtimer_start(void) { + get_online_cpus(); + ctr_running = 1; on_each_cpu(__oprofile_hrtimer_start, NULL, 1); + put_online_cpus(); return 0; } @@ -50,6 +57,9 @@ static void __oprofile_hrtimer_stop(int cpu) { struct hrtimer *hrtimer = &per_cpu(oprofile_hrtimer, cpu); + if (!ctr_running) + return; + hrtimer_cancel(hrtimer); } @@ -57,8 +67,11 @@ static void oprofile_hrtimer_stop(void) { int cpu; + get_online_cpus(); for_each_online_cpu(cpu) __oprofile_hrtimer_stop(cpu); + ctr_running = 0; + put_online_cpus(); } static int __cpuinit oprofile_cpu_notify(struct notifier_block *self, -- cgit v1.2.3 From 3d7851b3cdd43a734e5cc4c643fd886ab28ad4d5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 15 Oct 2010 09:51:08 -0400 Subject: oprofile: Remove deprecated use of flush_scheduled_work() flush_scheduled_work() is deprecated and scheduled to be removed. sync_stop() currently cancels cpu_buffer works inside buffer_mutex and flushes the system workqueue outside. Instead, split end_cpu_work() into two parts - stopping further work enqueues and flushing works - and do the former inside buffer_mutex and latter outside. For stable kernels v2.6.35.y and v2.6.36.y. Signed-off-by: Tejun Heo Cc: stable@kernel.org Signed-off-by: Robert Richter --- drivers/oprofile/buffer_sync.c | 2 +- drivers/oprofile/cpu_buffer.c | 10 +++++++--- drivers/oprofile/cpu_buffer.h | 1 + 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c index b7e755f4178a..a3984f4ef192 100644 --- a/drivers/oprofile/buffer_sync.c +++ b/drivers/oprofile/buffer_sync.c @@ -190,7 +190,7 @@ void sync_stop(void) profile_event_unregister(PROFILE_TASK_EXIT, &task_exit_nb); task_handoff_unregister(&task_free_nb); mutex_unlock(&buffer_mutex); - flush_scheduled_work(); + flush_cpu_work(); /* make sure we don't leak task structs */ process_task_mortuary(); diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index f179ac2ea801..59f55441e075 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -111,14 +111,18 @@ void start_cpu_work(void) void end_cpu_work(void) { - int i; - work_enabled = 0; +} + +void flush_cpu_work(void) +{ + int i; for_each_online_cpu(i) { struct oprofile_cpu_buffer *b = &per_cpu(op_cpu_buffer, i); - cancel_delayed_work(&b->work); + /* these works are per-cpu, no need for flush_sync */ + flush_delayed_work(&b->work); } } diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h index 68ea16ab645f..e1d097e250ae 100644 --- a/drivers/oprofile/cpu_buffer.h +++ b/drivers/oprofile/cpu_buffer.h @@ -25,6 +25,7 @@ void free_cpu_buffers(void); void start_cpu_work(void); void end_cpu_work(void); +void flush_cpu_work(void); /* CPU buffer is composed of such entries (which are * also used for context switch notes) -- cgit v1.2.3 From de31c3ca8179d7c21def7ecb56e4fec0c8659d36 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 18 Oct 2010 10:38:58 -0400 Subject: jump label: Fix error with preempt disable holding mutex Kprobes and jump label were having a race between mutexes that was fixed by reordering the jump label. But this reordering moved the jump label mutex into a preempt disable location. This patch does a little fiddling to move the grabbing of the jump label mutex from inside the preempt disable section and still keep the order correct between the mutex and the kprobes lock. Reported-by: Ingo Molnar Acked-by: Masami Hiramatsu Cc: Jason Baron Signed-off-by: Steven Rostedt --- kernel/kprobes.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 9437e14f36bd..9737a76e106f 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1145,17 +1145,13 @@ int __kprobes register_kprobe(struct kprobe *p) if (ret) return ret; - preempt_disable(); jump_label_lock(); + preempt_disable(); if (!kernel_text_address((unsigned long) p->addr) || in_kprobes_functions((unsigned long) p->addr) || ftrace_text_reserved(p->addr, p->addr) || - jump_label_text_reserved(p->addr, p->addr)) { - preempt_enable(); - jump_label_unlock(); - return -EINVAL; - } - jump_label_unlock(); + jump_label_text_reserved(p->addr, p->addr)) + goto fail_with_jump_label; /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */ p->flags &= KPROBE_FLAG_DISABLED; @@ -1169,10 +1165,9 @@ int __kprobes register_kprobe(struct kprobe *p) * We must hold a refcount of the probed module while updating * its code to prohibit unexpected unloading. */ - if (unlikely(!try_module_get(probed_mod))) { - preempt_enable(); - return -EINVAL; - } + if (unlikely(!try_module_get(probed_mod))) + goto fail_with_jump_label; + /* * If the module freed .init.text, we couldn't insert * kprobes in there. @@ -1180,11 +1175,11 @@ int __kprobes register_kprobe(struct kprobe *p) if (within_module_init((unsigned long)p->addr, probed_mod) && probed_mod->state != MODULE_STATE_COMING) { module_put(probed_mod); - preempt_enable(); - return -EINVAL; + goto fail_with_jump_label; } } preempt_enable(); + jump_label_unlock(); p->nmissed = 0; INIT_LIST_HEAD(&p->list); @@ -1226,6 +1221,11 @@ out: module_put(probed_mod); return ret; + +fail_with_jump_label: + preempt_enable(); + jump_label_unlock(); + return -EINVAL; } EXPORT_SYMBOL_GPL(register_kprobe); -- cgit v1.2.3 From 95bcd683fb694a3e2d0538bf486430a0dfbb4111 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 29 Oct 2010 11:02:43 -0400 Subject: jump label: Make arch_jump_label_text_poke_early() optional Some archs do not need to do anything special for jump labels on startup (like MIPS). This patch adds a weak function stub for arch_jump_label_text_poke_early(); Cc: Jason Baron Cc: David Miller Cc: David Daney Suggested-by: Thomas Gleixner LKML-Reference: <1286218615-24011-2-git-send-email-ddaney@caviumnetworks.com> LKML-Reference: <20101015201037.703989993@goodmis.org> Signed-off-by: Steven Rostedt --- kernel/jump_label.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 12cce78e9568..3b79bd938330 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -270,6 +270,13 @@ out: return conflict; } +/* + * Not all archs need this. + */ +void __weak arch_jump_label_text_poke_early(jump_label_t addr) +{ +} + static __init int init_jump_label(void) { int ret; -- cgit v1.2.3 From f0daed0242d514033b9ecca9187108d3c4e281a5 Mon Sep 17 00:00:00 2001 From: David Miller Date: Sat, 23 Oct 2010 11:06:24 -0700 Subject: jump_label: Fix unaligned traps on sparc. The vmlinux.lds.h knobs to emit the __jump_table section in the main kernel image takes care to align the section, but this doesn't help for the __jump_table section that gets emitted into modules. Fix the resulting lack of section alignment by explicitly specifying it in the assembler. Signed-off-by: David S. Miller LKML-Reference: <20101023.110624.226758370.davem@davemloft.net> Signed-off-by: Steven Rostedt --- arch/sparc/include/asm/jump_label.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h index 62e66d7b2fb6..d95cf44fb40d 100644 --- a/arch/sparc/include/asm/jump_label.h +++ b/arch/sparc/include/asm/jump_label.h @@ -14,6 +14,7 @@ "nop\n\t" \ "nop\n\t" \ ".pushsection __jump_table, \"a\"\n\t"\ + ".align 4\n\t" \ ".word 1b, %l[" #label "], %c0\n\t" \ ".popsection \n\t" \ : : "i" (key) : : label);\ -- cgit v1.2.3 From 2d1d7126bbde53989f1d7de174816c123bb7ecb0 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 27 Oct 2010 21:09:15 -0700 Subject: x86, ftrace: Use safe noops, drop trap test Always use a safe 5-byte noop sequence. Drop the trap test, since it is known to return false negatives on some virtualization platforms on 32 bits. The resulting code is both simpler and safer. Cc: Daniel Drake Cc: Jason Baron Cc: Mathieu Desnoyers Signed-off-by: H. Peter Anvin Signed-off-by: Steven Rostedt --- arch/x86/kernel/alternative.c | 69 ++++++++++--------------------------------- 1 file changed, 15 insertions(+), 54 deletions(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index a36bb90aef53..0b30214282a8 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -644,65 +644,26 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) -unsigned char ideal_nop5[IDEAL_NOP_SIZE_5]; +#ifdef CONFIG_X86_64 +unsigned char ideal_nop5[5] = { 0x66, 0x66, 0x66, 0x66, 0x90 }; +#else +unsigned char ideal_nop5[5] = { 0x3e, 0x8d, 0x74, 0x26, 0x00 }; +#endif void __init arch_init_ideal_nop5(void) { - extern const unsigned char ftrace_test_p6nop[]; - extern const unsigned char ftrace_test_nop5[]; - extern const unsigned char ftrace_test_jmp[]; - int faulted = 0; - /* - * There is no good nop for all x86 archs. - * We will default to using the P6_NOP5, but first we - * will test to make sure that the nop will actually - * work on this CPU. If it faults, we will then - * go to a lesser efficient 5 byte nop. If that fails - * we then just use a jmp as our nop. This isn't the most - * efficient nop, but we can not use a multi part nop - * since we would then risk being preempted in the middle - * of that nop, and if we enabled tracing then, it might - * cause a system crash. + * There is no good nop for all x86 archs. This selection + * algorithm should be unified with the one in find_nop_table(), + * but this should be good enough for now. * - * TODO: check the cpuid to determine the best nop. + * For cases other than the ones below, use the safe (as in + * always functional) defaults above. */ - asm volatile ( - "ftrace_test_jmp:" - "jmp ftrace_test_p6nop\n" - "nop\n" - "nop\n" - "nop\n" /* 2 byte jmp + 3 bytes */ - "ftrace_test_p6nop:" - P6_NOP5 - "jmp 1f\n" - "ftrace_test_nop5:" - ".byte 0x66,0x66,0x66,0x66,0x90\n" - "1:" - ".section .fixup, \"ax\"\n" - "2: movl $1, %0\n" - " jmp ftrace_test_nop5\n" - "3: movl $2, %0\n" - " jmp 1b\n" - ".previous\n" - _ASM_EXTABLE(ftrace_test_p6nop, 2b) - _ASM_EXTABLE(ftrace_test_nop5, 3b) - : "=r"(faulted) : "0" (faulted)); - - switch (faulted) { - case 0: - pr_info("converting mcount calls to 0f 1f 44 00 00\n"); - memcpy(ideal_nop5, ftrace_test_p6nop, IDEAL_NOP_SIZE_5); - break; - case 1: - pr_info("converting mcount calls to 66 66 66 66 90\n"); - memcpy(ideal_nop5, ftrace_test_nop5, IDEAL_NOP_SIZE_5); - break; - case 2: - pr_info("converting mcount calls to jmp . + 5\n"); - memcpy(ideal_nop5, ftrace_test_jmp, IDEAL_NOP_SIZE_5); - break; - } - +#ifdef CONFIG_X86_64 + /* Don't use these on 32 bits due to broken virtualizers */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + memcpy(ideal_nop5, p6_nops[5], 5); +#endif } #endif -- cgit v1.2.3 From 45f81b1c96d9793e47ce925d257ea693ce0b193e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 29 Oct 2010 12:33:43 -0400 Subject: jump label: Add work around to i386 gcc asm goto bug On i386 (not x86_64) early implementations of gcc would have a bug with asm goto causing it to produce code like the following: (This was noticed by Peter Zijlstra) 56 pushl 0 67 nopl jmp 0x6f popl jmp 0x8c 6f mov test je 0x8c 8c mov call *(%esp) The jump added in the asm goto skipped over the popl that matched the pushl 0, which lead up to a quick crash of the system when the jump was enabled. The nopl is defined in the asm goto () statement and when tracepoints are enabled, the nop changes to a jump to the label that was specified by the asm goto. asm goto is suppose to tell gcc that the code in the asm might jump to an external label. Here gcc obviously fails to make that work. The bug report for gcc is here: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=46226 The bug only appears on x86 when not compiled with -maccumulate-outgoing-args. This option is always set on x86_64 and it is also the work around for a function graph tracer i386 bug. (See commit: 746357d6a526d6da9d89a2ec645b28406e959c2e) This explains why the bug only showed up on i386 when function graph tracer was not enabled. This patch now adds a CONFIG_JUMP_LABEL option that is default off instead of using jump labels by default. When jump labels are enabled, the -maccumulate-outgoing-args will be used (causing a slightly larger kernel image on i386). This option will exist until we have a way to detect if the gcc compiler in use is safe to use on all configurations without the work around. Note, there exists such a test, but for now we will keep the enabling of jump label as a manual option. Archs that know the compiler is safe with asm goto, may choose to select JUMP_LABEL and enable it by default. Reported-by: Ingo Molnar Cause-discovered-by: Peter Zijlstra Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Jason Baron Cc: H. Peter Anvin Cc: David Daney Cc: Mathieu Desnoyers Cc: Masami Hiramatsu Cc: David Miller Cc: Richard Henderson LKML-Reference: <1288028746.3673.11.camel@laptop> Signed-off-by: Steven Rostedt --- arch/Kconfig | 14 ++++++++++++++ arch/x86/Makefile_32.cpu | 13 ++++++++++++- include/linux/jump_label.h | 2 +- 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 53d7f619a1b9..8bf0fa652eb6 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -42,6 +42,20 @@ config KPROBES for kernel debugging, non-intrusive instrumentation and testing. If in doubt, say "N". +config JUMP_LABEL + bool "Optimize trace point call sites" + depends on HAVE_ARCH_JUMP_LABEL + help + If it is detected that the compiler has support for "asm goto", + the kernel will compile trace point locations with just a + nop instruction. When trace points are enabled, the nop will + be converted to a jump to the trace function. This technique + lowers overhead and stress on the branch prediction of the + processor. + + On i386, options added to the compiler flags may increase + the size of the kernel slightly. + config OPTPROBES def_bool y depends on KPROBES && HAVE_OPTPROBES diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu index 1255d953c65d..f2ee1abb1df9 100644 --- a/arch/x86/Makefile_32.cpu +++ b/arch/x86/Makefile_32.cpu @@ -51,7 +51,18 @@ cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686)) # prologue (push %ebp, mov %esp, %ebp) which breaks the function graph # tracer assumptions. For i686, generic, core2 this is set by the # compiler anyway -cflags-$(CONFIG_FUNCTION_GRAPH_TRACER) += $(call cc-option,-maccumulate-outgoing-args) +ifeq ($(CONFIG_FUNCTION_GRAPH_TRACER), y) +ADD_ACCUMULATE_OUTGOING_ARGS := y +endif + +# Work around to a bug with asm goto with first implementations of it +# in gcc causing gcc to mess up the push and pop of the stack in some +# uses of asm goto. +ifeq ($(CONFIG_JUMP_LABEL), y) +ADD_ACCUMULATE_OUTGOING_ARGS := y +endif + +cflags-$(ADD_ACCUMULATE_OUTGOING_ARGS) += $(call cc-option,-maccumulate-outgoing-args) # Bug fix for binutils: this option is required in order to keep # binutils from generating NOPL instructions against our will. diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 1947a1212678..7880f18e4b86 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -1,7 +1,7 @@ #ifndef _LINUX_JUMP_LABEL_H #define _LINUX_JUMP_LABEL_H -#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_HAVE_ARCH_JUMP_LABEL) +#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) # include # define HAVE_JUMP_LABEL #endif -- cgit v1.2.3 From 7b79462a20826a7269322113c68ca78d5f67c0bd Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 30 Oct 2010 01:19:29 -0700 Subject: x86: Check irq_remapped instead of remapping_enabled in destroy_irq() Russ Anderson reported: | There is a regression that is causing a NULL pointer dereference | in free_irte when shutting down xpc. git bisect narrowed it down | to git commit d585d06(intr_remap: Simplify the code further), which | changed free_irte(). Reverse applying the patch fixes the problem. We need to use irq_remapped() for each irq instead of checking only intr_remapping_enabled as there might be non remapped irqs even when remapping is enabled. [ tglx: use cfg instead of retrieving it again. Massaged changelog ] Reported-bisected-and-tested-by: Russ Anderson Signed-off-by: Yinghai Lu Cc: Suresh Siddha LKML-Reference: <4CCBD511.40607@kernel.org> Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/io_apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 0929191d83cf..7cc0a721f628 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3109,7 +3109,7 @@ void destroy_irq(unsigned int irq) irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE); - if (intr_remapping_enabled) + if (irq_remapped(cfg)) free_irte(irq); raw_spin_lock_irqsave(&vector_lock, flags); __clear_irq_vector(irq, cfg); -- cgit v1.2.3