diff options
Diffstat (limited to 'kernel/trace')
32 files changed, 2116 insertions, 3343 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 13e13d428cd3..c7683fd8a03a 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -44,9 +44,6 @@ config HAVE_FTRACE_MCOUNT_RECORD help See Documentation/trace/ftrace-design.txt -config HAVE_HW_BRANCH_TRACER - bool - config HAVE_SYSCALL_TRACEPOINTS bool help @@ -197,15 +194,6 @@ config PREEMPT_TRACER enabled. This option and the irqs-off timing option can be used together or separately.) -config SYSPROF_TRACER - bool "Sysprof Tracer" - depends on X86 - select GENERIC_TRACER - select CONTEXT_SWITCH_TRACER - help - This tracer provides the trace needed by the 'Sysprof' userspace - tool. - config SCHED_TRACER bool "Scheduling Latency Tracer" select GENERIC_TRACER @@ -232,23 +220,6 @@ config FTRACE_SYSCALLS help Basic tracer to catch the syscall entry and exit events. -config BOOT_TRACER - bool "Trace boot initcalls" - select GENERIC_TRACER - select CONTEXT_SWITCH_TRACER - help - This tracer helps developers to optimize boot times: it records - the timings of the initcalls and traces key events and the identity - of tasks that can cause boot delays, such as context-switches. - - Its aim is to be parsed by the scripts/bootgraph.pl tool to - produce pretty graphics about boot inefficiencies, giving a visual - representation of the delays during initcalls - but the raw - /debug/tracing/trace text output is readable too. - - You must pass in initcall_debug and ftrace=initcall to the kernel - command line to enable this on bootup. - config TRACE_BRANCH_PROFILING bool select GENERIC_TRACER @@ -328,28 +299,6 @@ config BRANCH_TRACER Say N if unsure. -config KSYM_TRACER - bool "Trace read and write access on kernel memory locations" - depends on HAVE_HW_BREAKPOINT - select TRACING - help - This tracer helps find read and write operations on any given kernel - symbol i.e. /proc/kallsyms. - -config PROFILE_KSYM_TRACER - bool "Profile all kernel memory accesses on 'watched' variables" - depends on KSYM_TRACER - help - This tracer profiles kernel accesses on variables watched through the - ksym tracer ftrace plugin. Depending upon the hardware, all read - and write operations on kernel variables can be monitored for - accesses. - - The results will be displayed in: - /debugfs/tracing/profile_ksym - - Say N if unsure. - config STACK_TRACER bool "Trace max stack" depends on HAVE_FUNCTION_TRACER @@ -374,34 +323,6 @@ config STACK_TRACER Say N if unsure. -config HW_BRANCH_TRACER - depends on HAVE_HW_BRANCH_TRACER - bool "Trace hw branches" - select GENERIC_TRACER - help - This tracer records all branches on the system in a circular - buffer, giving access to the last N branches for each cpu. - -config KMEMTRACE - bool "Trace SLAB allocations" - select GENERIC_TRACER - help - kmemtrace provides tracing for slab allocator functions, such as - kmalloc, kfree, kmem_cache_alloc, kmem_cache_free, etc. Collected - data is then fed to the userspace application in order to analyse - allocation hotspots, internal fragmentation and so on, making it - possible to see how well an allocator performs, as well as debug - and profile kernel code. - - This requires an userspace application to use. See - Documentation/trace/kmemtrace.txt for more information. - - Saying Y will make the kernel somewhat larger and slower. However, - if you disable kmemtrace at run-time or boot-time, the performance - impact is minimal (depending on the arch the kernel is built for). - - If unsure, say N. - config WORKQUEUE_TRACER bool "Trace workqueues" select GENERIC_TRACER diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 78edc6490038..438e84a56ab3 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -30,7 +30,6 @@ obj-$(CONFIG_TRACING) += trace_output.o obj-$(CONFIG_TRACING) += trace_stat.o obj-$(CONFIG_TRACING) += trace_printk.o obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o -obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o @@ -38,11 +37,8 @@ obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o obj-$(CONFIG_NOP_TRACER) += trace_nop.o obj-$(CONFIG_STACK_TRACER) += trace_stack.o obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o -obj-$(CONFIG_BOOT_TRACER) += trace_boot.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o -obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o -obj-$(CONFIG_KMEMTRACE) += kmemtrace.o obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o ifeq ($(CONFIG_BLOCK),y) @@ -56,7 +52,6 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o endif obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o -obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o obj-$(CONFIG_EVENT_TRACING) += power-traces.o libftrace-y := ftrace.o diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index b3bc91a3f510..638711c17504 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -675,28 +675,33 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq, } } -static void blk_add_trace_rq_abort(struct request_queue *q, struct request *rq) +static void blk_add_trace_rq_abort(void *ignore, + struct request_queue *q, struct request *rq) { blk_add_trace_rq(q, rq, BLK_TA_ABORT); } -static void blk_add_trace_rq_insert(struct request_queue *q, struct request *rq) +static void blk_add_trace_rq_insert(void *ignore, + struct request_queue *q, struct request *rq) { blk_add_trace_rq(q, rq, BLK_TA_INSERT); } -static void blk_add_trace_rq_issue(struct request_queue *q, struct request *rq) +static void blk_add_trace_rq_issue(void *ignore, + struct request_queue *q, struct request *rq) { blk_add_trace_rq(q, rq, BLK_TA_ISSUE); } -static void blk_add_trace_rq_requeue(struct request_queue *q, +static void blk_add_trace_rq_requeue(void *ignore, + struct request_queue *q, struct request *rq) { blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); } -static void blk_add_trace_rq_complete(struct request_queue *q, +static void blk_add_trace_rq_complete(void *ignore, + struct request_queue *q, struct request *rq) { blk_add_trace_rq(q, rq, BLK_TA_COMPLETE); @@ -724,34 +729,40 @@ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, !bio_flagged(bio, BIO_UPTODATE), 0, NULL); } -static void blk_add_trace_bio_bounce(struct request_queue *q, struct bio *bio) +static void blk_add_trace_bio_bounce(void *ignore, + struct request_queue *q, struct bio *bio) { blk_add_trace_bio(q, bio, BLK_TA_BOUNCE); } -static void blk_add_trace_bio_complete(struct request_queue *q, struct bio *bio) +static void blk_add_trace_bio_complete(void *ignore, + struct request_queue *q, struct bio *bio) { blk_add_trace_bio(q, bio, BLK_TA_COMPLETE); } -static void blk_add_trace_bio_backmerge(struct request_queue *q, +static void blk_add_trace_bio_backmerge(void *ignore, + struct request_queue *q, struct bio *bio) { blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); } -static void blk_add_trace_bio_frontmerge(struct request_queue *q, +static void blk_add_trace_bio_frontmerge(void *ignore, + struct request_queue *q, struct bio *bio) { blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); } -static void blk_add_trace_bio_queue(struct request_queue *q, struct bio *bio) +static void blk_add_trace_bio_queue(void *ignore, + struct request_queue *q, struct bio *bio) { blk_add_trace_bio(q, bio, BLK_TA_QUEUE); } -static void blk_add_trace_getrq(struct request_queue *q, +static void blk_add_trace_getrq(void *ignore, + struct request_queue *q, struct bio *bio, int rw) { if (bio) @@ -765,7 +776,8 @@ static void blk_add_trace_getrq(struct request_queue *q, } -static void blk_add_trace_sleeprq(struct request_queue *q, +static void blk_add_trace_sleeprq(void *ignore, + struct request_queue *q, struct bio *bio, int rw) { if (bio) @@ -779,7 +791,7 @@ static void blk_add_trace_sleeprq(struct request_queue *q, } } -static void blk_add_trace_plug(struct request_queue *q) +static void blk_add_trace_plug(void *ignore, struct request_queue *q) { struct blk_trace *bt = q->blk_trace; @@ -787,7 +799,7 @@ static void blk_add_trace_plug(struct request_queue *q) __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL); } -static void blk_add_trace_unplug_io(struct request_queue *q) +static void blk_add_trace_unplug_io(void *ignore, struct request_queue *q) { struct blk_trace *bt = q->blk_trace; @@ -800,7 +812,7 @@ static void blk_add_trace_unplug_io(struct request_queue *q) } } -static void blk_add_trace_unplug_timer(struct request_queue *q) +static void blk_add_trace_unplug_timer(void *ignore, struct request_queue *q) { struct blk_trace *bt = q->blk_trace; @@ -813,7 +825,8 @@ static void blk_add_trace_unplug_timer(struct request_queue *q) } } -static void blk_add_trace_split(struct request_queue *q, struct bio *bio, +static void blk_add_trace_split(void *ignore, + struct request_queue *q, struct bio *bio, unsigned int pdu) { struct blk_trace *bt = q->blk_trace; @@ -829,6 +842,7 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio, /** * blk_add_trace_remap - Add a trace for a remap operation + * @ignore: trace callback data parameter (not used) * @q: queue the io is for * @bio: the source bio * @dev: target device @@ -839,8 +853,9 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio, * it spans a stripe (or similar). Add a trace for that action. * **/ -static void blk_add_trace_remap(struct request_queue *q, struct bio *bio, - dev_t dev, sector_t from) +static void blk_add_trace_remap(void *ignore, + struct request_queue *q, struct bio *bio, + dev_t dev, sector_t from) { struct blk_trace *bt = q->blk_trace; struct blk_io_trace_remap r; @@ -859,6 +874,7 @@ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio, /** * blk_add_trace_rq_remap - Add a trace for a request-remap operation + * @ignore: trace callback data parameter (not used) * @q: queue the io is for * @rq: the source request * @dev: target device @@ -869,7 +885,8 @@ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio, * Add a trace for that action. * **/ -static void blk_add_trace_rq_remap(struct request_queue *q, +static void blk_add_trace_rq_remap(void *ignore, + struct request_queue *q, struct request *rq, dev_t dev, sector_t from) { @@ -921,64 +938,64 @@ static void blk_register_tracepoints(void) { int ret; - ret = register_trace_block_rq_abort(blk_add_trace_rq_abort); + ret = register_trace_block_rq_abort(blk_add_trace_rq_abort, NULL); WARN_ON(ret); - ret = register_trace_block_rq_insert(blk_add_trace_rq_insert); + ret = register_trace_block_rq_insert(blk_add_trace_rq_insert, NULL); WARN_ON(ret); - ret = register_trace_block_rq_issue(blk_add_trace_rq_issue); + ret = register_trace_block_rq_issue(blk_add_trace_rq_issue, NULL); WARN_ON(ret); - ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue); + ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL); WARN_ON(ret); - ret = register_trace_block_rq_complete(blk_add_trace_rq_complete); + ret = register_trace_block_rq_complete(blk_add_trace_rq_complete, NULL); WARN_ON(ret); - ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce); + ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce, NULL); WARN_ON(ret); - ret = register_trace_block_bio_complete(blk_add_trace_bio_complete); + ret = register_trace_block_bio_complete(blk_add_trace_bio_complete, NULL); WARN_ON(ret); - ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge); + ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge, NULL); WARN_ON(ret); - ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge); + ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge, NULL); WARN_ON(ret); - ret = register_trace_block_bio_queue(blk_add_trace_bio_queue); + ret = register_trace_block_bio_queue(blk_add_trace_bio_queue, NULL); WARN_ON(ret); - ret = register_trace_block_getrq(blk_add_trace_getrq); + ret = register_trace_block_getrq(blk_add_trace_getrq, NULL); WARN_ON(ret); - ret = register_trace_block_sleeprq(blk_add_trace_sleeprq); + ret = register_trace_block_sleeprq(blk_add_trace_sleeprq, NULL); WARN_ON(ret); - ret = register_trace_block_plug(blk_add_trace_plug); + ret = register_trace_block_plug(blk_add_trace_plug, NULL); WARN_ON(ret); - ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer); + ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL); WARN_ON(ret); - ret = register_trace_block_unplug_io(blk_add_trace_unplug_io); + ret = register_trace_block_unplug_io(blk_add_trace_unplug_io, NULL); WARN_ON(ret); - ret = register_trace_block_split(blk_add_trace_split); + ret = register_trace_block_split(blk_add_trace_split, NULL); WARN_ON(ret); - ret = register_trace_block_remap(blk_add_trace_remap); + ret = register_trace_block_remap(blk_add_trace_remap, NULL); WARN_ON(ret); - ret = register_trace_block_rq_remap(blk_add_trace_rq_remap); + ret = register_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); WARN_ON(ret); } static void blk_unregister_tracepoints(void) { - unregister_trace_block_rq_remap(blk_add_trace_rq_remap); - unregister_trace_block_remap(blk_add_trace_remap); - unregister_trace_block_split(blk_add_trace_split); - unregister_trace_block_unplug_io(blk_add_trace_unplug_io); - unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer); - unregister_trace_block_plug(blk_add_trace_plug); - unregister_trace_block_sleeprq(blk_add_trace_sleeprq); - unregister_trace_block_getrq(blk_add_trace_getrq); - unregister_trace_block_bio_queue(blk_add_trace_bio_queue); - unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge); - unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge); - unregister_trace_block_bio_complete(blk_add_trace_bio_complete); - unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce); - unregister_trace_block_rq_complete(blk_add_trace_rq_complete); - unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue); - unregister_trace_block_rq_issue(blk_add_trace_rq_issue); - unregister_trace_block_rq_insert(blk_add_trace_rq_insert); - unregister_trace_block_rq_abort(blk_add_trace_rq_abort); + unregister_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); + unregister_trace_block_remap(blk_add_trace_remap, NULL); + unregister_trace_block_split(blk_add_trace_split, NULL); + unregister_trace_block_unplug_io(blk_add_trace_unplug_io, NULL); + unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL); + unregister_trace_block_plug(blk_add_trace_plug, NULL); + unregister_trace_block_sleeprq(blk_add_trace_sleeprq, NULL); + unregister_trace_block_getrq(blk_add_trace_getrq, NULL); + unregister_trace_block_bio_queue(blk_add_trace_bio_queue, NULL); + unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge, NULL); + unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge, NULL); + unregister_trace_block_bio_complete(blk_add_trace_bio_complete, NULL); + unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce, NULL); + unregister_trace_block_rq_complete(blk_add_trace_rq_complete, NULL); + unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL); + unregister_trace_block_rq_issue(blk_add_trace_rq_issue, NULL); + unregister_trace_block_rq_insert(blk_add_trace_rq_insert, NULL); + unregister_trace_block_rq_abort(blk_add_trace_rq_abort, NULL); tracepoint_synchronize_unregister(); } @@ -1321,7 +1338,7 @@ out: } static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, - int flags) + int flags, struct trace_event *event) { return print_one_line(iter, false); } @@ -1343,7 +1360,8 @@ static int blk_trace_synthesize_old_trace(struct trace_iterator *iter) } static enum print_line_t -blk_trace_event_print_binary(struct trace_iterator *iter, int flags) +blk_trace_event_print_binary(struct trace_iterator *iter, int flags, + struct trace_event *event) { return blk_trace_synthesize_old_trace(iter) ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; @@ -1381,12 +1399,16 @@ static struct tracer blk_tracer __read_mostly = { .set_flag = blk_tracer_set_flag, }; -static struct trace_event trace_blk_event = { - .type = TRACE_BLK, +static struct trace_event_functions trace_blk_event_funcs = { .trace = blk_trace_event_print, .binary = blk_trace_event_print_binary, }; +static struct trace_event trace_blk_event = { + .type = TRACE_BLK, + .funcs = &trace_blk_event_funcs, +}; + static int __init init_blk_tracer(void) { if (!register_ftrace_event(&trace_blk_event)) { diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 2404b59b3097..0d88ce9b9fb8 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -264,6 +264,7 @@ struct ftrace_profile { unsigned long counter; #ifdef CONFIG_FUNCTION_GRAPH_TRACER unsigned long long time; + unsigned long long time_squared; #endif }; @@ -366,9 +367,9 @@ static int function_stat_headers(struct seq_file *m) { #ifdef CONFIG_FUNCTION_GRAPH_TRACER seq_printf(m, " Function " - "Hit Time Avg\n" + "Hit Time Avg s^2\n" " -------- " - "--- ---- ---\n"); + "--- ---- --- ---\n"); #else seq_printf(m, " Function Hit\n" " -------- ---\n"); @@ -384,6 +385,7 @@ static int function_stat_show(struct seq_file *m, void *v) static DEFINE_MUTEX(mutex); static struct trace_seq s; unsigned long long avg; + unsigned long long stddev; #endif kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); @@ -394,11 +396,25 @@ static int function_stat_show(struct seq_file *m, void *v) avg = rec->time; do_div(avg, rec->counter); + /* Sample standard deviation (s^2) */ + if (rec->counter <= 1) + stddev = 0; + else { + stddev = rec->time_squared - rec->counter * avg * avg; + /* + * Divide only 1000 for ns^2 -> us^2 conversion. + * trace_print_graph_duration will divide 1000 again. + */ + do_div(stddev, (rec->counter - 1) * 1000); + } + mutex_lock(&mutex); trace_seq_init(&s); trace_print_graph_duration(rec->time, &s); trace_seq_puts(&s, " "); trace_print_graph_duration(avg, &s); + trace_seq_puts(&s, " "); + trace_print_graph_duration(stddev, &s); trace_print_seq(m, &s); mutex_unlock(&mutex); #endif @@ -650,6 +666,10 @@ static void profile_graph_return(struct ftrace_graph_ret *trace) if (!stat->hash || !ftrace_profile_enabled) goto out; + /* If the calltime was zero'd ignore it */ + if (!trace->calltime) + goto out; + calltime = trace->rettime - trace->calltime; if (!(trace_flags & TRACE_ITER_GRAPH_TIME)) { @@ -668,8 +688,10 @@ static void profile_graph_return(struct ftrace_graph_ret *trace) } rec = ftrace_find_profiled_func(stat, trace->func); - if (rec) + if (rec) { rec->time += calltime; + rec->time_squared += calltime * calltime; + } out: local_irq_restore(flags); @@ -1861,7 +1883,6 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip) struct hlist_head *hhd; struct hlist_node *n; unsigned long key; - int resched; key = hash_long(ip, FTRACE_HASH_BITS); @@ -1875,12 +1896,12 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip) * period. This syncs the hash iteration and freeing of items * on the hash. rcu_read_lock is too dangerous here. */ - resched = ftrace_preempt_disable(); + preempt_disable_notrace(); hlist_for_each_entry_rcu(entry, n, hhd, node) { if (entry->ip == ip) entry->ops->func(ip, parent_ip, &entry->data); } - ftrace_preempt_enable(resched); + preempt_enable_notrace(); } static struct ftrace_ops trace_probe_ops __read_mostly = @@ -3212,8 +3233,8 @@ free: } static void -ftrace_graph_probe_sched_switch(struct rq *__rq, struct task_struct *prev, - struct task_struct *next) +ftrace_graph_probe_sched_switch(void *ignore, + struct task_struct *prev, struct task_struct *next) { unsigned long long timestamp; int index; @@ -3267,7 +3288,7 @@ static int start_graph_tracing(void) } while (ret == -EAGAIN); if (!ret) { - ret = register_trace_sched_switch(ftrace_graph_probe_sched_switch); + ret = register_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL); if (ret) pr_info("ftrace_graph: Couldn't activate tracepoint" " probe to kernel_sched_switch\n"); @@ -3339,11 +3360,11 @@ void unregister_ftrace_graph(void) goto out; ftrace_graph_active--; - unregister_trace_sched_switch(ftrace_graph_probe_sched_switch); ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; ftrace_graph_entry = ftrace_graph_entry_stub; ftrace_shutdown(FTRACE_STOP_FUNC_RET); unregister_pm_notifier(&ftrace_suspend_notifier); + unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL); out: mutex_unlock(&ftrace_lock); diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c deleted file mode 100644 index a91da69f153a..000000000000 --- a/kernel/trace/kmemtrace.c +++ /dev/null @@ -1,511 +0,0 @@ -/* - * Memory allocator tracing - * - * Copyright (C) 2008 Eduard - Gabriel Munteanu - * Copyright (C) 2008 Pekka Enberg <penberg@cs.helsinki.fi> - * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com> - */ - -#include <linux/tracepoint.h> -#include <linux/seq_file.h> -#include <linux/debugfs.h> -#include <linux/dcache.h> -#include <linux/fs.h> - -#include <linux/kmemtrace.h> - -#include "trace_output.h" -#include "trace.h" - -/* Select an alternative, minimalistic output than the original one */ -#define TRACE_KMEM_OPT_MINIMAL 0x1 - -static struct tracer_opt kmem_opts[] = { - /* Default disable the minimalistic output */ - { TRACER_OPT(kmem_minimalistic, TRACE_KMEM_OPT_MINIMAL) }, - { } -}; - -static struct tracer_flags kmem_tracer_flags = { - .val = 0, - .opts = kmem_opts -}; - -static struct trace_array *kmemtrace_array; - -/* Trace allocations */ -static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id, - unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node) -{ - struct ftrace_event_call *call = &event_kmem_alloc; - struct trace_array *tr = kmemtrace_array; - struct kmemtrace_alloc_entry *entry; - struct ring_buffer_event *event; - - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); - if (!event) - return; - - entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, 0, 0); - - entry->ent.type = TRACE_KMEM_ALLOC; - entry->type_id = type_id; - entry->call_site = call_site; - entry->ptr = ptr; - entry->bytes_req = bytes_req; - entry->bytes_alloc = bytes_alloc; - entry->gfp_flags = gfp_flags; - entry->node = node; - - if (!filter_check_discard(call, entry, tr->buffer, event)) - ring_buffer_unlock_commit(tr->buffer, event); - - trace_wake_up(); -} - -static inline void kmemtrace_free(enum kmemtrace_type_id type_id, - unsigned long call_site, - const void *ptr) -{ - struct ftrace_event_call *call = &event_kmem_free; - struct trace_array *tr = kmemtrace_array; - struct kmemtrace_free_entry *entry; - struct ring_buffer_event *event; - - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); - if (!event) - return; - entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, 0, 0); - - entry->ent.type = TRACE_KMEM_FREE; - entry->type_id = type_id; - entry->call_site = call_site; - entry->ptr = ptr; - - if (!filter_check_discard(call, entry, tr->buffer, event)) - ring_buffer_unlock_commit(tr->buffer, event); - - trace_wake_up(); -} - -static void kmemtrace_kmalloc(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags) -{ - kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr, - bytes_req, bytes_alloc, gfp_flags, -1); -} - -static void kmemtrace_kmem_cache_alloc(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags) -{ - kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr, - bytes_req, bytes_alloc, gfp_flags, -1); -} - -static void kmemtrace_kmalloc_node(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node) -{ - kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr, - bytes_req, bytes_alloc, gfp_flags, node); -} - -static void kmemtrace_kmem_cache_alloc_node(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node) -{ - kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr, - bytes_req, bytes_alloc, gfp_flags, node); -} - -static void kmemtrace_kfree(unsigned long call_site, const void *ptr) -{ - kmemtrace_free(KMEMTRACE_TYPE_KMALLOC, call_site, ptr); -} - -static void kmemtrace_kmem_cache_free(unsigned long call_site, const void *ptr) -{ - kmemtrace_free(KMEMTRACE_TYPE_CACHE, call_site, ptr); -} - -static int kmemtrace_start_probes(void) -{ - int err; - - err = register_trace_kmalloc(kmemtrace_kmalloc); - if (err) - return err; - err = register_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc); - if (err) - return err; - err = register_trace_kmalloc_node(kmemtrace_kmalloc_node); - if (err) - return err; - err = register_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node); - if (err) - return err; - err = register_trace_kfree(kmemtrace_kfree); - if (err) - return err; - err = register_trace_kmem_cache_free(kmemtrace_kmem_cache_free); - - return err; -} - -static void kmemtrace_stop_probes(void) -{ - unregister_trace_kmalloc(kmemtrace_kmalloc); - unregister_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc); - unregister_trace_kmalloc_node(kmemtrace_kmalloc_node); - unregister_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node); - unregister_trace_kfree(kmemtrace_kfree); - unregister_trace_kmem_cache_free(kmemtrace_kmem_cache_free); -} - -static int kmem_trace_init(struct trace_array *tr) -{ - kmemtrace_array = tr; - - tracing_reset_online_cpus(tr); - - kmemtrace_start_probes(); - - return 0; -} - -static void kmem_trace_reset(struct trace_array *tr) -{ - kmemtrace_stop_probes(); -} - -static void kmemtrace_headers(struct seq_file *s) -{ - /* Don't need headers for the original kmemtrace output */ - if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)) - return; - - seq_printf(s, "#\n"); - seq_printf(s, "# ALLOC TYPE REQ GIVEN FLAGS " - " POINTER NODE CALLER\n"); - seq_printf(s, "# FREE | | | | " - " | | | |\n"); - seq_printf(s, "# |\n\n"); -} - -/* - * The following functions give the original output from kmemtrace, - * plus the origin CPU, since reordering occurs in-kernel now. - */ - -#define KMEMTRACE_USER_ALLOC 0 -#define KMEMTRACE_USER_FREE 1 - -struct kmemtrace_user_event { - u8 event_id; - u8 type_id; - u16 event_size; - u32 cpu; - u64 timestamp; - unsigned long call_site; - unsigned long ptr; -}; - -struct kmemtrace_user_event_alloc { - size_t bytes_req; - size_t bytes_alloc; - unsigned gfp_flags; - int node; -}; - -static enum print_line_t -kmemtrace_print_alloc(struct trace_iterator *iter, int flags) -{ - struct trace_seq *s = &iter->seq; - struct kmemtrace_alloc_entry *entry; - int ret; - - trace_assign_type(entry, iter->ent); - - ret = trace_seq_printf(s, "type_id %d call_site %pF ptr %lu " - "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d\n", - entry->type_id, (void *)entry->call_site, (unsigned long)entry->ptr, - (unsigned long)entry->bytes_req, (unsigned long)entry->bytes_alloc, - (unsigned long)entry->gfp_flags, entry->node); - - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - return TRACE_TYPE_HANDLED; -} - -static enum print_line_t -kmemtrace_print_free(struct trace_iterator *iter, int flags) -{ - struct trace_seq *s = &iter->seq; - struct kmemtrace_free_entry *entry; - int ret; - - trace_assign_type(entry, iter->ent); - - ret = trace_seq_printf(s, "type_id %d call_site %pF ptr %lu\n", - entry->type_id, (void *)entry->call_site, - (unsigned long)entry->ptr); - - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - return TRACE_TYPE_HANDLED; -} - -static enum print_line_t -kmemtrace_print_alloc_user(struct trace_iterator *iter, int flags) -{ - struct trace_seq *s = &iter->seq; - struct kmemtrace_alloc_entry *entry; - struct kmemtrace_user_event *ev; - struct kmemtrace_user_event_alloc *ev_alloc; - - trace_assign_type(entry, iter->ent); - - ev = trace_seq_reserve(s, sizeof(*ev)); - if (!ev) - return TRACE_TYPE_PARTIAL_LINE; - - ev->event_id = KMEMTRACE_USER_ALLOC; - ev->type_id = entry->type_id; - ev->event_size = sizeof(*ev) + sizeof(*ev_alloc); - ev->cpu = iter->cpu; - ev->timestamp = iter->ts; - ev->call_site = entry->call_site; - ev->ptr = (unsigned long)entry->ptr; - - ev_alloc = trace_seq_reserve(s, sizeof(*ev_alloc)); - if (!ev_alloc) - return TRACE_TYPE_PARTIAL_LINE; - - ev_alloc->bytes_req = entry->bytes_req; - ev_alloc->bytes_alloc = entry->bytes_alloc; - ev_alloc->gfp_flags = entry->gfp_flags; - ev_alloc->node = entry->node; - - return TRACE_TYPE_HANDLED; -} - -static enum print_line_t -kmemtrace_print_free_user(struct trace_iterator *iter, int flags) -{ - struct trace_seq *s = &iter->seq; - struct kmemtrace_free_entry *entry; - struct kmemtrace_user_event *ev; - - trace_assign_type(entry, iter->ent); - - ev = trace_seq_reserve(s, sizeof(*ev)); - if (!ev) - return TRACE_TYPE_PARTIAL_LINE; - - ev->event_id = KMEMTRACE_USER_FREE; - ev->type_id = entry->type_id; - ev->event_size = sizeof(*ev); - ev->cpu = iter->cpu; - ev->timestamp = iter->ts; - ev->call_site = entry->call_site; - ev->ptr = (unsigned long)entry->ptr; - - return TRACE_TYPE_HANDLED; -} - -/* The two other following provide a more minimalistic output */ -static enum print_line_t -kmemtrace_print_alloc_compress(struct trace_iterator *iter) -{ - struct kmemtrace_alloc_entry *entry; - struct trace_seq *s = &iter->seq; - int ret; - - trace_assign_type(entry, iter->ent); - - /* Alloc entry */ - ret = trace_seq_printf(s, " + "); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - /* Type */ - switch (entry->type_id) { - case KMEMTRACE_TYPE_KMALLOC: - ret = trace_seq_printf(s, "K "); - break; - case KMEMTRACE_TYPE_CACHE: - ret = trace_seq_printf(s, "C "); - break; - case KMEMTRACE_TYPE_PAGES: - ret = trace_seq_printf(s, "P "); - break; - default: - ret = trace_seq_printf(s, "? "); - } - - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - /* Requested */ - ret = trace_seq_printf(s, "%4zu ", entry->bytes_req); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - /* Allocated */ - ret = trace_seq_printf(s, "%4zu ", entry->bytes_alloc); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - /* Flags - * TODO: would be better to see the name of the GFP flag names - */ - ret = trace_seq_printf(s, "%08x ", entry->gfp_flags); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - /* Pointer to allocated */ - ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - /* Node and call site*/ - ret = trace_seq_printf(s, "%4d %pf\n", entry->node, - (void *)entry->call_site); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - return TRACE_TYPE_HANDLED; -} - -static enum print_line_t -kmemtrace_print_free_compress(struct trace_iterator *iter) -{ - struct kmemtrace_free_entry *entry; - struct trace_seq *s = &iter->seq; - int ret; - - trace_assign_type(entry, iter->ent); - - /* Free entry */ - ret = trace_seq_printf(s, " - "); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - /* Type */ - switch (entry->type_id) { - case KMEMTRACE_TYPE_KMALLOC: - ret = trace_seq_printf(s, "K "); - break; - case KMEMTRACE_TYPE_CACHE: - ret = trace_seq_printf(s, "C "); - break; - case KMEMTRACE_TYPE_PAGES: - ret = trace_seq_printf(s, "P "); - break; - default: - ret = trace_seq_printf(s, "? "); - } - - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - /* Skip requested/allocated/flags */ - ret = trace_seq_printf(s, " "); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - /* Pointer to allocated */ - ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - /* Skip node and print call site*/ - ret = trace_seq_printf(s, " %pf\n", (void *)entry->call_site); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - return TRACE_TYPE_HANDLED; -} - -static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter) -{ - struct trace_entry *entry = iter->ent; - - if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)) - return TRACE_TYPE_UNHANDLED; - - switch (entry->type) { - case TRACE_KMEM_ALLOC: - return kmemtrace_print_alloc_compress(iter); - case TRACE_KMEM_FREE: - return kmemtrace_print_free_compress(iter); - default: - return TRACE_TYPE_UNHANDLED; - } -} - -static struct trace_event kmem_trace_alloc = { - .type = TRACE_KMEM_ALLOC, - .trace = kmemtrace_print_alloc, - .binary = kmemtrace_print_alloc_user, -}; - -static struct trace_event kmem_trace_free = { - .type = TRACE_KMEM_FREE, - .trace = kmemtrace_print_free, - .binary = kmemtrace_print_free_user, -}; - -static struct tracer kmem_tracer __read_mostly = { - .name = "kmemtrace", - .init = kmem_trace_init, - .reset = kmem_trace_reset, - .print_line = kmemtrace_print_line, - .print_header = kmemtrace_headers, - .flags = &kmem_tracer_flags -}; - -void kmemtrace_init(void) -{ - /* earliest opportunity to start kmem tracing */ -} - -static int __init init_kmem_tracer(void) -{ - if (!register_ftrace_event(&kmem_trace_alloc)) { - pr_warning("Warning: could not register kmem events\n"); - return 1; - } - - if (!register_ftrace_event(&kmem_trace_free)) { - pr_warning("Warning: could not register kmem events\n"); - return 1; - } - - if (register_tracer(&kmem_tracer) != 0) { - pr_warning("Warning: could not register the kmem tracer\n"); - return 1; - } - - return 0; -} -device_initcall(init_kmem_tracer); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 41ca394feb22..3632ce87674f 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -319,6 +319,11 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data); #define TS_MASK ((1ULL << TS_SHIFT) - 1) #define TS_DELTA_TEST (~TS_MASK) +/* Flag when events were overwritten */ +#define RB_MISSED_EVENTS (1 << 31) +/* Missed count stored at end */ +#define RB_MISSED_STORED (1 << 30) + struct buffer_data_page { u64 time_stamp; /* page time stamp */ local_t commit; /* write committed index */ @@ -338,6 +343,7 @@ struct buffer_page { local_t write; /* index for next write */ unsigned read; /* index for next read */ local_t entries; /* entries on this page */ + unsigned long real_end; /* real end of data */ struct buffer_data_page *page; /* Actual data page */ }; @@ -417,6 +423,12 @@ int ring_buffer_print_page_header(struct trace_seq *s) (unsigned int)sizeof(field.commit), (unsigned int)is_signed_type(long)); + ret = trace_seq_printf(s, "\tfield: int overwrite;\t" + "offset:%u;\tsize:%u;\tsigned:%u;\n", + (unsigned int)offsetof(typeof(field), commit), + 1, + (unsigned int)is_signed_type(long)); + ret = trace_seq_printf(s, "\tfield: char data;\t" "offset:%u;\tsize:%u;\tsigned:%u;\n", (unsigned int)offsetof(typeof(field), data), @@ -431,6 +443,7 @@ int ring_buffer_print_page_header(struct trace_seq *s) */ struct ring_buffer_per_cpu { int cpu; + atomic_t record_disabled; struct ring_buffer *buffer; spinlock_t reader_lock; /* serialize readers */ arch_spinlock_t lock; @@ -440,6 +453,8 @@ struct ring_buffer_per_cpu { struct buffer_page *tail_page; /* write to tail */ struct buffer_page *commit_page; /* committed pages */ struct buffer_page *reader_page; + unsigned long lost_events; + unsigned long last_overrun; local_t commit_overrun; local_t overrun; local_t entries; @@ -448,7 +463,6 @@ struct ring_buffer_per_cpu { unsigned long read; u64 write_stamp; u64 read_stamp; - atomic_t record_disabled; }; struct ring_buffer { @@ -1754,6 +1768,14 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, * must fill the old tail_page with padding. */ if (tail >= BUF_PAGE_SIZE) { + /* + * If the page was filled, then we still need + * to update the real_end. Reset it to zero + * and the reader will ignore it. + */ + if (tail == BUF_PAGE_SIZE) + tail_page->real_end = 0; + local_sub(length, &tail_page->write); return; } @@ -1762,6 +1784,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, kmemcheck_annotate_bitfield(event, bitfield); /* + * Save the original length to the meta data. + * This will be used by the reader to add lost event + * counter. + */ + tail_page->real_end = tail; + + /* * If this event is bigger than the minimum size, then * we need to be careful that we don't subtract the * write counter enough to allow another writer to slip @@ -1979,17 +2008,13 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts, u64 *delta) { struct ring_buffer_event *event; - static int once; int ret; - if (unlikely(*delta > (1ULL << 59) && !once++)) { - printk(KERN_WARNING "Delta way too big! %llu" - " ts=%llu write stamp = %llu\n", - (unsigned long long)*delta, - (unsigned long long)*ts, - (unsigned long long)cpu_buffer->write_stamp); - WARN_ON(1); - } + WARN_ONCE(*delta > (1ULL << 59), + KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n", + (unsigned long long)*delta, + (unsigned long long)*ts, + (unsigned long long)cpu_buffer->write_stamp); /* * The delta is too big, we to add a @@ -2217,8 +2242,6 @@ static void trace_recursive_unlock(void) #endif -static DEFINE_PER_CPU(int, rb_need_resched); - /** * ring_buffer_lock_reserve - reserve a part of the buffer * @buffer: the ring buffer to reserve from @@ -2239,13 +2262,13 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) { struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_event *event; - int cpu, resched; + int cpu; if (ring_buffer_flags != RB_BUFFERS_ON) return NULL; /* If we are tracing schedule, we don't want to recurse */ - resched = ftrace_preempt_disable(); + preempt_disable_notrace(); if (atomic_read(&buffer->record_disabled)) goto out_nocheck; @@ -2270,21 +2293,13 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) if (!event) goto out; - /* - * Need to store resched state on this cpu. - * Only the first needs to. - */ - - if (preempt_count() == 1) - per_cpu(rb_need_resched, cpu) = resched; - return event; out: trace_recursive_unlock(); out_nocheck: - ftrace_preempt_enable(resched); + preempt_enable_notrace(); return NULL; } EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve); @@ -2330,13 +2345,7 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer, trace_recursive_unlock(); - /* - * Only the last preempt count needs to restore preemption. - */ - if (preempt_count() == 1) - ftrace_preempt_enable(per_cpu(rb_need_resched, cpu)); - else - preempt_enable_no_resched_notrace(); + preempt_enable_notrace(); return 0; } @@ -2444,13 +2453,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer, trace_recursive_unlock(); - /* - * Only the last preempt count needs to restore preemption. - */ - if (preempt_count() == 1) - ftrace_preempt_enable(per_cpu(rb_need_resched, cpu)); - else - preempt_enable_no_resched_notrace(); + preempt_enable_notrace(); } EXPORT_SYMBOL_GPL(ring_buffer_discard_commit); @@ -2476,12 +2479,12 @@ int ring_buffer_write(struct ring_buffer *buffer, struct ring_buffer_event *event; void *body; int ret = -EBUSY; - int cpu, resched; + int cpu; if (ring_buffer_flags != RB_BUFFERS_ON) return -EBUSY; - resched = ftrace_preempt_disable(); + preempt_disable_notrace(); if (atomic_read(&buffer->record_disabled)) goto out; @@ -2511,7 +2514,7 @@ int ring_buffer_write(struct ring_buffer *buffer, ret = 0; out: - ftrace_preempt_enable(resched); + preempt_enable_notrace(); return ret; } @@ -2838,6 +2841,7 @@ static struct buffer_page * rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) { struct buffer_page *reader = NULL; + unsigned long overwrite; unsigned long flags; int nr_loops = 0; int ret; @@ -2879,6 +2883,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) local_set(&cpu_buffer->reader_page->write, 0); local_set(&cpu_buffer->reader_page->entries, 0); local_set(&cpu_buffer->reader_page->page->commit, 0); + cpu_buffer->reader_page->real_end = 0; spin: /* @@ -2899,6 +2904,18 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list); /* + * We want to make sure we read the overruns after we set up our + * pointers to the next object. The writer side does a + * cmpxchg to cross pages which acts as the mb on the writer + * side. Note, the reader will constantly fail the swap + * while the writer is updating the pointers, so this + * guarantees that the overwrite recorded here is the one we + * want to compare with the last_overrun. + */ + smp_mb(); + overwrite = local_read(&(cpu_buffer->overrun)); + + /* * Here's the tricky part. * * We need to move the pointer past the header page. @@ -2929,6 +2946,11 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->reader_page = reader; rb_reset_reader_page(cpu_buffer); + if (overwrite != cpu_buffer->last_overrun) { + cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun; + cpu_buffer->last_overrun = overwrite; + } + goto again; out: @@ -3005,8 +3027,14 @@ static void rb_advance_iter(struct ring_buffer_iter *iter) rb_advance_iter(iter); } +static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer) +{ + return cpu_buffer->lost_events; +} + static struct ring_buffer_event * -rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts) +rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts, + unsigned long *lost_events) { struct ring_buffer_event *event; struct buffer_page *reader; @@ -3058,6 +3086,8 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts) ring_buffer_normalize_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu, ts); } + if (lost_events) + *lost_events = rb_lost_events(cpu_buffer); return event; default: @@ -3168,12 +3198,14 @@ static inline int rb_ok_to_lock(void) * @buffer: The ring buffer to read * @cpu: The cpu to peak at * @ts: The timestamp counter of this event. + * @lost_events: a variable to store if events were lost (may be NULL) * * This will return the event that will be read next, but does * not consume the data. */ struct ring_buffer_event * -ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) +ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts, + unsigned long *lost_events) { struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; struct ring_buffer_event *event; @@ -3188,7 +3220,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) local_irq_save(flags); if (dolock) spin_lock(&cpu_buffer->reader_lock); - event = rb_buffer_peek(cpu_buffer, ts); + event = rb_buffer_peek(cpu_buffer, ts, lost_events); if (event && event->type_len == RINGBUF_TYPE_PADDING) rb_advance_reader(cpu_buffer); if (dolock) @@ -3230,13 +3262,17 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) /** * ring_buffer_consume - return an event and consume it * @buffer: The ring buffer to get the next event from + * @cpu: the cpu to read the buffer from + * @ts: a variable to store the timestamp (may be NULL) + * @lost_events: a variable to store if events were lost (may be NULL) * * Returns the next event in the ring buffer, and that event is consumed. * Meaning, that sequential reads will keep returning a different event, * and eventually empty the ring buffer if the producer is slower. */ struct ring_buffer_event * -ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) +ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, + unsigned long *lost_events) { struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_event *event = NULL; @@ -3257,9 +3293,11 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) if (dolock) spin_lock(&cpu_buffer->reader_lock); - event = rb_buffer_peek(cpu_buffer, ts); - if (event) + event = rb_buffer_peek(cpu_buffer, ts, lost_events); + if (event) { + cpu_buffer->lost_events = 0; rb_advance_reader(cpu_buffer); + } if (dolock) spin_unlock(&cpu_buffer->reader_lock); @@ -3276,23 +3314,30 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) EXPORT_SYMBOL_GPL(ring_buffer_consume); /** - * ring_buffer_read_start - start a non consuming read of the buffer + * ring_buffer_read_prepare - Prepare for a non consuming read of the buffer * @buffer: The ring buffer to read from * @cpu: The cpu buffer to iterate over * - * This starts up an iteration through the buffer. It also disables - * the recording to the buffer until the reading is finished. - * This prevents the reading from being corrupted. This is not - * a consuming read, so a producer is not expected. + * This performs the initial preparations necessary to iterate + * through the buffer. Memory is allocated, buffer recording + * is disabled, and the iterator pointer is returned to the caller. * - * Must be paired with ring_buffer_finish. + * Disabling buffer recordng prevents the reading from being + * corrupted. This is not a consuming read, so a producer is not + * expected. + * + * After a sequence of ring_buffer_read_prepare calls, the user is + * expected to make at least one call to ring_buffer_prepare_sync. + * Afterwards, ring_buffer_read_start is invoked to get things going + * for real. + * + * This overall must be paired with ring_buffer_finish. */ struct ring_buffer_iter * -ring_buffer_read_start(struct ring_buffer *buffer, int cpu) +ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_iter *iter; - unsigned long flags; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return NULL; @@ -3306,15 +3351,52 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu) iter->cpu_buffer = cpu_buffer; atomic_inc(&cpu_buffer->record_disabled); + + return iter; +} +EXPORT_SYMBOL_GPL(ring_buffer_read_prepare); + +/** + * ring_buffer_read_prepare_sync - Synchronize a set of prepare calls + * + * All previously invoked ring_buffer_read_prepare calls to prepare + * iterators will be synchronized. Afterwards, read_buffer_read_start + * calls on those iterators are allowed. + */ +void +ring_buffer_read_prepare_sync(void) +{ synchronize_sched(); +} +EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync); + +/** + * ring_buffer_read_start - start a non consuming read of the buffer + * @iter: The iterator returned by ring_buffer_read_prepare + * + * This finalizes the startup of an iteration through the buffer. + * The iterator comes from a call to ring_buffer_read_prepare and + * an intervening ring_buffer_read_prepare_sync must have been + * performed. + * + * Must be paired with ring_buffer_finish. + */ +void +ring_buffer_read_start(struct ring_buffer_iter *iter) +{ + struct ring_buffer_per_cpu *cpu_buffer; + unsigned long flags; + + if (!iter) + return; + + cpu_buffer = iter->cpu_buffer; spin_lock_irqsave(&cpu_buffer->reader_lock, flags); arch_spin_lock(&cpu_buffer->lock); rb_iter_reset(iter); arch_spin_unlock(&cpu_buffer->lock); spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); - - return iter; } EXPORT_SYMBOL_GPL(ring_buffer_read_start); @@ -3408,6 +3490,9 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->write_stamp = 0; cpu_buffer->read_stamp = 0; + cpu_buffer->lost_events = 0; + cpu_buffer->last_overrun = 0; + rb_head_page_activate(cpu_buffer); } @@ -3683,6 +3768,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer, struct ring_buffer_event *event; struct buffer_data_page *bpage; struct buffer_page *reader; + unsigned long missed_events; unsigned long flags; unsigned int commit; unsigned int read; @@ -3719,6 +3805,9 @@ int ring_buffer_read_page(struct ring_buffer *buffer, read = reader->read; commit = rb_page_commit(reader); + /* Check if any events were dropped */ + missed_events = cpu_buffer->lost_events; + /* * If this page has been partially read or * if len is not big enough to read the rest of the page or @@ -3779,9 +3868,42 @@ int ring_buffer_read_page(struct ring_buffer *buffer, local_set(&reader->entries, 0); reader->read = 0; *data_page = bpage; + + /* + * Use the real_end for the data size, + * This gives us a chance to store the lost events + * on the page. + */ + if (reader->real_end) + local_set(&bpage->commit, reader->real_end); } ret = read; + cpu_buffer->lost_events = 0; + + commit = local_read(&bpage->commit); + /* + * Set a flag in the commit field if we lost events + */ + if (missed_events) { + /* If there is room at the end of the page to save the + * missed events, then record it there. + */ + if (BUF_PAGE_SIZE - commit >= sizeof(missed_events)) { + memcpy(&bpage->data[commit], &missed_events, + sizeof(missed_events)); + local_add(RB_MISSED_STORED, &bpage->commit); + commit += sizeof(missed_events); + } + local_add(RB_MISSED_EVENTS, &bpage->commit); + } + + /* + * This page may be off to user land. Zero it out here. + */ + if (commit < BUF_PAGE_SIZE) + memset(&bpage->data[commit], 0, BUF_PAGE_SIZE - commit); + out_unlock: spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index df74c7982255..302f8a614635 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -81,7 +81,7 @@ static enum event_status read_event(int cpu) int *entry; u64 ts; - event = ring_buffer_consume(buffer, cpu, &ts); + event = ring_buffer_consume(buffer, cpu, &ts, NULL); if (!event) return EVENT_DROPPED; @@ -113,7 +113,8 @@ static enum event_status read_page(int cpu) ret = ring_buffer_read_page(buffer, &bpage, PAGE_SIZE, cpu, 1); if (ret >= 0) { rpage = bpage; - commit = local_read(&rpage->commit); + /* The commit may have missed event flags set, clear them */ + commit = local_read(&rpage->commit) & 0xfffff; for (i = 0; i < commit && !kill_test; i += inc) { if (i >= (PAGE_SIZE - offsetof(struct rb_page, data))) { diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 44f916a04065..4b1122d0df37 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -117,9 +117,12 @@ static cpumask_var_t __read_mostly tracing_buffer_mask; * * It is default off, but you can enable it with either specifying * "ftrace_dump_on_oops" in the kernel command line, or setting - * /proc/sys/kernel/ftrace_dump_on_oops to true. + * /proc/sys/kernel/ftrace_dump_on_oops + * Set 1 if you want to dump buffers of all CPUs + * Set 2 if you want to dump the buffer of the CPU that triggered oops */ -int ftrace_dump_on_oops; + +enum ftrace_dump_mode ftrace_dump_on_oops; static int tracing_set_tracer(const char *buf); @@ -139,8 +142,17 @@ __setup("ftrace=", set_cmdline_ftrace); static int __init set_ftrace_dump_on_oops(char *str) { - ftrace_dump_on_oops = 1; - return 1; + if (*str++ != '=' || !*str) { + ftrace_dump_on_oops = DUMP_ALL; + return 1; + } + + if (!strcmp("orig_cpu", str)) { + ftrace_dump_on_oops = DUMP_ORIG; + return 1; + } + + return 0; } __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); @@ -332,7 +344,7 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait); /* trace_flags holds trace_options default values */ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME | - TRACE_ITER_GRAPH_TIME; + TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD; static int trace_stop_count; static DEFINE_SPINLOCK(tracing_start_lock); @@ -416,6 +428,7 @@ static const char *trace_options[] = { "latency-format", "sleep-time", "graph-time", + "record-cmd", NULL }; @@ -647,6 +660,10 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) return; WARN_ON_ONCE(!irqs_disabled()); + if (!current_trace->use_max_tr) { + WARN_ON_ONCE(1); + return; + } arch_spin_lock(&ftrace_max_lock); tr->buffer = max_tr.buffer; @@ -673,6 +690,11 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) return; WARN_ON_ONCE(!irqs_disabled()); + if (!current_trace->use_max_tr) { + WARN_ON_ONCE(1); + return; + } + arch_spin_lock(&ftrace_max_lock); ftrace_disable_cpu(); @@ -717,7 +739,7 @@ __acquires(kernel_lock) return -1; } - if (strlen(type->name) > MAX_TRACER_SIZE) { + if (strlen(type->name) >= MAX_TRACER_SIZE) { pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE); return -1; } @@ -1319,61 +1341,6 @@ static void __trace_userstack(struct trace_array *tr, unsigned long flags) #endif /* CONFIG_STACKTRACE */ -static void -ftrace_trace_special(void *__tr, - unsigned long arg1, unsigned long arg2, unsigned long arg3, - int pc) -{ - struct ftrace_event_call *call = &event_special; - struct ring_buffer_event *event; - struct trace_array *tr = __tr; - struct ring_buffer *buffer = tr->buffer; - struct special_entry *entry; - - event = trace_buffer_lock_reserve(buffer, TRACE_SPECIAL, - sizeof(*entry), 0, pc); - if (!event) - return; - entry = ring_buffer_event_data(event); - entry->arg1 = arg1; - entry->arg2 = arg2; - entry->arg3 = arg3; - - if (!filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit(buffer, event, 0, pc); -} - -void -__trace_special(void *__tr, void *__data, - unsigned long arg1, unsigned long arg2, unsigned long arg3) -{ - ftrace_trace_special(__tr, arg1, arg2, arg3, preempt_count()); -} - -void -ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) -{ - struct trace_array *tr = &global_trace; - struct trace_array_cpu *data; - unsigned long flags; - int cpu; - int pc; - - if (tracing_disabled) - return; - - pc = preempt_count(); - local_irq_save(flags); - cpu = raw_smp_processor_id(); - data = tr->data[cpu]; - - if (likely(atomic_inc_return(&data->disabled) == 1)) - ftrace_trace_special(tr, arg1, arg2, arg3, pc); - - atomic_dec(&data->disabled); - local_irq_restore(flags); -} - /** * trace_vbprintk - write binary msg to tracing buffer * @@ -1392,7 +1359,6 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) struct bprint_entry *entry; unsigned long flags; int disable; - int resched; int cpu, len = 0, size, pc; if (unlikely(tracing_selftest_running || tracing_disabled)) @@ -1402,7 +1368,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) pause_graph_tracing(); pc = preempt_count(); - resched = ftrace_preempt_disable(); + preempt_disable_notrace(); cpu = raw_smp_processor_id(); data = tr->data[cpu]; @@ -1440,7 +1406,7 @@ out_unlock: out: atomic_dec_return(&data->disabled); - ftrace_preempt_enable(resched); + preempt_enable_notrace(); unpause_graph_tracing(); return len; @@ -1545,7 +1511,8 @@ static void trace_iterator_increment(struct trace_iterator *iter) } static struct trace_entry * -peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts) +peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts, + unsigned long *lost_events) { struct ring_buffer_event *event; struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu]; @@ -1556,7 +1523,8 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts) if (buf_iter) event = ring_buffer_iter_peek(buf_iter, ts); else - event = ring_buffer_peek(iter->tr->buffer, cpu, ts); + event = ring_buffer_peek(iter->tr->buffer, cpu, ts, + lost_events); ftrace_enable_cpu(); @@ -1564,10 +1532,12 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts) } static struct trace_entry * -__find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) +__find_next_entry(struct trace_iterator *iter, int *ent_cpu, + unsigned long *missing_events, u64 *ent_ts) { struct ring_buffer *buffer = iter->tr->buffer; struct trace_entry *ent, *next = NULL; + unsigned long lost_events = 0, next_lost = 0; int cpu_file = iter->cpu_file; u64 next_ts = 0, ts; int next_cpu = -1; @@ -1580,7 +1550,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) if (cpu_file > TRACE_PIPE_ALL_CPU) { if (ring_buffer_empty_cpu(buffer, cpu_file)) return NULL; - ent = peek_next_entry(iter, cpu_file, ent_ts); + ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events); if (ent_cpu) *ent_cpu = cpu_file; @@ -1592,7 +1562,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) if (ring_buffer_empty_cpu(buffer, cpu)) continue; - ent = peek_next_entry(iter, cpu, &ts); + ent = peek_next_entry(iter, cpu, &ts, &lost_events); /* * Pick the entry with the smallest timestamp: @@ -1601,6 +1571,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) next = ent; next_cpu = cpu; next_ts = ts; + next_lost = lost_events; } } @@ -1610,6 +1581,9 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) if (ent_ts) *ent_ts = next_ts; + if (missing_events) + *missing_events = next_lost; + return next; } @@ -1617,13 +1591,14 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) { - return __find_next_entry(iter, ent_cpu, ent_ts); + return __find_next_entry(iter, ent_cpu, NULL, ent_ts); } /* Find the next real entry, and increment the iterator to the next entry */ static void *find_next_entry_inc(struct trace_iterator *iter) { - iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts); + iter->ent = __find_next_entry(iter, &iter->cpu, + &iter->lost_events, &iter->ts); if (iter->ent) trace_iterator_increment(iter); @@ -1635,7 +1610,8 @@ static void trace_consume(struct trace_iterator *iter) { /* Don't allow ftrace to trace into the ring buffers */ ftrace_disable_cpu(); - ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts); + ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts, + &iter->lost_events); ftrace_enable_cpu(); } @@ -1786,7 +1762,7 @@ static void print_func_help_header(struct seq_file *m) } -static void +void print_trace_header(struct seq_file *m, struct trace_iterator *iter) { unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); @@ -1914,7 +1890,7 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) } if (event) - return event->trace(iter, sym_flags); + return event->funcs->trace(iter, sym_flags, event); if (!trace_seq_printf(s, "Unknown type %d\n", entry->type)) goto partial; @@ -1940,7 +1916,7 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter) event = ftrace_find_event(entry->type); if (event) - return event->raw(iter, 0); + return event->funcs->raw(iter, 0, event); if (!trace_seq_printf(s, "%d ?\n", entry->type)) goto partial; @@ -1967,7 +1943,7 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter) event = ftrace_find_event(entry->type); if (event) { - enum print_line_t ret = event->hex(iter, 0); + enum print_line_t ret = event->funcs->hex(iter, 0, event); if (ret != TRACE_TYPE_HANDLED) return ret; } @@ -1992,10 +1968,11 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter) } event = ftrace_find_event(entry->type); - return event ? event->binary(iter, 0) : TRACE_TYPE_HANDLED; + return event ? event->funcs->binary(iter, 0, event) : + TRACE_TYPE_HANDLED; } -static int trace_empty(struct trace_iterator *iter) +int trace_empty(struct trace_iterator *iter) { int cpu; @@ -2030,6 +2007,10 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter) { enum print_line_t ret; + if (iter->lost_events) + trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n", + iter->cpu, iter->lost_events); + if (iter->trace && iter->trace->print_line) { ret = iter->trace->print_line(iter); if (ret != TRACE_TYPE_UNHANDLED) @@ -2058,6 +2039,23 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter) return print_trace_fmt(iter); } +void trace_default_header(struct seq_file *m) +{ + struct trace_iterator *iter = m->private; + + if (iter->iter_flags & TRACE_FILE_LAT_FMT) { + /* print nothing if the buffers are empty */ + if (trace_empty(iter)) + return; + print_trace_header(m, iter); + if (!(trace_flags & TRACE_ITER_VERBOSE)) + print_lat_help_header(m); + } else { + if (!(trace_flags & TRACE_ITER_VERBOSE)) + print_func_help_header(m); + } +} + static int s_show(struct seq_file *m, void *v) { struct trace_iterator *iter = v; @@ -2070,17 +2068,9 @@ static int s_show(struct seq_file *m, void *v) } if (iter->trace && iter->trace->print_header) iter->trace->print_header(m); - else if (iter->iter_flags & TRACE_FILE_LAT_FMT) { - /* print nothing if the buffers are empty */ - if (trace_empty(iter)) - return 0; - print_trace_header(m, iter); - if (!(trace_flags & TRACE_ITER_VERBOSE)) - print_lat_help_header(m); - } else { - if (!(trace_flags & TRACE_ITER_VERBOSE)) - print_func_help_header(m); - } + else + trace_default_header(m); + } else if (iter->leftover) { /* * If we filled the seq_file buffer earlier, we @@ -2166,15 +2156,20 @@ __tracing_open(struct inode *inode, struct file *file) if (iter->cpu_file == TRACE_PIPE_ALL_CPU) { for_each_tracing_cpu(cpu) { - iter->buffer_iter[cpu] = - ring_buffer_read_start(iter->tr->buffer, cpu); + ring_buffer_read_prepare(iter->tr->buffer, cpu); + } + ring_buffer_read_prepare_sync(); + for_each_tracing_cpu(cpu) { + ring_buffer_read_start(iter->buffer_iter[cpu]); tracing_iter_reset(iter, cpu); } } else { cpu = iter->cpu_file; iter->buffer_iter[cpu] = - ring_buffer_read_start(iter->tr->buffer, cpu); + ring_buffer_read_prepare(iter->tr->buffer, cpu); + ring_buffer_read_prepare_sync(); + ring_buffer_read_start(iter->buffer_iter[cpu]); tracing_iter_reset(iter, cpu); } @@ -2353,6 +2348,7 @@ static const struct file_operations show_traces_fops = { .open = show_traces_open, .read = seq_read, .release = seq_release, + .llseek = seq_lseek, }; /* @@ -2446,6 +2442,7 @@ static const struct file_operations tracing_cpumask_fops = { .open = tracing_open_generic, .read = tracing_cpumask_read, .write = tracing_cpumask_write, + .llseek = generic_file_llseek, }; static int tracing_trace_options_show(struct seq_file *m, void *v) @@ -2521,6 +2518,9 @@ static void set_tracer_flags(unsigned int mask, int enabled) trace_flags |= mask; else trace_flags &= ~mask; + + if (mask == TRACE_ITER_RECORD_CMD) + trace_event_enable_cmd_record(enabled); } static ssize_t @@ -2612,6 +2612,7 @@ tracing_readme_read(struct file *filp, char __user *ubuf, static const struct file_operations tracing_readme_fops = { .open = tracing_open_generic, .read = tracing_readme_read, + .llseek = generic_file_llseek, }; static ssize_t @@ -2662,6 +2663,7 @@ tracing_saved_cmdlines_read(struct file *file, char __user *ubuf, static const struct file_operations tracing_saved_cmdlines_fops = { .open = tracing_open_generic, .read = tracing_saved_cmdlines_read, + .llseek = generic_file_llseek, }; static ssize_t @@ -2757,6 +2759,9 @@ static int tracing_resize_ring_buffer(unsigned long size) if (ret < 0) return ret; + if (!current_trace->use_max_tr) + goto out; + ret = ring_buffer_resize(max_tr.buffer, size); if (ret < 0) { int r; @@ -2784,11 +2789,14 @@ static int tracing_resize_ring_buffer(unsigned long size) return ret; } + max_tr.entries = size; + out: global_trace.entries = size; return ret; } + /** * tracing_update_buffers - used by tracing facility to expand ring buffers * @@ -2849,12 +2857,26 @@ static int tracing_set_tracer(const char *buf) trace_branch_disable(); if (current_trace && current_trace->reset) current_trace->reset(tr); - + if (current_trace && current_trace->use_max_tr) { + /* + * We don't free the ring buffer. instead, resize it because + * The max_tr ring buffer has some state (e.g. ring->clock) and + * we want preserve it. + */ + ring_buffer_resize(max_tr.buffer, 1); + max_tr.entries = 1; + } destroy_trace_option_files(topts); current_trace = t; topts = create_trace_option_files(current_trace); + if (current_trace->use_max_tr) { + ret = ring_buffer_resize(max_tr.buffer, global_trace.entries); + if (ret < 0) + goto out; + max_tr.entries = global_trace.entries; + } if (t->init) { ret = tracer_init(t, tr); @@ -2991,6 +3013,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) if (iter->trace->pipe_open) iter->trace->pipe_open(iter); + nonseekable_open(inode, filp); out: mutex_unlock(&trace_types_lock); return ret; @@ -3269,12 +3292,12 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, size_t len, unsigned int flags) { - struct page *pages[PIPE_BUFFERS]; - struct partial_page partial[PIPE_BUFFERS]; + struct page *pages_def[PIPE_DEF_BUFFERS]; + struct partial_page partial_def[PIPE_DEF_BUFFERS]; struct trace_iterator *iter = filp->private_data; struct splice_pipe_desc spd = { - .pages = pages, - .partial = partial, + .pages = pages_def, + .partial = partial_def, .nr_pages = 0, /* This gets updated below. */ .flags = flags, .ops = &tracing_pipe_buf_ops, @@ -3285,6 +3308,9 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, size_t rem; unsigned int i; + if (splice_grow_spd(pipe, &spd)) + return -ENOMEM; + /* copy the tracer to avoid using a global lock all around */ mutex_lock(&trace_types_lock); if (unlikely(old_tracer != current_trace && current_trace)) { @@ -3315,23 +3341,23 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, trace_access_lock(iter->cpu_file); /* Fill as many pages as possible. */ - for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) { - pages[i] = alloc_page(GFP_KERNEL); - if (!pages[i]) + for (i = 0, rem = len; i < pipe->buffers && rem; i++) { + spd.pages[i] = alloc_page(GFP_KERNEL); + if (!spd.pages[i]) break; rem = tracing_fill_pipe_page(rem, iter); /* Copy the data into the page, so we can start over. */ ret = trace_seq_to_buffer(&iter->seq, - page_address(pages[i]), + page_address(spd.pages[i]), iter->seq.len); if (ret < 0) { - __free_page(pages[i]); + __free_page(spd.pages[i]); break; } - partial[i].offset = 0; - partial[i].len = iter->seq.len; + spd.partial[i].offset = 0; + spd.partial[i].len = iter->seq.len; trace_seq_init(&iter->seq); } @@ -3342,12 +3368,14 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, spd.nr_pages = i; - return splice_to_pipe(pipe, &spd); + ret = splice_to_pipe(pipe, &spd); +out: + splice_shrink_spd(pipe, &spd); + return ret; out_err: mutex_unlock(&iter->mutex); - - return ret; + goto out; } static ssize_t @@ -3431,7 +3459,6 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, } tracing_start(); - max_tr.entries = global_trace.entries; mutex_unlock(&trace_types_lock); return cnt; @@ -3544,18 +3571,21 @@ static const struct file_operations tracing_max_lat_fops = { .open = tracing_open_generic, .read = tracing_max_lat_read, .write = tracing_max_lat_write, + .llseek = generic_file_llseek, }; static const struct file_operations tracing_ctrl_fops = { .open = tracing_open_generic, .read = tracing_ctrl_read, .write = tracing_ctrl_write, + .llseek = generic_file_llseek, }; static const struct file_operations set_tracer_fops = { .open = tracing_open_generic, .read = tracing_set_trace_read, .write = tracing_set_trace_write, + .llseek = generic_file_llseek, }; static const struct file_operations tracing_pipe_fops = { @@ -3564,17 +3594,20 @@ static const struct file_operations tracing_pipe_fops = { .read = tracing_read_pipe, .splice_read = tracing_splice_read_pipe, .release = tracing_release_pipe, + .llseek = no_llseek, }; static const struct file_operations tracing_entries_fops = { .open = tracing_open_generic, .read = tracing_entries_read, .write = tracing_entries_write, + .llseek = generic_file_llseek, }; static const struct file_operations tracing_mark_fops = { .open = tracing_open_generic, .write = tracing_mark_write, + .llseek = generic_file_llseek, }; static const struct file_operations trace_clock_fops = { @@ -3620,7 +3653,6 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, size_t count, loff_t *ppos) { struct ftrace_buffer_info *info = filp->private_data; - unsigned int pos; ssize_t ret; size_t size; @@ -3647,11 +3679,6 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, if (ret < 0) return 0; - pos = ring_buffer_page_len(info->spare); - - if (pos < PAGE_SIZE) - memset(info->spare + pos, 0, PAGE_SIZE - pos); - read: size = PAGE_SIZE - info->read; if (size > count) @@ -3746,11 +3773,11 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, unsigned int flags) { struct ftrace_buffer_info *info = file->private_data; - struct partial_page partial[PIPE_BUFFERS]; - struct page *pages[PIPE_BUFFERS]; + struct partial_page partial_def[PIPE_DEF_BUFFERS]; + struct page *pages_def[PIPE_DEF_BUFFERS]; struct splice_pipe_desc spd = { - .pages = pages, - .partial = partial, + .pages = pages_def, + .partial = partial_def, .flags = flags, .ops = &buffer_pipe_buf_ops, .spd_release = buffer_spd_release, @@ -3759,22 +3786,28 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, int entries, size, i; size_t ret; + if (splice_grow_spd(pipe, &spd)) + return -ENOMEM; + if (*ppos & (PAGE_SIZE - 1)) { WARN_ONCE(1, "Ftrace: previous read must page-align\n"); - return -EINVAL; + ret = -EINVAL; + goto out; } if (len & (PAGE_SIZE - 1)) { WARN_ONCE(1, "Ftrace: splice_read should page-align\n"); - if (len < PAGE_SIZE) - return -EINVAL; + if (len < PAGE_SIZE) { + ret = -EINVAL; + goto out; + } len &= PAGE_MASK; } trace_access_lock(info->cpu); entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); - for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) { + for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) { struct page *page; int r; @@ -3829,11 +3862,12 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, else ret = 0; /* TODO: block */ - return ret; + goto out; } ret = splice_to_pipe(pipe, &spd); - + splice_shrink_spd(pipe, &spd); +out: return ret; } @@ -3879,6 +3913,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf, static const struct file_operations tracing_stats_fops = { .open = tracing_open_generic, .read = tracing_stats_read, + .llseek = generic_file_llseek, }; #ifdef CONFIG_DYNAMIC_FTRACE @@ -3915,6 +3950,7 @@ tracing_read_dyn_info(struct file *filp, char __user *ubuf, static const struct file_operations tracing_dyn_info_fops = { .open = tracing_open_generic, .read = tracing_read_dyn_info, + .llseek = generic_file_llseek, }; #endif @@ -4068,6 +4104,7 @@ static const struct file_operations trace_options_fops = { .open = tracing_open_generic, .read = trace_options_read, .write = trace_options_write, + .llseek = generic_file_llseek, }; static ssize_t @@ -4119,6 +4156,7 @@ static const struct file_operations trace_options_core_fops = { .open = tracing_open_generic, .read = trace_options_core_read, .write = trace_options_core_write, + .llseek = generic_file_llseek, }; struct dentry *trace_create_file(const char *name, @@ -4308,9 +4346,6 @@ static __init int tracer_init_debugfs(void) trace_create_file("dyn_ftrace_total_info", 0444, d_tracer, &ftrace_update_tot_cnt, &tracing_dyn_info_fops); #endif -#ifdef CONFIG_SYSPROF_TRACER - init_tracer_sysprof_debugfs(d_tracer); -#endif create_trace_options_dir(); @@ -4324,7 +4359,7 @@ static int trace_panic_handler(struct notifier_block *this, unsigned long event, void *unused) { if (ftrace_dump_on_oops) - ftrace_dump(); + ftrace_dump(ftrace_dump_on_oops); return NOTIFY_OK; } @@ -4341,7 +4376,7 @@ static int trace_die_handler(struct notifier_block *self, switch (val) { case DIE_OOPS: if (ftrace_dump_on_oops) - ftrace_dump(); + ftrace_dump(ftrace_dump_on_oops); break; default: break; @@ -4382,7 +4417,8 @@ trace_printk_seq(struct trace_seq *s) trace_seq_init(s); } -static void __ftrace_dump(bool disable_tracing) +static void +__ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode) { static arch_spinlock_t ftrace_dump_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; @@ -4415,12 +4451,25 @@ static void __ftrace_dump(bool disable_tracing) /* don't look at user memory in panic mode */ trace_flags &= ~TRACE_ITER_SYM_USEROBJ; - printk(KERN_TRACE "Dumping ftrace buffer:\n"); - /* Simulate the iterator */ iter.tr = &global_trace; iter.trace = current_trace; - iter.cpu_file = TRACE_PIPE_ALL_CPU; + + switch (oops_dump_mode) { + case DUMP_ALL: + iter.cpu_file = TRACE_PIPE_ALL_CPU; + break; + case DUMP_ORIG: + iter.cpu_file = raw_smp_processor_id(); + break; + case DUMP_NONE: + goto out_enable; + default: + printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n"); + iter.cpu_file = TRACE_PIPE_ALL_CPU; + } + + printk(KERN_TRACE "Dumping ftrace buffer:\n"); /* * We need to stop all tracing on all CPUS to read the @@ -4459,6 +4508,7 @@ static void __ftrace_dump(bool disable_tracing) else printk(KERN_TRACE "---------------------------------\n"); + out_enable: /* Re-enable tracing if requested */ if (!disable_tracing) { trace_flags |= old_userobj; @@ -4475,9 +4525,9 @@ static void __ftrace_dump(bool disable_tracing) } /* By default: disable tracing after the dump */ -void ftrace_dump(void) +void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { - __ftrace_dump(true); + __ftrace_dump(true, oops_dump_mode); } __init static int tracer_alloc_buffers(void) @@ -4513,16 +4563,14 @@ __init static int tracer_alloc_buffers(void) #ifdef CONFIG_TRACER_MAX_TRACE - max_tr.buffer = ring_buffer_alloc(ring_buf_size, - TRACE_BUFFER_FLAGS); + max_tr.buffer = ring_buffer_alloc(1, TRACE_BUFFER_FLAGS); if (!max_tr.buffer) { printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n"); WARN_ON(1); ring_buffer_free(global_trace.buffer); goto out_free_cpumask; } - max_tr.entries = ring_buffer_size(max_tr.buffer); - WARN_ON(max_tr.entries != global_trace.entries); + max_tr.entries = 1; #endif /* Allocate the first page for all buffers */ @@ -4535,9 +4583,6 @@ __init static int tracer_alloc_buffers(void) register_tracer(&nop_trace); current_trace = &nop_trace; -#ifdef CONFIG_BOOT_TRACER - register_tracer(&boot_tracer); -#endif /* All seems OK, enable tracing */ tracing_disabled = 0; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 2825ef2c0b15..d05c873dd4b2 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -9,10 +9,7 @@ #include <linux/mmiotrace.h> #include <linux/tracepoint.h> #include <linux/ftrace.h> -#include <trace/boot.h> -#include <linux/kmemtrace.h> #include <linux/hw_breakpoint.h> - #include <linux/trace_seq.h> #include <linux/ftrace_event.h> @@ -25,31 +22,17 @@ enum trace_type { TRACE_STACK, TRACE_PRINT, TRACE_BPRINT, - TRACE_SPECIAL, TRACE_MMIO_RW, TRACE_MMIO_MAP, TRACE_BRANCH, - TRACE_BOOT_CALL, - TRACE_BOOT_RET, TRACE_GRAPH_RET, TRACE_GRAPH_ENT, TRACE_USER_STACK, - TRACE_HW_BRANCHES, - TRACE_KMEM_ALLOC, - TRACE_KMEM_FREE, TRACE_BLK, - TRACE_KSYM, __TRACE_LAST_TYPE, }; -enum kmemtrace_type_id { - KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */ - KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */ - KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */ -}; - -extern struct tracer boot_tracer; #undef __field #define __field(type, item) type item; @@ -103,29 +86,17 @@ struct syscall_trace_exit { long ret; }; -struct kprobe_trace_entry { +struct kprobe_trace_entry_head { struct trace_entry ent; unsigned long ip; - int nargs; - unsigned long args[]; }; -#define SIZEOF_KPROBE_TRACE_ENTRY(n) \ - (offsetof(struct kprobe_trace_entry, args) + \ - (sizeof(unsigned long) * (n))) - -struct kretprobe_trace_entry { +struct kretprobe_trace_entry_head { struct trace_entry ent; unsigned long func; unsigned long ret_ip; - int nargs; - unsigned long args[]; }; -#define SIZEOF_KRETPROBE_TRACE_ENTRY(n) \ - (offsetof(struct kretprobe_trace_entry, args) + \ - (sizeof(unsigned long) * (n))) - /* * trace_flag_type is an enumeration that holds different * states when a trace occurs. These are: @@ -217,24 +188,15 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT); \ - IF_ASSIGN(var, ent, struct special_entry, 0); \ IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ TRACE_MMIO_RW); \ IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \ TRACE_MMIO_MAP); \ - IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\ - IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\ IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \ IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry, \ TRACE_GRAPH_ENT); \ IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \ TRACE_GRAPH_RET); \ - IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\ - IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \ - TRACE_KMEM_ALLOC); \ - IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ - TRACE_KMEM_FREE); \ - IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\ __ftrace_bad_type(); \ } while (0) @@ -312,6 +274,7 @@ struct tracer { struct tracer *next; int print_max; struct tracer_flags *flags; + int use_max_tr; }; @@ -332,7 +295,6 @@ struct dentry *trace_create_file(const char *name, const struct file_operations *fops); struct dentry *tracing_init_dentry(void); -void init_tracer_sysprof_debugfs(struct dentry *d_tracer); struct ring_buffer_event; @@ -369,15 +331,13 @@ void tracing_sched_wakeup_trace(struct trace_array *tr, struct task_struct *wakee, struct task_struct *cur, unsigned long flags, int pc); -void trace_special(struct trace_array *tr, - struct trace_array_cpu *data, - unsigned long arg1, - unsigned long arg2, - unsigned long arg3, int pc); void trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, unsigned long flags, int pc); +void trace_default_header(struct seq_file *m); +void print_trace_header(struct seq_file *m, struct trace_iterator *iter); +int trace_empty(struct trace_iterator *iter); void trace_graph_return(struct ftrace_graph_ret *trace); int trace_graph_entry(struct ftrace_graph_ent *trace); @@ -392,8 +352,6 @@ int register_tracer(struct tracer *type); void unregister_tracer(struct tracer *type); int is_tracing_stopped(void); -extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr); - extern unsigned long nsecs_to_usecs(unsigned long nsecs); extern unsigned long tracing_thresh; @@ -416,12 +374,12 @@ void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, int pc); #else -static inline void ftrace_trace_stack(struct trace_array *tr, +static inline void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags, int skip, int pc) { } -static inline void ftrace_trace_userstack(struct trace_array *tr, +static inline void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) { } @@ -463,14 +421,8 @@ extern int trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr); -extern int trace_selftest_startup_sysprof(struct tracer *trace, - struct trace_array *tr); extern int trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr); -extern int trace_selftest_startup_hw_branches(struct tracer *trace, - struct trace_array *tr); -extern int trace_selftest_startup_ksym(struct tracer *trace, - struct trace_array *tr); #endif /* CONFIG_FTRACE_STARTUP_TEST */ extern void *head_page(struct trace_array_cpu *data); @@ -491,9 +443,29 @@ extern int trace_clock_id; /* Standard output formatting function used for function return traces */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER -extern enum print_line_t print_graph_function(struct trace_iterator *iter); + +/* Flag options */ +#define TRACE_GRAPH_PRINT_OVERRUN 0x1 +#define TRACE_GRAPH_PRINT_CPU 0x2 +#define TRACE_GRAPH_PRINT_OVERHEAD 0x4 +#define TRACE_GRAPH_PRINT_PROC 0x8 +#define TRACE_GRAPH_PRINT_DURATION 0x10 +#define TRACE_GRAPH_PRINT_ABS_TIME 0x20 + +extern enum print_line_t +print_graph_function_flags(struct trace_iterator *iter, u32 flags); +extern void print_graph_headers_flags(struct seq_file *s, u32 flags); extern enum print_line_t trace_print_graph_duration(unsigned long long duration, struct trace_seq *s); +extern void graph_trace_open(struct trace_iterator *iter); +extern void graph_trace_close(struct trace_iterator *iter); +extern int __trace_graph_entry(struct trace_array *tr, + struct ftrace_graph_ent *trace, + unsigned long flags, int pc); +extern void __trace_graph_return(struct trace_array *tr, + struct ftrace_graph_ret *trace, + unsigned long flags, int pc); + #ifdef CONFIG_DYNAMIC_FTRACE /* TODO: make this variable */ @@ -524,7 +496,7 @@ static inline int ftrace_graph_addr(unsigned long addr) #endif /* CONFIG_DYNAMIC_FTRACE */ #else /* CONFIG_FUNCTION_GRAPH_TRACER */ static inline enum print_line_t -print_graph_function(struct trace_iterator *iter) +print_graph_function_flags(struct trace_iterator *iter, u32 flags) { return TRACE_TYPE_UNHANDLED; } @@ -610,6 +582,7 @@ enum trace_iterator_flags { TRACE_ITER_LATENCY_FMT = 0x20000, TRACE_ITER_SLEEP_TIME = 0x40000, TRACE_ITER_GRAPH_TIME = 0x80000, + TRACE_ITER_RECORD_CMD = 0x100000, }; /* @@ -621,54 +594,6 @@ enum trace_iterator_flags { extern struct tracer nop_trace; -/** - * ftrace_preempt_disable - disable preemption scheduler safe - * - * When tracing can happen inside the scheduler, there exists - * cases that the tracing might happen before the need_resched - * flag is checked. If this happens and the tracer calls - * preempt_enable (after a disable), a schedule might take place - * causing an infinite recursion. - * - * To prevent this, we read the need_resched flag before - * disabling preemption. When we want to enable preemption we - * check the flag, if it is set, then we call preempt_enable_no_resched. - * Otherwise, we call preempt_enable. - * - * The rational for doing the above is that if need_resched is set - * and we have yet to reschedule, we are either in an atomic location - * (where we do not need to check for scheduling) or we are inside - * the scheduler and do not want to resched. - */ -static inline int ftrace_preempt_disable(void) -{ - int resched; - - resched = need_resched(); - preempt_disable_notrace(); - - return resched; -} - -/** - * ftrace_preempt_enable - enable preemption scheduler safe - * @resched: the return value from ftrace_preempt_disable - * - * This is a scheduler safe way to enable preemption and not miss - * any preemption checks. The disabled saved the state of preemption. - * If resched is set, then we are either inside an atomic or - * are inside the scheduler (we would have already scheduled - * otherwise). In this case, we do not want to call normal - * preempt_enable, but preempt_enable_no_resched instead. - */ -static inline void ftrace_preempt_enable(int resched) -{ - if (resched) - preempt_enable_no_resched_notrace(); - else - preempt_enable_notrace(); -} - #ifdef CONFIG_BRANCH_TRACER extern int enable_branch_tracing(struct trace_array *tr); extern void disable_branch_tracing(void); @@ -759,6 +684,8 @@ struct filter_pred { int pop_n; }; +extern struct list_head ftrace_common_fields; + extern enum regex_type filter_parse_regex(char *buff, int len, char **search, int *not); extern void print_event_filter(struct ftrace_event_call *call, @@ -771,12 +698,15 @@ extern void print_subsystem_event_filter(struct event_subsystem *system, struct trace_seq *s); extern int filter_assign_type(const char *type); +struct list_head * +trace_get_fields(struct ftrace_event_call *event_call); + static inline int filter_check_discard(struct ftrace_event_call *call, void *rec, struct ring_buffer *buffer, struct ring_buffer_event *event) { - if (unlikely(call->filter_active) && + if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) && !filter_match_preds(call->filter, rec)) { ring_buffer_discard_commit(buffer, event); return 1; @@ -785,6 +715,8 @@ filter_check_discard(struct ftrace_event_call *call, void *rec, return 0; } +extern void trace_event_enable_cmd_record(bool enable); + extern struct mutex event_mutex; extern struct list_head ftrace_events; diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c deleted file mode 100644 index c21d5f3956ad..000000000000 --- a/kernel/trace/trace_boot.c +++ /dev/null @@ -1,185 +0,0 @@ -/* - * ring buffer based initcalls tracer - * - * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com> - * - */ - -#include <linux/init.h> -#include <linux/debugfs.h> -#include <linux/ftrace.h> -#include <linux/kallsyms.h> -#include <linux/time.h> - -#include "trace.h" -#include "trace_output.h" - -static struct trace_array *boot_trace; -static bool pre_initcalls_finished; - -/* Tells the boot tracer that the pre_smp_initcalls are finished. - * So we are ready . - * It doesn't enable sched events tracing however. - * You have to call enable_boot_trace to do so. - */ -void start_boot_trace(void) -{ - pre_initcalls_finished = true; -} - -void enable_boot_trace(void) -{ - if (boot_trace && pre_initcalls_finished) - tracing_start_sched_switch_record(); -} - -void disable_boot_trace(void) -{ - if (boot_trace && pre_initcalls_finished) - tracing_stop_sched_switch_record(); -} - -static int boot_trace_init(struct trace_array *tr) -{ - boot_trace = tr; - - if (!tr) - return 0; - - tracing_reset_online_cpus(tr); - - tracing_sched_switch_assign_trace(tr); - return 0; -} - -static enum print_line_t -initcall_call_print_line(struct trace_iterator *iter) -{ - struct trace_entry *entry = iter->ent; - struct trace_seq *s = &iter->seq; - struct trace_boot_call *field; - struct boot_trace_call *call; - u64 ts; - unsigned long nsec_rem; - int ret; - - trace_assign_type(field, entry); - call = &field->boot_call; - ts = iter->ts; - nsec_rem = do_div(ts, NSEC_PER_SEC); - - ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n", - (unsigned long)ts, nsec_rem, call->func, call->caller); - - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - else - return TRACE_TYPE_HANDLED; -} - -static enum print_line_t -initcall_ret_print_line(struct trace_iterator *iter) -{ - struct trace_entry *entry = iter->ent; - struct trace_seq *s = &iter->seq; - struct trace_boot_ret *field; - struct boot_trace_ret *init_ret; - u64 ts; - unsigned long nsec_rem; - int ret; - - trace_assign_type(field, entry); - init_ret = &field->boot_ret; - ts = iter->ts; - nsec_rem = do_div(ts, NSEC_PER_SEC); - - ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s " - "returned %d after %llu msecs\n", - (unsigned long) ts, - nsec_rem, - init_ret->func, init_ret->result, init_ret->duration); - - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - else - return TRACE_TYPE_HANDLED; -} - -static enum print_line_t initcall_print_line(struct trace_iterator *iter) -{ - struct trace_entry *entry = iter->ent; - - switch (entry->type) { - case TRACE_BOOT_CALL: - return initcall_call_print_line(iter); - case TRACE_BOOT_RET: - return initcall_ret_print_line(iter); - default: - return TRACE_TYPE_UNHANDLED; - } -} - -struct tracer boot_tracer __read_mostly = -{ - .name = "initcall", - .init = boot_trace_init, - .reset = tracing_reset_online_cpus, - .print_line = initcall_print_line, -}; - -void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) -{ - struct ftrace_event_call *call = &event_boot_call; - struct ring_buffer_event *event; - struct ring_buffer *buffer; - struct trace_boot_call *entry; - struct trace_array *tr = boot_trace; - - if (!tr || !pre_initcalls_finished) - return; - - /* Get its name now since this function could - * disappear because it is in the .init section. - */ - sprint_symbol(bt->func, (unsigned long)fn); - preempt_disable(); - - buffer = tr->buffer; - event = trace_buffer_lock_reserve(buffer, TRACE_BOOT_CALL, - sizeof(*entry), 0, 0); - if (!event) - goto out; - entry = ring_buffer_event_data(event); - entry->boot_call = *bt; - if (!filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit(buffer, event, 0, 0); - out: - preempt_enable(); -} - -void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) -{ - struct ftrace_event_call *call = &event_boot_ret; - struct ring_buffer_event *event; - struct ring_buffer *buffer; - struct trace_boot_ret *entry; - struct trace_array *tr = boot_trace; - - if (!tr || !pre_initcalls_finished) - return; - - sprint_symbol(bt->func, (unsigned long)fn); - preempt_disable(); - - buffer = tr->buffer; - event = trace_buffer_lock_reserve(buffer, TRACE_BOOT_RET, - sizeof(*entry), 0, 0); - if (!event) - goto out; - entry = ring_buffer_event_data(event); - entry->boot_ret = *bt; - if (!filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit(buffer, event, 0, 0); - out: - preempt_enable(); -} diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index b9bc4d470177..8d3538b4ea5f 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -143,7 +143,7 @@ static void branch_trace_reset(struct trace_array *tr) } static enum print_line_t trace_branch_print(struct trace_iterator *iter, - int flags) + int flags, struct trace_event *event) { struct trace_branch *field; @@ -167,9 +167,13 @@ static void branch_print_header(struct seq_file *s) " |\n"); } +static struct trace_event_functions trace_branch_funcs = { + .trace = trace_branch_print, +}; + static struct trace_event trace_branch_event = { .type = TRACE_BRANCH, - .trace = trace_branch_print, + .funcs = &trace_branch_funcs, }; static struct tracer branch_trace __read_mostly = diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index 9d589d8dcd1a..52fda6c04ac3 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c @@ -32,16 +32,15 @@ u64 notrace trace_clock_local(void) { u64 clock; - int resched; /* * sched_clock() is an architecture implemented, fast, scalable, * lockless clock. It is not guaranteed to be coherent across * CPUs, nor across CPU idle events. */ - resched = ftrace_preempt_disable(); + preempt_disable_notrace(); clock = sched_clock(); - ftrace_preempt_enable(resched); + preempt_enable_notrace(); return clock; } diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index c16a08f399df..e3dfecaf13e6 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -151,23 +151,6 @@ FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry, ); /* - * Special (free-form) trace entry: - */ -FTRACE_ENTRY(special, special_entry, - - TRACE_SPECIAL, - - F_STRUCT( - __field( unsigned long, arg1 ) - __field( unsigned long, arg2 ) - __field( unsigned long, arg3 ) - ), - - F_printk("(%08lx) (%08lx) (%08lx)", - __entry->arg1, __entry->arg2, __entry->arg3) -); - -/* * Stack-trace entry: */ @@ -271,33 +254,6 @@ FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map, __entry->map_id, __entry->opcode) ); -FTRACE_ENTRY(boot_call, trace_boot_call, - - TRACE_BOOT_CALL, - - F_STRUCT( - __field_struct( struct boot_trace_call, boot_call ) - __field_desc( pid_t, boot_call, caller ) - __array_desc( char, boot_call, func, KSYM_SYMBOL_LEN) - ), - - F_printk("%d %s", __entry->caller, __entry->func) -); - -FTRACE_ENTRY(boot_ret, trace_boot_ret, - - TRACE_BOOT_RET, - - F_STRUCT( - __field_struct( struct boot_trace_ret, boot_ret ) - __array_desc( char, boot_ret, func, KSYM_SYMBOL_LEN) - __field_desc( int, boot_ret, result ) - __field_desc( unsigned long, boot_ret, duration ) - ), - - F_printk("%s %d %lx", - __entry->func, __entry->result, __entry->duration) -); #define TRACE_FUNC_SIZE 30 #define TRACE_FILE_SIZE 20 @@ -318,65 +274,3 @@ FTRACE_ENTRY(branch, trace_branch, __entry->func, __entry->file, __entry->correct) ); -FTRACE_ENTRY(hw_branch, hw_branch_entry, - - TRACE_HW_BRANCHES, - - F_STRUCT( - __field( u64, from ) - __field( u64, to ) - ), - - F_printk("from: %llx to: %llx", __entry->from, __entry->to) -); - -FTRACE_ENTRY(kmem_alloc, kmemtrace_alloc_entry, - - TRACE_KMEM_ALLOC, - - F_STRUCT( - __field( enum kmemtrace_type_id, type_id ) - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - __field( int, node ) - ), - - F_printk("type:%u call_site:%lx ptr:%p req:%zi alloc:%zi" - " flags:%x node:%d", - __entry->type_id, __entry->call_site, __entry->ptr, - __entry->bytes_req, __entry->bytes_alloc, - __entry->gfp_flags, __entry->node) -); - -FTRACE_ENTRY(kmem_free, kmemtrace_free_entry, - - TRACE_KMEM_FREE, - - F_STRUCT( - __field( enum kmemtrace_type_id, type_id ) - __field( unsigned long, call_site ) - __field( const void *, ptr ) - ), - - F_printk("type:%u call_site:%lx ptr:%p", - __entry->type_id, __entry->call_site, __entry->ptr) -); - -FTRACE_ENTRY(ksym_trace, ksym_trace_entry, - - TRACE_KSYM, - - F_STRUCT( - __field( unsigned long, ip ) - __field( unsigned char, type ) - __array( char , cmd, TASK_COMM_LEN ) - __field( unsigned long, addr ) - ), - - F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s", - (void *)__entry->ip, (unsigned int)__entry->type, - (void *)__entry->addr, __entry->cmd) -); diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 0565bb42566f..000e6e85b445 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -9,13 +9,7 @@ #include <linux/kprobes.h> #include "trace.h" -DEFINE_PER_CPU(struct pt_regs, perf_trace_regs); -EXPORT_PER_CPU_SYMBOL_GPL(perf_trace_regs); - -EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs); - -static char *perf_trace_buf; -static char *perf_trace_buf_nmi; +static char *perf_trace_buf[4]; /* * Force it to be aligned to unsigned long to avoid misaligned accesses @@ -27,57 +21,76 @@ typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)]) /* Count the events in use (per event id, not per instance) */ static int total_ref_count; -static int perf_trace_event_enable(struct ftrace_event_call *event) +static int perf_trace_event_init(struct ftrace_event_call *tp_event, + struct perf_event *p_event) { - char *buf; + struct hlist_head *list; int ret = -ENOMEM; + int cpu; - if (event->perf_refcount++ > 0) + p_event->tp_event = tp_event; + if (tp_event->perf_refcount++ > 0) return 0; - if (!total_ref_count) { - buf = (char *)alloc_percpu(perf_trace_t); - if (!buf) - goto fail_buf; + list = alloc_percpu(struct hlist_head); + if (!list) + goto fail; - rcu_assign_pointer(perf_trace_buf, buf); + for_each_possible_cpu(cpu) + INIT_HLIST_HEAD(per_cpu_ptr(list, cpu)); - buf = (char *)alloc_percpu(perf_trace_t); - if (!buf) - goto fail_buf_nmi; + tp_event->perf_events = list; - rcu_assign_pointer(perf_trace_buf_nmi, buf); - } + if (!total_ref_count) { + char *buf; + int i; - ret = event->perf_event_enable(event); - if (!ret) { - total_ref_count++; - return 0; + for (i = 0; i < 4; i++) { + buf = (char *)alloc_percpu(perf_trace_t); + if (!buf) + goto fail; + + perf_trace_buf[i] = buf; + } } -fail_buf_nmi: + ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER); + if (ret) + goto fail; + + total_ref_count++; + return 0; + +fail: if (!total_ref_count) { - free_percpu(perf_trace_buf_nmi); - free_percpu(perf_trace_buf); - perf_trace_buf_nmi = NULL; - perf_trace_buf = NULL; + int i; + + for (i = 0; i < 4; i++) { + free_percpu(perf_trace_buf[i]); + perf_trace_buf[i] = NULL; + } + } + + if (!--tp_event->perf_refcount) { + free_percpu(tp_event->perf_events); + tp_event->perf_events = NULL; } -fail_buf: - event->perf_refcount--; return ret; } -int perf_trace_enable(int event_id) +int perf_trace_init(struct perf_event *p_event) { - struct ftrace_event_call *event; + struct ftrace_event_call *tp_event; + int event_id = p_event->attr.config; int ret = -EINVAL; mutex_lock(&event_mutex); - list_for_each_entry(event, &ftrace_events, list) { - if (event->id == event_id && event->perf_event_enable && - try_module_get(event->mod)) { - ret = perf_trace_event_enable(event); + list_for_each_entry(tp_event, &ftrace_events, list) { + if (tp_event->event.type == event_id && + tp_event->class && tp_event->class->reg && + try_module_get(tp_event->mod)) { + ret = perf_trace_event_init(tp_event, p_event); break; } } @@ -86,90 +99,82 @@ int perf_trace_enable(int event_id) return ret; } -static void perf_trace_event_disable(struct ftrace_event_call *event) +int perf_trace_enable(struct perf_event *p_event) { - char *buf, *nmi_buf; - - if (--event->perf_refcount > 0) - return; + struct ftrace_event_call *tp_event = p_event->tp_event; + struct hlist_head *list; - event->perf_event_disable(event); + list = tp_event->perf_events; + if (WARN_ON_ONCE(!list)) + return -EINVAL; - if (!--total_ref_count) { - buf = perf_trace_buf; - rcu_assign_pointer(perf_trace_buf, NULL); - - nmi_buf = perf_trace_buf_nmi; - rcu_assign_pointer(perf_trace_buf_nmi, NULL); + list = this_cpu_ptr(list); + hlist_add_head_rcu(&p_event->hlist_entry, list); - /* - * Ensure every events in profiling have finished before - * releasing the buffers - */ - synchronize_sched(); + return 0; +} - free_percpu(buf); - free_percpu(nmi_buf); - } +void perf_trace_disable(struct perf_event *p_event) +{ + hlist_del_rcu(&p_event->hlist_entry); } -void perf_trace_disable(int event_id) +void perf_trace_destroy(struct perf_event *p_event) { - struct ftrace_event_call *event; + struct ftrace_event_call *tp_event = p_event->tp_event; + int i; mutex_lock(&event_mutex); - list_for_each_entry(event, &ftrace_events, list) { - if (event->id == event_id) { - perf_trace_event_disable(event); - module_put(event->mod); - break; + if (--tp_event->perf_refcount > 0) + goto out; + + tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER); + + /* + * Ensure our callback won't be called anymore. The buffers + * will be freed after that. + */ + tracepoint_synchronize_unregister(); + + free_percpu(tp_event->perf_events); + tp_event->perf_events = NULL; + + if (!--total_ref_count) { + for (i = 0; i < 4; i++) { + free_percpu(perf_trace_buf[i]); + perf_trace_buf[i] = NULL; } } +out: mutex_unlock(&event_mutex); } __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, - int *rctxp, unsigned long *irq_flags) + struct pt_regs *regs, int *rctxp) { struct trace_entry *entry; - char *trace_buf, *raw_data; - int pc, cpu; + unsigned long flags; + char *raw_data; + int pc; BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); pc = preempt_count(); - /* Protect the per cpu buffer, begin the rcu read side */ - local_irq_save(*irq_flags); - *rctxp = perf_swevent_get_recursion_context(); if (*rctxp < 0) - goto err_recursion; - - cpu = smp_processor_id(); - - if (in_nmi()) - trace_buf = rcu_dereference_sched(perf_trace_buf_nmi); - else - trace_buf = rcu_dereference_sched(perf_trace_buf); - - if (!trace_buf) - goto err; + return NULL; - raw_data = per_cpu_ptr(trace_buf, cpu); + raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]); /* zero the dead bytes from align to not leak stack to user */ memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64)); entry = (struct trace_entry *)raw_data; - tracing_generic_entry_update(entry, *irq_flags, pc); + local_save_flags(flags); + tracing_generic_entry_update(entry, flags, pc); entry->type = type; return raw_data; -err: - perf_swevent_put_recursion_context(*rctxp); -err_recursion: - local_irq_restore(*irq_flags); - return NULL; } EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index c697c7043349..09b4fa6e4d3b 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -28,10 +28,19 @@ DEFINE_MUTEX(event_mutex); LIST_HEAD(ftrace_events); +LIST_HEAD(ftrace_common_fields); -int trace_define_field(struct ftrace_event_call *call, const char *type, - const char *name, int offset, int size, int is_signed, - int filter_type) +struct list_head * +trace_get_fields(struct ftrace_event_call *event_call) +{ + if (!event_call->class->get_fields) + return &event_call->class->fields; + return event_call->class->get_fields(event_call); +} + +static int __trace_define_field(struct list_head *head, const char *type, + const char *name, int offset, int size, + int is_signed, int filter_type) { struct ftrace_event_field *field; @@ -56,7 +65,7 @@ int trace_define_field(struct ftrace_event_call *call, const char *type, field->size = size; field->is_signed = is_signed; - list_add(&field->link, &call->fields); + list_add(&field->link, head); return 0; @@ -67,17 +76,32 @@ err: return -ENOMEM; } + +int trace_define_field(struct ftrace_event_call *call, const char *type, + const char *name, int offset, int size, int is_signed, + int filter_type) +{ + struct list_head *head; + + if (WARN_ON(!call->class)) + return 0; + + head = trace_get_fields(call); + return __trace_define_field(head, type, name, offset, size, + is_signed, filter_type); +} EXPORT_SYMBOL_GPL(trace_define_field); #define __common_field(type, item) \ - ret = trace_define_field(call, #type, "common_" #item, \ - offsetof(typeof(ent), item), \ - sizeof(ent.item), \ - is_signed_type(type), FILTER_OTHER); \ + ret = __trace_define_field(&ftrace_common_fields, #type, \ + "common_" #item, \ + offsetof(typeof(ent), item), \ + sizeof(ent.item), \ + is_signed_type(type), FILTER_OTHER); \ if (ret) \ return ret; -static int trace_define_common_fields(struct ftrace_event_call *call) +static int trace_define_common_fields(void) { int ret; struct trace_entry ent; @@ -94,8 +118,10 @@ static int trace_define_common_fields(struct ftrace_event_call *call) void trace_destroy_fields(struct ftrace_event_call *call) { struct ftrace_event_field *field, *next; + struct list_head *head; - list_for_each_entry_safe(field, next, &call->fields, link) { + head = trace_get_fields(call); + list_for_each_entry_safe(field, next, head, link) { list_del(&field->link); kfree(field->type); kfree(field->name); @@ -107,16 +133,63 @@ int trace_event_raw_init(struct ftrace_event_call *call) { int id; - id = register_ftrace_event(call->event); + id = register_ftrace_event(&call->event); if (!id) return -ENODEV; - call->id = id; - INIT_LIST_HEAD(&call->fields); return 0; } EXPORT_SYMBOL_GPL(trace_event_raw_init); +int ftrace_event_reg(struct ftrace_event_call *call, enum trace_reg type) +{ + switch (type) { + case TRACE_REG_REGISTER: + return tracepoint_probe_register(call->name, + call->class->probe, + call); + case TRACE_REG_UNREGISTER: + tracepoint_probe_unregister(call->name, + call->class->probe, + call); + return 0; + +#ifdef CONFIG_PERF_EVENTS + case TRACE_REG_PERF_REGISTER: + return tracepoint_probe_register(call->name, + call->class->perf_probe, + call); + case TRACE_REG_PERF_UNREGISTER: + tracepoint_probe_unregister(call->name, + call->class->perf_probe, + call); + return 0; +#endif + } + return 0; +} +EXPORT_SYMBOL_GPL(ftrace_event_reg); + +void trace_event_enable_cmd_record(bool enable) +{ + struct ftrace_event_call *call; + + mutex_lock(&event_mutex); + list_for_each_entry(call, &ftrace_events, list) { + if (!(call->flags & TRACE_EVENT_FL_ENABLED)) + continue; + + if (enable) { + tracing_start_cmdline_record(); + call->flags |= TRACE_EVENT_FL_RECORDED_CMD; + } else { + tracing_stop_cmdline_record(); + call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD; + } + } + mutex_unlock(&event_mutex); +} + static int ftrace_event_enable_disable(struct ftrace_event_call *call, int enable) { @@ -124,23 +197,29 @@ static int ftrace_event_enable_disable(struct ftrace_event_call *call, switch (enable) { case 0: - if (call->enabled) { - call->enabled = 0; - tracing_stop_cmdline_record(); - call->unregfunc(call); + if (call->flags & TRACE_EVENT_FL_ENABLED) { + call->flags &= ~TRACE_EVENT_FL_ENABLED; + if (call->flags & TRACE_EVENT_FL_RECORDED_CMD) { + tracing_stop_cmdline_record(); + call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD; + } + call->class->reg(call, TRACE_REG_UNREGISTER); } break; case 1: - if (!call->enabled) { - tracing_start_cmdline_record(); - ret = call->regfunc(call); + if (!(call->flags & TRACE_EVENT_FL_ENABLED)) { + if (trace_flags & TRACE_ITER_RECORD_CMD) { + tracing_start_cmdline_record(); + call->flags |= TRACE_EVENT_FL_RECORDED_CMD; + } + ret = call->class->reg(call, TRACE_REG_REGISTER); if (ret) { tracing_stop_cmdline_record(); pr_info("event trace: Could not enable event " "%s\n", call->name); break; } - call->enabled = 1; + call->flags |= TRACE_EVENT_FL_ENABLED; } break; } @@ -171,15 +250,15 @@ static int __ftrace_set_clr_event(const char *match, const char *sub, mutex_lock(&event_mutex); list_for_each_entry(call, &ftrace_events, list) { - if (!call->name || !call->regfunc) + if (!call->name || !call->class || !call->class->reg) continue; if (match && strcmp(match, call->name) != 0 && - strcmp(match, call->system) != 0) + strcmp(match, call->class->system) != 0) continue; - if (sub && strcmp(sub, call->system) != 0) + if (sub && strcmp(sub, call->class->system) != 0) continue; if (event && strcmp(event, call->name) != 0) @@ -297,7 +376,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos) * The ftrace subsystem is for showing formats only. * They can not be enabled or disabled via the event files. */ - if (call->regfunc) + if (call->class && call->class->reg) return call; } @@ -328,7 +407,7 @@ s_next(struct seq_file *m, void *v, loff_t *pos) (*pos)++; list_for_each_entry_continue(call, &ftrace_events, list) { - if (call->enabled) + if (call->flags & TRACE_EVENT_FL_ENABLED) return call; } @@ -355,8 +434,8 @@ static int t_show(struct seq_file *m, void *v) { struct ftrace_event_call *call = v; - if (strcmp(call->system, TRACE_SYSTEM) != 0) - seq_printf(m, "%s:", call->system); + if (strcmp(call->class->system, TRACE_SYSTEM) != 0) + seq_printf(m, "%s:", call->class->system); seq_printf(m, "%s\n", call->name); return 0; @@ -387,7 +466,7 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, struct ftrace_event_call *call = filp->private_data; char *buf; - if (call->enabled) + if (call->flags & TRACE_EVENT_FL_ENABLED) buf = "1\n"; else buf = "0\n"; @@ -450,10 +529,10 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, mutex_lock(&event_mutex); list_for_each_entry(call, &ftrace_events, list) { - if (!call->name || !call->regfunc) + if (!call->name || !call->class || !call->class->reg) continue; - if (system && strcmp(call->system, system) != 0) + if (system && strcmp(call->class->system, system) != 0) continue; /* @@ -461,7 +540,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, * or if all events or cleared, or if we have * a mixture. */ - set |= (1 << !!call->enabled); + set |= (1 << !!(call->flags & TRACE_EVENT_FL_ENABLED)); /* * If we have a mixture, no need to look further. @@ -519,31 +598,11 @@ out: return ret; } -static ssize_t -event_format_read(struct file *filp, char __user *ubuf, size_t cnt, - loff_t *ppos) +static void print_event_fields(struct trace_seq *s, struct list_head *head) { - struct ftrace_event_call *call = filp->private_data; struct ftrace_event_field *field; - struct trace_seq *s; - int common_field_count = 5; - char *buf; - int r = 0; - - if (*ppos) - return 0; - s = kmalloc(sizeof(*s), GFP_KERNEL); - if (!s) - return -ENOMEM; - - trace_seq_init(s); - - trace_seq_printf(s, "name: %s\n", call->name); - trace_seq_printf(s, "ID: %d\n", call->id); - trace_seq_printf(s, "format:\n"); - - list_for_each_entry_reverse(field, &call->fields, link) { + list_for_each_entry_reverse(field, head, link) { /* * Smartly shows the array type(except dynamic array). * Normal: @@ -557,29 +616,54 @@ event_format_read(struct file *filp, char __user *ubuf, size_t cnt, array_descriptor = NULL; if (!array_descriptor) { - r = trace_seq_printf(s, "\tfield:%s %s;\toffset:%u;" + trace_seq_printf(s, "\tfield:%s %s;\toffset:%u;" "\tsize:%u;\tsigned:%d;\n", field->type, field->name, field->offset, field->size, !!field->is_signed); } else { - r = trace_seq_printf(s, "\tfield:%.*s %s%s;\toffset:%u;" + trace_seq_printf(s, "\tfield:%.*s %s%s;\toffset:%u;" "\tsize:%u;\tsigned:%d;\n", (int)(array_descriptor - field->type), field->type, field->name, array_descriptor, field->offset, field->size, !!field->is_signed); } + } +} - if (--common_field_count == 0) - r = trace_seq_printf(s, "\n"); +static ssize_t +event_format_read(struct file *filp, char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + struct ftrace_event_call *call = filp->private_data; + struct list_head *head; + struct trace_seq *s; + char *buf; + int r; - if (!r) - break; - } + if (*ppos) + return 0; + + s = kmalloc(sizeof(*s), GFP_KERNEL); + if (!s) + return -ENOMEM; - if (r) - r = trace_seq_printf(s, "\nprint fmt: %s\n", - call->print_fmt); + trace_seq_init(s); + + trace_seq_printf(s, "name: %s\n", call->name); + trace_seq_printf(s, "ID: %d\n", call->event.type); + trace_seq_printf(s, "format:\n"); + + /* print common fields */ + print_event_fields(s, &ftrace_common_fields); + + trace_seq_putc(s, '\n'); + + /* print event specific fields */ + head = trace_get_fields(call); + print_event_fields(s, head); + + r = trace_seq_printf(s, "\nprint fmt: %s\n", call->print_fmt); if (!r) { /* @@ -613,7 +697,7 @@ event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) return -ENOMEM; trace_seq_init(s); - trace_seq_printf(s, "%d\n", call->id); + trace_seq_printf(s, "%d\n", call->event.type); r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); @@ -919,14 +1003,15 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events, const struct file_operations *filter, const struct file_operations *format) { + struct list_head *head; int ret; /* * If the trace point header did not define TRACE_SYSTEM * then the system would be called "TRACE_SYSTEM". */ - if (strcmp(call->system, TRACE_SYSTEM) != 0) - d_events = event_subsystem_dir(call->system, d_events); + if (strcmp(call->class->system, TRACE_SYSTEM) != 0) + d_events = event_subsystem_dir(call->class->system, d_events); call->dir = debugfs_create_dir(call->name, d_events); if (!call->dir) { @@ -935,26 +1020,31 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events, return -1; } - if (call->regfunc) + if (call->class->reg) trace_create_file("enable", 0644, call->dir, call, enable); - if (call->id && call->perf_event_enable) +#ifdef CONFIG_PERF_EVENTS + if (call->event.type && call->class->reg) trace_create_file("id", 0444, call->dir, call, id); +#endif - if (call->define_fields) { - ret = trace_define_common_fields(call); - if (!ret) - ret = call->define_fields(call); + /* + * Other events may have the same class. Only update + * the fields if they are not already defined. + */ + head = trace_get_fields(call); + if (list_empty(head)) { + ret = call->class->define_fields(call); if (ret < 0) { pr_warning("Could not initialize trace point" " events/%s\n", call->name); return ret; } - trace_create_file("filter", 0644, call->dir, call, - filter); } + trace_create_file("filter", 0644, call->dir, call, + filter); trace_create_file("format", 0444, call->dir, call, format); @@ -962,20 +1052,26 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events, return 0; } -static int __trace_add_event_call(struct ftrace_event_call *call) +static int +__trace_add_event_call(struct ftrace_event_call *call, struct module *mod, + const struct file_operations *id, + const struct file_operations *enable, + const struct file_operations *filter, + const struct file_operations *format) { struct dentry *d_events; int ret; + /* The linker may leave blanks */ if (!call->name) return -EINVAL; - if (call->raw_init) { - ret = call->raw_init(call); + if (call->class->raw_init) { + ret = call->class->raw_init(call); if (ret < 0) { if (ret != -ENOSYS) - pr_warning("Could not initialize trace " - "events/%s\n", call->name); + pr_warning("Could not initialize trace events/%s\n", + call->name); return ret; } } @@ -984,11 +1080,10 @@ static int __trace_add_event_call(struct ftrace_event_call *call) if (!d_events) return -ENOENT; - ret = event_create_dir(call, d_events, &ftrace_event_id_fops, - &ftrace_enable_fops, &ftrace_event_filter_fops, - &ftrace_event_format_fops); + ret = event_create_dir(call, d_events, id, enable, filter, format); if (!ret) list_add(&call->list, &ftrace_events); + call->mod = mod; return ret; } @@ -998,7 +1093,10 @@ int trace_add_event_call(struct ftrace_event_call *call) { int ret; mutex_lock(&event_mutex); - ret = __trace_add_event_call(call); + ret = __trace_add_event_call(call, NULL, &ftrace_event_id_fops, + &ftrace_enable_fops, + &ftrace_event_filter_fops, + &ftrace_event_format_fops); mutex_unlock(&event_mutex); return ret; } @@ -1035,13 +1133,13 @@ static void remove_subsystem_dir(const char *name) static void __trace_remove_event_call(struct ftrace_event_call *call) { ftrace_event_enable_disable(call, 0); - if (call->event) - __unregister_ftrace_event(call->event); + if (call->event.funcs) + __unregister_ftrace_event(&call->event); debugfs_remove_recursive(call->dir); list_del(&call->list); trace_destroy_fields(call); destroy_preds(call); - remove_subsystem_dir(call->system); + remove_subsystem_dir(call->class->system); } /* Remove an event_call */ @@ -1115,8 +1213,6 @@ static void trace_module_add_events(struct module *mod) { struct ftrace_module_file_ops *file_ops = NULL; struct ftrace_event_call *call, *start, *end; - struct dentry *d_events; - int ret; start = mod->trace_events; end = mod->trace_events + mod->num_trace_events; @@ -1124,38 +1220,14 @@ static void trace_module_add_events(struct module *mod) if (start == end) return; - d_events = event_trace_events_dir(); - if (!d_events) + file_ops = trace_create_file_ops(mod); + if (!file_ops) return; for_each_event(call, start, end) { - /* The linker may leave blanks */ - if (!call->name) - continue; - if (call->raw_init) { - ret = call->raw_init(call); - if (ret < 0) { - if (ret != -ENOSYS) - pr_warning("Could not initialize trace " - "point events/%s\n", call->name); - continue; - } - } - /* - * This module has events, create file ops for this module - * if not already done. - */ - if (!file_ops) { - file_ops = trace_create_file_ops(mod); - if (!file_ops) - return; - } - call->mod = mod; - ret = event_create_dir(call, d_events, + __trace_add_event_call(call, mod, &file_ops->id, &file_ops->enable, &file_ops->filter, &file_ops->format); - if (!ret) - list_add(&call->list, &ftrace_events); } } @@ -1282,25 +1354,14 @@ static __init int event_trace_init(void) trace_create_file("enable", 0644, d_events, NULL, &ftrace_system_enable_fops); + if (trace_define_common_fields()) + pr_warning("tracing: Failed to allocate common fields"); + for_each_event(call, __start_ftrace_events, __stop_ftrace_events) { - /* The linker may leave blanks */ - if (!call->name) - continue; - if (call->raw_init) { - ret = call->raw_init(call); - if (ret < 0) { - if (ret != -ENOSYS) - pr_warning("Could not initialize trace " - "point events/%s\n", call->name); - continue; - } - } - ret = event_create_dir(call, d_events, &ftrace_event_id_fops, + __trace_add_event_call(call, NULL, &ftrace_event_id_fops, &ftrace_enable_fops, &ftrace_event_filter_fops, &ftrace_event_format_fops); - if (!ret) - list_add(&call->list, &ftrace_events); } while (true) { @@ -1388,8 +1449,8 @@ static __init void event_trace_self_tests(void) list_for_each_entry(call, &ftrace_events, list) { - /* Only test those that have a regfunc */ - if (!call->regfunc) + /* Only test those that have a probe */ + if (!call->class || !call->class->probe) continue; /* @@ -1399,8 +1460,8 @@ static __init void event_trace_self_tests(void) * syscalls as we test. */ #ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS - if (call->system && - strcmp(call->system, "syscalls") == 0) + if (call->class->system && + strcmp(call->class->system, "syscalls") == 0) continue; #endif @@ -1410,7 +1471,7 @@ static __init void event_trace_self_tests(void) * If an event is already enabled, someone is using * it and the self test should not be on. */ - if (call->enabled) { + if (call->flags & TRACE_EVENT_FL_ENABLED) { pr_warning("Enabled event during self test!\n"); WARN_ON_ONCE(1); continue; @@ -1487,12 +1548,11 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip) struct ftrace_entry *entry; unsigned long flags; long disabled; - int resched; int cpu; int pc; pc = preempt_count(); - resched = ftrace_preempt_disable(); + preempt_disable_notrace(); cpu = raw_smp_processor_id(); disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu)); @@ -1514,7 +1574,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip) out: atomic_dec(&per_cpu(ftrace_test_event_disable, cpu)); - ftrace_preempt_enable(resched); + preempt_enable_notrace(); } static struct ftrace_ops trace_ops __initdata = diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 88c0b6dbd7fe..36d40104b17f 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -497,11 +497,11 @@ void print_subsystem_event_filter(struct event_subsystem *system, } static struct ftrace_event_field * -find_event_field(struct ftrace_event_call *call, char *name) +__find_event_field(struct list_head *head, char *name) { struct ftrace_event_field *field; - list_for_each_entry(field, &call->fields, link) { + list_for_each_entry(field, head, link) { if (!strcmp(field->name, name)) return field; } @@ -509,6 +509,20 @@ find_event_field(struct ftrace_event_call *call, char *name) return NULL; } +static struct ftrace_event_field * +find_event_field(struct ftrace_event_call *call, char *name) +{ + struct ftrace_event_field *field; + struct list_head *head; + + field = __find_event_field(&ftrace_common_fields, name); + if (field) + return field; + + head = trace_get_fields(call); + return __find_event_field(head, name); +} + static void filter_free_pred(struct filter_pred *pred) { if (!pred) @@ -545,7 +559,7 @@ static void filter_disable_preds(struct ftrace_event_call *call) struct event_filter *filter = call->filter; int i; - call->filter_active = 0; + call->flags &= ~TRACE_EVENT_FL_FILTERED; filter->n_preds = 0; for (i = 0; i < MAX_FILTER_PRED; i++) @@ -572,7 +586,7 @@ void destroy_preds(struct ftrace_event_call *call) { __free_preds(call->filter); call->filter = NULL; - call->filter_active = 0; + call->flags &= ~TRACE_EVENT_FL_FILTERED; } static struct event_filter *__alloc_preds(void) @@ -611,7 +625,7 @@ static int init_preds(struct ftrace_event_call *call) if (call->filter) return 0; - call->filter_active = 0; + call->flags &= ~TRACE_EVENT_FL_FILTERED; call->filter = __alloc_preds(); if (IS_ERR(call->filter)) return PTR_ERR(call->filter); @@ -625,10 +639,7 @@ static int init_subsystem_preds(struct event_subsystem *system) int err; list_for_each_entry(call, &ftrace_events, list) { - if (!call->define_fields) - continue; - - if (strcmp(call->system, system->name) != 0) + if (strcmp(call->class->system, system->name) != 0) continue; err = init_preds(call); @@ -644,10 +655,7 @@ static void filter_free_subsystem_preds(struct event_subsystem *system) struct ftrace_event_call *call; list_for_each_entry(call, &ftrace_events, list) { - if (!call->define_fields) - continue; - - if (strcmp(call->system, system->name) != 0) + if (strcmp(call->class->system, system->name) != 0) continue; filter_disable_preds(call); @@ -1249,10 +1257,7 @@ static int replace_system_preds(struct event_subsystem *system, list_for_each_entry(call, &ftrace_events, list) { struct event_filter *filter = call->filter; - if (!call->define_fields) - continue; - - if (strcmp(call->system, system->name) != 0) + if (strcmp(call->class->system, system->name) != 0) continue; /* try to see if the filter can be applied */ @@ -1266,7 +1271,7 @@ static int replace_system_preds(struct event_subsystem *system, if (err) filter_disable_preds(call); else { - call->filter_active = 1; + call->flags |= TRACE_EVENT_FL_FILTERED; replace_filter_string(filter, filter_string); } fail = false; @@ -1315,7 +1320,7 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string) if (err) append_filter_err(ps, call->filter); else - call->filter_active = 1; + call->flags |= TRACE_EVENT_FL_FILTERED; out: filter_opstack_clear(ps); postfix_clear(ps); @@ -1393,12 +1398,12 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id, mutex_lock(&event_mutex); list_for_each_entry(call, &ftrace_events, list) { - if (call->id == event_id) + if (call->event.type == event_id) break; } err = -EINVAL; - if (!call) + if (&call->list == &ftrace_events) goto out_unlock; err = -EEXIST; diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index e091f64ba6ce..4ba44deaac25 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -125,12 +125,6 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \ #include "trace_entries.h" -static int ftrace_raw_init_event(struct ftrace_event_call *call) -{ - INIT_LIST_HEAD(&call->fields); - return 0; -} - #undef __entry #define __entry REC @@ -153,17 +147,21 @@ static int ftrace_raw_init_event(struct ftrace_event_call *call) #define F_printk(fmt, args...) #fmt ", " __stringify(args) #undef FTRACE_ENTRY -#define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \ +#define FTRACE_ENTRY(call, struct_name, etype, tstruct, print) \ + \ +struct ftrace_event_class event_class_ftrace_##call = { \ + .system = __stringify(TRACE_SYSTEM), \ + .define_fields = ftrace_define_fields_##call, \ + .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\ +}; \ \ struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) event_##call = { \ .name = #call, \ - .id = type, \ - .system = __stringify(TRACE_SYSTEM), \ - .raw_init = ftrace_raw_init_event, \ + .event.type = etype, \ + .class = &event_class_ftrace_##call, \ .print_fmt = print, \ - .define_fields = ftrace_define_fields_##call, \ }; \ #include "trace_entries.h" diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index b3f3776b0cd6..16aee4d44e8f 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -54,14 +54,14 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip) struct trace_array_cpu *data; unsigned long flags; long disabled; - int cpu, resched; + int cpu; int pc; if (unlikely(!ftrace_function_enabled)) return; pc = preempt_count(); - resched = ftrace_preempt_disable(); + preempt_disable_notrace(); local_save_flags(flags); cpu = raw_smp_processor_id(); data = tr->data[cpu]; @@ -71,7 +71,7 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip) trace_function(tr, ip, parent_ip, flags, pc); atomic_dec(&data->disabled); - ftrace_preempt_enable(resched); + preempt_enable_notrace(); } static void diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 9aed1a5cf553..6bff23625781 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -40,7 +40,7 @@ struct fgraph_data { #define TRACE_GRAPH_PRINT_OVERHEAD 0x4 #define TRACE_GRAPH_PRINT_PROC 0x8 #define TRACE_GRAPH_PRINT_DURATION 0x10 -#define TRACE_GRAPH_PRINT_ABS_TIME 0X20 +#define TRACE_GRAPH_PRINT_ABS_TIME 0x20 static struct tracer_opt trace_opts[] = { /* Display overruns? (for self-debug purpose) */ @@ -179,7 +179,7 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer) return ret; } -static int __trace_graph_entry(struct trace_array *tr, +int __trace_graph_entry(struct trace_array *tr, struct ftrace_graph_ent *trace, unsigned long flags, int pc) @@ -246,7 +246,7 @@ int trace_graph_thresh_entry(struct ftrace_graph_ent *trace) return trace_graph_entry(trace); } -static void __trace_graph_return(struct trace_array *tr, +void __trace_graph_return(struct trace_array *tr, struct ftrace_graph_ret *trace, unsigned long flags, int pc) @@ -490,9 +490,10 @@ get_return_for_leaf(struct trace_iterator *iter, * We need to consume the current entry to see * the next one. */ - ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL); + ring_buffer_consume(iter->tr->buffer, iter->cpu, + NULL, NULL); event = ring_buffer_peek(iter->tr->buffer, iter->cpu, - NULL); + NULL, NULL); } if (!event) @@ -526,17 +527,18 @@ get_return_for_leaf(struct trace_iterator *iter, /* Signal a overhead of time execution to the output */ static int -print_graph_overhead(unsigned long long duration, struct trace_seq *s) +print_graph_overhead(unsigned long long duration, struct trace_seq *s, + u32 flags) { /* If duration disappear, we don't need anything */ - if (!(tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)) + if (!(flags & TRACE_GRAPH_PRINT_DURATION)) return 1; /* Non nested entry or return */ if (duration == -1) return trace_seq_printf(s, " "); - if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { + if (flags & TRACE_GRAPH_PRINT_OVERHEAD) { /* Duration exceeded 100 msecs */ if (duration > 100000ULL) return trace_seq_printf(s, "! "); @@ -562,7 +564,7 @@ static int print_graph_abs_time(u64 t, struct trace_seq *s) static enum print_line_t print_graph_irq(struct trace_iterator *iter, unsigned long addr, - enum trace_type type, int cpu, pid_t pid) + enum trace_type type, int cpu, pid_t pid, u32 flags) { int ret; struct trace_seq *s = &iter->seq; @@ -572,21 +574,21 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, return TRACE_TYPE_UNHANDLED; /* Absolute time */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) { + if (flags & TRACE_GRAPH_PRINT_ABS_TIME) { ret = print_graph_abs_time(iter->ts, s); if (!ret) return TRACE_TYPE_PARTIAL_LINE; } /* Cpu */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { + if (flags & TRACE_GRAPH_PRINT_CPU) { ret = print_graph_cpu(s, cpu); if (ret == TRACE_TYPE_PARTIAL_LINE) return TRACE_TYPE_PARTIAL_LINE; } /* Proc */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) { + if (flags & TRACE_GRAPH_PRINT_PROC) { ret = print_graph_proc(s, pid); if (ret == TRACE_TYPE_PARTIAL_LINE) return TRACE_TYPE_PARTIAL_LINE; @@ -596,7 +598,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, } /* No overhead */ - ret = print_graph_overhead(-1, s); + ret = print_graph_overhead(-1, s, flags); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -609,7 +611,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, return TRACE_TYPE_PARTIAL_LINE; /* Don't close the duration column if haven't one */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) + if (flags & TRACE_GRAPH_PRINT_DURATION) trace_seq_printf(s, " |"); ret = trace_seq_printf(s, "\n"); @@ -639,7 +641,8 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s) /* Print nsecs (we don't want to exceed 7 numbers) */ if (len < 7) { - snprintf(nsecs_str, 8 - len, "%03lu", nsecs_rem); + snprintf(nsecs_str, min(sizeof(nsecs_str), 8UL - len), "%03lu", + nsecs_rem); ret = trace_seq_printf(s, ".%s", nsecs_str); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -679,7 +682,8 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s) static enum print_line_t print_graph_entry_leaf(struct trace_iterator *iter, struct ftrace_graph_ent_entry *entry, - struct ftrace_graph_ret_entry *ret_entry, struct trace_seq *s) + struct ftrace_graph_ret_entry *ret_entry, + struct trace_seq *s, u32 flags) { struct fgraph_data *data = iter->private; struct ftrace_graph_ret *graph_ret; @@ -711,12 +715,12 @@ print_graph_entry_leaf(struct trace_iterator *iter, } /* Overhead */ - ret = print_graph_overhead(duration, s); + ret = print_graph_overhead(duration, s, flags); if (!ret) return TRACE_TYPE_PARTIAL_LINE; /* Duration */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) { + if (flags & TRACE_GRAPH_PRINT_DURATION) { ret = print_graph_duration(duration, s); if (ret == TRACE_TYPE_PARTIAL_LINE) return TRACE_TYPE_PARTIAL_LINE; @@ -739,7 +743,7 @@ print_graph_entry_leaf(struct trace_iterator *iter, static enum print_line_t print_graph_entry_nested(struct trace_iterator *iter, struct ftrace_graph_ent_entry *entry, - struct trace_seq *s, int cpu) + struct trace_seq *s, int cpu, u32 flags) { struct ftrace_graph_ent *call = &entry->graph_ent; struct fgraph_data *data = iter->private; @@ -759,12 +763,12 @@ print_graph_entry_nested(struct trace_iterator *iter, } /* No overhead */ - ret = print_graph_overhead(-1, s); + ret = print_graph_overhead(-1, s, flags); if (!ret) return TRACE_TYPE_PARTIAL_LINE; /* No time */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) { + if (flags & TRACE_GRAPH_PRINT_DURATION) { ret = trace_seq_printf(s, " | "); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -790,7 +794,7 @@ print_graph_entry_nested(struct trace_iterator *iter, static enum print_line_t print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s, - int type, unsigned long addr) + int type, unsigned long addr, u32 flags) { struct fgraph_data *data = iter->private; struct trace_entry *ent = iter->ent; @@ -803,27 +807,27 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s, if (type) { /* Interrupt */ - ret = print_graph_irq(iter, addr, type, cpu, ent->pid); + ret = print_graph_irq(iter, addr, type, cpu, ent->pid, flags); if (ret == TRACE_TYPE_PARTIAL_LINE) return TRACE_TYPE_PARTIAL_LINE; } /* Absolute time */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) { + if (flags & TRACE_GRAPH_PRINT_ABS_TIME) { ret = print_graph_abs_time(iter->ts, s); if (!ret) return TRACE_TYPE_PARTIAL_LINE; } /* Cpu */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { + if (flags & TRACE_GRAPH_PRINT_CPU) { ret = print_graph_cpu(s, cpu); if (ret == TRACE_TYPE_PARTIAL_LINE) return TRACE_TYPE_PARTIAL_LINE; } /* Proc */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) { + if (flags & TRACE_GRAPH_PRINT_PROC) { ret = print_graph_proc(s, ent->pid); if (ret == TRACE_TYPE_PARTIAL_LINE) return TRACE_TYPE_PARTIAL_LINE; @@ -845,7 +849,7 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s, static enum print_line_t print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, - struct trace_iterator *iter) + struct trace_iterator *iter, u32 flags) { struct fgraph_data *data = iter->private; struct ftrace_graph_ent *call = &field->graph_ent; @@ -853,14 +857,14 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, static enum print_line_t ret; int cpu = iter->cpu; - if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func)) + if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func, flags)) return TRACE_TYPE_PARTIAL_LINE; leaf_ret = get_return_for_leaf(iter, field); if (leaf_ret) - ret = print_graph_entry_leaf(iter, field, leaf_ret, s); + ret = print_graph_entry_leaf(iter, field, leaf_ret, s, flags); else - ret = print_graph_entry_nested(iter, field, s, cpu); + ret = print_graph_entry_nested(iter, field, s, cpu, flags); if (data) { /* @@ -879,7 +883,8 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, static enum print_line_t print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, - struct trace_entry *ent, struct trace_iterator *iter) + struct trace_entry *ent, struct trace_iterator *iter, + u32 flags) { unsigned long long duration = trace->rettime - trace->calltime; struct fgraph_data *data = iter->private; @@ -909,16 +914,16 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, } } - if (print_graph_prologue(iter, s, 0, 0)) + if (print_graph_prologue(iter, s, 0, 0, flags)) return TRACE_TYPE_PARTIAL_LINE; /* Overhead */ - ret = print_graph_overhead(duration, s); + ret = print_graph_overhead(duration, s, flags); if (!ret) return TRACE_TYPE_PARTIAL_LINE; /* Duration */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) { + if (flags & TRACE_GRAPH_PRINT_DURATION) { ret = print_graph_duration(duration, s); if (ret == TRACE_TYPE_PARTIAL_LINE) return TRACE_TYPE_PARTIAL_LINE; @@ -948,14 +953,15 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, } /* Overrun */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) { + if (flags & TRACE_GRAPH_PRINT_OVERRUN) { ret = trace_seq_printf(s, " (Overruns: %lu)\n", trace->overrun); if (!ret) return TRACE_TYPE_PARTIAL_LINE; } - ret = print_graph_irq(iter, trace->func, TRACE_GRAPH_RET, cpu, pid); + ret = print_graph_irq(iter, trace->func, TRACE_GRAPH_RET, + cpu, pid, flags); if (ret == TRACE_TYPE_PARTIAL_LINE) return TRACE_TYPE_PARTIAL_LINE; @@ -963,8 +969,8 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, } static enum print_line_t -print_graph_comment(struct trace_seq *s, struct trace_entry *ent, - struct trace_iterator *iter) +print_graph_comment(struct trace_seq *s, struct trace_entry *ent, + struct trace_iterator *iter, u32 flags) { unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); struct fgraph_data *data = iter->private; @@ -976,16 +982,16 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent, if (data) depth = per_cpu_ptr(data->cpu_data, iter->cpu)->depth; - if (print_graph_prologue(iter, s, 0, 0)) + if (print_graph_prologue(iter, s, 0, 0, flags)) return TRACE_TYPE_PARTIAL_LINE; /* No overhead */ - ret = print_graph_overhead(-1, s); + ret = print_graph_overhead(-1, s, flags); if (!ret) return TRACE_TYPE_PARTIAL_LINE; /* No time */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) { + if (flags & TRACE_GRAPH_PRINT_DURATION) { ret = trace_seq_printf(s, " | "); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -1020,7 +1026,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent, if (!event) return TRACE_TYPE_UNHANDLED; - ret = event->trace(iter, sym_flags); + ret = event->funcs->trace(iter, sym_flags, event); if (ret != TRACE_TYPE_HANDLED) return ret; } @@ -1040,7 +1046,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent, enum print_line_t -print_graph_function(struct trace_iterator *iter) +print_graph_function_flags(struct trace_iterator *iter, u32 flags) { struct ftrace_graph_ent_entry *field; struct fgraph_data *data = iter->private; @@ -1061,7 +1067,7 @@ print_graph_function(struct trace_iterator *iter) if (data && data->failed) { field = &data->ent; iter->cpu = data->cpu; - ret = print_graph_entry(field, s, iter); + ret = print_graph_entry(field, s, iter, flags); if (ret == TRACE_TYPE_HANDLED && iter->cpu != cpu) { per_cpu_ptr(data->cpu_data, iter->cpu)->ignore = 1; ret = TRACE_TYPE_NO_CONSUME; @@ -1081,32 +1087,50 @@ print_graph_function(struct trace_iterator *iter) struct ftrace_graph_ent_entry saved; trace_assign_type(field, entry); saved = *field; - return print_graph_entry(&saved, s, iter); + return print_graph_entry(&saved, s, iter, flags); } case TRACE_GRAPH_RET: { struct ftrace_graph_ret_entry *field; trace_assign_type(field, entry); - return print_graph_return(&field->ret, s, entry, iter); + return print_graph_return(&field->ret, s, entry, iter, flags); } + case TRACE_STACK: + case TRACE_FN: + /* dont trace stack and functions as comments */ + return TRACE_TYPE_UNHANDLED; + default: - return print_graph_comment(s, entry, iter); + return print_graph_comment(s, entry, iter, flags); } return TRACE_TYPE_HANDLED; } -static void print_lat_header(struct seq_file *s) +static enum print_line_t +print_graph_function(struct trace_iterator *iter) +{ + return print_graph_function_flags(iter, tracer_flags.val); +} + +static enum print_line_t +print_graph_function_event(struct trace_iterator *iter, int flags, + struct trace_event *event) +{ + return print_graph_function(iter); +} + +static void print_lat_header(struct seq_file *s, u32 flags) { static const char spaces[] = " " /* 16 spaces */ " " /* 4 spaces */ " "; /* 17 spaces */ int size = 0; - if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) + if (flags & TRACE_GRAPH_PRINT_ABS_TIME) size += 16; - if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) + if (flags & TRACE_GRAPH_PRINT_CPU) size += 4; - if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) + if (flags & TRACE_GRAPH_PRINT_PROC) size += 17; seq_printf(s, "#%.*s _-----=> irqs-off \n", size, spaces); @@ -1117,43 +1141,48 @@ static void print_lat_header(struct seq_file *s) seq_printf(s, "#%.*s|||| / \n", size, spaces); } -static void print_graph_headers(struct seq_file *s) +void print_graph_headers_flags(struct seq_file *s, u32 flags) { int lat = trace_flags & TRACE_ITER_LATENCY_FMT; if (lat) - print_lat_header(s); + print_lat_header(s, flags); /* 1st line */ seq_printf(s, "#"); - if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) + if (flags & TRACE_GRAPH_PRINT_ABS_TIME) seq_printf(s, " TIME "); - if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) + if (flags & TRACE_GRAPH_PRINT_CPU) seq_printf(s, " CPU"); - if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) + if (flags & TRACE_GRAPH_PRINT_PROC) seq_printf(s, " TASK/PID "); if (lat) seq_printf(s, "|||||"); - if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) + if (flags & TRACE_GRAPH_PRINT_DURATION) seq_printf(s, " DURATION "); seq_printf(s, " FUNCTION CALLS\n"); /* 2nd line */ seq_printf(s, "#"); - if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) + if (flags & TRACE_GRAPH_PRINT_ABS_TIME) seq_printf(s, " | "); - if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) + if (flags & TRACE_GRAPH_PRINT_CPU) seq_printf(s, " | "); - if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) + if (flags & TRACE_GRAPH_PRINT_PROC) seq_printf(s, " | | "); if (lat) seq_printf(s, "|||||"); - if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) + if (flags & TRACE_GRAPH_PRINT_DURATION) seq_printf(s, " | | "); seq_printf(s, " | | | |\n"); } -static void graph_trace_open(struct trace_iterator *iter) +void print_graph_headers(struct seq_file *s) +{ + print_graph_headers_flags(s, tracer_flags.val); +} + +void graph_trace_open(struct trace_iterator *iter) { /* pid and depth on the last trace processed */ struct fgraph_data *data; @@ -1188,7 +1217,7 @@ static void graph_trace_open(struct trace_iterator *iter) pr_warning("function graph tracer: not enough memory\n"); } -static void graph_trace_close(struct trace_iterator *iter) +void graph_trace_close(struct trace_iterator *iter) { struct fgraph_data *data = iter->private; @@ -1198,6 +1227,20 @@ static void graph_trace_close(struct trace_iterator *iter) } } +static struct trace_event_functions graph_functions = { + .trace = print_graph_function_event, +}; + +static struct trace_event graph_trace_entry_event = { + .type = TRACE_GRAPH_ENT, + .funcs = &graph_functions, +}; + +static struct trace_event graph_trace_ret_event = { + .type = TRACE_GRAPH_RET, + .funcs = &graph_functions +}; + static struct tracer graph_trace __read_mostly = { .name = "function_graph", .open = graph_trace_open, @@ -1219,6 +1262,16 @@ static __init int init_graph_trace(void) { max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1); + if (!register_ftrace_event(&graph_trace_entry_event)) { + pr_warning("Warning: could not register graph trace events\n"); + return 1; + } + + if (!register_ftrace_event(&graph_trace_ret_event)) { + pr_warning("Warning: could not register graph trace events\n"); + return 1; + } + return register_tracer(&graph_trace); } diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c deleted file mode 100644 index 7b97000745f5..000000000000 --- a/kernel/trace/trace_hw_branches.c +++ /dev/null @@ -1,312 +0,0 @@ -/* - * h/w branch tracer for x86 based on BTS - * - * Copyright (C) 2008-2009 Intel Corporation. - * Markus Metzger <markus.t.metzger@gmail.com>, 2008-2009 - */ -#include <linux/kallsyms.h> -#include <linux/debugfs.h> -#include <linux/ftrace.h> -#include <linux/module.h> -#include <linux/cpu.h> -#include <linux/smp.h> -#include <linux/fs.h> - -#include <asm/ds.h> - -#include "trace_output.h" -#include "trace.h" - - -#define BTS_BUFFER_SIZE (1 << 13) - -static DEFINE_PER_CPU(struct bts_tracer *, hwb_tracer); -static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], hwb_buffer); - -#define this_tracer per_cpu(hwb_tracer, smp_processor_id()) - -static int trace_hw_branches_enabled __read_mostly; -static int trace_hw_branches_suspended __read_mostly; -static struct trace_array *hw_branch_trace __read_mostly; - - -static void bts_trace_init_cpu(int cpu) -{ - per_cpu(hwb_tracer, cpu) = - ds_request_bts_cpu(cpu, per_cpu(hwb_buffer, cpu), - BTS_BUFFER_SIZE, NULL, (size_t)-1, - BTS_KERNEL); - - if (IS_ERR(per_cpu(hwb_tracer, cpu))) - per_cpu(hwb_tracer, cpu) = NULL; -} - -static int bts_trace_init(struct trace_array *tr) -{ - int cpu; - - hw_branch_trace = tr; - trace_hw_branches_enabled = 0; - - get_online_cpus(); - for_each_online_cpu(cpu) { - bts_trace_init_cpu(cpu); - - if (likely(per_cpu(hwb_tracer, cpu))) - trace_hw_branches_enabled = 1; - } - trace_hw_branches_suspended = 0; - put_online_cpus(); - - /* If we could not enable tracing on a single cpu, we fail. */ - return trace_hw_branches_enabled ? 0 : -EOPNOTSUPP; -} - -static void bts_trace_reset(struct trace_array *tr) -{ - int cpu; - - get_online_cpus(); - for_each_online_cpu(cpu) { - if (likely(per_cpu(hwb_tracer, cpu))) { - ds_release_bts(per_cpu(hwb_tracer, cpu)); - per_cpu(hwb_tracer, cpu) = NULL; - } - } - trace_hw_branches_enabled = 0; - trace_hw_branches_suspended = 0; - put_online_cpus(); -} - -static void bts_trace_start(struct trace_array *tr) -{ - int cpu; - - get_online_cpus(); - for_each_online_cpu(cpu) - if (likely(per_cpu(hwb_tracer, cpu))) - ds_resume_bts(per_cpu(hwb_tracer, cpu)); - trace_hw_branches_suspended = 0; - put_online_cpus(); -} - -static void bts_trace_stop(struct trace_array *tr) -{ - int cpu; - - get_online_cpus(); - for_each_online_cpu(cpu) - if (likely(per_cpu(hwb_tracer, cpu))) - ds_suspend_bts(per_cpu(hwb_tracer, cpu)); - trace_hw_branches_suspended = 1; - put_online_cpus(); -} - -static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb, - unsigned long action, void *hcpu) -{ - int cpu = (long)hcpu; - - switch (action) { - case CPU_ONLINE: - case CPU_DOWN_FAILED: - /* The notification is sent with interrupts enabled. */ - if (trace_hw_branches_enabled) { - bts_trace_init_cpu(cpu); - - if (trace_hw_branches_suspended && - likely(per_cpu(hwb_tracer, cpu))) - ds_suspend_bts(per_cpu(hwb_tracer, cpu)); - } - break; - - case CPU_DOWN_PREPARE: - /* The notification is sent with interrupts enabled. */ - if (likely(per_cpu(hwb_tracer, cpu))) { - ds_release_bts(per_cpu(hwb_tracer, cpu)); - per_cpu(hwb_tracer, cpu) = NULL; - } - } - - return NOTIFY_DONE; -} - -static struct notifier_block bts_hotcpu_notifier __cpuinitdata = { - .notifier_call = bts_hotcpu_handler -}; - -static void bts_trace_print_header(struct seq_file *m) -{ - seq_puts(m, "# CPU# TO <- FROM\n"); -} - -static enum print_line_t bts_trace_print_line(struct trace_iterator *iter) -{ - unsigned long symflags = TRACE_ITER_SYM_OFFSET; - struct trace_entry *entry = iter->ent; - struct trace_seq *seq = &iter->seq; - struct hw_branch_entry *it; - - trace_assign_type(it, entry); - - if (entry->type == TRACE_HW_BRANCHES) { - if (trace_seq_printf(seq, "%4d ", iter->cpu) && - seq_print_ip_sym(seq, it->to, symflags) && - trace_seq_printf(seq, "\t <- ") && - seq_print_ip_sym(seq, it->from, symflags) && - trace_seq_printf(seq, "\n")) - return TRACE_TYPE_HANDLED; - return TRACE_TYPE_PARTIAL_LINE; - } - return TRACE_TYPE_UNHANDLED; -} - -void trace_hw_branch(u64 from, u64 to) -{ - struct ftrace_event_call *call = &event_hw_branch; - struct trace_array *tr = hw_branch_trace; - struct ring_buffer_event *event; - struct ring_buffer *buf; - struct hw_branch_entry *entry; - unsigned long irq1; - int cpu; - - if (unlikely(!tr)) - return; - - if (unlikely(!trace_hw_branches_enabled)) - return; - - local_irq_save(irq1); - cpu = raw_smp_processor_id(); - if (atomic_inc_return(&tr->data[cpu]->disabled) != 1) - goto out; - - buf = tr->buffer; - event = trace_buffer_lock_reserve(buf, TRACE_HW_BRANCHES, - sizeof(*entry), 0, 0); - if (!event) - goto out; - entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, 0, from); - entry->ent.type = TRACE_HW_BRANCHES; - entry->from = from; - entry->to = to; - if (!filter_check_discard(call, entry, buf, event)) - trace_buffer_unlock_commit(buf, event, 0, 0); - - out: - atomic_dec(&tr->data[cpu]->disabled); - local_irq_restore(irq1); -} - -static void trace_bts_at(const struct bts_trace *trace, void *at) -{ - struct bts_struct bts; - int err = 0; - - WARN_ON_ONCE(!trace->read); - if (!trace->read) - return; - - err = trace->read(this_tracer, at, &bts); - if (err < 0) - return; - - switch (bts.qualifier) { - case BTS_BRANCH: - trace_hw_branch(bts.variant.lbr.from, bts.variant.lbr.to); - break; - } -} - -/* - * Collect the trace on the current cpu and write it into the ftrace buffer. - * - * pre: tracing must be suspended on the current cpu - */ -static void trace_bts_cpu(void *arg) -{ - struct trace_array *tr = (struct trace_array *)arg; - const struct bts_trace *trace; - unsigned char *at; - - if (unlikely(!tr)) - return; - - if (unlikely(atomic_read(&tr->data[raw_smp_processor_id()]->disabled))) - return; - - if (unlikely(!this_tracer)) - return; - - trace = ds_read_bts(this_tracer); - if (!trace) - return; - - for (at = trace->ds.top; (void *)at < trace->ds.end; - at += trace->ds.size) - trace_bts_at(trace, at); - - for (at = trace->ds.begin; (void *)at < trace->ds.top; - at += trace->ds.size) - trace_bts_at(trace, at); -} - -static void trace_bts_prepare(struct trace_iterator *iter) -{ - int cpu; - - get_online_cpus(); - for_each_online_cpu(cpu) - if (likely(per_cpu(hwb_tracer, cpu))) - ds_suspend_bts(per_cpu(hwb_tracer, cpu)); - /* - * We need to collect the trace on the respective cpu since ftrace - * implicitly adds the record for the current cpu. - * Once that is more flexible, we could collect the data from any cpu. - */ - on_each_cpu(trace_bts_cpu, iter->tr, 1); - - for_each_online_cpu(cpu) - if (likely(per_cpu(hwb_tracer, cpu))) - ds_resume_bts(per_cpu(hwb_tracer, cpu)); - put_online_cpus(); -} - -static void trace_bts_close(struct trace_iterator *iter) -{ - tracing_reset_online_cpus(iter->tr); -} - -void trace_hw_branch_oops(void) -{ - if (this_tracer) { - ds_suspend_bts_noirq(this_tracer); - trace_bts_cpu(hw_branch_trace); - ds_resume_bts_noirq(this_tracer); - } -} - -struct tracer bts_tracer __read_mostly = -{ - .name = "hw-branch-tracer", - .init = bts_trace_init, - .reset = bts_trace_reset, - .print_header = bts_trace_print_header, - .print_line = bts_trace_print_line, - .start = bts_trace_start, - .stop = bts_trace_stop, - .open = trace_bts_prepare, - .close = trace_bts_close, -#ifdef CONFIG_FTRACE_SELFTEST - .selftest = trace_selftest_startup_hw_branches, -#endif /* CONFIG_FTRACE_SELFTEST */ -}; - -__init static int init_bts_trace(void) -{ - register_hotcpu_notifier(&bts_hotcpu_notifier); - return register_tracer(&bts_tracer); -} -device_initcall(init_bts_trace); diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 2974bc7538c7..73a6b0601f2e 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -34,6 +34,9 @@ static int trace_type __read_mostly; static int save_lat_flag; +static void stop_irqsoff_tracer(struct trace_array *tr, int graph); +static int start_irqsoff_tracer(struct trace_array *tr, int graph); + #ifdef CONFIG_PREEMPT_TRACER static inline int preempt_trace(void) @@ -55,6 +58,23 @@ irq_trace(void) # define irq_trace() (0) #endif +#define TRACE_DISPLAY_GRAPH 1 + +static struct tracer_opt trace_opts[] = { +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + /* display latency trace as call graph */ + { TRACER_OPT(display-graph, TRACE_DISPLAY_GRAPH) }, +#endif + { } /* Empty entry */ +}; + +static struct tracer_flags tracer_flags = { + .val = 0, + .opts = trace_opts, +}; + +#define is_graph() (tracer_flags.val & TRACE_DISPLAY_GRAPH) + /* * Sequence count - we record it when starting a measurement and * skip the latency if the sequence has changed - some other section @@ -108,6 +128,202 @@ static struct ftrace_ops trace_ops __read_mostly = }; #endif /* CONFIG_FUNCTION_TRACER */ +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +static int irqsoff_set_flag(u32 old_flags, u32 bit, int set) +{ + int cpu; + + if (!(bit & TRACE_DISPLAY_GRAPH)) + return -EINVAL; + + if (!(is_graph() ^ set)) + return 0; + + stop_irqsoff_tracer(irqsoff_trace, !set); + + for_each_possible_cpu(cpu) + per_cpu(tracing_cpu, cpu) = 0; + + tracing_max_latency = 0; + tracing_reset_online_cpus(irqsoff_trace); + + return start_irqsoff_tracer(irqsoff_trace, set); +} + +static int irqsoff_graph_entry(struct ftrace_graph_ent *trace) +{ + struct trace_array *tr = irqsoff_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int ret; + int cpu; + int pc; + + cpu = raw_smp_processor_id(); + if (likely(!per_cpu(tracing_cpu, cpu))) + return 0; + + local_save_flags(flags); + /* slight chance to get a false positive on tracing_cpu */ + if (!irqs_disabled_flags(flags)) + return 0; + + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + + if (likely(disabled == 1)) { + pc = preempt_count(); + ret = __trace_graph_entry(tr, trace, flags, pc); + } else + ret = 0; + + atomic_dec(&data->disabled); + return ret; +} + +static void irqsoff_graph_return(struct ftrace_graph_ret *trace) +{ + struct trace_array *tr = irqsoff_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int cpu; + int pc; + + cpu = raw_smp_processor_id(); + if (likely(!per_cpu(tracing_cpu, cpu))) + return; + + local_save_flags(flags); + /* slight chance to get a false positive on tracing_cpu */ + if (!irqs_disabled_flags(flags)) + return; + + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + + if (likely(disabled == 1)) { + pc = preempt_count(); + __trace_graph_return(tr, trace, flags, pc); + } + + atomic_dec(&data->disabled); +} + +static void irqsoff_trace_open(struct trace_iterator *iter) +{ + if (is_graph()) + graph_trace_open(iter); + +} + +static void irqsoff_trace_close(struct trace_iterator *iter) +{ + if (iter->private) + graph_trace_close(iter); +} + +#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_CPU | \ + TRACE_GRAPH_PRINT_PROC) + +static enum print_line_t irqsoff_print_line(struct trace_iterator *iter) +{ + u32 flags = GRAPH_TRACER_FLAGS; + + if (trace_flags & TRACE_ITER_LATENCY_FMT) + flags |= TRACE_GRAPH_PRINT_DURATION; + else + flags |= TRACE_GRAPH_PRINT_ABS_TIME; + + /* + * In graph mode call the graph tracer output function, + * otherwise go with the TRACE_FN event handler + */ + if (is_graph()) + return print_graph_function_flags(iter, flags); + + return TRACE_TYPE_UNHANDLED; +} + +static void irqsoff_print_header(struct seq_file *s) +{ + if (is_graph()) { + struct trace_iterator *iter = s->private; + u32 flags = GRAPH_TRACER_FLAGS; + + if (trace_flags & TRACE_ITER_LATENCY_FMT) { + /* print nothing if the buffers are empty */ + if (trace_empty(iter)) + return; + + print_trace_header(s, iter); + flags |= TRACE_GRAPH_PRINT_DURATION; + } else + flags |= TRACE_GRAPH_PRINT_ABS_TIME; + + print_graph_headers_flags(s, flags); + } else + trace_default_header(s); +} + +static void +trace_graph_function(struct trace_array *tr, + unsigned long ip, unsigned long flags, int pc) +{ + u64 time = trace_clock_local(); + struct ftrace_graph_ent ent = { + .func = ip, + .depth = 0, + }; + struct ftrace_graph_ret ret = { + .func = ip, + .depth = 0, + .calltime = time, + .rettime = time, + }; + + __trace_graph_entry(tr, &ent, flags, pc); + __trace_graph_return(tr, &ret, flags, pc); +} + +static void +__trace_function(struct trace_array *tr, + unsigned long ip, unsigned long parent_ip, + unsigned long flags, int pc) +{ + if (!is_graph()) + trace_function(tr, ip, parent_ip, flags, pc); + else { + trace_graph_function(tr, parent_ip, flags, pc); + trace_graph_function(tr, ip, flags, pc); + } +} + +#else +#define __trace_function trace_function + +static int irqsoff_set_flag(u32 old_flags, u32 bit, int set) +{ + return -EINVAL; +} + +static int irqsoff_graph_entry(struct ftrace_graph_ent *trace) +{ + return -1; +} + +static enum print_line_t irqsoff_print_line(struct trace_iterator *iter) +{ + return TRACE_TYPE_UNHANDLED; +} + +static void irqsoff_graph_return(struct ftrace_graph_ret *trace) { } +static void irqsoff_print_header(struct seq_file *s) { } +static void irqsoff_trace_open(struct trace_iterator *iter) { } +static void irqsoff_trace_close(struct trace_iterator *iter) { } +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + /* * Should this new latency be reported/recorded? */ @@ -150,7 +366,7 @@ check_critical_timing(struct trace_array *tr, if (!report_latency(delta)) goto out_unlock; - trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); + __trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); /* Skip 5 functions to get to the irq/preempt enable function */ __trace_stack(tr, flags, 5, pc); @@ -172,7 +388,7 @@ out_unlock: out: data->critical_sequence = max_sequence; data->preempt_timestamp = ftrace_now(cpu); - trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); + __trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); } static inline void @@ -204,7 +420,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip) local_save_flags(flags); - trace_function(tr, ip, parent_ip, flags, preempt_count()); + __trace_function(tr, ip, parent_ip, flags, preempt_count()); per_cpu(tracing_cpu, cpu) = 1; @@ -238,7 +454,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip) atomic_inc(&data->disabled); local_save_flags(flags); - trace_function(tr, ip, parent_ip, flags, preempt_count()); + __trace_function(tr, ip, parent_ip, flags, preempt_count()); check_critical_timing(tr, data, parent_ip ? : ip, cpu); data->critical_start = 0; atomic_dec(&data->disabled); @@ -347,19 +563,32 @@ void trace_preempt_off(unsigned long a0, unsigned long a1) } #endif /* CONFIG_PREEMPT_TRACER */ -static void start_irqsoff_tracer(struct trace_array *tr) +static int start_irqsoff_tracer(struct trace_array *tr, int graph) { - register_ftrace_function(&trace_ops); - if (tracing_is_enabled()) + int ret = 0; + + if (!graph) + ret = register_ftrace_function(&trace_ops); + else + ret = register_ftrace_graph(&irqsoff_graph_return, + &irqsoff_graph_entry); + + if (!ret && tracing_is_enabled()) tracer_enabled = 1; else tracer_enabled = 0; + + return ret; } -static void stop_irqsoff_tracer(struct trace_array *tr) +static void stop_irqsoff_tracer(struct trace_array *tr, int graph) { tracer_enabled = 0; - unregister_ftrace_function(&trace_ops); + + if (!graph) + unregister_ftrace_function(&trace_ops); + else + unregister_ftrace_graph(); } static void __irqsoff_tracer_init(struct trace_array *tr) @@ -372,12 +601,14 @@ static void __irqsoff_tracer_init(struct trace_array *tr) /* make sure that the tracer is visible */ smp_wmb(); tracing_reset_online_cpus(tr); - start_irqsoff_tracer(tr); + + if (start_irqsoff_tracer(tr, is_graph())) + printk(KERN_ERR "failed to start irqsoff tracer\n"); } static void irqsoff_tracer_reset(struct trace_array *tr) { - stop_irqsoff_tracer(tr); + stop_irqsoff_tracer(tr, is_graph()); if (!save_lat_flag) trace_flags &= ~TRACE_ITER_LATENCY_FMT; @@ -409,9 +640,16 @@ static struct tracer irqsoff_tracer __read_mostly = .start = irqsoff_tracer_start, .stop = irqsoff_tracer_stop, .print_max = 1, + .print_header = irqsoff_print_header, + .print_line = irqsoff_print_line, + .flags = &tracer_flags, + .set_flag = irqsoff_set_flag, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_irqsoff, #endif + .open = irqsoff_trace_open, + .close = irqsoff_trace_close, + .use_max_tr = 1, }; # define register_irqsoff(trace) register_tracer(&trace) #else @@ -435,9 +673,16 @@ static struct tracer preemptoff_tracer __read_mostly = .start = irqsoff_tracer_start, .stop = irqsoff_tracer_stop, .print_max = 1, + .print_header = irqsoff_print_header, + .print_line = irqsoff_print_line, + .flags = &tracer_flags, + .set_flag = irqsoff_set_flag, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_preemptoff, #endif + .open = irqsoff_trace_open, + .close = irqsoff_trace_close, + .use_max_tr = 1, }; # define register_preemptoff(trace) register_tracer(&trace) #else @@ -463,9 +708,16 @@ static struct tracer preemptirqsoff_tracer __read_mostly = .start = irqsoff_tracer_start, .stop = irqsoff_tracer_stop, .print_max = 1, + .print_header = irqsoff_print_header, + .print_line = irqsoff_print_line, + .flags = &tracer_flags, + .set_flag = irqsoff_set_flag, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_preemptirqsoff, #endif + .open = irqsoff_trace_open, + .close = irqsoff_trace_close, + .use_max_tr = 1, }; # define register_preemptirqsoff(trace) register_tracer(&trace) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 1251e367bae9..1b79d1c15726 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -29,6 +29,10 @@ #include <linux/ctype.h> #include <linux/ptrace.h> #include <linux/perf_event.h> +#include <linux/stringify.h> +#include <linux/limits.h> +#include <linux/uaccess.h> +#include <asm/bitsperlong.h> #include "trace.h" #include "trace_output.h" @@ -36,11 +40,11 @@ #define MAX_TRACE_ARGS 128 #define MAX_ARGSTR_LEN 63 #define MAX_EVENT_NAME_LEN 64 +#define MAX_STRING_SIZE PATH_MAX #define KPROBE_EVENT_SYSTEM "kprobes" /* Reserved field names */ #define FIELD_STRING_IP "__probe_ip" -#define FIELD_STRING_NARGS "__probe_nargs" #define FIELD_STRING_RETIP "__probe_ret_ip" #define FIELD_STRING_FUNC "__probe_func" @@ -52,55 +56,214 @@ const char *reserved_field_names[] = { "common_tgid", "common_lock_depth", FIELD_STRING_IP, - FIELD_STRING_NARGS, FIELD_STRING_RETIP, FIELD_STRING_FUNC, }; -struct fetch_func { - unsigned long (*func)(struct pt_regs *, void *); - void *data; -}; - -static __kprobes unsigned long call_fetch(struct fetch_func *f, - struct pt_regs *regs) +/* Printing function type */ +typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *, + void *); +#define PRINT_TYPE_FUNC_NAME(type) print_type_##type +#define PRINT_TYPE_FMT_NAME(type) print_type_format_##type + +/* Printing in basic type function template */ +#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt, cast) \ +static __kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, \ + const char *name, \ + void *data, void *ent)\ +{ \ + return trace_seq_printf(s, " %s=" fmt, name, (cast)*(type *)data);\ +} \ +static const char PRINT_TYPE_FMT_NAME(type)[] = fmt; + +DEFINE_BASIC_PRINT_TYPE_FUNC(u8, "%x", unsigned int) +DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "%x", unsigned int) +DEFINE_BASIC_PRINT_TYPE_FUNC(u32, "%lx", unsigned long) +DEFINE_BASIC_PRINT_TYPE_FUNC(u64, "%llx", unsigned long long) +DEFINE_BASIC_PRINT_TYPE_FUNC(s8, "%d", int) +DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d", int) +DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%ld", long) +DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%lld", long long) + +/* data_rloc: data relative location, compatible with u32 */ +#define make_data_rloc(len, roffs) \ + (((u32)(len) << 16) | ((u32)(roffs) & 0xffff)) +#define get_rloc_len(dl) ((u32)(dl) >> 16) +#define get_rloc_offs(dl) ((u32)(dl) & 0xffff) + +static inline void *get_rloc_data(u32 *dl) { - return f->func(regs, f->data); + return (u8 *)dl + get_rloc_offs(*dl); } -/* fetch handlers */ -static __kprobes unsigned long fetch_register(struct pt_regs *regs, - void *offset) +/* For data_loc conversion */ +static inline void *get_loc_data(u32 *dl, void *ent) { - return regs_get_register(regs, (unsigned int)((unsigned long)offset)); + return (u8 *)ent + get_rloc_offs(*dl); } -static __kprobes unsigned long fetch_stack(struct pt_regs *regs, - void *num) -{ - return regs_get_kernel_stack_nth(regs, - (unsigned int)((unsigned long)num)); -} +/* + * Convert data_rloc to data_loc: + * data_rloc stores the offset from data_rloc itself, but data_loc + * stores the offset from event entry. + */ +#define convert_rloc_to_loc(dl, offs) ((u32)(dl) + (offs)) -static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr) +/* For defining macros, define string/string_size types */ +typedef u32 string; +typedef u32 string_size; + +/* Print type function for string type */ +static __kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, + const char *name, + void *data, void *ent) { - unsigned long retval; + int len = *(u32 *)data >> 16; - if (probe_kernel_address(addr, retval)) - return 0; - return retval; + if (!len) + return trace_seq_printf(s, " %s=(fault)", name); + else + return trace_seq_printf(s, " %s=\"%s\"", name, + (const char *)get_loc_data(data, ent)); } +static const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\""; + +/* Data fetch function type */ +typedef void (*fetch_func_t)(struct pt_regs *, void *, void *); + +struct fetch_param { + fetch_func_t fn; + void *data; +}; -static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs, - void *dummy) +static __kprobes void call_fetch(struct fetch_param *fprm, + struct pt_regs *regs, void *dest) { - return regs_return_value(regs); + return fprm->fn(regs, fprm->data, dest); } -static __kprobes unsigned long fetch_stack_address(struct pt_regs *regs, - void *dummy) +#define FETCH_FUNC_NAME(method, type) fetch_##method##_##type +/* + * Define macro for basic types - we don't need to define s* types, because + * we have to care only about bitwidth at recording time. + */ +#define DEFINE_BASIC_FETCH_FUNCS(method) \ +DEFINE_FETCH_##method(u8) \ +DEFINE_FETCH_##method(u16) \ +DEFINE_FETCH_##method(u32) \ +DEFINE_FETCH_##method(u64) + +#define CHECK_FETCH_FUNCS(method, fn) \ + (((FETCH_FUNC_NAME(method, u8) == fn) || \ + (FETCH_FUNC_NAME(method, u16) == fn) || \ + (FETCH_FUNC_NAME(method, u32) == fn) || \ + (FETCH_FUNC_NAME(method, u64) == fn) || \ + (FETCH_FUNC_NAME(method, string) == fn) || \ + (FETCH_FUNC_NAME(method, string_size) == fn)) \ + && (fn != NULL)) + +/* Data fetch function templates */ +#define DEFINE_FETCH_reg(type) \ +static __kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, \ + void *offset, void *dest) \ +{ \ + *(type *)dest = (type)regs_get_register(regs, \ + (unsigned int)((unsigned long)offset)); \ +} +DEFINE_BASIC_FETCH_FUNCS(reg) +/* No string on the register */ +#define fetch_reg_string NULL +#define fetch_reg_string_size NULL + +#define DEFINE_FETCH_stack(type) \ +static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\ + void *offset, void *dest) \ +{ \ + *(type *)dest = (type)regs_get_kernel_stack_nth(regs, \ + (unsigned int)((unsigned long)offset)); \ +} +DEFINE_BASIC_FETCH_FUNCS(stack) +/* No string on the stack entry */ +#define fetch_stack_string NULL +#define fetch_stack_string_size NULL + +#define DEFINE_FETCH_retval(type) \ +static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\ + void *dummy, void *dest) \ +{ \ + *(type *)dest = (type)regs_return_value(regs); \ +} +DEFINE_BASIC_FETCH_FUNCS(retval) +/* No string on the retval */ +#define fetch_retval_string NULL +#define fetch_retval_string_size NULL + +#define DEFINE_FETCH_memory(type) \ +static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\ + void *addr, void *dest) \ +{ \ + type retval; \ + if (probe_kernel_address(addr, retval)) \ + *(type *)dest = 0; \ + else \ + *(type *)dest = retval; \ +} +DEFINE_BASIC_FETCH_FUNCS(memory) +/* + * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max + * length and relative data location. + */ +static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs, + void *addr, void *dest) { - return kernel_stack_pointer(regs); + long ret; + int maxlen = get_rloc_len(*(u32 *)dest); + u8 *dst = get_rloc_data(dest); + u8 *src = addr; + mm_segment_t old_fs = get_fs(); + if (!maxlen) + return; + /* + * Try to get string again, since the string can be changed while + * probing. + */ + set_fs(KERNEL_DS); + pagefault_disable(); + do + ret = __copy_from_user_inatomic(dst++, src++, 1); + while (dst[-1] && ret == 0 && src - (u8 *)addr < maxlen); + dst[-1] = '\0'; + pagefault_enable(); + set_fs(old_fs); + + if (ret < 0) { /* Failed to fetch string */ + ((u8 *)get_rloc_data(dest))[0] = '\0'; + *(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest)); + } else + *(u32 *)dest = make_data_rloc(src - (u8 *)addr, + get_rloc_offs(*(u32 *)dest)); +} +/* Return the length of string -- including null terminal byte */ +static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs, + void *addr, void *dest) +{ + int ret, len = 0; + u8 c; + mm_segment_t old_fs = get_fs(); + + set_fs(KERNEL_DS); + pagefault_disable(); + do { + ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1); + len++; + } while (c && ret == 0 && len < MAX_STRING_SIZE); + pagefault_enable(); + set_fs(old_fs); + + if (ret < 0) /* Failed to check the length */ + *(u32 *)dest = 0; + else + *(u32 *)dest = len; } /* Memory fetching by symbol */ @@ -145,51 +308,168 @@ static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset) return sc; } -static __kprobes unsigned long fetch_symbol(struct pt_regs *regs, void *data) -{ - struct symbol_cache *sc = data; - - if (sc->addr) - return fetch_memory(regs, (void *)sc->addr); - else - return 0; +#define DEFINE_FETCH_symbol(type) \ +static __kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs,\ + void *data, void *dest) \ +{ \ + struct symbol_cache *sc = data; \ + if (sc->addr) \ + fetch_memory_##type(regs, (void *)sc->addr, dest); \ + else \ + *(type *)dest = 0; \ } +DEFINE_BASIC_FETCH_FUNCS(symbol) +DEFINE_FETCH_symbol(string) +DEFINE_FETCH_symbol(string_size) -/* Special indirect memory access interface */ -struct indirect_fetch_data { - struct fetch_func orig; +/* Dereference memory access function */ +struct deref_fetch_param { + struct fetch_param orig; long offset; }; -static __kprobes unsigned long fetch_indirect(struct pt_regs *regs, void *data) -{ - struct indirect_fetch_data *ind = data; - unsigned long addr; - - addr = call_fetch(&ind->orig, regs); - if (addr) { - addr += ind->offset; - return fetch_memory(regs, (void *)addr); - } else - return 0; +#define DEFINE_FETCH_deref(type) \ +static __kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,\ + void *data, void *dest) \ +{ \ + struct deref_fetch_param *dprm = data; \ + unsigned long addr; \ + call_fetch(&dprm->orig, regs, &addr); \ + if (addr) { \ + addr += dprm->offset; \ + fetch_memory_##type(regs, (void *)addr, dest); \ + } else \ + *(type *)dest = 0; \ } +DEFINE_BASIC_FETCH_FUNCS(deref) +DEFINE_FETCH_deref(string) +DEFINE_FETCH_deref(string_size) -static __kprobes void free_indirect_fetch_data(struct indirect_fetch_data *data) +static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data) { - if (data->orig.func == fetch_indirect) - free_indirect_fetch_data(data->orig.data); - else if (data->orig.func == fetch_symbol) + if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) + free_deref_fetch_param(data->orig.data); + else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn)) free_symbol_cache(data->orig.data); kfree(data); } +/* Default (unsigned long) fetch type */ +#define __DEFAULT_FETCH_TYPE(t) u##t +#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t) +#define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG) +#define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE) + +/* Fetch types */ +enum { + FETCH_MTD_reg = 0, + FETCH_MTD_stack, + FETCH_MTD_retval, + FETCH_MTD_memory, + FETCH_MTD_symbol, + FETCH_MTD_deref, + FETCH_MTD_END, +}; + +#define ASSIGN_FETCH_FUNC(method, type) \ + [FETCH_MTD_##method] = FETCH_FUNC_NAME(method, type) + +#define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype) \ + {.name = _name, \ + .size = _size, \ + .is_signed = sign, \ + .print = PRINT_TYPE_FUNC_NAME(ptype), \ + .fmt = PRINT_TYPE_FMT_NAME(ptype), \ + .fmttype = _fmttype, \ + .fetch = { \ +ASSIGN_FETCH_FUNC(reg, ftype), \ +ASSIGN_FETCH_FUNC(stack, ftype), \ +ASSIGN_FETCH_FUNC(retval, ftype), \ +ASSIGN_FETCH_FUNC(memory, ftype), \ +ASSIGN_FETCH_FUNC(symbol, ftype), \ +ASSIGN_FETCH_FUNC(deref, ftype), \ + } \ + } + +#define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \ + __ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype) + +#define FETCH_TYPE_STRING 0 +#define FETCH_TYPE_STRSIZE 1 + +/* Fetch type information table */ +static const struct fetch_type { + const char *name; /* Name of type */ + size_t size; /* Byte size of type */ + int is_signed; /* Signed flag */ + print_type_func_t print; /* Print functions */ + const char *fmt; /* Fromat string */ + const char *fmttype; /* Name in format file */ + /* Fetch functions */ + fetch_func_t fetch[FETCH_MTD_END]; +} fetch_type_table[] = { + /* Special types */ + [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string, + sizeof(u32), 1, "__data_loc char[]"), + [FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32, + string_size, sizeof(u32), 0, "u32"), + /* Basic types */ + ASSIGN_FETCH_TYPE(u8, u8, 0), + ASSIGN_FETCH_TYPE(u16, u16, 0), + ASSIGN_FETCH_TYPE(u32, u32, 0), + ASSIGN_FETCH_TYPE(u64, u64, 0), + ASSIGN_FETCH_TYPE(s8, u8, 1), + ASSIGN_FETCH_TYPE(s16, u16, 1), + ASSIGN_FETCH_TYPE(s32, u32, 1), + ASSIGN_FETCH_TYPE(s64, u64, 1), +}; + +static const struct fetch_type *find_fetch_type(const char *type) +{ + int i; + + if (!type) + type = DEFAULT_FETCH_TYPE_STR; + + for (i = 0; i < ARRAY_SIZE(fetch_type_table); i++) + if (strcmp(type, fetch_type_table[i].name) == 0) + return &fetch_type_table[i]; + return NULL; +} + +/* Special function : only accept unsigned long */ +static __kprobes void fetch_stack_address(struct pt_regs *regs, + void *dummy, void *dest) +{ + *(unsigned long *)dest = kernel_stack_pointer(regs); +} + +static fetch_func_t get_fetch_size_function(const struct fetch_type *type, + fetch_func_t orig_fn) +{ + int i; + + if (type != &fetch_type_table[FETCH_TYPE_STRING]) + return NULL; /* Only string type needs size function */ + for (i = 0; i < FETCH_MTD_END; i++) + if (type->fetch[i] == orig_fn) + return fetch_type_table[FETCH_TYPE_STRSIZE].fetch[i]; + + WARN_ON(1); /* This should not happen */ + return NULL; +} + /** * Kprobe event core functions */ struct probe_arg { - struct fetch_func fetch; - const char *name; + struct fetch_param fetch; + struct fetch_param fetch_size; + unsigned int offset; /* Offset from argument entry */ + const char *name; /* Name of this argument */ + const char *comm; /* Command of this argument */ + const struct fetch_type *type; /* Type of this argument */ }; /* Flags for trace_probe */ @@ -202,8 +482,9 @@ struct trace_probe { unsigned long nhit; unsigned int flags; /* For TP_FLAG_* */ const char *symbol; /* symbol name */ + struct ftrace_event_class class; struct ftrace_event_call call; - struct trace_event event; + ssize_t size; /* trace entry size */ unsigned int nr_args; struct probe_arg args[]; }; @@ -212,6 +493,7 @@ struct trace_probe { (offsetof(struct trace_probe, args) + \ (sizeof(struct probe_arg) * (n))) + static __kprobes int probe_is_return(struct trace_probe *tp) { return tp->rp.handler != NULL; @@ -222,49 +504,6 @@ static __kprobes const char *probe_symbol(struct trace_probe *tp) return tp->symbol ? tp->symbol : "unknown"; } -static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff) -{ - int ret = -EINVAL; - - if (ff->func == fetch_register) { - const char *name; - name = regs_query_register_name((unsigned int)((long)ff->data)); - ret = snprintf(buf, n, "%%%s", name); - } else if (ff->func == fetch_stack) - ret = snprintf(buf, n, "$stack%lu", (unsigned long)ff->data); - else if (ff->func == fetch_memory) - ret = snprintf(buf, n, "@0x%p", ff->data); - else if (ff->func == fetch_symbol) { - struct symbol_cache *sc = ff->data; - if (sc->offset) - ret = snprintf(buf, n, "@%s%+ld", sc->symbol, - sc->offset); - else - ret = snprintf(buf, n, "@%s", sc->symbol); - } else if (ff->func == fetch_retvalue) - ret = snprintf(buf, n, "$retval"); - else if (ff->func == fetch_stack_address) - ret = snprintf(buf, n, "$stack"); - else if (ff->func == fetch_indirect) { - struct indirect_fetch_data *id = ff->data; - size_t l = 0; - ret = snprintf(buf, n, "%+ld(", id->offset); - if (ret >= n) - goto end; - l += ret; - ret = probe_arg_string(buf + l, n - l, &id->orig); - if (ret < 0) - goto end; - l += ret; - ret = snprintf(buf + l, n - l, ")"); - ret += l; - } -end: - if (ret >= n) - return -ENOSPC; - return ret; -} - static int register_probe_event(struct trace_probe *tp); static void unregister_probe_event(struct trace_probe *tp); @@ -323,6 +562,7 @@ static struct trace_probe *alloc_trace_probe(const char *group, goto error; } + tp->call.class = &tp->class; tp->call.name = kstrdup(event, GFP_KERNEL); if (!tp->call.name) goto error; @@ -332,8 +572,8 @@ static struct trace_probe *alloc_trace_probe(const char *group, goto error; } - tp->call.system = kstrdup(group, GFP_KERNEL); - if (!tp->call.system) + tp->class.system = kstrdup(group, GFP_KERNEL); + if (!tp->class.system) goto error; INIT_LIST_HEAD(&tp->list); @@ -347,11 +587,12 @@ error: static void free_probe_arg(struct probe_arg *arg) { - if (arg->fetch.func == fetch_symbol) + if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn)) + free_deref_fetch_param(arg->fetch.data); + else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn)) free_symbol_cache(arg->fetch.data); - else if (arg->fetch.func == fetch_indirect) - free_indirect_fetch_data(arg->fetch.data); kfree(arg->name); + kfree(arg->comm); } static void free_trace_probe(struct trace_probe *tp) @@ -361,7 +602,7 @@ static void free_trace_probe(struct trace_probe *tp) for (i = 0; i < tp->nr_args; i++) free_probe_arg(&tp->args[i]); - kfree(tp->call.system); + kfree(tp->call.class->system); kfree(tp->call.name); kfree(tp->symbol); kfree(tp); @@ -374,7 +615,7 @@ static struct trace_probe *find_probe_event(const char *event, list_for_each_entry(tp, &probe_list, list) if (strcmp(tp->call.name, event) == 0 && - strcmp(tp->call.system, group) == 0) + strcmp(tp->call.class->system, group) == 0) return tp; return NULL; } @@ -399,7 +640,7 @@ static int register_trace_probe(struct trace_probe *tp) mutex_lock(&probe_lock); /* register as an event */ - old_tp = find_probe_event(tp->call.name, tp->call.system); + old_tp = find_probe_event(tp->call.name, tp->call.class->system); if (old_tp) { /* delete old event */ unregister_trace_probe(old_tp); @@ -457,28 +698,30 @@ static int split_symbol_offset(char *symbol, unsigned long *offset) #define PARAM_MAX_ARGS 16 #define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long)) -static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return) +static int parse_probe_vars(char *arg, const struct fetch_type *t, + struct fetch_param *f, int is_return) { int ret = 0; unsigned long param; if (strcmp(arg, "retval") == 0) { - if (is_return) { - ff->func = fetch_retvalue; - ff->data = NULL; - } else + if (is_return) + f->fn = t->fetch[FETCH_MTD_retval]; + else ret = -EINVAL; } else if (strncmp(arg, "stack", 5) == 0) { if (arg[5] == '\0') { - ff->func = fetch_stack_address; - ff->data = NULL; + if (strcmp(t->name, DEFAULT_FETCH_TYPE_STR) == 0) + f->fn = fetch_stack_address; + else + ret = -EINVAL; } else if (isdigit(arg[5])) { ret = strict_strtoul(arg + 5, 10, ¶m); if (ret || param > PARAM_MAX_STACK) ret = -EINVAL; else { - ff->func = fetch_stack; - ff->data = (void *)param; + f->fn = t->fetch[FETCH_MTD_stack]; + f->data = (void *)param; } } else ret = -EINVAL; @@ -488,7 +731,8 @@ static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return) } /* Recursive argument parser */ -static int __parse_probe_arg(char *arg, struct fetch_func *ff, int is_return) +static int __parse_probe_arg(char *arg, const struct fetch_type *t, + struct fetch_param *f, int is_return) { int ret = 0; unsigned long param; @@ -497,13 +741,13 @@ static int __parse_probe_arg(char *arg, struct fetch_func *ff, int is_return) switch (arg[0]) { case '$': - ret = parse_probe_vars(arg + 1, ff, is_return); + ret = parse_probe_vars(arg + 1, t, f, is_return); break; case '%': /* named register */ ret = regs_query_register_offset(arg + 1); if (ret >= 0) { - ff->func = fetch_register; - ff->data = (void *)(unsigned long)ret; + f->fn = t->fetch[FETCH_MTD_reg]; + f->data = (void *)(unsigned long)ret; ret = 0; } break; @@ -512,26 +756,22 @@ static int __parse_probe_arg(char *arg, struct fetch_func *ff, int is_return) ret = strict_strtoul(arg + 1, 0, ¶m); if (ret) break; - ff->func = fetch_memory; - ff->data = (void *)param; + f->fn = t->fetch[FETCH_MTD_memory]; + f->data = (void *)param; } else { ret = split_symbol_offset(arg + 1, &offset); if (ret) break; - ff->data = alloc_symbol_cache(arg + 1, offset); - if (ff->data) - ff->func = fetch_symbol; - else - ret = -EINVAL; + f->data = alloc_symbol_cache(arg + 1, offset); + if (f->data) + f->fn = t->fetch[FETCH_MTD_symbol]; } break; - case '+': /* indirect memory */ + case '+': /* deref memory */ case '-': tmp = strchr(arg, '('); - if (!tmp) { - ret = -EINVAL; + if (!tmp) break; - } *tmp = '\0'; ret = strict_strtol(arg + 1, 0, &offset); if (ret) @@ -541,38 +781,68 @@ static int __parse_probe_arg(char *arg, struct fetch_func *ff, int is_return) arg = tmp + 1; tmp = strrchr(arg, ')'); if (tmp) { - struct indirect_fetch_data *id; + struct deref_fetch_param *dprm; + const struct fetch_type *t2 = find_fetch_type(NULL); *tmp = '\0'; - id = kzalloc(sizeof(struct indirect_fetch_data), - GFP_KERNEL); - if (!id) + dprm = kzalloc(sizeof(struct deref_fetch_param), + GFP_KERNEL); + if (!dprm) return -ENOMEM; - id->offset = offset; - ret = __parse_probe_arg(arg, &id->orig, is_return); + dprm->offset = offset; + ret = __parse_probe_arg(arg, t2, &dprm->orig, + is_return); if (ret) - kfree(id); + kfree(dprm); else { - ff->func = fetch_indirect; - ff->data = (void *)id; + f->fn = t->fetch[FETCH_MTD_deref]; + f->data = (void *)dprm; } - } else - ret = -EINVAL; + } break; - default: - /* TODO: support custom handler */ + } + if (!ret && !f->fn) { /* Parsed, but do not find fetch method */ + pr_info("%s type has no corresponding fetch method.\n", + t->name); ret = -EINVAL; } return ret; } /* String length checking wrapper */ -static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return) +static int parse_probe_arg(char *arg, struct trace_probe *tp, + struct probe_arg *parg, int is_return) { + const char *t; + int ret; + if (strlen(arg) > MAX_ARGSTR_LEN) { pr_info("Argument is too long.: %s\n", arg); return -ENOSPC; } - return __parse_probe_arg(arg, ff, is_return); + parg->comm = kstrdup(arg, GFP_KERNEL); + if (!parg->comm) { + pr_info("Failed to allocate memory for command '%s'.\n", arg); + return -ENOMEM; + } + t = strchr(parg->comm, ':'); + if (t) { + arg[t - parg->comm] = '\0'; + t++; + } + parg->type = find_fetch_type(t); + if (!parg->type) { + pr_info("Unsupported type: %s\n", t); + return -EINVAL; + } + parg->offset = tp->size; + tp->size += parg->type->size; + ret = __parse_probe_arg(arg, parg->type, &parg->fetch, is_return); + if (ret >= 0) { + parg->fetch_size.fn = get_fetch_size_function(parg->type, + parg->fetch.fn); + parg->fetch_size.data = parg->fetch.data; + } + return ret; } /* Return 1 if name is reserved or already used by another argument */ @@ -602,15 +872,18 @@ static int create_trace_probe(int argc, char **argv) * @ADDR : fetch memory at ADDR (ADDR should be in kernel) * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol) * %REG : fetch register REG - * Indirect memory fetch: + * Dereferencing memory fetch: * +|-offs(ARG) : fetch memory at ARG +|- offs address. * Alias name of args: * NAME=FETCHARG : set NAME as alias of FETCHARG. + * Type of args: + * FETCHARG:TYPE : use TYPE instead of unsigned long. */ struct trace_probe *tp; int i, ret = 0; int is_return = 0, is_delete = 0; - char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL; + char *symbol = NULL, *event = NULL, *group = NULL; + char *arg, *tmp; unsigned long offset = 0; void *addr = NULL; char buf[MAX_EVENT_NAME_LEN]; @@ -723,13 +996,6 @@ static int create_trace_probe(int argc, char **argv) else arg = argv[i]; - if (conflict_field_name(argv[i], tp->args, i)) { - pr_info("Argument%d name '%s' conflicts with " - "another field.\n", i, argv[i]); - ret = -EINVAL; - goto error; - } - tp->args[i].name = kstrdup(argv[i], GFP_KERNEL); if (!tp->args[i].name) { pr_info("Failed to allocate argument%d name '%s'.\n", @@ -737,9 +1003,19 @@ static int create_trace_probe(int argc, char **argv) ret = -ENOMEM; goto error; } + tmp = strchr(tp->args[i].name, ':'); + if (tmp) + *tmp = '_'; /* convert : to _ */ + + if (conflict_field_name(tp->args[i].name, tp->args, i)) { + pr_info("Argument%d name '%s' conflicts with " + "another field.\n", i, argv[i]); + ret = -EINVAL; + goto error; + } /* Parse fetch argument */ - ret = parse_probe_arg(arg, &tp->args[i].fetch, is_return); + ret = parse_probe_arg(arg, tp, &tp->args[i], is_return); if (ret) { pr_info("Parse error at argument%d. (%d)\n", i, ret); kfree(tp->args[i].name); @@ -794,11 +1070,10 @@ static void probes_seq_stop(struct seq_file *m, void *v) static int probes_seq_show(struct seq_file *m, void *v) { struct trace_probe *tp = v; - int i, ret; - char buf[MAX_ARGSTR_LEN + 1]; + int i; seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p'); - seq_printf(m, ":%s/%s", tp->call.system, tp->call.name); + seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name); if (!tp->symbol) seq_printf(m, " 0x%p", tp->rp.kp.addr); @@ -807,15 +1082,10 @@ static int probes_seq_show(struct seq_file *m, void *v) else seq_printf(m, " %s", probe_symbol(tp)); - for (i = 0; i < tp->nr_args; i++) { - ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i].fetch); - if (ret < 0) { - pr_warning("Argument%d decoding error(%d).\n", i, ret); - return ret; - } - seq_printf(m, " %s=%s", tp->args[i].name, buf); - } + for (i = 0; i < tp->nr_args; i++) + seq_printf(m, " %s=%s", tp->args[i].name, tp->args[i].comm); seq_printf(m, "\n"); + return 0; } @@ -941,14 +1211,62 @@ static const struct file_operations kprobe_profile_ops = { .release = seq_release, }; +/* Sum up total data length for dynamic arraies (strings) */ +static __kprobes int __get_data_size(struct trace_probe *tp, + struct pt_regs *regs) +{ + int i, ret = 0; + u32 len; + + for (i = 0; i < tp->nr_args; i++) + if (unlikely(tp->args[i].fetch_size.fn)) { + call_fetch(&tp->args[i].fetch_size, regs, &len); + ret += len; + } + + return ret; +} + +/* Store the value of each argument */ +static __kprobes void store_trace_args(int ent_size, struct trace_probe *tp, + struct pt_regs *regs, + u8 *data, int maxlen) +{ + int i; + u32 end = tp->size; + u32 *dl; /* Data (relative) location */ + + for (i = 0; i < tp->nr_args; i++) { + if (unlikely(tp->args[i].fetch_size.fn)) { + /* + * First, we set the relative location and + * maximum data length to *dl + */ + dl = (u32 *)(data + tp->args[i].offset); + *dl = make_data_rloc(maxlen, end - tp->args[i].offset); + /* Then try to fetch string or dynamic array data */ + call_fetch(&tp->args[i].fetch, regs, dl); + /* Reduce maximum length */ + end += get_rloc_len(*dl); + maxlen -= get_rloc_len(*dl); + /* Trick here, convert data_rloc to data_loc */ + *dl = convert_rloc_to_loc(*dl, + ent_size + tp->args[i].offset); + } else + /* Just fetching data normally */ + call_fetch(&tp->args[i].fetch, regs, + data + tp->args[i].offset); + } +} + /* Kprobe handler */ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) { struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); - struct kprobe_trace_entry *entry; + struct kprobe_trace_entry_head *entry; struct ring_buffer_event *event; struct ring_buffer *buffer; - int size, i, pc; + int size, dsize, pc; unsigned long irq_flags; struct ftrace_event_call *call = &tp->call; @@ -957,18 +1275,17 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) local_save_flags(irq_flags); pc = preempt_count(); - size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); + dsize = __get_data_size(tp, regs); + size = sizeof(*entry) + tp->size + dsize; - event = trace_current_buffer_lock_reserve(&buffer, call->id, size, - irq_flags, pc); + event = trace_current_buffer_lock_reserve(&buffer, call->event.type, + size, irq_flags, pc); if (!event) return; entry = ring_buffer_event_data(event); - entry->nargs = tp->nr_args; entry->ip = (unsigned long)kp->addr; - for (i = 0; i < tp->nr_args; i++) - entry->args[i] = call_fetch(&tp->args[i].fetch, regs); + store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); if (!filter_current_check_discard(buffer, call, entry, event)) trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); @@ -979,29 +1296,28 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri, struct pt_regs *regs) { struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); - struct kretprobe_trace_entry *entry; + struct kretprobe_trace_entry_head *entry; struct ring_buffer_event *event; struct ring_buffer *buffer; - int size, i, pc; + int size, pc, dsize; unsigned long irq_flags; struct ftrace_event_call *call = &tp->call; local_save_flags(irq_flags); pc = preempt_count(); - size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); + dsize = __get_data_size(tp, regs); + size = sizeof(*entry) + tp->size + dsize; - event = trace_current_buffer_lock_reserve(&buffer, call->id, size, - irq_flags, pc); + event = trace_current_buffer_lock_reserve(&buffer, call->event.type, + size, irq_flags, pc); if (!event) return; entry = ring_buffer_event_data(event); - entry->nargs = tp->nr_args; entry->func = (unsigned long)tp->rp.kp.addr; entry->ret_ip = (unsigned long)ri->ret_addr; - for (i = 0; i < tp->nr_args; i++) - entry->args[i] = call_fetch(&tp->args[i].fetch, regs); + store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); if (!filter_current_check_discard(buffer, call, entry, event)) trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); @@ -1009,17 +1325,17 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri, /* Event entry printers */ enum print_line_t -print_kprobe_event(struct trace_iterator *iter, int flags) +print_kprobe_event(struct trace_iterator *iter, int flags, + struct trace_event *event) { - struct kprobe_trace_entry *field; + struct kprobe_trace_entry_head *field; struct trace_seq *s = &iter->seq; - struct trace_event *event; struct trace_probe *tp; + u8 *data; int i; - field = (struct kprobe_trace_entry *)iter->ent; - event = ftrace_find_event(field->ent.type); - tp = container_of(event, struct trace_probe, event); + field = (struct kprobe_trace_entry_head *)iter->ent; + tp = container_of(event, struct trace_probe, call.event); if (!trace_seq_printf(s, "%s: (", tp->call.name)) goto partial; @@ -1030,9 +1346,10 @@ print_kprobe_event(struct trace_iterator *iter, int flags) if (!trace_seq_puts(s, ")")) goto partial; - for (i = 0; i < field->nargs; i++) - if (!trace_seq_printf(s, " %s=%lx", - tp->args[i].name, field->args[i])) + data = (u8 *)&field[1]; + for (i = 0; i < tp->nr_args; i++) + if (!tp->args[i].type->print(s, tp->args[i].name, + data + tp->args[i].offset, field)) goto partial; if (!trace_seq_puts(s, "\n")) @@ -1044,17 +1361,17 @@ partial: } enum print_line_t -print_kretprobe_event(struct trace_iterator *iter, int flags) +print_kretprobe_event(struct trace_iterator *iter, int flags, + struct trace_event *event) { - struct kretprobe_trace_entry *field; + struct kretprobe_trace_entry_head *field; struct trace_seq *s = &iter->seq; - struct trace_event *event; struct trace_probe *tp; + u8 *data; int i; - field = (struct kretprobe_trace_entry *)iter->ent; - event = ftrace_find_event(field->ent.type); - tp = container_of(event, struct trace_probe, event); + field = (struct kretprobe_trace_entry_head *)iter->ent; + tp = container_of(event, struct trace_probe, call.event); if (!trace_seq_printf(s, "%s: (", tp->call.name)) goto partial; @@ -1071,9 +1388,10 @@ print_kretprobe_event(struct trace_iterator *iter, int flags) if (!trace_seq_puts(s, ")")) goto partial; - for (i = 0; i < field->nargs; i++) - if (!trace_seq_printf(s, " %s=%lx", - tp->args[i].name, field->args[i])) + data = (u8 *)&field[1]; + for (i = 0; i < tp->nr_args; i++) + if (!tp->args[i].type->print(s, tp->args[i].name, + data + tp->args[i].offset, field)) goto partial; if (!trace_seq_puts(s, "\n")) @@ -1108,13 +1426,6 @@ static void probe_event_disable(struct ftrace_event_call *call) } } -static int probe_event_raw_init(struct ftrace_event_call *event_call) -{ - INIT_LIST_HEAD(&event_call->fields); - - return 0; -} - #undef DEFINE_FIELD #define DEFINE_FIELD(type, item, name, is_signed) \ do { \ @@ -1129,29 +1440,43 @@ static int probe_event_raw_init(struct ftrace_event_call *event_call) static int kprobe_event_define_fields(struct ftrace_event_call *event_call) { int ret, i; - struct kprobe_trace_entry field; + struct kprobe_trace_entry_head field; struct trace_probe *tp = (struct trace_probe *)event_call->data; DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); - DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1); /* Set argument names as fields */ - for (i = 0; i < tp->nr_args; i++) - DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0); + for (i = 0; i < tp->nr_args; i++) { + ret = trace_define_field(event_call, tp->args[i].type->fmttype, + tp->args[i].name, + sizeof(field) + tp->args[i].offset, + tp->args[i].type->size, + tp->args[i].type->is_signed, + FILTER_OTHER); + if (ret) + return ret; + } return 0; } static int kretprobe_event_define_fields(struct ftrace_event_call *event_call) { int ret, i; - struct kretprobe_trace_entry field; + struct kretprobe_trace_entry_head field; struct trace_probe *tp = (struct trace_probe *)event_call->data; DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0); DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0); - DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1); /* Set argument names as fields */ - for (i = 0; i < tp->nr_args; i++) - DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0); + for (i = 0; i < tp->nr_args; i++) { + ret = trace_define_field(event_call, tp->args[i].type->fmttype, + tp->args[i].name, + sizeof(field) + tp->args[i].offset, + tp->args[i].type->size, + tp->args[i].type->is_signed, + FILTER_OTHER); + if (ret) + return ret; + } return 0; } @@ -1176,15 +1501,20 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len) pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt); for (i = 0; i < tp->nr_args; i++) { - pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%%lx", - tp->args[i].name); + pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s", + tp->args[i].name, tp->args[i].type->fmt); } pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg); for (i = 0; i < tp->nr_args; i++) { - pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s", - tp->args[i].name); + if (strcmp(tp->args[i].type->name, "string") == 0) + pos += snprintf(buf + pos, LEN_OR_ZERO, + ", __get_str(%s)", + tp->args[i].name); + else + pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s", + tp->args[i].name); } #undef LEN_OR_ZERO @@ -1219,28 +1549,29 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp, { struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); struct ftrace_event_call *call = &tp->call; - struct kprobe_trace_entry *entry; - int size, __size, i; - unsigned long irq_flags; + struct kprobe_trace_entry_head *entry; + struct hlist_head *head; + int size, __size, dsize; int rctx; - __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); + dsize = __get_data_size(tp, regs); + __size = sizeof(*entry) + tp->size + dsize; size = ALIGN(__size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) return; - entry = perf_trace_buf_prepare(size, call->id, &rctx, &irq_flags); + entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); if (!entry) return; - entry->nargs = tp->nr_args; entry->ip = (unsigned long)kp->addr; - for (i = 0; i < tp->nr_args; i++) - entry->args[i] = call_fetch(&tp->args[i].fetch, regs); + memset(&entry[1], 0, dsize); + store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); - perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags, regs); + head = this_cpu_ptr(call->perf_events); + perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head); } /* Kretprobe profile handler */ @@ -1249,30 +1580,29 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri, { struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); struct ftrace_event_call *call = &tp->call; - struct kretprobe_trace_entry *entry; - int size, __size, i; - unsigned long irq_flags; + struct kretprobe_trace_entry_head *entry; + struct hlist_head *head; + int size, __size, dsize; int rctx; - __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); + dsize = __get_data_size(tp, regs); + __size = sizeof(*entry) + tp->size + dsize; size = ALIGN(__size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) return; - entry = perf_trace_buf_prepare(size, call->id, &rctx, &irq_flags); + entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); if (!entry) return; - entry->nargs = tp->nr_args; entry->func = (unsigned long)tp->rp.kp.addr; entry->ret_ip = (unsigned long)ri->ret_addr; - for (i = 0; i < tp->nr_args; i++) - entry->args[i] = call_fetch(&tp->args[i].fetch, regs); + store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); - perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, - irq_flags, regs); + head = this_cpu_ptr(call->perf_events); + perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head); } static int probe_perf_enable(struct ftrace_event_call *call) @@ -1302,6 +1632,26 @@ static void probe_perf_disable(struct ftrace_event_call *call) } #endif /* CONFIG_PERF_EVENTS */ +static __kprobes +int kprobe_register(struct ftrace_event_call *event, enum trace_reg type) +{ + switch (type) { + case TRACE_REG_REGISTER: + return probe_event_enable(event); + case TRACE_REG_UNREGISTER: + probe_event_disable(event); + return 0; + +#ifdef CONFIG_PERF_EVENTS + case TRACE_REG_PERF_REGISTER: + return probe_perf_enable(event); + case TRACE_REG_PERF_UNREGISTER: + probe_perf_disable(event); + return 0; +#endif + } + return 0; +} static __kprobes int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) @@ -1331,43 +1681,43 @@ int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) return 0; /* We don't tweek kernel, so just return 0 */ } +static struct trace_event_functions kretprobe_funcs = { + .trace = print_kretprobe_event +}; + +static struct trace_event_functions kprobe_funcs = { + .trace = print_kprobe_event +}; + static int register_probe_event(struct trace_probe *tp) { struct ftrace_event_call *call = &tp->call; int ret; /* Initialize ftrace_event_call */ + INIT_LIST_HEAD(&call->class->fields); if (probe_is_return(tp)) { - tp->event.trace = print_kretprobe_event; - call->raw_init = probe_event_raw_init; - call->define_fields = kretprobe_event_define_fields; + call->event.funcs = &kretprobe_funcs; + call->class->define_fields = kretprobe_event_define_fields; } else { - tp->event.trace = print_kprobe_event; - call->raw_init = probe_event_raw_init; - call->define_fields = kprobe_event_define_fields; + call->event.funcs = &kprobe_funcs; + call->class->define_fields = kprobe_event_define_fields; } if (set_print_fmt(tp) < 0) return -ENOMEM; - call->event = &tp->event; - call->id = register_ftrace_event(&tp->event); - if (!call->id) { + ret = register_ftrace_event(&call->event); + if (!ret) { kfree(call->print_fmt); return -ENODEV; } - call->enabled = 0; - call->regfunc = probe_event_enable; - call->unregfunc = probe_event_disable; - -#ifdef CONFIG_PERF_EVENTS - call->perf_event_enable = probe_perf_enable; - call->perf_event_disable = probe_perf_disable; -#endif + call->flags = 0; + call->class->reg = kprobe_register; call->data = tp; ret = trace_add_event_call(call); if (ret) { pr_info("Failed to register kprobe event: %s\n", call->name); kfree(call->print_fmt); - unregister_ftrace_event(&tp->event); + unregister_ftrace_event(&call->event); } return ret; } diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c deleted file mode 100644 index d59cd6879477..000000000000 --- a/kernel/trace/trace_ksym.c +++ /dev/null @@ -1,520 +0,0 @@ -/* - * trace_ksym.c - Kernel Symbol Tracer - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright (C) IBM Corporation, 2009 - */ - -#include <linux/kallsyms.h> -#include <linux/uaccess.h> -#include <linux/debugfs.h> -#include <linux/ftrace.h> -#include <linux/module.h> -#include <linux/slab.h> -#include <linux/fs.h> - -#include "trace_output.h" -#include "trace.h" - -#include <linux/hw_breakpoint.h> -#include <asm/hw_breakpoint.h> - -#include <asm/atomic.h> - -/* - * For now, let us restrict the no. of symbols traced simultaneously to number - * of available hardware breakpoint registers. - */ -#define KSYM_TRACER_MAX HBP_NUM - -#define KSYM_TRACER_OP_LEN 3 /* rw- */ - -struct trace_ksym { - struct perf_event **ksym_hbp; - struct perf_event_attr attr; -#ifdef CONFIG_PROFILE_KSYM_TRACER - atomic64_t counter; -#endif - struct hlist_node ksym_hlist; -}; - -static struct trace_array *ksym_trace_array; - -static unsigned int ksym_filter_entry_count; -static unsigned int ksym_tracing_enabled; - -static HLIST_HEAD(ksym_filter_head); - -static DEFINE_MUTEX(ksym_tracer_mutex); - -#ifdef CONFIG_PROFILE_KSYM_TRACER - -#define MAX_UL_INT 0xffffffff - -void ksym_collect_stats(unsigned long hbp_hit_addr) -{ - struct hlist_node *node; - struct trace_ksym *entry; - - rcu_read_lock(); - hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) { - if (entry->attr.bp_addr == hbp_hit_addr) { - atomic64_inc(&entry->counter); - break; - } - } - rcu_read_unlock(); -} -#endif /* CONFIG_PROFILE_KSYM_TRACER */ - -void ksym_hbp_handler(struct perf_event *hbp, int nmi, - struct perf_sample_data *data, - struct pt_regs *regs) -{ - struct ring_buffer_event *event; - struct ksym_trace_entry *entry; - struct ring_buffer *buffer; - int pc; - - if (!ksym_tracing_enabled) - return; - - buffer = ksym_trace_array->buffer; - - pc = preempt_count(); - - event = trace_buffer_lock_reserve(buffer, TRACE_KSYM, - sizeof(*entry), 0, pc); - if (!event) - return; - - entry = ring_buffer_event_data(event); - entry->ip = instruction_pointer(regs); - entry->type = hw_breakpoint_type(hbp); - entry->addr = hw_breakpoint_addr(hbp); - strlcpy(entry->cmd, current->comm, TASK_COMM_LEN); - -#ifdef CONFIG_PROFILE_KSYM_TRACER - ksym_collect_stats(hw_breakpoint_addr(hbp)); -#endif /* CONFIG_PROFILE_KSYM_TRACER */ - - trace_buffer_unlock_commit(buffer, event, 0, pc); -} - -/* Valid access types are represented as - * - * rw- : Set Read/Write Access Breakpoint - * -w- : Set Write Access Breakpoint - * --- : Clear Breakpoints - * --x : Set Execution Break points (Not available yet) - * - */ -static int ksym_trace_get_access_type(char *str) -{ - int access = 0; - - if (str[0] == 'r') - access |= HW_BREAKPOINT_R; - - if (str[1] == 'w') - access |= HW_BREAKPOINT_W; - - if (str[2] == 'x') - access |= HW_BREAKPOINT_X; - - switch (access) { - case HW_BREAKPOINT_R: - case HW_BREAKPOINT_W: - case HW_BREAKPOINT_W | HW_BREAKPOINT_R: - return access; - default: - return -EINVAL; - } -} - -/* - * There can be several possible malformed requests and we attempt to capture - * all of them. We enumerate some of the rules - * 1. We will not allow kernel symbols with ':' since it is used as a delimiter. - * i.e. multiple ':' symbols disallowed. Possible uses are of the form - * <module>:<ksym_name>:<op>. - * 2. No delimiter symbol ':' in the input string - * 3. Spurious operator symbols or symbols not in their respective positions - * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file - * 5. Kernel symbol not a part of /proc/kallsyms - * 6. Duplicate requests - */ -static int parse_ksym_trace_str(char *input_string, char **ksymname, - unsigned long *addr) -{ - int ret; - - *ksymname = strsep(&input_string, ":"); - *addr = kallsyms_lookup_name(*ksymname); - - /* Check for malformed request: (2), (1) and (5) */ - if ((!input_string) || - (strlen(input_string) != KSYM_TRACER_OP_LEN) || - (*addr == 0)) - return -EINVAL;; - - ret = ksym_trace_get_access_type(input_string); - - return ret; -} - -int process_new_ksym_entry(char *ksymname, int op, unsigned long addr) -{ - struct trace_ksym *entry; - int ret = -ENOMEM; - - if (ksym_filter_entry_count >= KSYM_TRACER_MAX) { - printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No" - " new requests for tracing can be accepted now.\n", - KSYM_TRACER_MAX); - return -ENOSPC; - } - - entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL); - if (!entry) - return -ENOMEM; - - hw_breakpoint_init(&entry->attr); - - entry->attr.bp_type = op; - entry->attr.bp_addr = addr; - entry->attr.bp_len = HW_BREAKPOINT_LEN_4; - - entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr, - ksym_hbp_handler); - - if (IS_ERR(entry->ksym_hbp)) { - ret = PTR_ERR(entry->ksym_hbp); - printk(KERN_INFO "ksym_tracer request failed. Try again" - " later!!\n"); - goto err; - } - - hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head); - ksym_filter_entry_count++; - - return 0; - -err: - kfree(entry); - - return ret; -} - -static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf, - size_t count, loff_t *ppos) -{ - struct trace_ksym *entry; - struct hlist_node *node; - struct trace_seq *s; - ssize_t cnt = 0; - int ret; - - s = kmalloc(sizeof(*s), GFP_KERNEL); - if (!s) - return -ENOMEM; - trace_seq_init(s); - - mutex_lock(&ksym_tracer_mutex); - - hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { - ret = trace_seq_printf(s, "%pS:", - (void *)(unsigned long)entry->attr.bp_addr); - if (entry->attr.bp_type == HW_BREAKPOINT_R) - ret = trace_seq_puts(s, "r--\n"); - else if (entry->attr.bp_type == HW_BREAKPOINT_W) - ret = trace_seq_puts(s, "-w-\n"); - else if (entry->attr.bp_type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R)) - ret = trace_seq_puts(s, "rw-\n"); - WARN_ON_ONCE(!ret); - } - - cnt = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); - - mutex_unlock(&ksym_tracer_mutex); - - kfree(s); - - return cnt; -} - -static void __ksym_trace_reset(void) -{ - struct trace_ksym *entry; - struct hlist_node *node, *node1; - - mutex_lock(&ksym_tracer_mutex); - hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head, - ksym_hlist) { - unregister_wide_hw_breakpoint(entry->ksym_hbp); - ksym_filter_entry_count--; - hlist_del_rcu(&(entry->ksym_hlist)); - synchronize_rcu(); - kfree(entry); - } - mutex_unlock(&ksym_tracer_mutex); -} - -static ssize_t ksym_trace_filter_write(struct file *file, - const char __user *buffer, - size_t count, loff_t *ppos) -{ - struct trace_ksym *entry; - struct hlist_node *node; - char *buf, *input_string, *ksymname = NULL; - unsigned long ksym_addr = 0; - int ret, op, changed = 0; - - buf = kzalloc(count + 1, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - ret = -EFAULT; - if (copy_from_user(buf, buffer, count)) - goto out; - - buf[count] = '\0'; - input_string = strstrip(buf); - - /* - * Clear all breakpoints if: - * 1: echo > ksym_trace_filter - * 2: echo 0 > ksym_trace_filter - * 3: echo "*:---" > ksym_trace_filter - */ - if (!input_string[0] || !strcmp(input_string, "0") || - !strcmp(input_string, "*:---")) { - __ksym_trace_reset(); - ret = 0; - goto out; - } - - ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr); - if (ret < 0) - goto out; - - mutex_lock(&ksym_tracer_mutex); - - ret = -EINVAL; - hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { - if (entry->attr.bp_addr == ksym_addr) { - /* Check for malformed request: (6) */ - if (entry->attr.bp_type != op) - changed = 1; - else - goto out_unlock; - break; - } - } - if (changed) { - unregister_wide_hw_breakpoint(entry->ksym_hbp); - entry->attr.bp_type = op; - ret = 0; - if (op > 0) { - entry->ksym_hbp = - register_wide_hw_breakpoint(&entry->attr, - ksym_hbp_handler); - if (IS_ERR(entry->ksym_hbp)) - ret = PTR_ERR(entry->ksym_hbp); - else - goto out_unlock; - } - /* Error or "symbol:---" case: drop it */ - ksym_filter_entry_count--; - hlist_del_rcu(&(entry->ksym_hlist)); - synchronize_rcu(); - kfree(entry); - goto out_unlock; - } else { - /* Check for malformed request: (4) */ - if (op) - ret = process_new_ksym_entry(ksymname, op, ksym_addr); - } -out_unlock: - mutex_unlock(&ksym_tracer_mutex); -out: - kfree(buf); - return !ret ? count : ret; -} - -static const struct file_operations ksym_tracing_fops = { - .open = tracing_open_generic, - .read = ksym_trace_filter_read, - .write = ksym_trace_filter_write, -}; - -static void ksym_trace_reset(struct trace_array *tr) -{ - ksym_tracing_enabled = 0; - __ksym_trace_reset(); -} - -static int ksym_trace_init(struct trace_array *tr) -{ - int cpu, ret = 0; - - for_each_online_cpu(cpu) - tracing_reset(tr, cpu); - ksym_tracing_enabled = 1; - ksym_trace_array = tr; - - return ret; -} - -static void ksym_trace_print_header(struct seq_file *m) -{ - seq_puts(m, - "# TASK-PID CPU# Symbol " - "Type Function\n"); - seq_puts(m, - "# | | | " - " | |\n"); -} - -static enum print_line_t ksym_trace_output(struct trace_iterator *iter) -{ - struct trace_entry *entry = iter->ent; - struct trace_seq *s = &iter->seq; - struct ksym_trace_entry *field; - char str[KSYM_SYMBOL_LEN]; - int ret; - - if (entry->type != TRACE_KSYM) - return TRACE_TYPE_UNHANDLED; - - trace_assign_type(field, entry); - - ret = trace_seq_printf(s, "%11s-%-5d [%03d] %pS", field->cmd, - entry->pid, iter->cpu, (char *)field->addr); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - switch (field->type) { - case HW_BREAKPOINT_R: - ret = trace_seq_printf(s, " R "); - break; - case HW_BREAKPOINT_W: - ret = trace_seq_printf(s, " W "); - break; - case HW_BREAKPOINT_R | HW_BREAKPOINT_W: - ret = trace_seq_printf(s, " RW "); - break; - default: - return TRACE_TYPE_PARTIAL_LINE; - } - - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - sprint_symbol(str, field->ip); - ret = trace_seq_printf(s, "%s\n", str); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - return TRACE_TYPE_HANDLED; -} - -struct tracer ksym_tracer __read_mostly = -{ - .name = "ksym_tracer", - .init = ksym_trace_init, - .reset = ksym_trace_reset, -#ifdef CONFIG_FTRACE_SELFTEST - .selftest = trace_selftest_startup_ksym, -#endif - .print_header = ksym_trace_print_header, - .print_line = ksym_trace_output -}; - -#ifdef CONFIG_PROFILE_KSYM_TRACER -static int ksym_profile_show(struct seq_file *m, void *v) -{ - struct hlist_node *node; - struct trace_ksym *entry; - int access_type = 0; - char fn_name[KSYM_NAME_LEN]; - - seq_puts(m, " Access Type "); - seq_puts(m, " Symbol Counter\n"); - seq_puts(m, " ----------- "); - seq_puts(m, " ------ -------\n"); - - rcu_read_lock(); - hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) { - - access_type = entry->attr.bp_type; - - switch (access_type) { - case HW_BREAKPOINT_R: - seq_puts(m, " R "); - break; - case HW_BREAKPOINT_W: - seq_puts(m, " W "); - break; - case HW_BREAKPOINT_R | HW_BREAKPOINT_W: - seq_puts(m, " RW "); - break; - default: - seq_puts(m, " NA "); - } - - if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0) - seq_printf(m, " %-36s", fn_name); - else - seq_printf(m, " %-36s", "<NA>"); - seq_printf(m, " %15llu\n", - (unsigned long long)atomic64_read(&entry->counter)); - } - rcu_read_unlock(); - - return 0; -} - -static int ksym_profile_open(struct inode *node, struct file *file) -{ - return single_open(file, ksym_profile_show, NULL); -} - -static const struct file_operations ksym_profile_fops = { - .open = ksym_profile_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; -#endif /* CONFIG_PROFILE_KSYM_TRACER */ - -__init static int init_ksym_trace(void) -{ - struct dentry *d_tracer; - - d_tracer = tracing_init_dentry(); - - trace_create_file("ksym_trace_filter", 0644, d_tracer, - NULL, &ksym_tracing_fops); - -#ifdef CONFIG_PROFILE_KSYM_TRACER - trace_create_file("ksym_profile", 0444, d_tracer, - NULL, &ksym_profile_fops); -#endif - - return register_tracer(&ksym_tracer); -} -device_initcall(init_ksym_trace); diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 8e46b3323cdc..02272baa2206 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -16,9 +16,6 @@ DECLARE_RWSEM(trace_event_mutex); -DEFINE_PER_CPU(struct trace_seq, ftrace_event_seq); -EXPORT_PER_CPU_SYMBOL(ftrace_event_seq); - static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly; static int next_event_type = __TRACE_LAST_TYPE + 1; @@ -209,6 +206,7 @@ int trace_seq_putc(struct trace_seq *s, unsigned char c) return 1; } +EXPORT_SYMBOL(trace_seq_putc); int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len) { @@ -253,7 +251,7 @@ void *trace_seq_reserve(struct trace_seq *s, size_t len) void *ret; if (s->full) - return 0; + return NULL; if (len > ((PAGE_SIZE - 1) - s->len)) { s->full = 1; @@ -355,6 +353,21 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val, } EXPORT_SYMBOL(ftrace_print_symbols_seq); +const char * +ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len) +{ + int i; + const char *ret = p->buffer + p->len; + + for (i = 0; i < buf_len; i++) + trace_seq_printf(p, "%s%2.2x", i == 0 ? "" : " ", buf[i]); + + trace_seq_putc(p, 0); + + return ret; +} +EXPORT_SYMBOL(ftrace_print_hex_seq); + #ifdef CONFIG_KRETPROBES static inline const char *kretprobed(const char *name) { @@ -726,6 +739,9 @@ int register_ftrace_event(struct trace_event *event) if (WARN_ON(!event)) goto out; + if (WARN_ON(!event->funcs)) + goto out; + INIT_LIST_HEAD(&event->list); if (!event->type) { @@ -758,14 +774,14 @@ int register_ftrace_event(struct trace_event *event) goto out; } - if (event->trace == NULL) - event->trace = trace_nop_print; - if (event->raw == NULL) - event->raw = trace_nop_print; - if (event->hex == NULL) - event->hex = trace_nop_print; - if (event->binary == NULL) - event->binary = trace_nop_print; + if (event->funcs->trace == NULL) + event->funcs->trace = trace_nop_print; + if (event->funcs->raw == NULL) + event->funcs->raw = trace_nop_print; + if (event->funcs->hex == NULL) + event->funcs->hex = trace_nop_print; + if (event->funcs->binary == NULL) + event->funcs->binary = trace_nop_print; key = event->type & (EVENT_HASHSIZE - 1); @@ -807,13 +823,15 @@ EXPORT_SYMBOL_GPL(unregister_ftrace_event); * Standard events */ -enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags) +enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags, + struct trace_event *event) { return TRACE_TYPE_HANDLED; } /* TRACE_FN */ -static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags) +static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags, + struct trace_event *event) { struct ftrace_entry *field; struct trace_seq *s = &iter->seq; @@ -840,7 +858,8 @@ static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags) return TRACE_TYPE_PARTIAL_LINE; } -static enum print_line_t trace_fn_raw(struct trace_iterator *iter, int flags) +static enum print_line_t trace_fn_raw(struct trace_iterator *iter, int flags, + struct trace_event *event) { struct ftrace_entry *field; @@ -854,7 +873,8 @@ static enum print_line_t trace_fn_raw(struct trace_iterator *iter, int flags) return TRACE_TYPE_HANDLED; } -static enum print_line_t trace_fn_hex(struct trace_iterator *iter, int flags) +static enum print_line_t trace_fn_hex(struct trace_iterator *iter, int flags, + struct trace_event *event) { struct ftrace_entry *field; struct trace_seq *s = &iter->seq; @@ -867,7 +887,8 @@ static enum print_line_t trace_fn_hex(struct trace_iterator *iter, int flags) return TRACE_TYPE_HANDLED; } -static enum print_line_t trace_fn_bin(struct trace_iterator *iter, int flags) +static enum print_line_t trace_fn_bin(struct trace_iterator *iter, int flags, + struct trace_event *event) { struct ftrace_entry *field; struct trace_seq *s = &iter->seq; @@ -880,14 +901,18 @@ static enum print_line_t trace_fn_bin(struct trace_iterator *iter, int flags) return TRACE_TYPE_HANDLED; } -static struct trace_event trace_fn_event = { - .type = TRACE_FN, +static struct trace_event_functions trace_fn_funcs = { .trace = trace_fn_trace, .raw = trace_fn_raw, .hex = trace_fn_hex, .binary = trace_fn_bin, }; +static struct trace_event trace_fn_event = { + .type = TRACE_FN, + .funcs = &trace_fn_funcs, +}; + /* TRACE_CTX an TRACE_WAKE */ static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter, char *delim) @@ -916,13 +941,14 @@ static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter, return TRACE_TYPE_HANDLED; } -static enum print_line_t trace_ctx_print(struct trace_iterator *iter, int flags) +static enum print_line_t trace_ctx_print(struct trace_iterator *iter, int flags, + struct trace_event *event) { return trace_ctxwake_print(iter, "==>"); } static enum print_line_t trace_wake_print(struct trace_iterator *iter, - int flags) + int flags, struct trace_event *event) { return trace_ctxwake_print(iter, " +"); } @@ -950,12 +976,14 @@ static int trace_ctxwake_raw(struct trace_iterator *iter, char S) return TRACE_TYPE_HANDLED; } -static enum print_line_t trace_ctx_raw(struct trace_iterator *iter, int flags) +static enum print_line_t trace_ctx_raw(struct trace_iterator *iter, int flags, + struct trace_event *event) { return trace_ctxwake_raw(iter, 0); } -static enum print_line_t trace_wake_raw(struct trace_iterator *iter, int flags) +static enum print_line_t trace_wake_raw(struct trace_iterator *iter, int flags, + struct trace_event *event) { return trace_ctxwake_raw(iter, '+'); } @@ -984,18 +1012,20 @@ static int trace_ctxwake_hex(struct trace_iterator *iter, char S) return TRACE_TYPE_HANDLED; } -static enum print_line_t trace_ctx_hex(struct trace_iterator *iter, int flags) +static enum print_line_t trace_ctx_hex(struct trace_iterator *iter, int flags, + struct trace_event *event) { return trace_ctxwake_hex(iter, 0); } -static enum print_line_t trace_wake_hex(struct trace_iterator *iter, int flags) +static enum print_line_t trace_wake_hex(struct trace_iterator *iter, int flags, + struct trace_event *event) { return trace_ctxwake_hex(iter, '+'); } static enum print_line_t trace_ctxwake_bin(struct trace_iterator *iter, - int flags) + int flags, struct trace_event *event) { struct ctx_switch_entry *field; struct trace_seq *s = &iter->seq; @@ -1012,81 +1042,34 @@ static enum print_line_t trace_ctxwake_bin(struct trace_iterator *iter, return TRACE_TYPE_HANDLED; } -static struct trace_event trace_ctx_event = { - .type = TRACE_CTX, +static struct trace_event_functions trace_ctx_funcs = { .trace = trace_ctx_print, .raw = trace_ctx_raw, .hex = trace_ctx_hex, .binary = trace_ctxwake_bin, }; -static struct trace_event trace_wake_event = { - .type = TRACE_WAKE, +static struct trace_event trace_ctx_event = { + .type = TRACE_CTX, + .funcs = &trace_ctx_funcs, +}; + +static struct trace_event_functions trace_wake_funcs = { .trace = trace_wake_print, .raw = trace_wake_raw, .hex = trace_wake_hex, .binary = trace_ctxwake_bin, }; -/* TRACE_SPECIAL */ -static enum print_line_t trace_special_print(struct trace_iterator *iter, - int flags) -{ - struct special_entry *field; - - trace_assign_type(field, iter->ent); - - if (!trace_seq_printf(&iter->seq, "# %ld %ld %ld\n", - field->arg1, - field->arg2, - field->arg3)) - return TRACE_TYPE_PARTIAL_LINE; - - return TRACE_TYPE_HANDLED; -} - -static enum print_line_t trace_special_hex(struct trace_iterator *iter, - int flags) -{ - struct special_entry *field; - struct trace_seq *s = &iter->seq; - - trace_assign_type(field, iter->ent); - - SEQ_PUT_HEX_FIELD_RET(s, field->arg1); - SEQ_PUT_HEX_FIELD_RET(s, field->arg2); - SEQ_PUT_HEX_FIELD_RET(s, field->arg3); - - return TRACE_TYPE_HANDLED; -} - -static enum print_line_t trace_special_bin(struct trace_iterator *iter, - int flags) -{ - struct special_entry *field; - struct trace_seq *s = &iter->seq; - - trace_assign_type(field, iter->ent); - - SEQ_PUT_FIELD_RET(s, field->arg1); - SEQ_PUT_FIELD_RET(s, field->arg2); - SEQ_PUT_FIELD_RET(s, field->arg3); - - return TRACE_TYPE_HANDLED; -} - -static struct trace_event trace_special_event = { - .type = TRACE_SPECIAL, - .trace = trace_special_print, - .raw = trace_special_print, - .hex = trace_special_hex, - .binary = trace_special_bin, +static struct trace_event trace_wake_event = { + .type = TRACE_WAKE, + .funcs = &trace_wake_funcs, }; /* TRACE_STACK */ static enum print_line_t trace_stack_print(struct trace_iterator *iter, - int flags) + int flags, struct trace_event *event) { struct stack_entry *field; struct trace_seq *s = &iter->seq; @@ -1114,17 +1097,18 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter, return TRACE_TYPE_PARTIAL_LINE; } +static struct trace_event_functions trace_stack_funcs = { + .trace = trace_stack_print, +}; + static struct trace_event trace_stack_event = { .type = TRACE_STACK, - .trace = trace_stack_print, - .raw = trace_special_print, - .hex = trace_special_hex, - .binary = trace_special_bin, + .funcs = &trace_stack_funcs, }; /* TRACE_USER_STACK */ static enum print_line_t trace_user_stack_print(struct trace_iterator *iter, - int flags) + int flags, struct trace_event *event) { struct userstack_entry *field; struct trace_seq *s = &iter->seq; @@ -1143,17 +1127,19 @@ static enum print_line_t trace_user_stack_print(struct trace_iterator *iter, return TRACE_TYPE_PARTIAL_LINE; } +static struct trace_event_functions trace_user_stack_funcs = { + .trace = trace_user_stack_print, +}; + static struct trace_event trace_user_stack_event = { .type = TRACE_USER_STACK, - .trace = trace_user_stack_print, - .raw = trace_special_print, - .hex = trace_special_hex, - .binary = trace_special_bin, + .funcs = &trace_user_stack_funcs, }; /* TRACE_BPRINT */ static enum print_line_t -trace_bprint_print(struct trace_iterator *iter, int flags) +trace_bprint_print(struct trace_iterator *iter, int flags, + struct trace_event *event) { struct trace_entry *entry = iter->ent; struct trace_seq *s = &iter->seq; @@ -1178,7 +1164,8 @@ trace_bprint_print(struct trace_iterator *iter, int flags) static enum print_line_t -trace_bprint_raw(struct trace_iterator *iter, int flags) +trace_bprint_raw(struct trace_iterator *iter, int flags, + struct trace_event *event) { struct bprint_entry *field; struct trace_seq *s = &iter->seq; @@ -1197,16 +1184,19 @@ trace_bprint_raw(struct trace_iterator *iter, int flags) return TRACE_TYPE_PARTIAL_LINE; } +static struct trace_event_functions trace_bprint_funcs = { + .trace = trace_bprint_print, + .raw = trace_bprint_raw, +}; static struct trace_event trace_bprint_event = { .type = TRACE_BPRINT, - .trace = trace_bprint_print, - .raw = trace_bprint_raw, + .funcs = &trace_bprint_funcs, }; /* TRACE_PRINT */ static enum print_line_t trace_print_print(struct trace_iterator *iter, - int flags) + int flags, struct trace_event *event) { struct print_entry *field; struct trace_seq *s = &iter->seq; @@ -1225,7 +1215,8 @@ static enum print_line_t trace_print_print(struct trace_iterator *iter, return TRACE_TYPE_PARTIAL_LINE; } -static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags) +static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags, + struct trace_event *event) { struct print_entry *field; @@ -1240,18 +1231,21 @@ static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags) return TRACE_TYPE_PARTIAL_LINE; } -static struct trace_event trace_print_event = { - .type = TRACE_PRINT, +static struct trace_event_functions trace_print_funcs = { .trace = trace_print_print, .raw = trace_print_raw, }; +static struct trace_event trace_print_event = { + .type = TRACE_PRINT, + .funcs = &trace_print_funcs, +}; + static struct trace_event *events[] __initdata = { &trace_fn_event, &trace_ctx_event, &trace_wake_event, - &trace_special_event, &trace_stack_event, &trace_user_stack_event, &trace_bprint_event, diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h index 9d91c72ba38b..c038eba0492b 100644 --- a/kernel/trace/trace_output.h +++ b/kernel/trace/trace_output.h @@ -25,7 +25,7 @@ extern void trace_event_read_unlock(void); extern struct trace_event *ftrace_find_event(int type); extern enum print_line_t trace_nop_print(struct trace_iterator *iter, - int flags); + int flags, struct trace_event *event); extern int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry); diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 5fca0f51fde4..8f758d070c43 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -50,8 +50,7 @@ tracing_sched_switch_trace(struct trace_array *tr, } static void -probe_sched_switch(struct rq *__rq, struct task_struct *prev, - struct task_struct *next) +probe_sched_switch(void *ignore, struct task_struct *prev, struct task_struct *next) { struct trace_array_cpu *data; unsigned long flags; @@ -109,7 +108,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, } static void -probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success) +probe_sched_wakeup(void *ignore, struct task_struct *wakee, int success) { struct trace_array_cpu *data; unsigned long flags; @@ -139,21 +138,21 @@ static int tracing_sched_register(void) { int ret; - ret = register_trace_sched_wakeup(probe_sched_wakeup); + ret = register_trace_sched_wakeup(probe_sched_wakeup, NULL); if (ret) { pr_info("wakeup trace: Couldn't activate tracepoint" " probe to kernel_sched_wakeup\n"); return ret; } - ret = register_trace_sched_wakeup_new(probe_sched_wakeup); + ret = register_trace_sched_wakeup_new(probe_sched_wakeup, NULL); if (ret) { pr_info("wakeup trace: Couldn't activate tracepoint" " probe to kernel_sched_wakeup_new\n"); goto fail_deprobe; } - ret = register_trace_sched_switch(probe_sched_switch); + ret = register_trace_sched_switch(probe_sched_switch, NULL); if (ret) { pr_info("sched trace: Couldn't activate tracepoint" " probe to kernel_sched_switch\n"); @@ -162,17 +161,17 @@ static int tracing_sched_register(void) return ret; fail_deprobe_wake_new: - unregister_trace_sched_wakeup_new(probe_sched_wakeup); + unregister_trace_sched_wakeup_new(probe_sched_wakeup, NULL); fail_deprobe: - unregister_trace_sched_wakeup(probe_sched_wakeup); + unregister_trace_sched_wakeup(probe_sched_wakeup, NULL); return ret; } static void tracing_sched_unregister(void) { - unregister_trace_sched_switch(probe_sched_switch); - unregister_trace_sched_wakeup_new(probe_sched_wakeup); - unregister_trace_sched_wakeup(probe_sched_wakeup); + unregister_trace_sched_switch(probe_sched_switch, NULL); + unregister_trace_sched_wakeup_new(probe_sched_wakeup, NULL); + unregister_trace_sched_wakeup(probe_sched_wakeup, NULL); } static void tracing_start_sched_switch(void) diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 0271742abb8d..4086eae6e81b 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -46,7 +46,6 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) struct trace_array_cpu *data; unsigned long flags; long disabled; - int resched; int cpu; int pc; @@ -54,7 +53,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) return; pc = preempt_count(); - resched = ftrace_preempt_disable(); + preempt_disable_notrace(); cpu = raw_smp_processor_id(); if (cpu != wakeup_current_cpu) @@ -74,7 +73,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) out: atomic_dec(&data->disabled); out_enable: - ftrace_preempt_enable(resched); + preempt_enable_notrace(); } static struct ftrace_ops trace_ops __read_mostly = @@ -98,7 +97,8 @@ static int report_latency(cycle_t delta) return 1; } -static void probe_wakeup_migrate_task(struct task_struct *task, int cpu) +static void +probe_wakeup_migrate_task(void *ignore, struct task_struct *task, int cpu) { if (task != wakeup_task) return; @@ -107,8 +107,8 @@ static void probe_wakeup_migrate_task(struct task_struct *task, int cpu) } static void notrace -probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, - struct task_struct *next) +probe_wakeup_sched_switch(void *ignore, + struct task_struct *prev, struct task_struct *next) { struct trace_array_cpu *data; cycle_t T0, T1, delta; @@ -200,7 +200,7 @@ static void wakeup_reset(struct trace_array *tr) } static void -probe_wakeup(struct rq *rq, struct task_struct *p, int success) +probe_wakeup(void *ignore, struct task_struct *p, int success) { struct trace_array_cpu *data; int cpu = smp_processor_id(); @@ -264,28 +264,28 @@ static void start_wakeup_tracer(struct trace_array *tr) { int ret; - ret = register_trace_sched_wakeup(probe_wakeup); + ret = register_trace_sched_wakeup(probe_wakeup, NULL); if (ret) { pr_info("wakeup trace: Couldn't activate tracepoint" " probe to kernel_sched_wakeup\n"); return; } - ret = register_trace_sched_wakeup_new(probe_wakeup); + ret = register_trace_sched_wakeup_new(probe_wakeup, NULL); if (ret) { pr_info("wakeup trace: Couldn't activate tracepoint" " probe to kernel_sched_wakeup_new\n"); goto fail_deprobe; } - ret = register_trace_sched_switch(probe_wakeup_sched_switch); + ret = register_trace_sched_switch(probe_wakeup_sched_switch, NULL); if (ret) { pr_info("sched trace: Couldn't activate tracepoint" " probe to kernel_sched_switch\n"); goto fail_deprobe_wake_new; } - ret = register_trace_sched_migrate_task(probe_wakeup_migrate_task); + ret = register_trace_sched_migrate_task(probe_wakeup_migrate_task, NULL); if (ret) { pr_info("wakeup trace: Couldn't activate tracepoint" " probe to kernel_sched_migrate_task\n"); @@ -312,19 +312,19 @@ static void start_wakeup_tracer(struct trace_array *tr) return; fail_deprobe_wake_new: - unregister_trace_sched_wakeup_new(probe_wakeup); + unregister_trace_sched_wakeup_new(probe_wakeup, NULL); fail_deprobe: - unregister_trace_sched_wakeup(probe_wakeup); + unregister_trace_sched_wakeup(probe_wakeup, NULL); } static void stop_wakeup_tracer(struct trace_array *tr) { tracer_enabled = 0; unregister_ftrace_function(&trace_ops); - unregister_trace_sched_switch(probe_wakeup_sched_switch); - unregister_trace_sched_wakeup_new(probe_wakeup); - unregister_trace_sched_wakeup(probe_wakeup); - unregister_trace_sched_migrate_task(probe_wakeup_migrate_task); + unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL); + unregister_trace_sched_wakeup_new(probe_wakeup, NULL); + unregister_trace_sched_wakeup(probe_wakeup, NULL); + unregister_trace_sched_migrate_task(probe_wakeup_migrate_task, NULL); } static int __wakeup_tracer_init(struct trace_array *tr) @@ -382,6 +382,7 @@ static struct tracer wakeup_tracer __read_mostly = #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_wakeup, #endif + .use_max_tr = 1, }; static struct tracer wakeup_rt_tracer __read_mostly = @@ -396,6 +397,7 @@ static struct tracer wakeup_rt_tracer __read_mostly = #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_wakeup, #endif + .use_max_tr = 1, }; __init static int init_wakeup_tracer(void) diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 81003b4d617f..155a415b3209 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -13,12 +13,9 @@ static inline int trace_valid_entry(struct trace_entry *entry) case TRACE_WAKE: case TRACE_STACK: case TRACE_PRINT: - case TRACE_SPECIAL: case TRACE_BRANCH: case TRACE_GRAPH_ENT: case TRACE_GRAPH_RET: - case TRACE_HW_BRANCHES: - case TRACE_KSYM: return 1; } return 0; @@ -30,7 +27,7 @@ static int trace_test_buffer_cpu(struct trace_array *tr, int cpu) struct trace_entry *entry; unsigned int loops = 0; - while ((event = ring_buffer_consume(tr->buffer, cpu, NULL))) { + while ((event = ring_buffer_consume(tr->buffer, cpu, NULL, NULL))) { entry = ring_buffer_event_data(event); /* @@ -256,7 +253,8 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) /* Maximum number of functions to trace before diagnosing a hang */ #define GRAPH_MAX_FUNC_TEST 100000000 -static void __ftrace_dump(bool disable_tracing); +static void +__ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode); static unsigned int graph_hang_thresh; /* Wrap the real function entry probe to avoid possible hanging */ @@ -267,7 +265,7 @@ static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace) ftrace_graph_stop(); printk(KERN_WARNING "BUG: Function graph tracer hang!\n"); if (ftrace_dump_on_oops) - __ftrace_dump(false); + __ftrace_dump(false, DUMP_ALL); return 0; } @@ -691,38 +689,6 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr } #endif /* CONFIG_CONTEXT_SWITCH_TRACER */ -#ifdef CONFIG_SYSPROF_TRACER -int -trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr) -{ - unsigned long count; - int ret; - - /* start the tracing */ - ret = tracer_init(trace, tr); - if (ret) { - warn_failed_init_tracer(trace, ret); - return ret; - } - - /* Sleep for a 1/10 of a second */ - msleep(100); - /* stop the tracing. */ - tracing_stop(); - /* check the trace buffer */ - ret = trace_test_buffer(tr, &count); - trace->reset(tr); - tracing_start(); - - if (!ret && !count) { - printk(KERN_CONT ".. no entries found .."); - ret = -1; - } - - return ret; -} -#endif /* CONFIG_SYSPROF_TRACER */ - #ifdef CONFIG_BRANCH_TRACER int trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr) @@ -755,112 +721,3 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr) } #endif /* CONFIG_BRANCH_TRACER */ -#ifdef CONFIG_HW_BRANCH_TRACER -int -trace_selftest_startup_hw_branches(struct tracer *trace, - struct trace_array *tr) -{ - struct trace_iterator *iter; - struct tracer tracer; - unsigned long count; - int ret; - - if (!trace->open) { - printk(KERN_CONT "missing open function..."); - return -1; - } - - ret = tracer_init(trace, tr); - if (ret) { - warn_failed_init_tracer(trace, ret); - return ret; - } - - /* - * The hw-branch tracer needs to collect the trace from the various - * cpu trace buffers - before tracing is stopped. - */ - iter = kzalloc(sizeof(*iter), GFP_KERNEL); - if (!iter) - return -ENOMEM; - - memcpy(&tracer, trace, sizeof(tracer)); - - iter->trace = &tracer; - iter->tr = tr; - iter->pos = -1; - mutex_init(&iter->mutex); - - trace->open(iter); - - mutex_destroy(&iter->mutex); - kfree(iter); - - tracing_stop(); - - ret = trace_test_buffer(tr, &count); - trace->reset(tr); - tracing_start(); - - if (!ret && !count) { - printk(KERN_CONT "no entries found.."); - ret = -1; - } - - return ret; -} -#endif /* CONFIG_HW_BRANCH_TRACER */ - -#ifdef CONFIG_KSYM_TRACER -static int ksym_selftest_dummy; - -int -trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr) -{ - unsigned long count; - int ret; - - /* start the tracing */ - ret = tracer_init(trace, tr); - if (ret) { - warn_failed_init_tracer(trace, ret); - return ret; - } - - ksym_selftest_dummy = 0; - /* Register the read-write tracing request */ - - ret = process_new_ksym_entry("ksym_selftest_dummy", - HW_BREAKPOINT_R | HW_BREAKPOINT_W, - (unsigned long)(&ksym_selftest_dummy)); - - if (ret < 0) { - printk(KERN_CONT "ksym_trace read-write startup test failed\n"); - goto ret_path; - } - /* Perform a read and a write operation over the dummy variable to - * trigger the tracer - */ - if (ksym_selftest_dummy == 0) - ksym_selftest_dummy++; - - /* stop the tracing. */ - tracing_stop(); - /* check the trace buffer */ - ret = trace_test_buffer(tr, &count); - trace->reset(tr); - tracing_start(); - - /* read & write operations - one each is performed on the dummy variable - * triggering two entries in the trace buffer - */ - if (!ret && count != 2) { - printk(KERN_CONT "Ksym tracer startup test failed"); - ret = -1; - } - -ret_path: - return ret; -} -#endif /* CONFIG_KSYM_TRACER */ - diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index f4bc9b27de5f..056468eae7cf 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -110,12 +110,12 @@ static inline void check_stack(void) static void stack_trace_call(unsigned long ip, unsigned long parent_ip) { - int cpu, resched; + int cpu; if (unlikely(!ftrace_enabled || stack_trace_disabled)) return; - resched = ftrace_preempt_disable(); + preempt_disable_notrace(); cpu = raw_smp_processor_id(); /* no atomic needed, we only modify this variable by this cpu */ @@ -127,7 +127,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip) out: per_cpu(trace_active, cpu)--; /* prevent recursion in schedule */ - ftrace_preempt_enable(resched); + preempt_enable_notrace(); } static struct ftrace_ops trace_ops __read_mostly = diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 4d6d711717f2..bac752f0cfb5 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -15,6 +15,55 @@ static int sys_refcount_exit; static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); +static int syscall_enter_register(struct ftrace_event_call *event, + enum trace_reg type); +static int syscall_exit_register(struct ftrace_event_call *event, + enum trace_reg type); + +static int syscall_enter_define_fields(struct ftrace_event_call *call); +static int syscall_exit_define_fields(struct ftrace_event_call *call); + +/* All syscall exit events have the same fields */ +static LIST_HEAD(syscall_exit_fields); + +static struct list_head * +syscall_get_enter_fields(struct ftrace_event_call *call) +{ + struct syscall_metadata *entry = call->data; + + return &entry->enter_fields; +} + +static struct list_head * +syscall_get_exit_fields(struct ftrace_event_call *call) +{ + return &syscall_exit_fields; +} + +struct trace_event_functions enter_syscall_print_funcs = { + .trace = print_syscall_enter, +}; + +struct trace_event_functions exit_syscall_print_funcs = { + .trace = print_syscall_exit, +}; + +struct ftrace_event_class event_class_syscall_enter = { + .system = "syscalls", + .reg = syscall_enter_register, + .define_fields = syscall_enter_define_fields, + .get_fields = syscall_get_enter_fields, + .raw_init = init_syscall_trace, +}; + +struct ftrace_event_class event_class_syscall_exit = { + .system = "syscalls", + .reg = syscall_exit_register, + .define_fields = syscall_exit_define_fields, + .get_fields = syscall_get_exit_fields, + .raw_init = init_syscall_trace, +}; + extern unsigned long __start_syscalls_metadata[]; extern unsigned long __stop_syscalls_metadata[]; @@ -53,7 +102,8 @@ static struct syscall_metadata *syscall_nr_to_meta(int nr) } enum print_line_t -print_syscall_enter(struct trace_iterator *iter, int flags) +print_syscall_enter(struct trace_iterator *iter, int flags, + struct trace_event *event) { struct trace_seq *s = &iter->seq; struct trace_entry *ent = iter->ent; @@ -68,7 +118,7 @@ print_syscall_enter(struct trace_iterator *iter, int flags) if (!entry) goto end; - if (entry->enter_event->id != ent->type) { + if (entry->enter_event->event.type != ent->type) { WARN_ON_ONCE(1); goto end; } @@ -105,7 +155,8 @@ end: } enum print_line_t -print_syscall_exit(struct trace_iterator *iter, int flags) +print_syscall_exit(struct trace_iterator *iter, int flags, + struct trace_event *event) { struct trace_seq *s = &iter->seq; struct trace_entry *ent = iter->ent; @@ -123,7 +174,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags) return TRACE_TYPE_HANDLED; } - if (entry->exit_event->id != ent->type) { + if (entry->exit_event->event.type != ent->type) { WARN_ON_ONCE(1); return TRACE_TYPE_UNHANDLED; } @@ -205,7 +256,7 @@ static void free_syscall_print_fmt(struct ftrace_event_call *call) kfree(call->print_fmt); } -int syscall_enter_define_fields(struct ftrace_event_call *call) +static int syscall_enter_define_fields(struct ftrace_event_call *call) { struct syscall_trace_enter trace; struct syscall_metadata *meta = call->data; @@ -228,7 +279,7 @@ int syscall_enter_define_fields(struct ftrace_event_call *call) return ret; } -int syscall_exit_define_fields(struct ftrace_event_call *call) +static int syscall_exit_define_fields(struct ftrace_event_call *call) { struct syscall_trace_exit trace; int ret; @@ -243,7 +294,7 @@ int syscall_exit_define_fields(struct ftrace_event_call *call) return ret; } -void ftrace_syscall_enter(struct pt_regs *regs, long id) +void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id) { struct syscall_trace_enter *entry; struct syscall_metadata *sys_data; @@ -265,7 +316,7 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id) size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; event = trace_current_buffer_lock_reserve(&buffer, - sys_data->enter_event->id, size, 0, 0); + sys_data->enter_event->event.type, size, 0, 0); if (!event) return; @@ -278,7 +329,7 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id) trace_current_buffer_unlock_commit(buffer, event, 0, 0); } -void ftrace_syscall_exit(struct pt_regs *regs, long ret) +void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret) { struct syscall_trace_exit *entry; struct syscall_metadata *sys_data; @@ -297,7 +348,7 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret) return; event = trace_current_buffer_lock_reserve(&buffer, - sys_data->exit_event->id, sizeof(*entry), 0, 0); + sys_data->exit_event->event.type, sizeof(*entry), 0, 0); if (!event) return; @@ -320,7 +371,7 @@ int reg_event_syscall_enter(struct ftrace_event_call *call) return -ENOSYS; mutex_lock(&syscall_trace_lock); if (!sys_refcount_enter) - ret = register_trace_sys_enter(ftrace_syscall_enter); + ret = register_trace_sys_enter(ftrace_syscall_enter, NULL); if (!ret) { set_bit(num, enabled_enter_syscalls); sys_refcount_enter++; @@ -340,7 +391,7 @@ void unreg_event_syscall_enter(struct ftrace_event_call *call) sys_refcount_enter--; clear_bit(num, enabled_enter_syscalls); if (!sys_refcount_enter) - unregister_trace_sys_enter(ftrace_syscall_enter); + unregister_trace_sys_enter(ftrace_syscall_enter, NULL); mutex_unlock(&syscall_trace_lock); } @@ -354,7 +405,7 @@ int reg_event_syscall_exit(struct ftrace_event_call *call) return -ENOSYS; mutex_lock(&syscall_trace_lock); if (!sys_refcount_exit) - ret = register_trace_sys_exit(ftrace_syscall_exit); + ret = register_trace_sys_exit(ftrace_syscall_exit, NULL); if (!ret) { set_bit(num, enabled_exit_syscalls); sys_refcount_exit++; @@ -374,7 +425,7 @@ void unreg_event_syscall_exit(struct ftrace_event_call *call) sys_refcount_exit--; clear_bit(num, enabled_exit_syscalls); if (!sys_refcount_exit) - unregister_trace_sys_exit(ftrace_syscall_exit); + unregister_trace_sys_exit(ftrace_syscall_exit, NULL); mutex_unlock(&syscall_trace_lock); } @@ -434,11 +485,11 @@ static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls); static int sys_perf_refcount_enter; static int sys_perf_refcount_exit; -static void perf_syscall_enter(struct pt_regs *regs, long id) +static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) { struct syscall_metadata *sys_data; struct syscall_trace_enter *rec; - unsigned long flags; + struct hlist_head *head; int syscall_nr; int rctx; int size; @@ -461,14 +512,16 @@ static void perf_syscall_enter(struct pt_regs *regs, long id) return; rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size, - sys_data->enter_event->id, &rctx, &flags); + sys_data->enter_event->event.type, regs, &rctx); if (!rec) return; rec->nr = syscall_nr; syscall_get_arguments(current, regs, 0, sys_data->nb_args, (unsigned long *)&rec->args); - perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs); + + head = this_cpu_ptr(sys_data->enter_event->perf_events); + perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); } int perf_sysenter_enable(struct ftrace_event_call *call) @@ -480,7 +533,7 @@ int perf_sysenter_enable(struct ftrace_event_call *call) mutex_lock(&syscall_trace_lock); if (!sys_perf_refcount_enter) - ret = register_trace_sys_enter(perf_syscall_enter); + ret = register_trace_sys_enter(perf_syscall_enter, NULL); if (ret) { pr_info("event trace: Could not activate" "syscall entry trace point"); @@ -502,15 +555,15 @@ void perf_sysenter_disable(struct ftrace_event_call *call) sys_perf_refcount_enter--; clear_bit(num, enabled_perf_enter_syscalls); if (!sys_perf_refcount_enter) - unregister_trace_sys_enter(perf_syscall_enter); + unregister_trace_sys_enter(perf_syscall_enter, NULL); mutex_unlock(&syscall_trace_lock); } -static void perf_syscall_exit(struct pt_regs *regs, long ret) +static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) { struct syscall_metadata *sys_data; struct syscall_trace_exit *rec; - unsigned long flags; + struct hlist_head *head; int syscall_nr; int rctx; int size; @@ -536,14 +589,15 @@ static void perf_syscall_exit(struct pt_regs *regs, long ret) return; rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size, - sys_data->exit_event->id, &rctx, &flags); + sys_data->exit_event->event.type, regs, &rctx); if (!rec) return; rec->nr = syscall_nr; rec->ret = syscall_get_return_value(current, regs); - perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs); + head = this_cpu_ptr(sys_data->exit_event->perf_events); + perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); } int perf_sysexit_enable(struct ftrace_event_call *call) @@ -555,7 +609,7 @@ int perf_sysexit_enable(struct ftrace_event_call *call) mutex_lock(&syscall_trace_lock); if (!sys_perf_refcount_exit) - ret = register_trace_sys_exit(perf_syscall_exit); + ret = register_trace_sys_exit(perf_syscall_exit, NULL); if (ret) { pr_info("event trace: Could not activate" "syscall exit trace point"); @@ -577,9 +631,50 @@ void perf_sysexit_disable(struct ftrace_event_call *call) sys_perf_refcount_exit--; clear_bit(num, enabled_perf_exit_syscalls); if (!sys_perf_refcount_exit) - unregister_trace_sys_exit(perf_syscall_exit); + unregister_trace_sys_exit(perf_syscall_exit, NULL); mutex_unlock(&syscall_trace_lock); } #endif /* CONFIG_PERF_EVENTS */ +static int syscall_enter_register(struct ftrace_event_call *event, + enum trace_reg type) +{ + switch (type) { + case TRACE_REG_REGISTER: + return reg_event_syscall_enter(event); + case TRACE_REG_UNREGISTER: + unreg_event_syscall_enter(event); + return 0; + +#ifdef CONFIG_PERF_EVENTS + case TRACE_REG_PERF_REGISTER: + return perf_sysenter_enable(event); + case TRACE_REG_PERF_UNREGISTER: + perf_sysenter_disable(event); + return 0; +#endif + } + return 0; +} + +static int syscall_exit_register(struct ftrace_event_call *event, + enum trace_reg type) +{ + switch (type) { + case TRACE_REG_REGISTER: + return reg_event_syscall_exit(event); + case TRACE_REG_UNREGISTER: + unreg_event_syscall_exit(event); + return 0; + +#ifdef CONFIG_PERF_EVENTS + case TRACE_REG_PERF_REGISTER: + return perf_sysexit_enable(event); + case TRACE_REG_PERF_UNREGISTER: + perf_sysexit_disable(event); + return 0; +#endif + } + return 0; +} diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c deleted file mode 100644 index a7974a552ca9..000000000000 --- a/kernel/trace/trace_sysprof.c +++ /dev/null @@ -1,329 +0,0 @@ -/* - * trace stack traces - * - * Copyright (C) 2004-2008, Soeren Sandmann - * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com> - * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com> - */ -#include <linux/kallsyms.h> -#include <linux/debugfs.h> -#include <linux/hrtimer.h> -#include <linux/uaccess.h> -#include <linux/ftrace.h> -#include <linux/module.h> -#include <linux/irq.h> -#include <linux/fs.h> - -#include <asm/stacktrace.h> - -#include "trace.h" - -static struct trace_array *sysprof_trace; -static int __read_mostly tracer_enabled; - -/* - * 1 msec sample interval by default: - */ -static unsigned long sample_period = 1000000; -static const unsigned int sample_max_depth = 512; - -static DEFINE_MUTEX(sample_timer_lock); -/* - * Per CPU hrtimers that do the profiling: - */ -static DEFINE_PER_CPU(struct hrtimer, stack_trace_hrtimer); - -struct stack_frame { - const void __user *next_fp; - unsigned long return_address; -}; - -static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) -{ - int ret; - - if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) - return 0; - - ret = 1; - pagefault_disable(); - if (__copy_from_user_inatomic(frame, fp, sizeof(*frame))) - ret = 0; - pagefault_enable(); - - return ret; -} - -struct backtrace_info { - struct trace_array_cpu *data; - struct trace_array *tr; - int pos; -}; - -static void -backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) -{ - /* Ignore warnings */ -} - -static void backtrace_warning(void *data, char *msg) -{ - /* Ignore warnings */ -} - -static int backtrace_stack(void *data, char *name) -{ - /* Don't bother with IRQ stacks for now */ - return -1; -} - -static void backtrace_address(void *data, unsigned long addr, int reliable) -{ - struct backtrace_info *info = data; - - if (info->pos < sample_max_depth && reliable) { - __trace_special(info->tr, info->data, 1, addr, 0); - - info->pos++; - } -} - -static const struct stacktrace_ops backtrace_ops = { - .warning = backtrace_warning, - .warning_symbol = backtrace_warning_symbol, - .stack = backtrace_stack, - .address = backtrace_address, - .walk_stack = print_context_stack, -}; - -static int -trace_kernel(struct pt_regs *regs, struct trace_array *tr, - struct trace_array_cpu *data) -{ - struct backtrace_info info; - unsigned long bp; - char *stack; - - info.tr = tr; - info.data = data; - info.pos = 1; - - __trace_special(info.tr, info.data, 1, regs->ip, 0); - - stack = ((char *)regs + sizeof(struct pt_regs)); -#ifdef CONFIG_FRAME_POINTER - bp = regs->bp; -#else - bp = 0; -#endif - - dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, &info); - - return info.pos; -} - -static void timer_notify(struct pt_regs *regs, int cpu) -{ - struct trace_array_cpu *data; - struct stack_frame frame; - struct trace_array *tr; - const void __user *fp; - int is_user; - int i; - - if (!regs) - return; - - tr = sysprof_trace; - data = tr->data[cpu]; - is_user = user_mode(regs); - - if (!current || current->pid == 0) - return; - - if (is_user && current->state != TASK_RUNNING) - return; - - __trace_special(tr, data, 0, 0, current->pid); - - if (!is_user) - i = trace_kernel(regs, tr, data); - else - i = 0; - - /* - * Trace user stack if we are not a kernel thread - */ - if (current->mm && i < sample_max_depth) { - regs = (struct pt_regs *)current->thread.sp0 - 1; - - fp = (void __user *)regs->bp; - - __trace_special(tr, data, 2, regs->ip, 0); - - while (i < sample_max_depth) { - frame.next_fp = NULL; - frame.return_address = 0; - if (!copy_stack_frame(fp, &frame)) - break; - if ((unsigned long)fp < regs->sp) - break; - - __trace_special(tr, data, 2, frame.return_address, - (unsigned long)fp); - fp = frame.next_fp; - - i++; - } - - } - - /* - * Special trace entry if we overflow the max depth: - */ - if (i == sample_max_depth) - __trace_special(tr, data, -1, -1, -1); - - __trace_special(tr, data, 3, current->pid, i); -} - -static enum hrtimer_restart stack_trace_timer_fn(struct hrtimer *hrtimer) -{ - /* trace here */ - timer_notify(get_irq_regs(), smp_processor_id()); - - hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period)); - - return HRTIMER_RESTART; -} - -static void start_stack_timer(void *unused) -{ - struct hrtimer *hrtimer = &__get_cpu_var(stack_trace_hrtimer); - - hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - hrtimer->function = stack_trace_timer_fn; - - hrtimer_start(hrtimer, ns_to_ktime(sample_period), - HRTIMER_MODE_REL_PINNED); -} - -static void start_stack_timers(void) -{ - on_each_cpu(start_stack_timer, NULL, 1); -} - -static void stop_stack_timer(int cpu) -{ - struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu); - - hrtimer_cancel(hrtimer); -} - -static void stop_stack_timers(void) -{ - int cpu; - - for_each_online_cpu(cpu) - stop_stack_timer(cpu); -} - -static void stop_stack_trace(struct trace_array *tr) -{ - mutex_lock(&sample_timer_lock); - stop_stack_timers(); - tracer_enabled = 0; - mutex_unlock(&sample_timer_lock); -} - -static int stack_trace_init(struct trace_array *tr) -{ - sysprof_trace = tr; - - tracing_start_cmdline_record(); - - mutex_lock(&sample_timer_lock); - start_stack_timers(); - tracer_enabled = 1; - mutex_unlock(&sample_timer_lock); - return 0; -} - -static void stack_trace_reset(struct trace_array *tr) -{ - tracing_stop_cmdline_record(); - stop_stack_trace(tr); -} - -static struct tracer stack_trace __read_mostly = -{ - .name = "sysprof", - .init = stack_trace_init, - .reset = stack_trace_reset, -#ifdef CONFIG_FTRACE_SELFTEST - .selftest = trace_selftest_startup_sysprof, -#endif -}; - -__init static int init_stack_trace(void) -{ - return register_tracer(&stack_trace); -} -device_initcall(init_stack_trace); - -#define MAX_LONG_DIGITS 22 - -static ssize_t -sysprof_sample_read(struct file *filp, char __user *ubuf, - size_t cnt, loff_t *ppos) -{ - char buf[MAX_LONG_DIGITS]; - int r; - - r = sprintf(buf, "%ld\n", nsecs_to_usecs(sample_period)); - - return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); -} - -static ssize_t -sysprof_sample_write(struct file *filp, const char __user *ubuf, - size_t cnt, loff_t *ppos) -{ - char buf[MAX_LONG_DIGITS]; - unsigned long val; - - if (cnt > MAX_LONG_DIGITS-1) - cnt = MAX_LONG_DIGITS-1; - - if (copy_from_user(&buf, ubuf, cnt)) - return -EFAULT; - - buf[cnt] = 0; - - val = simple_strtoul(buf, NULL, 10); - /* - * Enforce a minimum sample period of 100 usecs: - */ - if (val < 100) - val = 100; - - mutex_lock(&sample_timer_lock); - stop_stack_timers(); - sample_period = val * 1000; - start_stack_timers(); - mutex_unlock(&sample_timer_lock); - - return cnt; -} - -static const struct file_operations sysprof_sample_fops = { - .read = sysprof_sample_read, - .write = sysprof_sample_write, -}; - -void init_tracer_sysprof_debugfs(struct dentry *d_tracer) -{ - - trace_create_file("sysprof_sample_period", 0644, - d_tracer, NULL, &sysprof_sample_fops); -} diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c index cc2d2faa7d9e..a7cc3793baf6 100644 --- a/kernel/trace/trace_workqueue.c +++ b/kernel/trace/trace_workqueue.c @@ -49,7 +49,8 @@ static void cpu_workqueue_stat_free(struct kref *kref) /* Insertion of a work */ static void -probe_workqueue_insertion(struct task_struct *wq_thread, +probe_workqueue_insertion(void *ignore, + struct task_struct *wq_thread, struct work_struct *work) { int cpu = cpumask_first(&wq_thread->cpus_allowed); @@ -70,7 +71,8 @@ found: /* Execution of a work */ static void -probe_workqueue_execution(struct task_struct *wq_thread, +probe_workqueue_execution(void *ignore, + struct task_struct *wq_thread, struct work_struct *work) { int cpu = cpumask_first(&wq_thread->cpus_allowed); @@ -90,7 +92,8 @@ found: } /* Creation of a cpu workqueue thread */ -static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu) +static void probe_workqueue_creation(void *ignore, + struct task_struct *wq_thread, int cpu) { struct cpu_workqueue_stats *cws; unsigned long flags; @@ -114,7 +117,8 @@ static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu) } /* Destruction of a cpu workqueue thread */ -static void probe_workqueue_destruction(struct task_struct *wq_thread) +static void +probe_workqueue_destruction(void *ignore, struct task_struct *wq_thread) { /* Workqueue only execute on one cpu */ int cpu = cpumask_first(&wq_thread->cpus_allowed); @@ -259,19 +263,19 @@ int __init trace_workqueue_early_init(void) { int ret, cpu; - ret = register_trace_workqueue_insertion(probe_workqueue_insertion); + ret = register_trace_workqueue_insertion(probe_workqueue_insertion, NULL); if (ret) goto out; - ret = register_trace_workqueue_execution(probe_workqueue_execution); + ret = register_trace_workqueue_execution(probe_workqueue_execution, NULL); if (ret) goto no_insertion; - ret = register_trace_workqueue_creation(probe_workqueue_creation); + ret = register_trace_workqueue_creation(probe_workqueue_creation, NULL); if (ret) goto no_execution; - ret = register_trace_workqueue_destruction(probe_workqueue_destruction); + ret = register_trace_workqueue_destruction(probe_workqueue_destruction, NULL); if (ret) goto no_creation; @@ -283,11 +287,11 @@ int __init trace_workqueue_early_init(void) return 0; no_creation: - unregister_trace_workqueue_creation(probe_workqueue_creation); + unregister_trace_workqueue_creation(probe_workqueue_creation, NULL); no_execution: - unregister_trace_workqueue_execution(probe_workqueue_execution); + unregister_trace_workqueue_execution(probe_workqueue_execution, NULL); no_insertion: - unregister_trace_workqueue_insertion(probe_workqueue_insertion); + unregister_trace_workqueue_insertion(probe_workqueue_insertion, NULL); out: pr_warning("trace_workqueue: unable to trace workqueues\n"); |