From 77ae365eca895061c8bf2b2e3ae1d9ea62869739 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 27 Mar 2009 11:00:29 -0400 Subject: ring-buffer: make lockless This patch converts the ring buffers into a completely lockless buffer recording system. The read side still takes locks since we still serialize readers. But the writers are the ones that must be lockless (those can happen in NMIs). The main change is to the "head_page" pointer. We write to the tail, and read from the head. The "head_page" pointer in the cpu buffer is now just a reference to where to look. The real head page is now kept in the head_page->list->prev->next pointer. That is, in the list head of the previous page we set flags. The list pages are allocated to be aligned such that the lowest significant bits are always zero pointing to the list. This gives us play to put in flags to their pointers. bit 0: set when the page is a head page bit 1: set when the writer is moving the page (for overwrite mode) cmpxchg is used to update the pointer. When the writer wraps the buffer and the tail meets the head, in overwrite mode, the writer must move the head page forward. It first uses cmpxchg to change the pointer flag from 1 to 2. Once this is done, the reader on another CPU will not take the page from the buffer. The writers need to protect against interrupts (we don't bother with disabling interrupts because NMIs are allowed to write too). After the writer sets the pointer flag to 2, it takes care to manage interrupts coming in. This is discribed in detail within the comments of the code. Changes in version 2: - Let reader reset entries value of header page. - Fix tail page passing commit page on reader page test. - Always increment entries and write counter in rb_tail_page_update - Add safety check in rb_set_commit_to_write to break out of infinite loop - add mask in rb_is_reader_page [ Impact: lock free writing to the ring buffer ] Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 29f8599e6bea..7fca71693ae7 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -170,7 +170,6 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer); unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu); unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu); unsigned long ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu); -unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu); u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu); void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, -- cgit v1.2.3 From 1f9963cbb0280e0cd554161e00f1a0eeddbf1ae1 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 20 Jul 2009 10:20:53 +0800 Subject: tracing/filters: improve subsystem filter Currently a subsystem filter should be applicable to all events under the subsystem, and if it failed, all the event filters will be cleared. Those behaviors make subsys filter much less useful: # echo 'vec == 1' > irq/softirq_entry/filter # echo 'irq == 5' > irq/filter bash: echo: write error: Invalid argument # cat irq/softirq_entry/filter none I'd expect it set the filter for irq_handler_entry/exit, and not touch softirq_entry/exit. The basic idea is, try to see if the filter can be applied to which events, and then just apply to the those events: # echo 'vec == 1' > softirq_entry/filter # echo 'irq == 5' > filter # cat irq_handler_entry/filter irq == 5 # cat softirq_entry/filter vec == 1 Changelog for v2: - do some cleanups to address Frederic's comments. Inspired-by: Steven Rostedt Signed-off-by: Li Zefan Acked-by: Frederic Weisbecker LKML-Reference: <4A63D485.7030703@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 4 +- kernel/trace/trace.h | 3 +- kernel/trace/trace_events_filter.c | 124 ++++++++++++++++++++++++------------- 3 files changed, 87 insertions(+), 44 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 5c093ffc655b..26d3673d5143 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -101,6 +101,8 @@ void trace_current_buffer_discard_commit(struct ring_buffer_event *event); void tracing_record_cmdline(struct task_struct *tsk); +struct event_filter; + struct ftrace_event_call { struct list_head list; char *name; @@ -116,7 +118,7 @@ struct ftrace_event_call { int (*define_fields)(void); struct list_head fields; int filter_active; - void *filter; + struct event_filter *filter; void *mod; #ifdef CONFIG_EVENT_PROFILE diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 94305c7bc11c..758b0dbed552 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -750,13 +750,14 @@ struct event_filter { int n_preds; struct filter_pred **preds; char *filter_string; + bool no_reset; }; struct event_subsystem { struct list_head list; const char *name; struct dentry *entry; - void *filter; + struct event_filter *filter; int nr_events; }; diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 1c80ef702b83..27c2dbea3710 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -420,7 +420,14 @@ oom: } EXPORT_SYMBOL_GPL(init_preds); -static void filter_free_subsystem_preds(struct event_subsystem *system) +enum { + FILTER_DISABLE_ALL, + FILTER_INIT_NO_RESET, + FILTER_SKIP_NO_RESET, +}; + +static void filter_free_subsystem_preds(struct event_subsystem *system, + int flag) { struct ftrace_event_call *call; @@ -428,6 +435,14 @@ static void filter_free_subsystem_preds(struct event_subsystem *system) if (!call->define_fields) continue; + if (flag == FILTER_INIT_NO_RESET) { + call->filter->no_reset = false; + continue; + } + + if (flag == FILTER_SKIP_NO_RESET && call->filter->no_reset) + continue; + if (!strcmp(call->system, system->name)) { filter_disable_preds(call); remove_filter_string(call->filter); @@ -529,7 +544,8 @@ static filter_pred_fn_t select_comparison_fn(int op, int field_size, static int filter_add_pred(struct filter_parse_state *ps, struct ftrace_event_call *call, - struct filter_pred *pred) + struct filter_pred *pred, + bool dry_run) { struct ftrace_event_field *field; filter_pred_fn_t fn; @@ -541,10 +557,12 @@ static int filter_add_pred(struct filter_parse_state *ps, if (pred->op == OP_AND) { pred->pop_n = 2; - return filter_add_pred_fn(ps, call, pred, filter_pred_and); + fn = filter_pred_and; + goto add_pred_fn; } else if (pred->op == OP_OR) { pred->pop_n = 2; - return filter_add_pred_fn(ps, call, pred, filter_pred_or); + fn = filter_pred_or; + goto add_pred_fn; } field = find_event_field(call, pred->field_name); @@ -567,9 +585,6 @@ static int filter_add_pred(struct filter_parse_state *ps, else fn = filter_pred_strloc; pred->str_len = field->size; - if (pred->op == OP_NE) - pred->not = 1; - return filter_add_pred_fn(ps, call, pred, fn); } else { if (field->is_signed) ret = strict_strtoll(pred->str_val, 0, &val); @@ -580,27 +595,33 @@ static int filter_add_pred(struct filter_parse_state *ps, return -EINVAL; } pred->val = val; - } - fn = select_comparison_fn(pred->op, field->size, field->is_signed); - if (!fn) { - parse_error(ps, FILT_ERR_INVALID_OP, 0); - return -EINVAL; + fn = select_comparison_fn(pred->op, field->size, + field->is_signed); + if (!fn) { + parse_error(ps, FILT_ERR_INVALID_OP, 0); + return -EINVAL; + } } if (pred->op == OP_NE) pred->not = 1; - return filter_add_pred_fn(ps, call, pred, fn); +add_pred_fn: + if (!dry_run) + return filter_add_pred_fn(ps, call, pred, fn); + return 0; } static int filter_add_subsystem_pred(struct filter_parse_state *ps, struct event_subsystem *system, struct filter_pred *pred, - char *filter_string) + char *filter_string, + bool dry_run) { struct ftrace_event_call *call; int err = 0; + bool fail = true; list_for_each_entry(call, &ftrace_events, list) { @@ -610,16 +631,24 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps, if (strcmp(call->system, system->name)) continue; - err = filter_add_pred(ps, call, pred); - if (err) { - filter_free_subsystem_preds(system); - parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0); - goto out; - } - replace_filter_string(call->filter, filter_string); + if (call->filter->no_reset) + continue; + + err = filter_add_pred(ps, call, pred, dry_run); + if (err) + call->filter->no_reset = true; + else + fail = false; + + if (!dry_run) + replace_filter_string(call->filter, filter_string); } -out: - return err; + + if (fail) { + parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0); + return err; + } + return 0; } static void parse_init(struct filter_parse_state *ps, @@ -978,12 +1007,14 @@ static int check_preds(struct filter_parse_state *ps) static int replace_preds(struct event_subsystem *system, struct ftrace_event_call *call, struct filter_parse_state *ps, - char *filter_string) + char *filter_string, + bool dry_run) { char *operand1 = NULL, *operand2 = NULL; struct filter_pred *pred; struct postfix_elt *elt; int err; + int n_preds = 0; err = check_preds(ps); if (err) @@ -1002,19 +1033,14 @@ static int replace_preds(struct event_subsystem *system, continue; } + if (n_preds++ == MAX_FILTER_PRED) { + parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0); + return -ENOSPC; + } + if (elt->op == OP_AND || elt->op == OP_OR) { pred = create_logical_pred(elt->op); - if (call) - err = filter_add_pred(ps, call, pred); - else - err = filter_add_subsystem_pred(ps, system, - pred, filter_string); - filter_free_pred(pred); - if (err) - return err; - - operand1 = operand2 = NULL; - continue; + goto add_pred; } if (!operand1 || !operand2) { @@ -1023,11 +1049,12 @@ static int replace_preds(struct event_subsystem *system, } pred = create_pred(elt->op, operand1, operand2); +add_pred: if (call) - err = filter_add_pred(ps, call, pred); + err = filter_add_pred(ps, call, pred, false); else err = filter_add_subsystem_pred(ps, system, pred, - filter_string); + filter_string, dry_run); filter_free_pred(pred); if (err) return err; @@ -1068,7 +1095,7 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string) goto out; } - err = replace_preds(NULL, call, ps, filter_string); + err = replace_preds(NULL, call, ps, filter_string, false); if (err) append_filter_err(ps, call->filter); @@ -1092,7 +1119,7 @@ int apply_subsystem_event_filter(struct event_subsystem *system, mutex_lock(&event_mutex); if (!strcmp(strstrip(filter_string), "0")) { - filter_free_subsystem_preds(system); + filter_free_subsystem_preds(system, FILTER_DISABLE_ALL); remove_filter_string(system->filter); mutex_unlock(&event_mutex); return 0; @@ -1103,7 +1130,6 @@ int apply_subsystem_event_filter(struct event_subsystem *system, if (!ps) goto out_unlock; - filter_free_subsystem_preds(system); replace_filter_string(system->filter, filter_string); parse_init(ps, filter_ops, filter_string); @@ -1113,9 +1139,23 @@ int apply_subsystem_event_filter(struct event_subsystem *system, goto out; } - err = replace_preds(system, NULL, ps, filter_string); - if (err) + filter_free_subsystem_preds(system, FILTER_INIT_NO_RESET); + + /* try to see the filter can be applied to which events */ + err = replace_preds(system, NULL, ps, filter_string, true); + if (err) { + append_filter_err(ps, system->filter); + goto out; + } + + filter_free_subsystem_preds(system, FILTER_SKIP_NO_RESET); + + /* really apply the filter to the events */ + err = replace_preds(system, NULL, ps, filter_string, false); + if (err) { append_filter_err(ps, system->filter); + filter_free_subsystem_preds(system, 2); + } out: filter_opstack_clear(ps); -- cgit v1.2.3 From 63fbdab3157b72467013fe4dcf88c85e45280ef7 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Mon, 10 Aug 2009 16:52:27 -0400 Subject: tracing: Add DECLARE_TRACE_WITH_CALLBACK() macro Introduce a new 'DECLARE_TRACE_WITH_CALLBACK()' macro, so that tracepoints can associate an external register/unregister function. This prepares for the syscalls tracer conversion to trace events. We will need to perform arch level operations once a syscall event is turned on/off, such as TIF flags setting, hence the need of such specific callbacks. Signed-off-by: Jason Baron Cc: Lai Jiangshan Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Mathieu Desnoyers Cc: Jiaying Zhang Cc: Martin Bligh Cc: Li Zefan Cc: Masami Hiramatsu Signed-off-by: Frederic Weisbecker --- include/linux/tracepoint.h | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index b9dc4ca0246f..5984ed04c03b 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -60,8 +60,10 @@ struct tracepoint { * Make sure the alignment of the structure in the __tracepoints section will * not add unwanted padding between the beginning of the section and the * structure. Force alignment to the same alignment as the section start. + * An optional set of (un)registration functions can be passed to perform any + * additional (un)registration work. */ -#define DECLARE_TRACE(name, proto, args) \ +#define DECLARE_TRACE_WITH_CALLBACK(name, proto, args, reg, unreg) \ extern struct tracepoint __tracepoint_##name; \ static inline void trace_##name(proto) \ { \ @@ -71,13 +73,30 @@ struct tracepoint { } \ static inline int register_trace_##name(void (*probe)(proto)) \ { \ - return tracepoint_probe_register(#name, (void *)probe); \ + int ret; \ + void (*func)(void) = reg; \ + \ + ret = tracepoint_probe_register(#name, (void *)probe); \ + if (func && !ret) \ + func(); \ + return ret; \ } \ static inline int unregister_trace_##name(void (*probe)(proto)) \ { \ - return tracepoint_probe_unregister(#name, (void *)probe);\ + int ret; \ + void (*func)(void) = unreg; \ + \ + ret = tracepoint_probe_unregister(#name, (void *)probe);\ + if (func && !ret) \ + func(); \ + return ret; \ } + +#define DECLARE_TRACE(name, proto, args) \ + DECLARE_TRACE_WITH_CALLBACK(name, TP_PROTO(proto), TP_ARGS(args),\ + NULL, NULL); + #define DEFINE_TRACE(name) \ static const char __tpstrtab_##name[] \ __attribute__((section("__tracepoints_strings"))) = #name; \ @@ -94,7 +113,7 @@ extern void tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end); #else /* !CONFIG_TRACEPOINTS */ -#define DECLARE_TRACE(name, proto, args) \ +#define DECLARE_TRACE_WITH_CALLBACK(name, proto, args, reg, unreg) \ static inline void _do_trace_##name(struct tracepoint *tp, proto) \ { } \ static inline void trace_##name(proto) \ @@ -108,6 +127,10 @@ extern void tracepoint_update_probe_range(struct tracepoint *begin, return -ENOSYS; \ } +#define DECLARE_TRACE(name, proto, args) \ + DECLARE_TRACE_WITH_CALLBACK(name, TP_PROTO(proto), TP_ARGS(args),\ + NULL, NULL); + #define DEFINE_TRACE(name) #define EXPORT_TRACEPOINT_SYMBOL_GPL(name) #define EXPORT_TRACEPOINT_SYMBOL(name) -- cgit v1.2.3 From 69fd4f0eb2ececbf8ade55e31a933e174965745e Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Mon, 10 Aug 2009 16:52:44 -0400 Subject: tracing: Add ftrace_event_call void * 'data' field add an optional void * pointer to 'ftrace_event_call' that is passed in for regfunc and unregfunc. This prepares for syscall tracepoints creation by passing the name of the syscall we want to trace and then retrieve its number through our arch syscall table. Signed-off-by: Jason Baron Cc: Lai Jiangshan Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Mathieu Desnoyers Cc: Jiaying Zhang Cc: Martin Bligh Cc: Li Zefan Cc: Masami Hiramatsu Signed-off-by: Frederic Weisbecker --- include/linux/ftrace_event.h | 5 +++-- include/trace/ftrace.h | 4 ++-- kernel/trace/trace_events.c | 4 ++-- 3 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index ac8c6f8cf242..8544f121d9f1 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -112,8 +112,8 @@ struct ftrace_event_call { struct dentry *dir; struct trace_event *event; int enabled; - int (*regfunc)(void); - void (*unregfunc)(void); + int (*regfunc)(void *); + void (*unregfunc)(void *); int id; int (*raw_init)(void); int (*show_format)(struct trace_seq *s); @@ -122,6 +122,7 @@ struct ftrace_event_call { int filter_active; struct event_filter *filter; void *mod; + void *data; atomic_t profile_count; int (*profile_enable)(struct ftrace_event_call *); diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 25d3b02a06f8..46d81b5e8610 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -568,7 +568,7 @@ static void ftrace_raw_event_##call(proto) \ trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \ } \ \ -static int ftrace_raw_reg_event_##call(void) \ +static int ftrace_raw_reg_event_##call(void *ptr) \ { \ int ret; \ \ @@ -579,7 +579,7 @@ static int ftrace_raw_reg_event_##call(void) \ return ret; \ } \ \ -static void ftrace_raw_unreg_event_##call(void) \ +static void ftrace_raw_unreg_event_##call(void *ptr) \ { \ unregister_trace_##call(ftrace_raw_event_##call); \ } \ diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index f95f8470dd38..1d289e2d6693 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -86,14 +86,14 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call, if (call->enabled) { call->enabled = 0; tracing_stop_cmdline_record(); - call->unregfunc(); + call->unregfunc(call->data); } break; case 1: if (!call->enabled) { call->enabled = 1; tracing_start_cmdline_record(); - call->regfunc(); + call->regfunc(call->data); } break; } -- cgit v1.2.3 From fb34a08c3469b2be9eae626ccb96476b4687b810 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Mon, 10 Aug 2009 16:52:47 -0400 Subject: tracing: Add trace events for each syscall entry/exit Layer Frederic's syscall tracer on tracepoints. We create trace events via hooking into the SYSCALL_DEFINE macros. This allows us to individually toggle syscall entry and exit points on/off. Signed-off-by: Jason Baron Cc: Lai Jiangshan Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Mathieu Desnoyers Cc: Jiaying Zhang Cc: Martin Bligh Cc: Li Zefan Cc: Masami Hiramatsu Signed-off-by: Frederic Weisbecker --- include/linux/syscalls.h | 61 +++++++++++++- include/trace/syscall.h | 18 ++--- kernel/trace/trace_syscalls.c | 183 +++++++++++++++++++++--------------------- 3 files changed, 159 insertions(+), 103 deletions(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 80de7003d8c2..5e5b4d33a31c 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -64,6 +64,7 @@ struct perf_counter_attr; #include #include #include +#include #include #include #include @@ -112,6 +113,59 @@ struct perf_counter_attr; #define __SC_STR_TDECL5(t, a, ...) #t, __SC_STR_TDECL4(__VA_ARGS__) #define __SC_STR_TDECL6(t, a, ...) #t, __SC_STR_TDECL5(__VA_ARGS__) + +#define SYSCALL_TRACE_ENTER_EVENT(sname) \ + static struct ftrace_event_call event_enter_##sname; \ + static int init_enter_##sname(void) \ + { \ + int num; \ + num = syscall_name_to_nr("sys"#sname); \ + if (num < 0) \ + return -ENOSYS; \ + register_ftrace_event(&event_syscall_enter); \ + INIT_LIST_HEAD(&event_enter_##sname.fields); \ + init_preds(&event_enter_##sname); \ + return 0; \ + } \ + static struct ftrace_event_call __used \ + __attribute__((__aligned__(4))) \ + __attribute__((section("_ftrace_events"))) \ + event_enter_##sname = { \ + .name = "sys_enter"#sname, \ + .system = "syscalls", \ + .event = &event_syscall_enter, \ + .raw_init = init_enter_##sname, \ + .regfunc = reg_event_syscall_enter, \ + .unregfunc = unreg_event_syscall_enter, \ + .data = "sys"#sname, \ + } + +#define SYSCALL_TRACE_EXIT_EVENT(sname) \ + static struct ftrace_event_call event_exit_##sname; \ + static int init_exit_##sname(void) \ + { \ + int num; \ + num = syscall_name_to_nr("sys"#sname); \ + if (num < 0) \ + return -ENOSYS; \ + register_ftrace_event(&event_syscall_exit); \ + INIT_LIST_HEAD(&event_exit_##sname.fields); \ + init_preds(&event_exit_##sname); \ + return 0; \ + } \ + static struct ftrace_event_call __used \ + __attribute__((__aligned__(4))) \ + __attribute__((section("_ftrace_events"))) \ + event_exit_##sname = { \ + .name = "sys_exit"#sname, \ + .system = "syscalls", \ + .event = &event_syscall_exit, \ + .raw_init = init_exit_##sname, \ + .regfunc = reg_event_syscall_exit, \ + .unregfunc = unreg_event_syscall_exit, \ + .data = "sys"#sname, \ + } + #define SYSCALL_METADATA(sname, nb) \ static const struct syscall_metadata __used \ __attribute__((__aligned__(4))) \ @@ -121,7 +175,9 @@ struct perf_counter_attr; .nb_args = nb, \ .types = types_##sname, \ .args = args_##sname, \ - } + }; \ + SYSCALL_TRACE_ENTER_EVENT(sname); \ + SYSCALL_TRACE_EXIT_EVENT(sname); #define SYSCALL_DEFINE0(sname) \ static const struct syscall_metadata __used \ @@ -131,8 +187,9 @@ struct perf_counter_attr; .name = "sys_"#sname, \ .nb_args = 0, \ }; \ + SYSCALL_TRACE_ENTER_EVENT(_##sname); \ + SYSCALL_TRACE_EXIT_EVENT(_##sname); \ asmlinkage long sys_##sname(void) - #else #define SYSCALL_DEFINE0(name) asmlinkage long sys_##name(void) #endif diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 3951d774de18..73fb8b4a9955 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -2,6 +2,8 @@ #define _TRACE_SYSCALL_H #include +#include +#include #include @@ -40,15 +42,13 @@ struct syscall_metadata { #ifdef CONFIG_FTRACE_SYSCALLS extern struct syscall_metadata *syscall_nr_to_meta(int nr); -extern void start_ftrace_syscalls(void); -extern void stop_ftrace_syscalls(void); -extern void ftrace_syscall_enter(struct pt_regs *regs); -extern void ftrace_syscall_exit(struct pt_regs *regs); -#else -static inline void start_ftrace_syscalls(void) { } -static inline void stop_ftrace_syscalls(void) { } -static inline void ftrace_syscall_enter(struct pt_regs *regs) { } -static inline void ftrace_syscall_exit(struct pt_regs *regs) { } +extern int syscall_name_to_nr(char *name); +extern struct trace_event event_syscall_enter; +extern struct trace_event event_syscall_exit; +extern int reg_event_syscall_enter(void *ptr); +extern void unreg_event_syscall_enter(void *ptr); +extern int reg_event_syscall_exit(void *ptr); +extern void unreg_event_syscall_exit(void *ptr); #endif #endif /* _TRACE_SYSCALL_H */ diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 08aed439feaf..c7ae25ee95d8 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -1,15 +1,16 @@ #include #include +#include #include #include "trace_output.h" #include "trace.h" -/* Keep a counter of the syscall tracing users */ -static int refcount; - -/* Prevent from races on thread flags toggling */ static DEFINE_MUTEX(syscall_trace_lock); +static int sys_refcount_enter; +static int sys_refcount_exit; +static DECLARE_BITMAP(enabled_enter_syscalls, FTRACE_SYSCALL_MAX); +static DECLARE_BITMAP(enabled_exit_syscalls, FTRACE_SYSCALL_MAX); /* Option to display the parameters types */ enum { @@ -95,53 +96,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags) return TRACE_TYPE_HANDLED; } -void start_ftrace_syscalls(void) -{ - unsigned long flags; - struct task_struct *g, *t; - - mutex_lock(&syscall_trace_lock); - - /* Don't enable the flag on the tasks twice */ - if (++refcount != 1) - goto unlock; - - read_lock_irqsave(&tasklist_lock, flags); - - do_each_thread(g, t) { - set_tsk_thread_flag(t, TIF_SYSCALL_FTRACE); - } while_each_thread(g, t); - - read_unlock_irqrestore(&tasklist_lock, flags); - -unlock: - mutex_unlock(&syscall_trace_lock); -} - -void stop_ftrace_syscalls(void) -{ - unsigned long flags; - struct task_struct *g, *t; - - mutex_lock(&syscall_trace_lock); - - /* There are perhaps still some users */ - if (--refcount) - goto unlock; - - read_lock_irqsave(&tasklist_lock, flags); - - do_each_thread(g, t) { - clear_tsk_thread_flag(t, TIF_SYSCALL_FTRACE); - } while_each_thread(g, t); - - read_unlock_irqrestore(&tasklist_lock, flags); - -unlock: - mutex_unlock(&syscall_trace_lock); -} - -void ftrace_syscall_enter(struct pt_regs *regs) +void ftrace_syscall_enter(struct pt_regs *regs, long id) { struct syscall_trace_enter *entry; struct syscall_metadata *sys_data; @@ -150,6 +105,8 @@ void ftrace_syscall_enter(struct pt_regs *regs) int syscall_nr; syscall_nr = syscall_get_nr(current, regs); + if (!test_bit(syscall_nr, enabled_enter_syscalls)) + return; sys_data = syscall_nr_to_meta(syscall_nr); if (!sys_data) @@ -170,7 +127,7 @@ void ftrace_syscall_enter(struct pt_regs *regs) trace_wake_up(); } -void ftrace_syscall_exit(struct pt_regs *regs) +void ftrace_syscall_exit(struct pt_regs *regs, long ret) { struct syscall_trace_exit *entry; struct syscall_metadata *sys_data; @@ -178,6 +135,8 @@ void ftrace_syscall_exit(struct pt_regs *regs) int syscall_nr; syscall_nr = syscall_get_nr(current, regs); + if (!test_bit(syscall_nr, enabled_exit_syscalls)) + return; sys_data = syscall_nr_to_meta(syscall_nr); if (!sys_data) @@ -196,54 +155,94 @@ void ftrace_syscall_exit(struct pt_regs *regs) trace_wake_up(); } -static int init_syscall_tracer(struct trace_array *tr) +int reg_event_syscall_enter(void *ptr) { - start_ftrace_syscalls(); - - return 0; + int ret = 0; + int num; + char *name; + + name = (char *)ptr; + num = syscall_name_to_nr(name); + if (num < 0 || num >= FTRACE_SYSCALL_MAX) + return -ENOSYS; + mutex_lock(&syscall_trace_lock); + if (!sys_refcount_enter) + ret = register_trace_syscall_enter(ftrace_syscall_enter); + if (ret) { + pr_info("event trace: Could not activate" + "syscall entry trace point"); + } else { + set_bit(num, enabled_enter_syscalls); + sys_refcount_enter++; + } + mutex_unlock(&syscall_trace_lock); + return ret; } -static void reset_syscall_tracer(struct trace_array *tr) +void unreg_event_syscall_enter(void *ptr) { - stop_ftrace_syscalls(); - tracing_reset_online_cpus(tr); -} - -static struct trace_event syscall_enter_event = { - .type = TRACE_SYSCALL_ENTER, - .trace = print_syscall_enter, -}; - -static struct trace_event syscall_exit_event = { - .type = TRACE_SYSCALL_EXIT, - .trace = print_syscall_exit, -}; + int num; + char *name; -static struct tracer syscall_tracer __read_mostly = { - .name = "syscall", - .init = init_syscall_tracer, - .reset = reset_syscall_tracer, - .flags = &syscalls_flags, -}; + name = (char *)ptr; + num = syscall_name_to_nr(name); + if (num < 0 || num >= FTRACE_SYSCALL_MAX) + return; + mutex_lock(&syscall_trace_lock); + sys_refcount_enter--; + clear_bit(num, enabled_enter_syscalls); + if (!sys_refcount_enter) + unregister_trace_syscall_enter(ftrace_syscall_enter); + mutex_unlock(&syscall_trace_lock); +} -__init int register_ftrace_syscalls(void) +int reg_event_syscall_exit(void *ptr) { - int ret; - - ret = register_ftrace_event(&syscall_enter_event); - if (!ret) { - printk(KERN_WARNING "event %d failed to register\n", - syscall_enter_event.type); - WARN_ON_ONCE(1); + int ret = 0; + int num; + char *name; + + name = (char *)ptr; + num = syscall_name_to_nr(name); + if (num < 0 || num >= FTRACE_SYSCALL_MAX) + return -ENOSYS; + mutex_lock(&syscall_trace_lock); + if (!sys_refcount_exit) + ret = register_trace_syscall_exit(ftrace_syscall_exit); + if (ret) { + pr_info("event trace: Could not activate" + "syscall exit trace point"); + } else { + set_bit(num, enabled_exit_syscalls); + sys_refcount_exit++; } + mutex_unlock(&syscall_trace_lock); + return ret; +} - ret = register_ftrace_event(&syscall_exit_event); - if (!ret) { - printk(KERN_WARNING "event %d failed to register\n", - syscall_exit_event.type); - WARN_ON_ONCE(1); - } +void unreg_event_syscall_exit(void *ptr) +{ + int num; + char *name; - return register_tracer(&syscall_tracer); + name = (char *)ptr; + num = syscall_name_to_nr(name); + if (num < 0 || num >= FTRACE_SYSCALL_MAX) + return; + mutex_lock(&syscall_trace_lock); + sys_refcount_exit--; + clear_bit(num, enabled_exit_syscalls); + if (!sys_refcount_exit) + unregister_trace_syscall_exit(ftrace_syscall_exit); + mutex_unlock(&syscall_trace_lock); } -device_initcall(register_ftrace_syscalls); + +struct trace_event event_syscall_enter = { + .trace = print_syscall_enter, + .type = TRACE_SYSCALL_ENTER +}; + +struct trace_event event_syscall_exit = { + .trace = print_syscall_exit, + .type = TRACE_SYSCALL_EXIT +}; -- cgit v1.2.3 From 64c12e0444fcc6b75eb49144ba46d43dbdc6bc8f Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Mon, 10 Aug 2009 16:52:53 -0400 Subject: tracing: Add individual syscalls tracepoint id support The current state of syscalls tracepoints generates only one event id for every syscall events. This patch associates an id with each syscall trace event, so that we can identify each syscall trace event using the 'perf' tool. Signed-off-by: Jason Baron Cc: Lai Jiangshan Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Mathieu Desnoyers Cc: Jiaying Zhang Cc: Martin Bligh Cc: Li Zefan Cc: Masami Hiramatsu Signed-off-by: Frederic Weisbecker --- arch/x86/kernel/ftrace.c | 10 ++++++++++ include/linux/syscalls.h | 22 ++++++++++++++++++---- include/trace/syscall.h | 8 ++++++++ kernel/trace/trace.h | 6 ------ kernel/trace/trace_syscalls.c | 26 ++++++++++++++++---------- 5 files changed, 52 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 0d93d409b8d2..3cff1214e176 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -516,6 +516,16 @@ int syscall_name_to_nr(char *name) return -1; } +void set_syscall_enter_id(int num, int id) +{ + syscalls_metadata[num]->enter_id = id; +} + +void set_syscall_exit_id(int num, int id) +{ + syscalls_metadata[num]->exit_id = id; +} + static int __init arch_init_ftrace_syscalls(void) { int i; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 5e5b4d33a31c..ce4b01c658eb 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -116,13 +116,20 @@ struct perf_counter_attr; #define SYSCALL_TRACE_ENTER_EVENT(sname) \ static struct ftrace_event_call event_enter_##sname; \ + struct trace_event enter_syscall_print_##sname = { \ + .trace = print_syscall_enter, \ + }; \ static int init_enter_##sname(void) \ { \ - int num; \ + int num, id; \ num = syscall_name_to_nr("sys"#sname); \ if (num < 0) \ return -ENOSYS; \ - register_ftrace_event(&event_syscall_enter); \ + id = register_ftrace_event(&enter_syscall_print_##sname);\ + if (!id) \ + return -ENODEV; \ + event_enter_##sname.id = id; \ + set_syscall_enter_id(num, id); \ INIT_LIST_HEAD(&event_enter_##sname.fields); \ init_preds(&event_enter_##sname); \ return 0; \ @@ -142,13 +149,20 @@ struct perf_counter_attr; #define SYSCALL_TRACE_EXIT_EVENT(sname) \ static struct ftrace_event_call event_exit_##sname; \ + struct trace_event exit_syscall_print_##sname = { \ + .trace = print_syscall_exit, \ + }; \ static int init_exit_##sname(void) \ { \ - int num; \ + int num, id; \ num = syscall_name_to_nr("sys"#sname); \ if (num < 0) \ return -ENOSYS; \ - register_ftrace_event(&event_syscall_exit); \ + id = register_ftrace_event(&exit_syscall_print_##sname);\ + if (!id) \ + return -ENODEV; \ + event_exit_##sname.id = id; \ + set_syscall_exit_id(num, id); \ INIT_LIST_HEAD(&event_exit_##sname.fields); \ init_preds(&event_exit_##sname); \ return 0; \ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 73fb8b4a9955..df628404241a 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -32,23 +32,31 @@ DECLARE_TRACE_WITH_CALLBACK(syscall_exit, * @nb_args: number of parameters it takes * @types: list of types as strings * @args: list of args as strings (args[i] matches types[i]) + * @enter_id: associated ftrace enter event id + * @exit_id: associated ftrace exit event id */ struct syscall_metadata { const char *name; int nb_args; const char **types; const char **args; + int enter_id; + int exit_id; }; #ifdef CONFIG_FTRACE_SYSCALLS extern struct syscall_metadata *syscall_nr_to_meta(int nr); extern int syscall_name_to_nr(char *name); +void set_syscall_enter_id(int num, int id); +void set_syscall_exit_id(int num, int id); extern struct trace_event event_syscall_enter; extern struct trace_event event_syscall_exit; extern int reg_event_syscall_enter(void *ptr); extern void unreg_event_syscall_enter(void *ptr); extern int reg_event_syscall_exit(void *ptr); extern void unreg_event_syscall_exit(void *ptr); +enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags); +enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags); #endif #endif /* _TRACE_SYSCALL_H */ diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index d682357e4b1f..300ef788c976 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -34,8 +34,6 @@ enum trace_type { TRACE_GRAPH_ENT, TRACE_USER_STACK, TRACE_HW_BRANCHES, - TRACE_SYSCALL_ENTER, - TRACE_SYSCALL_EXIT, TRACE_KMEM_ALLOC, TRACE_KMEM_FREE, TRACE_POWER, @@ -319,10 +317,6 @@ extern void __ftrace_bad_type(void); TRACE_KMEM_ALLOC); \ IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ TRACE_KMEM_FREE); \ - IF_ASSIGN(var, ent, struct syscall_trace_enter, \ - TRACE_SYSCALL_ENTER); \ - IF_ASSIGN(var, ent, struct syscall_trace_exit, \ - TRACE_SYSCALL_EXIT); \ __ftrace_bad_type(); \ } while (0) diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index c7ae25ee95d8..e58a9c11ba85 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -36,14 +36,18 @@ print_syscall_enter(struct trace_iterator *iter, int flags) struct syscall_metadata *entry; int i, ret, syscall; - trace_assign_type(trace, ent); - + trace = (typeof(trace))ent; syscall = trace->nr; - entry = syscall_nr_to_meta(syscall); + if (!entry) goto end; + if (entry->enter_id != ent->type) { + WARN_ON_ONCE(1); + goto end; + } + ret = trace_seq_printf(s, "%s(", entry->name); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -78,16 +82,20 @@ print_syscall_exit(struct trace_iterator *iter, int flags) struct syscall_metadata *entry; int ret; - trace_assign_type(trace, ent); - + trace = (typeof(trace))ent; syscall = trace->nr; - entry = syscall_nr_to_meta(syscall); + if (!entry) { trace_seq_printf(s, "\n"); return TRACE_TYPE_HANDLED; } + if (entry->exit_id != ent->type) { + WARN_ON_ONCE(1); + return TRACE_TYPE_UNHANDLED; + } + ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name, trace->ret); if (!ret) @@ -114,7 +122,7 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id) size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; - event = trace_current_buffer_lock_reserve(TRACE_SYSCALL_ENTER, size, + event = trace_current_buffer_lock_reserve(sys_data->enter_id, size, 0, 0); if (!event) return; @@ -142,7 +150,7 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret) if (!sys_data) return; - event = trace_current_buffer_lock_reserve(TRACE_SYSCALL_EXIT, + event = trace_current_buffer_lock_reserve(sys_data->exit_id, sizeof(*entry), 0, 0); if (!event) return; @@ -239,10 +247,8 @@ void unreg_event_syscall_exit(void *ptr) struct trace_event event_syscall_enter = { .trace = print_syscall_enter, - .type = TRACE_SYSCALL_ENTER }; struct trace_event event_syscall_exit = { .trace = print_syscall_exit, - .type = TRACE_SYSCALL_EXIT }; -- cgit v1.2.3 From f4b5ffccc83c82947f5d9f15d6f1b6edb1b71cd7 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Mon, 10 Aug 2009 16:53:02 -0400 Subject: tracing: Add perf counter support for syscalls tracing The perf counter support is automated for usual trace events. But we have to define specific callbacks for this to handle syscalls trace events Make 'perf stat -e syscalls:sys_enter_blah' work with syscall style tracepoints. Signed-off-by: Jason Baron Cc: Lai Jiangshan Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Mathieu Desnoyers Cc: Jiaying Zhang Cc: Martin Bligh Cc: Li Zefan Cc: Masami Hiramatsu Signed-off-by: Frederic Weisbecker --- include/linux/perf_counter.h | 2 + include/linux/syscalls.h | 52 +++++++++++++++++- include/trace/syscall.h | 7 +++ kernel/trace/trace_syscalls.c | 121 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 181 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index a9d823a93fe8..8e6460fb4c02 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -734,6 +734,8 @@ extern int sysctl_perf_counter_mlock; extern int sysctl_perf_counter_sample_rate; extern void perf_counter_init(void); +extern void perf_tpcounter_event(int event_id, u64 addr, u64 count, + void *record, int entry_size); #ifndef perf_misc_flags #define perf_misc_flags(regs) (user_mode(regs) ? PERF_EVENT_MISC_USER : \ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index ce4b01c658eb..5541e75e140a 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -98,6 +98,53 @@ struct perf_counter_attr; #define __SC_TEST5(t5, a5, ...) __SC_TEST(t5); __SC_TEST4(__VA_ARGS__) #define __SC_TEST6(t6, a6, ...) __SC_TEST(t6); __SC_TEST5(__VA_ARGS__) +#ifdef CONFIG_EVENT_PROFILE +#define TRACE_SYS_ENTER_PROFILE(sname) \ +static int prof_sysenter_enable_##sname(struct ftrace_event_call *event_call) \ +{ \ + int ret = 0; \ + if (!atomic_inc_return(&event_enter_##sname.profile_count)) \ + ret = reg_prof_syscall_enter("sys"#sname); \ + return ret; \ +} \ + \ +static void prof_sysenter_disable_##sname(struct ftrace_event_call *event_call)\ +{ \ + if (atomic_add_negative(-1, &event_enter_##sname.profile_count)) \ + unreg_prof_syscall_enter("sys"#sname); \ +} + +#define TRACE_SYS_EXIT_PROFILE(sname) \ +static int prof_sysexit_enable_##sname(struct ftrace_event_call *event_call) \ +{ \ + int ret = 0; \ + if (!atomic_inc_return(&event_exit_##sname.profile_count)) \ + ret = reg_prof_syscall_exit("sys"#sname); \ + return ret; \ +} \ + \ +static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \ +{ \ + if (atomic_add_negative(-1, &event_exit_##sname.profile_count)) \ + unreg_prof_syscall_exit("sys"#sname); \ +} + +#define TRACE_SYS_ENTER_PROFILE_INIT(sname) \ + .profile_count = ATOMIC_INIT(-1), \ + .profile_enable = prof_sysenter_enable_##sname, \ + .profile_disable = prof_sysenter_disable_##sname, + +#define TRACE_SYS_EXIT_PROFILE_INIT(sname) \ + .profile_count = ATOMIC_INIT(-1), \ + .profile_enable = prof_sysexit_enable_##sname, \ + .profile_disable = prof_sysexit_disable_##sname, +#else +#define TRACE_SYS_ENTER_PROFILE(sname) +#define TRACE_SYS_ENTER_PROFILE_INIT(sname) +#define TRACE_SYS_EXIT_PROFILE(sname) +#define TRACE_SYS_EXIT_PROFILE_INIT(sname) +#endif + #ifdef CONFIG_FTRACE_SYSCALLS #define __SC_STR_ADECL1(t, a) #a #define __SC_STR_ADECL2(t, a, ...) #a, __SC_STR_ADECL1(__VA_ARGS__) @@ -113,7 +160,6 @@ struct perf_counter_attr; #define __SC_STR_TDECL5(t, a, ...) #t, __SC_STR_TDECL4(__VA_ARGS__) #define __SC_STR_TDECL6(t, a, ...) #t, __SC_STR_TDECL5(__VA_ARGS__) - #define SYSCALL_TRACE_ENTER_EVENT(sname) \ static struct ftrace_event_call event_enter_##sname; \ struct trace_event enter_syscall_print_##sname = { \ @@ -134,6 +180,7 @@ struct perf_counter_attr; init_preds(&event_enter_##sname); \ return 0; \ } \ + TRACE_SYS_ENTER_PROFILE(sname); \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) \ @@ -145,6 +192,7 @@ struct perf_counter_attr; .regfunc = reg_event_syscall_enter, \ .unregfunc = unreg_event_syscall_enter, \ .data = "sys"#sname, \ + TRACE_SYS_ENTER_PROFILE_INIT(sname) \ } #define SYSCALL_TRACE_EXIT_EVENT(sname) \ @@ -167,6 +215,7 @@ struct perf_counter_attr; init_preds(&event_exit_##sname); \ return 0; \ } \ + TRACE_SYS_EXIT_PROFILE(sname); \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) \ @@ -178,6 +227,7 @@ struct perf_counter_attr; .regfunc = reg_event_syscall_exit, \ .unregfunc = unreg_event_syscall_exit, \ .data = "sys"#sname, \ + TRACE_SYS_EXIT_PROFILE_INIT(sname) \ } #define SYSCALL_METADATA(sname, nb) \ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index df628404241a..3ab6dd18fa3a 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -57,6 +57,13 @@ extern int reg_event_syscall_exit(void *ptr); extern void unreg_event_syscall_exit(void *ptr); enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags); enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags); +#endif +#ifdef CONFIG_EVENT_PROFILE +int reg_prof_syscall_enter(char *name); +void unreg_prof_syscall_enter(char *name); +int reg_prof_syscall_exit(char *name); +void unreg_prof_syscall_exit(char *name); + #endif #endif /* _TRACE_SYSCALL_H */ diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index e58a9c11ba85..f4eaec3d559a 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -1,6 +1,7 @@ #include #include #include +#include #include #include "trace_output.h" @@ -252,3 +253,123 @@ struct trace_event event_syscall_enter = { struct trace_event event_syscall_exit = { .trace = print_syscall_exit, }; + +#ifdef CONFIG_EVENT_PROFILE +static DECLARE_BITMAP(enabled_prof_enter_syscalls, FTRACE_SYSCALL_MAX); +static DECLARE_BITMAP(enabled_prof_exit_syscalls, FTRACE_SYSCALL_MAX); +static int sys_prof_refcount_enter; +static int sys_prof_refcount_exit; + +static void prof_syscall_enter(struct pt_regs *regs, long id) +{ + struct syscall_metadata *sys_data; + int syscall_nr; + + syscall_nr = syscall_get_nr(current, regs); + if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) + return; + + sys_data = syscall_nr_to_meta(syscall_nr); + if (!sys_data) + return; + + perf_tpcounter_event(sys_data->enter_id, 0, 1, NULL, 0); +} + +int reg_prof_syscall_enter(char *name) +{ + int ret = 0; + int num; + + num = syscall_name_to_nr(name); + if (num < 0 || num >= FTRACE_SYSCALL_MAX) + return -ENOSYS; + + mutex_lock(&syscall_trace_lock); + if (!sys_prof_refcount_enter) + ret = register_trace_syscall_enter(prof_syscall_enter); + if (ret) { + pr_info("event trace: Could not activate" + "syscall entry trace point"); + } else { + set_bit(num, enabled_prof_enter_syscalls); + sys_prof_refcount_enter++; + } + mutex_unlock(&syscall_trace_lock); + return ret; +} + +void unreg_prof_syscall_enter(char *name) +{ + int num; + + num = syscall_name_to_nr(name); + if (num < 0 || num >= FTRACE_SYSCALL_MAX) + return; + + mutex_lock(&syscall_trace_lock); + sys_prof_refcount_enter--; + clear_bit(num, enabled_prof_enter_syscalls); + if (!sys_prof_refcount_enter) + unregister_trace_syscall_enter(prof_syscall_enter); + mutex_unlock(&syscall_trace_lock); +} + +static void prof_syscall_exit(struct pt_regs *regs, long ret) +{ + struct syscall_metadata *sys_data; + int syscall_nr; + + syscall_nr = syscall_get_nr(current, regs); + if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) + return; + + sys_data = syscall_nr_to_meta(syscall_nr); + if (!sys_data) + return; + + perf_tpcounter_event(sys_data->exit_id, 0, 1, NULL, 0); +} + +int reg_prof_syscall_exit(char *name) +{ + int ret = 0; + int num; + + num = syscall_name_to_nr(name); + if (num < 0 || num >= FTRACE_SYSCALL_MAX) + return -ENOSYS; + + mutex_lock(&syscall_trace_lock); + if (!sys_prof_refcount_exit) + ret = register_trace_syscall_exit(prof_syscall_exit); + if (ret) { + pr_info("event trace: Could not activate" + "syscall entry trace point"); + } else { + set_bit(num, enabled_prof_exit_syscalls); + sys_prof_refcount_exit++; + } + mutex_unlock(&syscall_trace_lock); + return ret; +} + +void unreg_prof_syscall_exit(char *name) +{ + int num; + + num = syscall_name_to_nr(name); + if (num < 0 || num >= FTRACE_SYSCALL_MAX) + return; + + mutex_lock(&syscall_trace_lock); + sys_prof_refcount_exit--; + clear_bit(num, enabled_prof_exit_syscalls); + if (!sys_prof_refcount_exit) + unregister_trace_syscall_exit(prof_syscall_exit); + mutex_unlock(&syscall_trace_lock); +} + +#endif + + -- cgit v1.2.3 From e8f9f4d79a677f55c8ec3acbe87b33a87e2df0de Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 11 Aug 2009 17:42:52 +0200 Subject: tracing: Add ftrace event call parameter to its field descriptor handler Add the struct ftrace_event_call as a parameter of its show_format() callback. This way we can use it from the syscall trace events to retrieve the syscall name from the ftrace event call parameter and describe its fields using the syscalls metadata. Signed-off-by: Frederic Weisbecker Cc: Lai Jiangshan Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Mathieu Desnoyers Cc: Jiaying Zhang Cc: Martin Bligh Cc: Li Zefan Cc: Masami Hiramatsu Cc: Jason Baron --- include/linux/ftrace_event.h | 3 ++- include/trace/ftrace.h | 3 ++- kernel/trace/trace_events.c | 2 +- kernel/trace/trace_export.c | 6 ++++-- 4 files changed, 9 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 8544f121d9f1..189806b6e69e 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -116,7 +116,8 @@ struct ftrace_event_call { void (*unregfunc)(void *); int id; int (*raw_init)(void); - int (*show_format)(struct trace_seq *s); + int (*show_format)(struct ftrace_event_call *call, + struct trace_seq *s); int (*define_fields)(void); struct list_head fields; int filter_active; diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 46d81b5e8610..b250b0616571 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -151,7 +151,8 @@ #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, func, print) \ static int \ -ftrace_format_##call(struct trace_seq *s) \ +ftrace_format_##call(struct ftrace_event_call *unused, \ + struct trace_seq *s) \ { \ struct ftrace_raw_##call field __attribute__((unused)); \ int ret = 0; \ diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 1d289e2d6693..b568ade8f453 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -576,7 +576,7 @@ event_format_read(struct file *filp, char __user *ubuf, size_t cnt, trace_seq_printf(s, "format:\n"); trace_write_header(s); - r = call->show_format(s); + r = call->show_format(call, s); if (!r) { /* * ug! The format output is bigger than a PAGE!! diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index d06cf898dc86..956d4bc675e5 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -60,7 +60,8 @@ extern void __bad_type_size(void); #undef TRACE_EVENT_FORMAT #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ static int \ -ftrace_format_##call(struct trace_seq *s) \ +ftrace_format_##call(struct ftrace_event_call *unused, \ + struct trace_seq *s) \ { \ struct args field; \ int ret; \ @@ -76,7 +77,8 @@ ftrace_format_##call(struct trace_seq *s) \ #define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \ tpfmt) \ static int \ -ftrace_format_##call(struct trace_seq *s) \ +ftrace_format_##call(struct ftrace_event_call *unused, \ + struct trace_seq *s) \ { \ struct args field; \ int ret; \ -- cgit v1.2.3 From dc4ddb4c0b7348f1c9759ae8a9e7d734dc1cda82 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 11 Aug 2009 19:03:54 +0200 Subject: tracing: Add fields format definition for syscall events Define the format of the syscall trace fields to parse the binary values from a raw trace using the syscall events "format" file. This is defined dynamically using the syscalls metadata. It prepares the export of syscall event raw records to perf counters. Example: $ cat /debug/tracing/events/syscalls/sys_enter_sched_getparam/format name: sys_enter_sched_getparam ID: 39 format: field:unsigned short common_type; offset:0; size:2; field:unsigned char common_flags; offset:2; size:1; field:unsigned char common_preempt_count; offset:3; size:1; field:int common_pid; offset:4; size:4; field:int common_tgid; offset:8; size:4; field:pid_t pid; offset:12; size:8; field:struct sched_param * param; offset:20; size:8; print fmt: "pid: 0x%08lx, param: 0x%08lx", ((unsigned long)(REC->pid)), ((unsigned long)(REC->param)) Signed-off-by: Frederic Weisbecker Cc: Lai Jiangshan Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Mathieu Desnoyers Cc: Jiaying Zhang Cc: Martin Bligh Cc: Li Zefan Cc: Masami Hiramatsu Cc: Jason Baron --- include/linux/syscalls.h | 1 + include/trace/syscall.h | 2 ++ kernel/trace/trace_syscalls.c | 46 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 5541e75e140a..87d06c173ddc 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -189,6 +189,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \ .system = "syscalls", \ .event = &event_syscall_enter, \ .raw_init = init_enter_##sname, \ + .show_format = ftrace_format_syscall, \ .regfunc = reg_event_syscall_enter, \ .unregfunc = unreg_event_syscall_enter, \ .data = "sys"#sname, \ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 3ab6dd18fa3a..0cb03625edd9 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -55,6 +55,8 @@ extern int reg_event_syscall_enter(void *ptr); extern void unreg_event_syscall_enter(void *ptr); extern int reg_event_syscall_exit(void *ptr); extern void unreg_event_syscall_exit(void *ptr); +extern int +ftrace_format_syscall(struct ftrace_event_call *call, struct trace_seq *s); enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags); enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags); #endif diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index f4eaec3d559a..9ee6386cf842 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -105,6 +105,52 @@ print_syscall_exit(struct trace_iterator *iter, int flags) return TRACE_TYPE_HANDLED; } +int ftrace_format_syscall(struct ftrace_event_call *call, struct trace_seq *s) +{ + int i; + int nr; + int ret = 0; + struct syscall_metadata *entry; + int offset = sizeof(struct trace_entry); + + nr = syscall_name_to_nr((char *)call->data); + entry = syscall_nr_to_meta(nr); + + if (!entry) + return ret; + + for (i = 0; i < entry->nb_args; i++) { + ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i], + entry->args[i]); + if (!ret) + return 0; + ret = trace_seq_printf(s, "\toffset:%d;\tsize:%lu;\n", offset, + sizeof(unsigned long)); + if (!ret) + return 0; + offset += sizeof(unsigned long); + } + + trace_seq_printf(s, "\nprint fmt: \""); + for (i = 0; i < entry->nb_args; i++) { + ret = trace_seq_printf(s, "%s: 0x%%0%lulx%s", entry->args[i], + sizeof(unsigned long), + i == entry->nb_args - 1 ? "\", " : ", "); + if (!ret) + return 0; + } + + for (i = 0; i < entry->nb_args; i++) { + ret = trace_seq_printf(s, "((unsigned long)(REC->%s))%s", + entry->args[i], + i == entry->nb_args - 1 ? "\n" : ", "); + if (!ret) + return 0; + } + + return ret; +} + void ftrace_syscall_enter(struct pt_regs *regs, long id) { struct syscall_trace_enter *entry; -- cgit v1.2.3 From 7ead8b8313d92b3a69a1a61b0dcbc4cd66c960dc Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 17 Aug 2009 16:56:28 +0800 Subject: tracing/events: Add module tracepoints Add trace points to trace module_load, module_free, module_get, module_put and module_request, and use trace_event facility to get the trace output. Here's the sample output: TASK-PID CPU# TIMESTAMP FUNCTION | | | | | <...>-42 [000] 1.758380: module_request: fb0 wait=1 call_site=fb_open ... <...>-60 [000] 3.269403: module_load: scsi_wait_scan <...>-60 [000] 3.269432: module_put: scsi_wait_scan call_site=sys_init_module refcnt=0 <...>-61 [001] 3.273168: module_free: scsi_wait_scan ... <...>-1021 [000] 13.836081: module_load: sunrpc <...>-1021 [000] 13.840589: module_put: sunrpc call_site=sys_init_module refcnt=-1 <...>-1027 [000] 13.848098: module_get: sunrpc call_site=try_module_get refcnt=0 <...>-1027 [000] 13.848308: module_get: sunrpc call_site=get_filesystem refcnt=1 <...>-1027 [000] 13.848692: module_put: sunrpc call_site=put_filesystem refcnt=0 ... modprobe-2587 [001] 1088.437213: module_load: trace_events_sample F modprobe-2587 [001] 1088.437786: module_put: trace_events_sample call_site=sys_init_module refcnt=0 Note: - the taints flag can be 'F', 'C' and/or 'P' if mod->taints != 0 - the module refcnt is percpu, so it can be negative in a specific cpu Signed-off-by: Li Zefan Acked-by: Rusty Russell Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Rusty Russell LKML-Reference: <4A891B3C.5030608@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/module.h | 14 ++++- include/trace/events/module.h | 126 ++++++++++++++++++++++++++++++++++++++++++ kernel/kmod.c | 4 ++ kernel/module.c | 11 ++++ 4 files changed, 152 insertions(+), 3 deletions(-) create mode 100644 include/trace/events/module.h (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 098bdb7bfacf..f8f92d015efe 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -17,10 +17,12 @@ #include #include #include -#include +#include #include +#include + /* Not Yet Implemented */ #define MODULE_SUPPORTED_DEVICE(name) @@ -462,7 +464,10 @@ static inline local_t *__module_ref_addr(struct module *mod, int cpu) static inline void __module_get(struct module *module) { if (module) { - local_inc(__module_ref_addr(module, get_cpu())); + unsigned int cpu = get_cpu(); + local_inc(__module_ref_addr(module, cpu)); + trace_module_get(module, _THIS_IP_, + local_read(__module_ref_addr(module, cpu))); put_cpu(); } } @@ -473,8 +478,11 @@ static inline int try_module_get(struct module *module) if (module) { unsigned int cpu = get_cpu(); - if (likely(module_is_live(module))) + if (likely(module_is_live(module))) { local_inc(__module_ref_addr(module, cpu)); + trace_module_get(module, _THIS_IP_, + local_read(__module_ref_addr(module, cpu))); + } else ret = 0; put_cpu(); diff --git a/include/trace/events/module.h b/include/trace/events/module.h new file mode 100644 index 000000000000..84160fb18478 --- /dev/null +++ b/include/trace/events/module.h @@ -0,0 +1,126 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM module + +#if !defined(_TRACE_MODULE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_MODULE_H + +#include + +#ifdef CONFIG_MODULES + +struct module; + +#define show_module_flags(flags) __print_flags(flags, "", \ + { (1UL << TAINT_PROPRIETARY_MODULE), "P" }, \ + { (1UL << TAINT_FORCED_MODULE), "F" }, \ + { (1UL << TAINT_CRAP), "C" }) + +TRACE_EVENT(module_load, + + TP_PROTO(struct module *mod), + + TP_ARGS(mod), + + TP_STRUCT__entry( + __field( unsigned int, taints ) + __string( name, mod->name ) + ), + + TP_fast_assign( + __entry->taints = mod->taints; + __assign_str(name, mod->name); + ), + + TP_printk("%s %s", __get_str(name), show_module_flags(__entry->taints)) +); + +TRACE_EVENT(module_free, + + TP_PROTO(struct module *mod), + + TP_ARGS(mod), + + TP_STRUCT__entry( + __string( name, mod->name ) + ), + + TP_fast_assign( + __assign_str(name, mod->name); + ), + + TP_printk("%s", __get_str(name)) +); + +TRACE_EVENT(module_get, + + TP_PROTO(struct module *mod, unsigned long ip, int refcnt), + + TP_ARGS(mod, ip, refcnt), + + TP_STRUCT__entry( + __field( unsigned long, ip ) + __field( int, refcnt ) + __string( name, mod->name ) + ), + + TP_fast_assign( + __entry->ip = ip; + __entry->refcnt = refcnt; + __assign_str(name, mod->name); + ), + + TP_printk("%s call_site=%pf refcnt=%d", + __get_str(name), (void *)__entry->ip, __entry->refcnt) +); + +TRACE_EVENT(module_put, + + TP_PROTO(struct module *mod, unsigned long ip, int refcnt), + + TP_ARGS(mod, ip, refcnt), + + TP_STRUCT__entry( + __field( unsigned long, ip ) + __field( int, refcnt ) + __string( name, mod->name ) + ), + + TP_fast_assign( + __entry->ip = ip; + __entry->refcnt = refcnt; + __assign_str(name, mod->name); + ), + + TP_printk("%s call_site=%pf refcnt=%d", + __get_str(name), (void *)__entry->ip, __entry->refcnt) +); + +TRACE_EVENT(module_request, + + TP_PROTO(char *name, bool wait, unsigned long ip), + + TP_ARGS(name, wait, ip), + + TP_STRUCT__entry( + __field( bool, wait ) + __field( unsigned long, ip ) + __string( name, name ) + ), + + TP_fast_assign( + __entry->wait = wait; + __entry->ip = ip; + __assign_str(name, name); + ), + + TP_printk("%s wait=%d call_site=%pf", + __get_str(name), (int)__entry->wait, (void *)__entry->ip) +); + +#endif /* CONFIG_MODULES */ + +#endif /* _TRACE_MODULE_H */ + +/* This part must be outside protection */ +#include + diff --git a/kernel/kmod.c b/kernel/kmod.c index 385c31a1bdbf..a92280870e30 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -37,6 +37,8 @@ #include #include +#include + extern int max_threads; static struct workqueue_struct *khelper_wq; @@ -108,6 +110,8 @@ int __request_module(bool wait, const char *fmt, ...) return -ENOMEM; } + trace_module_request(module_name, wait, _RET_IP_); + ret = call_usermodehelper(modprobe_path, argv, envp, wait ? UMH_WAIT_PROC : UMH_WAIT_EXEC); atomic_dec(&kmod_concurrent); diff --git a/kernel/module.c b/kernel/module.c index fd1411403558..b1821438694e 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -55,6 +55,11 @@ #include #include +#define CREATE_TRACE_POINTS +#include + +EXPORT_TRACEPOINT_SYMBOL(module_get); + #if 0 #define DEBUGP printk #else @@ -940,6 +945,8 @@ void module_put(struct module *module) if (module) { unsigned int cpu = get_cpu(); local_dec(__module_ref_addr(module, cpu)); + trace_module_put(module, _RET_IP_, + local_read(__module_ref_addr(module, cpu))); /* Maybe they're waiting for us to drop reference? */ if (unlikely(!module_is_live(module))) wake_up_process(module->waiter); @@ -1491,6 +1498,8 @@ static int __unlink_module(void *_mod) /* Free a module, remove from lists, etc (must hold module_mutex). */ static void free_module(struct module *mod) { + trace_module_free(mod); + /* Delete from various lists */ stop_machine(__unlink_module, mod, NULL); remove_notes_attrs(mod); @@ -2358,6 +2367,8 @@ static noinline struct module *load_module(void __user *umod, /* Get rid of temporary copy */ vfree(hdr); + trace_module_load(mod); + /* Done! */ return mod; -- cgit v1.2.3 From 10a5b66f625904ad5a2867cf7a28073e1236ff32 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 19 Aug 2009 15:53:05 +0800 Subject: tracing/syscalls: Add fields format for exit events Add "format" file for syscall exit events: # cat events/syscalls/sys_exit_open/format name: sys_exit_open ID: 344 format: field:unsigned short common_type; offset:0; size:2; field:unsigned char common_flags; offset:2; size:1; field:unsigned char common_preempt_count; offset:3; size:1; field:int common_pid; offset:4; size:4; field:int common_tgid; offset:8; size:4; field:int nr; offset:12; size:4; field:unsigned long ret; offset:16; size:4; Signed-off-by: Li Zefan Cc: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4A8BAF61.3060307@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/syscalls.h | 3 ++- include/trace/syscall.h | 6 ++++-- kernel/trace/trace_syscalls.c | 18 +++++++++++++++++- 3 files changed, 23 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 87d06c173ddc..8d57f77794ee 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -189,7 +189,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \ .system = "syscalls", \ .event = &event_syscall_enter, \ .raw_init = init_enter_##sname, \ - .show_format = ftrace_format_syscall, \ + .show_format = syscall_enter_format, \ .regfunc = reg_event_syscall_enter, \ .unregfunc = unreg_event_syscall_enter, \ .data = "sys"#sname, \ @@ -225,6 +225,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \ .system = "syscalls", \ .event = &event_syscall_exit, \ .raw_init = init_exit_##sname, \ + .show_format = syscall_exit_format, \ .regfunc = reg_event_syscall_exit, \ .unregfunc = unreg_event_syscall_exit, \ .data = "sys"#sname, \ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 0cb03625edd9..5ce85d75d31b 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -55,8 +55,10 @@ extern int reg_event_syscall_enter(void *ptr); extern void unreg_event_syscall_enter(void *ptr); extern int reg_event_syscall_exit(void *ptr); extern void unreg_event_syscall_exit(void *ptr); -extern int -ftrace_format_syscall(struct ftrace_event_call *call, struct trace_seq *s); +extern int syscall_enter_format(struct ftrace_event_call *call, + struct trace_seq *s); +extern int syscall_exit_format(struct ftrace_event_call *call, + struct trace_seq *s); enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags); enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags); #endif diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index d10daf0922b5..7336b6c265d7 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -97,7 +97,7 @@ extern char *__bad_type_size(void); __bad_type_size() : \ #type, #name, offsetof(typeof(trace), name), sizeof(trace.name) -int ftrace_format_syscall(struct ftrace_event_call *call, struct trace_seq *s) +int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) { int i; int nr; @@ -149,6 +149,22 @@ int ftrace_format_syscall(struct ftrace_event_call *call, struct trace_seq *s) return ret; } +int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s) +{ + int ret; + struct syscall_trace_exit trace; + + ret = trace_seq_printf(s, + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n", + SYSCALL_FIELD(int, nr), + SYSCALL_FIELD(unsigned long, ret)); + if (!ret) + return 0; + + return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n"); +} + void ftrace_syscall_enter(struct pt_regs *regs, long id) { struct syscall_trace_enter *entry; -- cgit v1.2.3 From 14be96c9716cb8c46dca94bd890defd7856e0734 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 19 Aug 2009 15:53:52 +0800 Subject: tracing/events: Add ftrace_event_call param to define_fields() This parameter is needed by syscall events to add define_fields() handler. Signed-off-by: Li Zefan Cc: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4A8BAF90.6060801@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 2 +- include/trace/ftrace.h | 3 +-- kernel/trace/trace_events.c | 2 +- kernel/trace/trace_export.c | 5 ++--- 4 files changed, 5 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 189806b6e69e..35b3a4a5ba86 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -118,7 +118,7 @@ struct ftrace_event_call { int (*raw_init)(void); int (*show_format)(struct ftrace_event_call *call, struct trace_seq *s); - int (*define_fields)(void); + int (*define_fields)(struct ftrace_event_call *); struct list_head fields; int filter_active; struct event_filter *filter; diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index b250b0616571..4e81c9b37515 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -294,10 +294,9 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, func, print) \ int \ -ftrace_define_fields_##call(void) \ +ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ { \ struct ftrace_raw_##call field; \ - struct ftrace_event_call *event_call = &event_##call; \ int ret; \ \ __common_field(int, type, 1); \ diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index b568ade8f453..af8fb8ebef0b 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -941,7 +941,7 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events, id); if (call->define_fields) { - ret = call->define_fields(); + ret = call->define_fields(call); if (ret < 0) { pr_warning("Could not initialize trace point" " events/%s\n", call->name); diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 956d4bc675e5..cf2c752a25bf 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -119,7 +119,7 @@ ftrace_format_##call(struct ftrace_event_call *unused, \ #undef TRACE_EVENT_FORMAT #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ -int ftrace_define_fields_##call(void); \ +int ftrace_define_fields_##call(struct ftrace_event_call *event_call); \ static int ftrace_raw_init_event_##call(void); \ \ struct ftrace_event_call __used \ @@ -184,9 +184,8 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #undef TRACE_EVENT_FORMAT #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ int \ -ftrace_define_fields_##call(void) \ +ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ { \ - struct ftrace_event_call *event_call = &event_##call; \ struct args field; \ int ret; \ \ -- cgit v1.2.3 From e647d6b314266adb904d4b84973eda0afa856946 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 19 Aug 2009 15:54:32 +0800 Subject: tracing/events: Add trace_define_common_fields() Extract duplicate code. Also prepare for the later patch. Signed-off-by: Li Zefan Cc: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4A8BAFB8.1010304@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 8 +------- include/trace/ftrace.h | 8 +++----- kernel/trace/trace_events.c | 22 ++++++++++++++++++++++ kernel/trace/trace_export.c | 8 +++----- 4 files changed, 29 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 35b3a4a5ba86..427cbae47f84 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -142,6 +142,7 @@ extern int filter_current_check_discard(struct ftrace_event_call *call, extern int trace_define_field(struct ftrace_event_call *call, char *type, char *name, int offset, int size, int is_signed); +extern int trace_define_common_fields(struct ftrace_event_call *call); #define is_signed_type(type) (((type)(-1)) < 0) @@ -166,11 +167,4 @@ do { \ __trace_printk(ip, fmt, ##args); \ } while (0) -#define __common_field(type, item, is_signed) \ - ret = trace_define_field(event_call, #type, "common_" #item, \ - offsetof(typeof(field.ent), item), \ - sizeof(field.ent.item), is_signed); \ - if (ret) \ - return ret; - #endif /* _LINUX_FTRACE_EVENT_H */ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 4e81c9b37515..127400255e4c 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -299,11 +299,9 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ struct ftrace_raw_##call field; \ int ret; \ \ - __common_field(int, type, 1); \ - __common_field(unsigned char, flags, 0); \ - __common_field(unsigned char, preempt_count, 0); \ - __common_field(int, pid, 1); \ - __common_field(int, tgid, 1); \ + ret = trace_define_common_fields(event_call); \ + if (ret) \ + return ret; \ \ tstruct; \ \ diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index af8fb8ebef0b..9c7ecfb3416f 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -62,6 +62,28 @@ err: } EXPORT_SYMBOL_GPL(trace_define_field); +#define __common_field(type, item) \ + ret = trace_define_field(call, #type, "common_" #item, \ + offsetof(typeof(ent), item), \ + sizeof(ent.item), \ + is_signed_type(type)); \ + if (ret) \ + return ret; + +int trace_define_common_fields(struct ftrace_event_call *call) +{ + int ret; + struct trace_entry ent; + + __common_field(unsigned short, type); + __common_field(unsigned char, flags); + __common_field(unsigned char, preempt_count); + __common_field(int, pid); + __common_field(int, tgid); + + return ret; +} + #ifdef CONFIG_MODULES static void trace_destroy_fields(struct ftrace_event_call *call) diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index cf2c752a25bf..70875303ae46 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -189,11 +189,9 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ struct args field; \ int ret; \ \ - __common_field(unsigned char, type, 0); \ - __common_field(unsigned char, flags, 0); \ - __common_field(unsigned char, preempt_count, 0); \ - __common_field(int, pid, 1); \ - __common_field(int, tgid, 1); \ + ret = trace_define_common_fields(event_call); \ + if (ret) \ + return ret; \ \ tstruct; \ \ -- cgit v1.2.3 From 540b7b8d65575c80162f2a0f38e1d313c92a6042 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 19 Aug 2009 15:54:51 +0800 Subject: tracing/syscalls: Add filtering support Add filtering support for syscall events: # echo 'mode == 0666' > events/syscalls/sys_enter_open # echo 'ret == 0' > events/syscalls/sys_exit_open # echo 1 > events/syscalls/sys_enter_open # echo 1 > events/syscalls/sys_exit_open # cat trace ... modprobe-3084 [001] 117.463140: sys_open(filename: 917d3e8, flags: 0, mode: 1b6) modprobe-3084 [001] 117.463176: sys_open -> 0x0 less-3086 [001] 117.510455: sys_open(filename: 9c6bdb8, flags: 8000, mode: 1b6) sendmail-2574 [001] 122.145840: sys_open(filename: b807a365, flags: 0, mode: 1b6) ... Signed-off-by: Li Zefan Cc: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4A8BAFCB.1040006@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 5 +++-- include/linux/syscalls.h | 16 +++++++++----- include/trace/syscall.h | 7 ++++++ kernel/trace/trace_events.c | 5 +++-- kernel/trace/trace_syscalls.c | 51 +++++++++++++++++++++++++++++++++++++++---- 5 files changed, 71 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 427cbae47f84..df5b085c4150 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -140,8 +140,9 @@ extern int filter_current_check_discard(struct ftrace_event_call *call, void *rec, struct ring_buffer_event *event); -extern int trace_define_field(struct ftrace_event_call *call, char *type, - char *name, int offset, int size, int is_signed); +extern int trace_define_field(struct ftrace_event_call *call, + const char *type, const char *name, + int offset, int size, int is_signed); extern int trace_define_common_fields(struct ftrace_event_call *call); #define is_signed_type(type) (((type)(-1)) < 0) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 8d57f77794ee..f124c8995555 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -190,6 +190,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \ .event = &event_syscall_enter, \ .raw_init = init_enter_##sname, \ .show_format = syscall_enter_format, \ + .define_fields = syscall_enter_define_fields, \ .regfunc = reg_event_syscall_enter, \ .unregfunc = unreg_event_syscall_enter, \ .data = "sys"#sname, \ @@ -226,6 +227,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \ .event = &event_syscall_exit, \ .raw_init = init_exit_##sname, \ .show_format = syscall_exit_format, \ + .define_fields = syscall_exit_define_fields, \ .regfunc = reg_event_syscall_exit, \ .unregfunc = unreg_event_syscall_exit, \ .data = "sys"#sname, \ @@ -233,6 +235,8 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \ } #define SYSCALL_METADATA(sname, nb) \ + SYSCALL_TRACE_ENTER_EVENT(sname); \ + SYSCALL_TRACE_EXIT_EVENT(sname); \ static const struct syscall_metadata __used \ __attribute__((__aligned__(4))) \ __attribute__((section("__syscalls_metadata"))) \ @@ -241,20 +245,22 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \ .nb_args = nb, \ .types = types_##sname, \ .args = args_##sname, \ - }; \ - SYSCALL_TRACE_ENTER_EVENT(sname); \ - SYSCALL_TRACE_EXIT_EVENT(sname); + .enter_event = &event_enter_##sname, \ + .exit_event = &event_exit_##sname, \ + }; #define SYSCALL_DEFINE0(sname) \ + SYSCALL_TRACE_ENTER_EVENT(_##sname); \ + SYSCALL_TRACE_EXIT_EVENT(_##sname); \ static const struct syscall_metadata __used \ __attribute__((__aligned__(4))) \ __attribute__((section("__syscalls_metadata"))) \ __syscall_meta_##sname = { \ .name = "sys_"#sname, \ .nb_args = 0, \ + .enter_event = &event_enter__##sname, \ + .exit_event = &event_exit__##sname, \ }; \ - SYSCALL_TRACE_ENTER_EVENT(_##sname); \ - SYSCALL_TRACE_EXIT_EVENT(_##sname); \ asmlinkage long sys_##sname(void) #else #define SYSCALL_DEFINE0(name) asmlinkage long sys_##name(void) diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 5ce85d75d31b..9661dd406b93 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -34,6 +34,8 @@ DECLARE_TRACE_WITH_CALLBACK(syscall_exit, * @args: list of args as strings (args[i] matches types[i]) * @enter_id: associated ftrace enter event id * @exit_id: associated ftrace exit event id + * @enter_event: associated syscall_enter trace event + * @exit_event: associated syscall_exit trace event */ struct syscall_metadata { const char *name; @@ -42,6 +44,9 @@ struct syscall_metadata { const char **args; int enter_id; int exit_id; + + struct ftrace_event_call *enter_event; + struct ftrace_event_call *exit_event; }; #ifdef CONFIG_FTRACE_SYSCALLS @@ -59,6 +64,8 @@ extern int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s); extern int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s); +extern int syscall_enter_define_fields(struct ftrace_event_call *call); +extern int syscall_exit_define_fields(struct ftrace_event_call *call); enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags); enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags); #endif diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 9c7ecfb3416f..79d352027a61 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -27,8 +27,8 @@ DEFINE_MUTEX(event_mutex); LIST_HEAD(ftrace_events); -int trace_define_field(struct ftrace_event_call *call, char *type, - char *name, int offset, int size, int is_signed) +int trace_define_field(struct ftrace_event_call *call, const char *type, + const char *name, int offset, int size, int is_signed) { struct ftrace_event_field *field; @@ -83,6 +83,7 @@ int trace_define_common_fields(struct ftrace_event_call *call) return ret; } +EXPORT_SYMBOL_GPL(trace_define_common_fields); #ifdef CONFIG_MODULES diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 7336b6c265d7..28e4dae4af21 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -165,6 +165,49 @@ int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s) return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n"); } +int syscall_enter_define_fields(struct ftrace_event_call *call) +{ + struct syscall_trace_enter trace; + struct syscall_metadata *meta; + int ret; + int nr; + int i; + int offset = offsetof(typeof(trace), args); + + nr = syscall_name_to_nr(call->data); + meta = syscall_nr_to_meta(nr); + + if (!meta) + return 0; + + ret = trace_define_common_fields(call); + if (ret) + return ret; + + for (i = 0; i < meta->nb_args; i++) { + ret = trace_define_field(call, meta->types[i], + meta->args[i], offset, + sizeof(unsigned long), 0); + offset += sizeof(unsigned long); + } + + return ret; +} + +int syscall_exit_define_fields(struct ftrace_event_call *call) +{ + struct syscall_trace_exit trace; + int ret; + + ret = trace_define_common_fields(call); + if (ret) + return ret; + + ret = trace_define_field(call, SYSCALL_FIELD(unsigned long, ret), 0); + + return ret; +} + void ftrace_syscall_enter(struct pt_regs *regs, long id) { struct syscall_trace_enter *entry; @@ -192,8 +235,8 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id) entry->nr = syscall_nr; syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args); - trace_current_buffer_unlock_commit(event, 0, 0); - trace_wake_up(); + if (!filter_current_check_discard(sys_data->enter_event, entry, event)) + trace_current_buffer_unlock_commit(event, 0, 0); } void ftrace_syscall_exit(struct pt_regs *regs, long ret) @@ -220,8 +263,8 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret) entry->nr = syscall_nr; entry->ret = syscall_get_return_value(current, regs); - trace_current_buffer_unlock_commit(event, 0, 0); - trace_wake_up(); + if (!filter_current_check_discard(sys_data->exit_event, entry, event)) + trace_current_buffer_unlock_commit(event, 0, 0); } int reg_event_syscall_enter(void *ptr) -- cgit v1.2.3 From 97419875865859fd2403e66266c02ce028e2f5ab Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Mon, 24 Aug 2009 14:43:13 -0700 Subject: tracing: Move tracepoint callbacks from declaration to definition It's not strictly correct for the tracepoint reg/unreg callbacks to occur when a client is hooking up, because the actual tracepoint may not be present yet. This happens to be fine for syscall, since that's in the core kernel, but it would cause problems for tracepoints defined in a module that hasn't been loaded yet. It also means the reg/unreg has to be EXPORTed for any modules to use the tracepoint (as in SystemTap). This patch removes DECLARE_TRACE_WITH_CALLBACK, and instead introduces DEFINE_TRACE_FN which stores the callbacks in struct tracepoint. The callbacks are used now when the active state of the tracepoint changes in set_tracepoint & disable_tracepoint. This also introduces TRACE_EVENT_FN, so ftrace events can also provide registration callbacks if needed. Signed-off-by: Josh Stone Cc: Jason Baron Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Li Zefan Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Mathieu Desnoyers Cc: Jiaying Zhang Cc: Martin Bligh Cc: Lai Jiangshan Cc: Paul Mundt Cc: Martin Schwidefsky Cc: Heiko Carstens LKML-Reference: <1251150194-1713-4-git-send-email-jistone@redhat.com> Signed-off-by: Frederic Weisbecker --- arch/s390/kernel/ptrace.c | 4 ++-- arch/x86/kernel/ptrace.c | 4 ++-- include/linux/tracepoint.h | 46 +++++++++++++++++--------------------------- include/trace/define_trace.h | 5 +++++ include/trace/ftrace.h | 9 +++++++++ include/trace/syscall.h | 12 ++++-------- kernel/tracepoint.c | 14 +++++++++----- 7 files changed, 49 insertions(+), 45 deletions(-) (limited to 'include/linux') diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 9d3dcfa79ea2..c05b44b80c23 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -51,8 +51,8 @@ #include "compat_ptrace.h" #endif -DEFINE_TRACE(syscall_enter); -DEFINE_TRACE(syscall_exit); +DEFINE_TRACE_FN(syscall_enter, syscall_regfunc, syscall_unregfunc); +DEFINE_TRACE_FN(syscall_exit, syscall_regfunc, syscall_unregfunc); enum s390_regset { REGSET_GENERAL, diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index a909afef44f4..31e9b97ec4d6 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -37,8 +37,8 @@ #include -DEFINE_TRACE(syscall_enter); -DEFINE_TRACE(syscall_exit); +DEFINE_TRACE_FN(syscall_enter, syscall_regfunc, syscall_unregfunc); +DEFINE_TRACE_FN(syscall_exit, syscall_regfunc, syscall_unregfunc); #include "tls.h" diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 5984ed04c03b..846a4ae501eb 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -23,6 +23,8 @@ struct tracepoint; struct tracepoint { const char *name; /* Tracepoint name */ int state; /* State. */ + void (*regfunc)(void); + void (*unregfunc)(void); void **funcs; } __attribute__((aligned(32))); /* * Aligned on 32 bytes because it is @@ -60,10 +62,8 @@ struct tracepoint { * Make sure the alignment of the structure in the __tracepoints section will * not add unwanted padding between the beginning of the section and the * structure. Force alignment to the same alignment as the section start. - * An optional set of (un)registration functions can be passed to perform any - * additional (un)registration work. */ -#define DECLARE_TRACE_WITH_CALLBACK(name, proto, args, reg, unreg) \ +#define DECLARE_TRACE(name, proto, args) \ extern struct tracepoint __tracepoint_##name; \ static inline void trace_##name(proto) \ { \ @@ -73,36 +73,23 @@ struct tracepoint { } \ static inline int register_trace_##name(void (*probe)(proto)) \ { \ - int ret; \ - void (*func)(void) = reg; \ - \ - ret = tracepoint_probe_register(#name, (void *)probe); \ - if (func && !ret) \ - func(); \ - return ret; \ + return tracepoint_probe_register(#name, (void *)probe); \ } \ static inline int unregister_trace_##name(void (*probe)(proto)) \ { \ - int ret; \ - void (*func)(void) = unreg; \ - \ - ret = tracepoint_probe_unregister(#name, (void *)probe);\ - if (func && !ret) \ - func(); \ - return ret; \ + return tracepoint_probe_unregister(#name, (void *)probe);\ } -#define DECLARE_TRACE(name, proto, args) \ - DECLARE_TRACE_WITH_CALLBACK(name, TP_PROTO(proto), TP_ARGS(args),\ - NULL, NULL); - -#define DEFINE_TRACE(name) \ +#define DEFINE_TRACE_FN(name, reg, unreg) \ static const char __tpstrtab_##name[] \ __attribute__((section("__tracepoints_strings"))) = #name; \ struct tracepoint __tracepoint_##name \ __attribute__((section("__tracepoints"), aligned(32))) = \ - { __tpstrtab_##name, 0, NULL } + { __tpstrtab_##name, 0, reg, unreg, NULL } + +#define DEFINE_TRACE(name) \ + DEFINE_TRACE_FN(name, NULL, NULL); #define EXPORT_TRACEPOINT_SYMBOL_GPL(name) \ EXPORT_SYMBOL_GPL(__tracepoint_##name) @@ -113,7 +100,7 @@ extern void tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end); #else /* !CONFIG_TRACEPOINTS */ -#define DECLARE_TRACE_WITH_CALLBACK(name, proto, args, reg, unreg) \ +#define DECLARE_TRACE(name, proto, args) \ static inline void _do_trace_##name(struct tracepoint *tp, proto) \ { } \ static inline void trace_##name(proto) \ @@ -127,10 +114,7 @@ extern void tracepoint_update_probe_range(struct tracepoint *begin, return -ENOSYS; \ } -#define DECLARE_TRACE(name, proto, args) \ - DECLARE_TRACE_WITH_CALLBACK(name, TP_PROTO(proto), TP_ARGS(args),\ - NULL, NULL); - +#define DEFINE_TRACE_FN(name, reg, unreg) #define DEFINE_TRACE(name) #define EXPORT_TRACEPOINT_SYMBOL_GPL(name) #define EXPORT_TRACEPOINT_SYMBOL(name) @@ -282,10 +266,16 @@ static inline void tracepoint_synchronize_unregister(void) * can also by used by generic instrumentation like SystemTap), and * it is also used to expose a structured trace record in * /sys/kernel/debug/tracing/events/. + * + * A set of (un)registration functions can be passed to the variant + * TRACE_EVENT_FN to perform any (un)registration work. */ #define TRACE_EVENT(name, proto, args, struct, assign, print) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) +#define TRACE_EVENT_FN(name, proto, args, struct, \ + assign, print, reg, unreg) \ + DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #endif #endif diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index f7a7ae1e8f90..2a969850736d 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -26,6 +26,11 @@ #define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ DEFINE_TRACE(name) +#undef TRACE_EVENT_FN +#define TRACE_EVENT_FN(name, proto, args, tstruct, \ + assign, print, reg, unreg) \ + DEFINE_TRACE_FN(name, reg, unreg) + #undef DECLARE_TRACE #define DECLARE_TRACE(name, proto, args) \ DEFINE_TRACE(name) diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 127400255e4c..3a0b44bdabf7 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -42,6 +42,15 @@ }; \ static struct ftrace_event_call event_##name +/* Callbacks are meaningless to ftrace. */ +#undef TRACE_EVENT_FN +#define TRACE_EVENT_FN(name, proto, args, tstruct, \ + assign, print, reg, unreg) \ + TRACE_EVENT(name, TP_PROTO(proto), TP_ARGS(args), \ + TP_STRUCT__entry(tstruct), \ + TP_fast_assign(assign), \ + TP_printk(print)) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 5dcb7e3a544c..4e1943001854 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -13,18 +13,14 @@ extern void syscall_regfunc(void); extern void syscall_unregfunc(void); -DECLARE_TRACE_WITH_CALLBACK(syscall_enter, +DECLARE_TRACE(syscall_enter, TP_PROTO(struct pt_regs *regs, long id), - TP_ARGS(regs, id), - syscall_regfunc, - syscall_unregfunc + TP_ARGS(regs, id) ); -DECLARE_TRACE_WITH_CALLBACK(syscall_exit, +DECLARE_TRACE(syscall_exit, TP_PROTO(struct pt_regs *regs, long ret), - TP_ARGS(regs, ret), - syscall_regfunc, - syscall_unregfunc + TP_ARGS(regs, ret) ); #endif diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 9e0a36f0e2a9..1a6a453b7efb 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -243,6 +243,11 @@ static void set_tracepoint(struct tracepoint_entry **entry, { WARN_ON(strcmp((*entry)->name, elem->name) != 0); + if (elem->regfunc && !elem->state && active) + elem->regfunc(); + else if (elem->unregfunc && elem->state && !active) + elem->unregfunc(); + /* * rcu_assign_pointer has a smp_wmb() which makes sure that the new * probe callbacks array is consistent before setting a pointer to it. @@ -262,6 +267,9 @@ static void set_tracepoint(struct tracepoint_entry **entry, */ static void disable_tracepoint(struct tracepoint *elem) { + if (elem->unregfunc && elem->state) + elem->unregfunc(); + elem->state = 0; rcu_assign_pointer(elem->funcs, NULL); } @@ -578,7 +586,7 @@ __initcall(init_tracepoints); #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS -static DEFINE_MUTEX(regfunc_mutex); +/* NB: reg/unreg are called while guarded with the tracepoints_mutex */ static int sys_tracepoint_refcount; void syscall_regfunc(void) @@ -586,7 +594,6 @@ void syscall_regfunc(void) unsigned long flags; struct task_struct *g, *t; - mutex_lock(®func_mutex); if (!sys_tracepoint_refcount) { read_lock_irqsave(&tasklist_lock, flags); do_each_thread(g, t) { @@ -595,7 +602,6 @@ void syscall_regfunc(void) read_unlock_irqrestore(&tasklist_lock, flags); } sys_tracepoint_refcount++; - mutex_unlock(®func_mutex); } void syscall_unregfunc(void) @@ -603,7 +609,6 @@ void syscall_unregfunc(void) unsigned long flags; struct task_struct *g, *t; - mutex_lock(®func_mutex); sys_tracepoint_refcount--; if (!sys_tracepoint_refcount) { read_lock_irqsave(&tasklist_lock, flags); @@ -612,6 +617,5 @@ void syscall_unregfunc(void) } while_each_thread(g, t); read_unlock_irqrestore(&tasklist_lock, flags); } - mutex_unlock(®func_mutex); } #endif -- cgit v1.2.3 From 43b51ead3f752a3935116e5b1a94254b8573734f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 7 Aug 2009 10:33:22 +0800 Subject: tracing/filters: Add __field_ext() to TRACE_EVENT Add __field_ext(), so a field can be assigned to a specific filter_type, which matches a corresponding filter function. For example, a later patch will allow this: __field_ext(const char *, str, FILTER_PTR_STR); Signed-off-by: Li Zefan LKML-Reference: <4A7B9272.6050709@cn.fujitsu.com> [ Fixed a -1 to FILTER_OTHER Forward ported to latest kernel. ] Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 9 ++++++++- include/trace/ftrace.h | 31 ++++++++++++++++++++++++------- kernel/trace/trace_events.c | 11 ++++++++--- kernel/trace/trace_events_filter.c | 6 ------ kernel/trace/trace_export.c | 8 +++++--- kernel/trace/trace_syscalls.c | 6 ++++-- 6 files changed, 49 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index df5b085c4150..0440bea8f6bb 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -140,9 +140,16 @@ extern int filter_current_check_discard(struct ftrace_event_call *call, void *rec, struct ring_buffer_event *event); +enum { + FILTER_OTHER = 0, + FILTER_STATIC_STRING, + FILTER_DYN_STRING, +}; + extern int trace_define_field(struct ftrace_event_call *call, const char *type, const char *name, - int offset, int size, int is_signed); + int offset, int size, int is_signed, + int filter_type); extern int trace_define_common_fields(struct ftrace_event_call *call); #define is_signed_type(type) (((type)(-1)) < 0) diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 127400255e4c..1b1f742a6045 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -21,6 +21,9 @@ #undef __field #define __field(type, item) type item; +#undef __field_ext +#define __field_ext(type, item, filter_type) type item; + #undef __array #define __array(type, item, len) type item[len]; @@ -62,7 +65,10 @@ */ #undef __field -#define __field(type, item); +#define __field(type, item) + +#undef __field_ext +#define __field_ext(type, item, filter_type) #undef __array #define __array(type, item, len) @@ -110,6 +116,9 @@ if (!ret) \ return 0; +#undef __field_ext +#define __field_ext(type, item, filter_type) __field(type, item) + #undef __array #define __array(type, item, len) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ @@ -265,28 +274,33 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -#undef __field -#define __field(type, item) \ +#undef __field_ext +#define __field_ext(type, item, filter_type) \ ret = trace_define_field(event_call, #type, #item, \ offsetof(typeof(field), item), \ - sizeof(field.item), is_signed_type(type)); \ + sizeof(field.item), \ + is_signed_type(type), filter_type); \ if (ret) \ return ret; +#undef __field +#define __field(type, item) __field_ext(type, item, FILTER_OTHER) + #undef __array #define __array(type, item, len) \ BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ ret = trace_define_field(event_call, #type "[" #len "]", #item, \ offsetof(typeof(field), item), \ - sizeof(field.item), 0); \ + sizeof(field.item), 0, FILTER_OTHER); \ if (ret) \ return ret; #undef __dynamic_array #define __dynamic_array(type, item, len) \ ret = trace_define_field(event_call, "__data_loc " #type "[]", #item, \ - offsetof(typeof(field), __data_loc_##item), \ - sizeof(field.__data_loc_##item), 0); + offsetof(typeof(field), __data_loc_##item), \ + sizeof(field.__data_loc_##item), 0, \ + FILTER_OTHER); #undef __string #define __string(item, src) __dynamic_array(char, item, -1) @@ -320,6 +334,9 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ #undef __field #define __field(type, item) +#undef __field_ext +#define __field_ext(type, item, filter_type) + #undef __array #define __array(type, item, len) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 5740e90f4ca1..d33bcdeffe69 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -28,7 +28,8 @@ DEFINE_MUTEX(event_mutex); LIST_HEAD(ftrace_events); int trace_define_field(struct ftrace_event_call *call, const char *type, - const char *name, int offset, int size, int is_signed) + const char *name, int offset, int size, int is_signed, + int filter_type) { struct ftrace_event_field *field; @@ -44,7 +45,11 @@ int trace_define_field(struct ftrace_event_call *call, const char *type, if (!field->type) goto err; - field->filter_type = filter_assign_type(type); + if (filter_type == FILTER_OTHER) + field->filter_type = filter_assign_type(type); + else + field->filter_type = filter_type; + field->offset = offset; field->size = size; field->is_signed = is_signed; @@ -68,7 +73,7 @@ EXPORT_SYMBOL_GPL(trace_define_field); ret = trace_define_field(call, #type, "common_" #item, \ offsetof(typeof(ent), item), \ sizeof(ent.item), \ - is_signed_type(type)); \ + is_signed_type(type), FILTER_OTHER); \ if (ret) \ return ret; diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 22e6d822bbaa..8a8e576733fc 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -475,12 +475,6 @@ static int filter_add_pred_fn(struct filter_parse_state *ps, return 0; } -enum { - FILTER_OTHER = 0, - FILTER_STATIC_STRING, - FILTER_DYN_STRING, -}; - int filter_assign_type(const char *type) { if (strstr(type, "__data_loc") && strstr(type, "char")) diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 70875303ae46..029a91f42287 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -158,7 +158,8 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #define TRACE_FIELD(type, item, assign) \ ret = trace_define_field(event_call, #type, #item, \ offsetof(typeof(field), item), \ - sizeof(field.item), is_signed_type(type)); \ + sizeof(field.item), \ + is_signed_type(type), FILTER_OTHER); \ if (ret) \ return ret; @@ -166,7 +167,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #define TRACE_FIELD_SPECIAL(type, item, len, cmd) \ ret = trace_define_field(event_call, #type "[" #len "]", #item, \ offsetof(typeof(field), item), \ - sizeof(field.item), 0); \ + sizeof(field.item), 0, FILTER_OTHER); \ if (ret) \ return ret; @@ -174,7 +175,8 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #define TRACE_FIELD_SIGN(type, item, assign, is_signed) \ ret = trace_define_field(event_call, #type, #item, \ offsetof(typeof(field), item), \ - sizeof(field.item), is_signed); \ + sizeof(field.item), is_signed, \ + FILTER_OTHER); \ if (ret) \ return ret; diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 46c1b977a2cb..97a2454760b0 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -194,7 +194,8 @@ int syscall_enter_define_fields(struct ftrace_event_call *call) for (i = 0; i < meta->nb_args; i++) { ret = trace_define_field(call, meta->types[i], meta->args[i], offset, - sizeof(unsigned long), 0); + sizeof(unsigned long), 0, + FILTER_OTHER); offset += sizeof(unsigned long); } @@ -210,7 +211,8 @@ int syscall_exit_define_fields(struct ftrace_event_call *call) if (ret) return ret; - ret = trace_define_field(call, SYSCALL_FIELD(unsigned long, ret), 0); + ret = trace_define_field(call, SYSCALL_FIELD(unsigned long, ret), 0, + FILTER_OTHER); return ret; } -- cgit v1.2.3 From 87a342f5db69d53ea70493bb1ec69c9047677038 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 7 Aug 2009 10:33:43 +0800 Subject: tracing/filters: Support filtering for char * strings Usually, char * entries are dangerous in traces because the string can be released whereas a pointer to it can still wait to be read from the ring buffer. But sometimes we can assume it's safe, like in case of RO data (eg: __file__ or __line__, used in bkl trace event). If these RO data are in a module and so is the call to the trace event, then it's safe, because the ring buffer will be flushed once this module get unloaded. To allow char * to be treated as a string: TRACE_EVENT(..., TP_STRUCT__entry( __field_ext(const char *, name, FILTER_PTR_STRING) ... ) ... ); The filtering will not dereference "char *" unless the developer explicitly sets FILTER_PTR_STR in __field_ext. Signed-off-by: Li Zefan LKML-Reference: <4A7B9287.90205@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 1 + kernel/trace/trace_events_filter.c | 26 +++++++++++++++++++++++--- 2 files changed, 24 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 0440bea8f6bb..ace2da9e0a0d 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -144,6 +144,7 @@ enum { FILTER_OTHER = 0, FILTER_STATIC_STRING, FILTER_DYN_STRING, + FILTER_PTR_STRING, }; extern int trace_define_field(struct ftrace_event_call *call, diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 8a8e576733fc..9f03082c81d8 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -163,6 +163,20 @@ static int filter_pred_string(struct filter_pred *pred, void *event, return match; } +/* Filter predicate for char * pointers */ +static int filter_pred_pchar(struct filter_pred *pred, void *event, + int val1, int val2) +{ + char **addr = (char **)(event + pred->offset); + int cmp, match; + + cmp = strncmp(*addr, pred->str_val, pred->str_len); + + match = (!cmp) ^ pred->not; + + return match; +} + /* * Filter predicate for dynamic sized arrays of characters. * These are implemented through a list of strings at the end @@ -489,7 +503,8 @@ int filter_assign_type(const char *type) static bool is_string_field(struct ftrace_event_field *field) { return field->filter_type == FILTER_DYN_STRING || - field->filter_type == FILTER_STATIC_STRING; + field->filter_type == FILTER_STATIC_STRING || + field->filter_type == FILTER_PTR_STRING; } static int is_legal_op(struct ftrace_event_field *field, int op) @@ -579,11 +594,16 @@ static int filter_add_pred(struct filter_parse_state *ps, } if (is_string_field(field)) { + pred->str_len = field->size; + if (field->filter_type == FILTER_STATIC_STRING) fn = filter_pred_string; - else + else if (field->filter_type == FILTER_DYN_STRING) fn = filter_pred_strloc; - pred->str_len = field->size; + else { + fn = filter_pred_pchar; + pred->str_len = strlen(pred->str_val); + } } else { if (field->is_signed) ret = strict_strtoll(pred->str_val, 0, &val); -- cgit v1.2.3 From 5ac35daa9343936038a3c9c4f4d6d3fe6a2a7bd8 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 25 Aug 2009 14:06:22 +0800 Subject: tracing/events: fix the include file dependencies The TRACE_EVENT depends on the include/linux/tracepoint.h first and include/trace/ftrace.h later, if we include the ftrace.h early, a building error will occur. Both define TRACE_EVENT in trace_a.h and trace_b.h, if we include those in .c file, like this: #define CREATE_TRACE_POINTS include include The above will not work, because the TRACE_EVENT was re-defined by the previous .h file. Reported-by: Wei Yongjun Signed-off-by: Xiao Guangrong LKML-Reference: <4A937F5E.3020802@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 3 +-- include/trace/define_trace.h | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 5984ed04c03b..81709854f7ab 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -180,6 +180,7 @@ static inline void tracepoint_synchronize_unregister(void) } #define PARAMS(args...) args +#endif #ifndef TRACE_EVENT /* @@ -287,5 +288,3 @@ static inline void tracepoint_synchronize_unregister(void) #define TRACE_EVENT(name, proto, args, struct, assign, print) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #endif - -#endif diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index f7a7ae1e8f90..cd150b9d8e32 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -56,6 +56,7 @@ #include #endif +#undef TRACE_EVENT #undef TRACE_HEADER_MULTI_READ /* Only undef what we defined in this file */ -- cgit v1.2.3 From 7cb2e3ee2aeec5b83ecadba929a2dc575dd4008f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 26 Aug 2009 00:32:37 -0400 Subject: tracing: add comments to explain TRACE_EVENT out of protection The commit: commit 5ac35daa9343936038a3c9c4f4d6d3fe6a2a7bd8 Author: Xiao Guangrong tracing/events: fix the include file dependencies Moved the TRACE_EVENT out of the ifdef protection of tracepoints.h but uses the define of TRACE_EVENT itself as protection. This patch adds comments to explain why. Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 81709854f7ab..0341f2e2698a 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -180,7 +180,15 @@ static inline void tracepoint_synchronize_unregister(void) } #define PARAMS(args...) args -#endif + +#endif /* _LINUX_TRACEPOINT_H */ + +/* + * Note: we keep the TRACE_EVENT outside the include file ifdef protection. + * This is due to the way trace events work. If a file includes two + * trace event headers under one "CREATE_TRACE_POINTS" the first include + * will override the TRACE_EVENT and break the second include. + */ #ifndef TRACE_EVENT /* @@ -287,4 +295,5 @@ static inline void tracepoint_synchronize_unregister(void) #define TRACE_EVENT(name, proto, args, struct, assign, print) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) -#endif + +#endif /* ifdef TRACE_EVENT (see note above) */ -- cgit v1.2.3 From 8e254c1d183f0225ad21f9049641529e56cce4da Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 31 Aug 2009 16:49:41 +0800 Subject: tracing/filters: Defer pred allocation init_preds() allocates about 5392 bytes of memory (on x86_32) for a TRACE_EVENT. With my config, at system boot total memory occupied is: 5392 * (642 + 15) == 3459KB 642 == cat available_events | wc -l 15 == number of dirs in events/ftrace That's quite a lot, so we'd better defer memory allocation util it's needed, that's when filter is used. Signed-off-by: Li Zefan Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Tom Zanussi Cc: Masami Hiramatsu LKML-Reference: <4A9B8EA5.6020700@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 1 - include/linux/syscalls.h | 2 -- include/trace/ftrace.h | 1 - kernel/trace/trace_events_filter.c | 50 +++++++++++++++++++++++++++++++------- kernel/trace/trace_export.c | 1 - 5 files changed, 41 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index ace2da9e0a0d..755480484eb6 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -133,7 +133,6 @@ struct ftrace_event_call { #define MAX_FILTER_PRED 32 #define MAX_FILTER_STR_VAL 128 -extern int init_preds(struct ftrace_event_call *call); extern void destroy_preds(struct ftrace_event_call *call); extern int filter_match_preds(struct ftrace_event_call *call, void *rec); extern int filter_current_check_discard(struct ftrace_event_call *call, diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index f124c8995555..a8e37821cc60 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -177,7 +177,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \ event_enter_##sname.id = id; \ set_syscall_enter_id(num, id); \ INIT_LIST_HEAD(&event_enter_##sname.fields); \ - init_preds(&event_enter_##sname); \ return 0; \ } \ TRACE_SYS_ENTER_PROFILE(sname); \ @@ -214,7 +213,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \ event_exit_##sname.id = id; \ set_syscall_exit_id(num, id); \ INIT_LIST_HEAD(&event_exit_##sname.fields); \ - init_preds(&event_exit_##sname); \ return 0; \ } \ TRACE_SYS_EXIT_PROFILE(sname); \ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 57c56a998ee6..bfbc842600a1 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -622,7 +622,6 @@ static int ftrace_raw_init_event_##call(void) \ return -ENODEV; \ event_##call.id = id; \ INIT_LIST_HEAD(&event_##call.fields); \ - init_preds(&event_##call); \ return 0; \ } \ \ diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 9f03082c81d8..c6b2edfb7fe9 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -309,7 +309,7 @@ void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s) struct event_filter *filter = call->filter; mutex_lock(&event_mutex); - if (filter->filter_string) + if (filter && filter->filter_string) trace_seq_printf(s, "%s\n", filter->filter_string); else trace_seq_printf(s, "none\n"); @@ -322,7 +322,7 @@ void print_subsystem_event_filter(struct event_subsystem *system, struct event_filter *filter = system->filter; mutex_lock(&event_mutex); - if (filter->filter_string) + if (filter && filter->filter_string) trace_seq_printf(s, "%s\n", filter->filter_string); else trace_seq_printf(s, "none\n"); @@ -390,6 +390,9 @@ void destroy_preds(struct ftrace_event_call *call) struct event_filter *filter = call->filter; int i; + if (!filter) + return; + for (i = 0; i < MAX_FILTER_PRED; i++) { if (filter->preds[i]) filter_free_pred(filter->preds[i]); @@ -400,7 +403,7 @@ void destroy_preds(struct ftrace_event_call *call) call->filter = NULL; } -int init_preds(struct ftrace_event_call *call) +static int init_preds(struct ftrace_event_call *call) { struct event_filter *filter; struct filter_pred *pred; @@ -410,7 +413,6 @@ int init_preds(struct ftrace_event_call *call) if (!call->filter) return -ENOMEM; - call->filter_active = 0; filter->n_preds = 0; filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL); @@ -432,7 +434,28 @@ oom: return -ENOMEM; } -EXPORT_SYMBOL_GPL(init_preds); + +static int init_subsystem_preds(struct event_subsystem *system) +{ + struct ftrace_event_call *call; + int err; + + list_for_each_entry(call, &ftrace_events, list) { + if (!call->define_fields) + continue; + + if (strcmp(call->system, system->name) != 0) + continue; + + if (!call->filter) { + err = init_preds(call); + if (err) + return err; + } + } + + return 0; +} enum { FILTER_DISABLE_ALL, @@ -449,6 +472,9 @@ static void filter_free_subsystem_preds(struct event_subsystem *system, if (!call->define_fields) continue; + if (strcmp(call->system, system->name) != 0) + continue; + if (flag == FILTER_INIT_NO_RESET) { call->filter->no_reset = false; continue; @@ -457,10 +483,8 @@ static void filter_free_subsystem_preds(struct event_subsystem *system, if (flag == FILTER_SKIP_NO_RESET && call->filter->no_reset) continue; - if (!strcmp(call->system, system->name)) { - filter_disable_preds(call); - remove_filter_string(call->filter); - } + filter_disable_preds(call); + remove_filter_string(call->filter); } } @@ -1094,6 +1118,10 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string) mutex_lock(&event_mutex); + err = init_preds(call); + if (err) + goto out_unlock; + if (!strcmp(strstrip(filter_string), "0")) { filter_disable_preds(call); remove_filter_string(call->filter); @@ -1139,6 +1167,10 @@ int apply_subsystem_event_filter(struct event_subsystem *system, mutex_lock(&event_mutex); + err = init_subsystem_preds(system); + if (err) + goto out_unlock; + if (!strcmp(strstrip(filter_string), "0")) { filter_free_subsystem_preds(system, FILTER_DISABLE_ALL); remove_filter_string(system->filter); diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 029a91f42287..df1bf6e48bb9 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -135,7 +135,6 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ static int ftrace_raw_init_event_##call(void) \ { \ INIT_LIST_HEAD(&event_##call.fields); \ - init_preds(&event_##call); \ return 0; \ } \ -- cgit v1.2.3 From dc892f7339af2d125478b800edb9081d6149665b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 3 Sep 2009 15:33:41 -0400 Subject: ring-buffer: remove ring_buffer_event_discard The function ring_buffer_event_discard can be used on any item in the ring buffer, even after the item was committed. This function provides no safety nets and is very race prone. An item may be safely removed from the ring buffer before it is committed with the ring_buffer_discard_commit. Since there are currently no users of this function, and because this function is racey and error prone, this patch removes it altogether. Note, removing this function also allows the counters to ignore all discarded events (patches will follow). Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 14 -------------- kernel/trace/ring_buffer.c | 27 ++++++--------------------- 2 files changed, 6 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 7fca71693ae7..e061b4ecdc3a 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -74,20 +74,6 @@ ring_buffer_event_time_delta(struct ring_buffer_event *event) return event->time_delta; } -/* - * ring_buffer_event_discard can discard any event in the ring buffer. - * it is up to the caller to protect against a reader from - * consuming it or a writer from wrapping and replacing it. - * - * No external protection is needed if this is called before - * the event is commited. But in that case it would be better to - * use ring_buffer_discard_commit. - * - * Note, if an event that has not been committed is discarded - * with ring_buffer_event_discard, it must still be committed. - */ -void ring_buffer_event_discard(struct ring_buffer_event *event); - /* * ring_buffer_discard_commit will remove an event that has not * ben committed yet. If this is used, then ring_buffer_unlock_commit diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 9d939e7ca924..092fe0c8fdae 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2327,32 +2327,17 @@ static inline void rb_event_discard(struct ring_buffer_event *event) event->time_delta = 1; } -/** - * ring_buffer_event_discard - discard any event in the ring buffer - * @event: the event to discard - * - * Sometimes a event that is in the ring buffer needs to be ignored. - * This function lets the user discard an event in the ring buffer - * and then that event will not be read later. - * - * Note, it is up to the user to be careful with this, and protect - * against races. If the user discards an event that has been consumed - * it is possible that it could corrupt the ring buffer. - */ -void ring_buffer_event_discard(struct ring_buffer_event *event) -{ - rb_event_discard(event); -} -EXPORT_SYMBOL_GPL(ring_buffer_event_discard); - /** * ring_buffer_commit_discard - discard an event that has not been committed * @buffer: the ring buffer * @event: non committed event to discard * - * This is similar to ring_buffer_event_discard but must only be - * performed on an event that has not been committed yet. The difference - * is that this will also try to free the event from the ring buffer + * Sometimes an event that is in the ring buffer needs to be ignored. + * This function lets the user discard an event in the ring buffer + * and then that event will not be read later. + * + * This function only works if it is called before the the item has been + * committed. It will try to free the event from the ring buffer * if another event has not been added behind it. * * If another event has been added behind it, it will set the event -- cgit v1.2.3 From e77405ad80f53966524b5c31244e13fbbbecbd84 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 2 Sep 2009 14:17:06 -0400 Subject: tracing: pass around ring buffer instead of tracer The latency tracers (irqsoff and wakeup) can swap trace buffers on the fly. If an event is happening and has reserved data on one of the buffers, and the latency tracer swaps the global buffer with the max buffer, the result is that the event may commit the data to the wrong buffer. This patch changes the API to the trace recording to be recieve the buffer that was used to reserve a commit. Then this buffer can be passed in to the commit. Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 15 +++-- include/trace/ftrace.h | 15 +++-- kernel/trace/blktrace.c | 12 ++-- kernel/trace/trace.c | 117 ++++++++++++++++++++--------------- kernel/trace/trace.h | 17 ++--- kernel/trace/trace_boot.c | 12 ++-- kernel/trace/trace_events.c | 6 +- kernel/trace/trace_functions_graph.c | 14 +++-- kernel/trace/trace_mmiotrace.c | 10 +-- kernel/trace/trace_power.c | 18 ++++-- kernel/trace/trace_sched_switch.c | 18 +++--- kernel/trace/trace_syscalls.c | 18 +++--- 12 files changed, 163 insertions(+), 109 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 755480484eb6..23f7179bf74e 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -93,13 +93,17 @@ void tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, int pc); struct ring_buffer_event * -trace_current_buffer_lock_reserve(int type, unsigned long len, +trace_current_buffer_lock_reserve(struct ring_buffer **current_buffer, + int type, unsigned long len, unsigned long flags, int pc); -void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, +void trace_current_buffer_unlock_commit(struct ring_buffer *buffer, + struct ring_buffer_event *event, unsigned long flags, int pc); -void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event, +void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer, + struct ring_buffer_event *event, unsigned long flags, int pc); -void trace_current_buffer_discard_commit(struct ring_buffer_event *event); +void trace_current_buffer_discard_commit(struct ring_buffer *buffer, + struct ring_buffer_event *event); void tracing_record_cmdline(struct task_struct *tsk); @@ -135,7 +139,8 @@ struct ftrace_event_call { extern void destroy_preds(struct ftrace_event_call *call); extern int filter_match_preds(struct ftrace_event_call *call, void *rec); -extern int filter_current_check_discard(struct ftrace_event_call *call, +extern int filter_current_check_discard(struct ring_buffer *buffer, + struct ftrace_event_call *call, void *rec, struct ring_buffer_event *event); diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index bfbc842600a1..308bafd93325 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -460,13 +460,15 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ * { * struct ring_buffer_event *event; * struct ftrace_raw_ *entry; <-- defined in stage 1 + * struct ring_buffer *buffer; * unsigned long irq_flags; * int pc; * * local_save_flags(irq_flags); * pc = preempt_count(); * - * event = trace_current_buffer_lock_reserve(event_.id, + * event = trace_current_buffer_lock_reserve(&buffer, + * event_.id, * sizeof(struct ftrace_raw_), * irq_flags, pc); * if (!event) @@ -476,7 +478,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ * ; <-- Here we assign the entries by the __field and * __array macros. * - * trace_current_buffer_unlock_commit(event, irq_flags, pc); + * trace_current_buffer_unlock_commit(buffer, event, irq_flags, pc); * } * * static int ftrace_raw_reg_event_(void) @@ -568,6 +570,7 @@ static void ftrace_raw_event_##call(proto) \ struct ftrace_event_call *event_call = &event_##call; \ struct ring_buffer_event *event; \ struct ftrace_raw_##call *entry; \ + struct ring_buffer *buffer; \ unsigned long irq_flags; \ int __data_size; \ int pc; \ @@ -577,7 +580,8 @@ static void ftrace_raw_event_##call(proto) \ \ __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ \ - event = trace_current_buffer_lock_reserve(event_##call.id, \ + event = trace_current_buffer_lock_reserve(&buffer, \ + event_##call.id, \ sizeof(*entry) + __data_size, \ irq_flags, pc); \ if (!event) \ @@ -589,8 +593,9 @@ static void ftrace_raw_event_##call(proto) \ \ { assign; } \ \ - if (!filter_current_check_discard(event_call, entry, event)) \ - trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \ + if (!filter_current_check_discard(buffer, event_call, entry, event)) \ + trace_nowake_buffer_unlock_commit(buffer, \ + event, irq_flags, pc); \ } \ \ static int ftrace_raw_reg_event_##call(void *ptr) \ diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 1090b0aed9ba..243bafc2ec90 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -65,13 +65,15 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action, { struct blk_io_trace *t; struct ring_buffer_event *event = NULL; + struct ring_buffer *buffer = NULL; int pc = 0; int cpu = smp_processor_id(); bool blk_tracer = blk_tracer_enabled; if (blk_tracer) { + buffer = blk_tr->buffer; pc = preempt_count(); - event = trace_buffer_lock_reserve(blk_tr, TRACE_BLK, + event = trace_buffer_lock_reserve(buffer, TRACE_BLK, sizeof(*t) + len, 0, pc); if (!event) @@ -96,7 +98,7 @@ record_it: memcpy((void *) t + sizeof(*t), data, len); if (blk_tracer) - trace_buffer_unlock_commit(blk_tr, event, 0, pc); + trace_buffer_unlock_commit(buffer, event, 0, pc); } } @@ -179,6 +181,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, { struct task_struct *tsk = current; struct ring_buffer_event *event = NULL; + struct ring_buffer *buffer = NULL; struct blk_io_trace *t; unsigned long flags = 0; unsigned long *sequence; @@ -204,8 +207,9 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, if (blk_tracer) { tracing_record_cmdline(current); + buffer = blk_tr->buffer; pc = preempt_count(); - event = trace_buffer_lock_reserve(blk_tr, TRACE_BLK, + event = trace_buffer_lock_reserve(buffer, TRACE_BLK, sizeof(*t) + pdu_len, 0, pc); if (!event) @@ -252,7 +256,7 @@ record_it: memcpy((void *) t + sizeof(*t), pdu_data, pdu_len); if (blk_tracer) { - trace_buffer_unlock_commit(blk_tr, event, 0, pc); + trace_buffer_unlock_commit(buffer, event, 0, pc); return; } } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0418e2650d41..0c61836e30e7 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -169,10 +169,11 @@ static struct trace_array global_trace; static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu); -int filter_current_check_discard(struct ftrace_event_call *call, void *rec, +int filter_current_check_discard(struct ring_buffer *buffer, + struct ftrace_event_call *call, void *rec, struct ring_buffer_event *event) { - return filter_check_discard(call, rec, global_trace.buffer, event); + return filter_check_discard(call, rec, buffer, event); } EXPORT_SYMBOL_GPL(filter_current_check_discard); @@ -887,14 +888,15 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, } EXPORT_SYMBOL_GPL(tracing_generic_entry_update); -struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, - int type, - unsigned long len, - unsigned long flags, int pc) +struct ring_buffer_event * +trace_buffer_lock_reserve(struct ring_buffer *buffer, + int type, + unsigned long len, + unsigned long flags, int pc) { struct ring_buffer_event *event; - event = ring_buffer_lock_reserve(tr->buffer, len); + event = ring_buffer_lock_reserve(buffer, len); if (event != NULL) { struct trace_entry *ent = ring_buffer_event_data(event); @@ -905,53 +907,59 @@ struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, return event; } -static inline void __trace_buffer_unlock_commit(struct trace_array *tr, - struct ring_buffer_event *event, - unsigned long flags, int pc, - int wake) +static inline void +__trace_buffer_unlock_commit(struct ring_buffer *buffer, + struct ring_buffer_event *event, + unsigned long flags, int pc, + int wake) { - ring_buffer_unlock_commit(tr->buffer, event); + ring_buffer_unlock_commit(buffer, event); - ftrace_trace_stack(tr, flags, 6, pc); - ftrace_trace_userstack(tr, flags, pc); + ftrace_trace_stack(buffer, flags, 6, pc); + ftrace_trace_userstack(buffer, flags, pc); if (wake) trace_wake_up(); } -void trace_buffer_unlock_commit(struct trace_array *tr, - struct ring_buffer_event *event, - unsigned long flags, int pc) +void trace_buffer_unlock_commit(struct ring_buffer *buffer, + struct ring_buffer_event *event, + unsigned long flags, int pc) { - __trace_buffer_unlock_commit(tr, event, flags, pc, 1); + __trace_buffer_unlock_commit(buffer, event, flags, pc, 1); } struct ring_buffer_event * -trace_current_buffer_lock_reserve(int type, unsigned long len, +trace_current_buffer_lock_reserve(struct ring_buffer **current_rb, + int type, unsigned long len, unsigned long flags, int pc) { - return trace_buffer_lock_reserve(&global_trace, + *current_rb = global_trace.buffer; + return trace_buffer_lock_reserve(*current_rb, type, len, flags, pc); } EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve); -void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, +void trace_current_buffer_unlock_commit(struct ring_buffer *buffer, + struct ring_buffer_event *event, unsigned long flags, int pc) { - __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1); + __trace_buffer_unlock_commit(buffer, event, flags, pc, 1); } EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit); -void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event, - unsigned long flags, int pc) +void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer, + struct ring_buffer_event *event, + unsigned long flags, int pc) { - __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0); + __trace_buffer_unlock_commit(buffer, event, flags, pc, 0); } EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit); -void trace_current_buffer_discard_commit(struct ring_buffer_event *event) +void trace_current_buffer_discard_commit(struct ring_buffer *buffer, + struct ring_buffer_event *event) { - ring_buffer_discard_commit(global_trace.buffer, event); + ring_buffer_discard_commit(buffer, event); } EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit); @@ -961,6 +969,7 @@ trace_function(struct trace_array *tr, int pc) { struct ftrace_event_call *call = &event_function; + struct ring_buffer *buffer = tr->buffer; struct ring_buffer_event *event; struct ftrace_entry *entry; @@ -968,7 +977,7 @@ trace_function(struct trace_array *tr, if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) return; - event = trace_buffer_lock_reserve(tr, TRACE_FN, sizeof(*entry), + event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry), flags, pc); if (!event) return; @@ -976,8 +985,8 @@ trace_function(struct trace_array *tr, entry->ip = ip; entry->parent_ip = parent_ip; - if (!filter_check_discard(call, entry, tr->buffer, event)) - ring_buffer_unlock_commit(tr->buffer, event); + if (!filter_check_discard(call, entry, buffer, event)) + ring_buffer_unlock_commit(buffer, event); } void @@ -990,7 +999,7 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data, } #ifdef CONFIG_STACKTRACE -static void __ftrace_trace_stack(struct trace_array *tr, +static void __ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags, int skip, int pc) { @@ -999,7 +1008,7 @@ static void __ftrace_trace_stack(struct trace_array *tr, struct stack_entry *entry; struct stack_trace trace; - event = trace_buffer_lock_reserve(tr, TRACE_STACK, + event = trace_buffer_lock_reserve(buffer, TRACE_STACK, sizeof(*entry), flags, pc); if (!event) return; @@ -1012,26 +1021,27 @@ static void __ftrace_trace_stack(struct trace_array *tr, trace.entries = entry->caller; save_stack_trace(&trace); - if (!filter_check_discard(call, entry, tr->buffer, event)) - ring_buffer_unlock_commit(tr->buffer, event); + if (!filter_check_discard(call, entry, buffer, event)) + ring_buffer_unlock_commit(buffer, event); } -void ftrace_trace_stack(struct trace_array *tr, unsigned long flags, int skip, - int pc) +void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags, + int skip, int pc) { if (!(trace_flags & TRACE_ITER_STACKTRACE)) return; - __ftrace_trace_stack(tr, flags, skip, pc); + __ftrace_trace_stack(buffer, flags, skip, pc); } void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, int pc) { - __ftrace_trace_stack(tr, flags, skip, pc); + __ftrace_trace_stack(tr->buffer, flags, skip, pc); } -void ftrace_trace_userstack(struct trace_array *tr, unsigned long flags, int pc) +void +ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) { struct ftrace_event_call *call = &event_user_stack; struct ring_buffer_event *event; @@ -1041,7 +1051,7 @@ void ftrace_trace_userstack(struct trace_array *tr, unsigned long flags, int pc) if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) return; - event = trace_buffer_lock_reserve(tr, TRACE_USER_STACK, + event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, sizeof(*entry), flags, pc); if (!event) return; @@ -1055,8 +1065,8 @@ void ftrace_trace_userstack(struct trace_array *tr, unsigned long flags, int pc) trace.entries = entry->caller; save_stack_trace_user(&trace); - if (!filter_check_discard(call, entry, tr->buffer, event)) - ring_buffer_unlock_commit(tr->buffer, event); + if (!filter_check_discard(call, entry, buffer, event)) + ring_buffer_unlock_commit(buffer, event); } #ifdef UNUSED @@ -1075,9 +1085,10 @@ ftrace_trace_special(void *__tr, { struct ring_buffer_event *event; struct trace_array *tr = __tr; + struct ring_buffer *buffer = tr->buffer; struct special_entry *entry; - event = trace_buffer_lock_reserve(tr, TRACE_SPECIAL, + event = trace_buffer_lock_reserve(buffer, TRACE_SPECIAL, sizeof(*entry), 0, pc); if (!event) return; @@ -1085,7 +1096,7 @@ ftrace_trace_special(void *__tr, entry->arg1 = arg1; entry->arg2 = arg2; entry->arg3 = arg3; - trace_buffer_unlock_commit(tr, event, 0, pc); + trace_buffer_unlock_commit(buffer, event, 0, pc); } void @@ -1131,6 +1142,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) struct ftrace_event_call *call = &event_bprint; struct ring_buffer_event *event; + struct ring_buffer *buffer; struct trace_array *tr = &global_trace; struct trace_array_cpu *data; struct bprint_entry *entry; @@ -1163,7 +1175,9 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) goto out_unlock; size = sizeof(*entry) + sizeof(u32) * len; - event = trace_buffer_lock_reserve(tr, TRACE_BPRINT, size, flags, pc); + buffer = tr->buffer; + event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, + flags, pc); if (!event) goto out_unlock; entry = ring_buffer_event_data(event); @@ -1171,8 +1185,8 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) entry->fmt = fmt; memcpy(entry->buf, trace_buf, sizeof(u32) * len); - if (!filter_check_discard(call, entry, tr->buffer, event)) - ring_buffer_unlock_commit(tr->buffer, event); + if (!filter_check_discard(call, entry, buffer, event)) + ring_buffer_unlock_commit(buffer, event); out_unlock: __raw_spin_unlock(&trace_buf_lock); @@ -1194,6 +1208,7 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) struct ftrace_event_call *call = &event_print; struct ring_buffer_event *event; + struct ring_buffer *buffer; struct trace_array *tr = &global_trace; struct trace_array_cpu *data; int cpu, len = 0, size, pc; @@ -1222,7 +1237,9 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) trace_buf[len] = 0; size = sizeof(*entry) + len + 1; - event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, irq_flags, pc); + buffer = tr->buffer; + event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, + irq_flags, pc); if (!event) goto out_unlock; entry = ring_buffer_event_data(event); @@ -1230,8 +1247,8 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) memcpy(&entry->buf, trace_buf, len); entry->buf[len] = 0; - if (!filter_check_discard(call, entry, tr->buffer, event)) - ring_buffer_unlock_commit(tr->buffer, event); + if (!filter_check_discard(call, entry, buffer, event)) + ring_buffer_unlock_commit(buffer, event); out_unlock: __raw_spin_unlock(&trace_buf_lock); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index ca070de36227..4d30414fe19a 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -415,12 +415,13 @@ void init_tracer_sysprof_debugfs(struct dentry *d_tracer); struct ring_buffer_event; -struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, - int type, - unsigned long len, - unsigned long flags, - int pc); -void trace_buffer_unlock_commit(struct trace_array *tr, +struct ring_buffer_event * +trace_buffer_lock_reserve(struct ring_buffer *buffer, + int type, + unsigned long len, + unsigned long flags, + int pc); +void trace_buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event, unsigned long flags, int pc); @@ -481,10 +482,10 @@ void update_max_tr_single(struct trace_array *tr, #endif /* CONFIG_TRACER_MAX_TRACE */ #ifdef CONFIG_STACKTRACE -void ftrace_trace_stack(struct trace_array *tr, unsigned long flags, +void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags, int skip, int pc); -void ftrace_trace_userstack(struct trace_array *tr, unsigned long flags, +void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc); void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index 863139327816..19bfc75d467e 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -130,6 +130,7 @@ struct tracer boot_tracer __read_mostly = void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) { struct ring_buffer_event *event; + struct ring_buffer *buffer; struct trace_boot_call *entry; struct trace_array *tr = boot_trace; @@ -142,13 +143,14 @@ void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) sprint_symbol(bt->func, (unsigned long)fn); preempt_disable(); - event = trace_buffer_lock_reserve(tr, TRACE_BOOT_CALL, + buffer = tr->buffer; + event = trace_buffer_lock_reserve(buffer, TRACE_BOOT_CALL, sizeof(*entry), 0, 0); if (!event) goto out; entry = ring_buffer_event_data(event); entry->boot_call = *bt; - trace_buffer_unlock_commit(tr, event, 0, 0); + trace_buffer_unlock_commit(buffer, event, 0, 0); out: preempt_enable(); } @@ -156,6 +158,7 @@ void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) { struct ring_buffer_event *event; + struct ring_buffer *buffer; struct trace_boot_ret *entry; struct trace_array *tr = boot_trace; @@ -165,13 +168,14 @@ void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) sprint_symbol(bt->func, (unsigned long)fn); preempt_disable(); - event = trace_buffer_lock_reserve(tr, TRACE_BOOT_RET, + buffer = tr->buffer; + event = trace_buffer_lock_reserve(buffer, TRACE_BOOT_RET, sizeof(*entry), 0, 0); if (!event) goto out; entry = ring_buffer_event_data(event); entry->boot_ret = *bt; - trace_buffer_unlock_commit(tr, event, 0, 0); + trace_buffer_unlock_commit(buffer, event, 0, 0); out: preempt_enable(); } diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index d33bcdeffe69..78b1ed230177 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1438,6 +1438,7 @@ static void function_test_events_call(unsigned long ip, unsigned long parent_ip) { struct ring_buffer_event *event; + struct ring_buffer *buffer; struct ftrace_entry *entry; unsigned long flags; long disabled; @@ -1455,7 +1456,8 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip) local_save_flags(flags); - event = trace_current_buffer_lock_reserve(TRACE_FN, sizeof(*entry), + event = trace_current_buffer_lock_reserve(&buffer, + TRACE_FN, sizeof(*entry), flags, pc); if (!event) goto out; @@ -1463,7 +1465,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip) entry->ip = ip; entry->parent_ip = parent_ip; - trace_nowake_buffer_unlock_commit(event, flags, pc); + trace_nowake_buffer_unlock_commit(buffer, event, flags, pc); out: atomic_dec(&per_cpu(test_event_disable, cpu)); diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 3f4a251b7d16..b3749a2c3132 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -173,19 +173,20 @@ static int __trace_graph_entry(struct trace_array *tr, { struct ftrace_event_call *call = &event_funcgraph_entry; struct ring_buffer_event *event; + struct ring_buffer *buffer = tr->buffer; struct ftrace_graph_ent_entry *entry; if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) return 0; - event = trace_buffer_lock_reserve(tr, TRACE_GRAPH_ENT, + event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT, sizeof(*entry), flags, pc); if (!event) return 0; entry = ring_buffer_event_data(event); entry->graph_ent = *trace; - if (!filter_current_check_discard(call, entry, event)) - ring_buffer_unlock_commit(tr->buffer, event); + if (!filter_current_check_discard(buffer, call, entry, event)) + ring_buffer_unlock_commit(buffer, event); return 1; } @@ -236,19 +237,20 @@ static void __trace_graph_return(struct trace_array *tr, { struct ftrace_event_call *call = &event_funcgraph_exit; struct ring_buffer_event *event; + struct ring_buffer *buffer = tr->buffer; struct ftrace_graph_ret_entry *entry; if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) return; - event = trace_buffer_lock_reserve(tr, TRACE_GRAPH_RET, + event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET, sizeof(*entry), flags, pc); if (!event) return; entry = ring_buffer_event_data(event); entry->ret = *trace; - if (!filter_current_check_discard(call, entry, event)) - ring_buffer_unlock_commit(tr->buffer, event); + if (!filter_current_check_discard(buffer, call, entry, event)) + ring_buffer_unlock_commit(buffer, event); } void trace_graph_return(struct ftrace_graph_ret *trace) diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index d53b45ed0806..c4c9bbda53d3 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -307,11 +307,12 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, struct trace_array_cpu *data, struct mmiotrace_rw *rw) { + struct ring_buffer *buffer = tr->buffer; struct ring_buffer_event *event; struct trace_mmiotrace_rw *entry; int pc = preempt_count(); - event = trace_buffer_lock_reserve(tr, TRACE_MMIO_RW, + event = trace_buffer_lock_reserve(buffer, TRACE_MMIO_RW, sizeof(*entry), 0, pc); if (!event) { atomic_inc(&dropped_count); @@ -319,7 +320,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, } entry = ring_buffer_event_data(event); entry->rw = *rw; - trace_buffer_unlock_commit(tr, event, 0, pc); + trace_buffer_unlock_commit(buffer, event, 0, pc); } void mmio_trace_rw(struct mmiotrace_rw *rw) @@ -333,11 +334,12 @@ static void __trace_mmiotrace_map(struct trace_array *tr, struct trace_array_cpu *data, struct mmiotrace_map *map) { + struct ring_buffer *buffer = tr->buffer; struct ring_buffer_event *event; struct trace_mmiotrace_map *entry; int pc = preempt_count(); - event = trace_buffer_lock_reserve(tr, TRACE_MMIO_MAP, + event = trace_buffer_lock_reserve(buffer, TRACE_MMIO_MAP, sizeof(*entry), 0, pc); if (!event) { atomic_inc(&dropped_count); @@ -345,7 +347,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr, } entry = ring_buffer_event_data(event); entry->map = *map; - trace_buffer_unlock_commit(tr, event, 0, pc); + trace_buffer_unlock_commit(buffer, event, 0, pc); } void mmio_trace_mapping(struct mmiotrace_map *map) diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c index a5d5a4f7745b..fe1a00f1445a 100644 --- a/kernel/trace/trace_power.c +++ b/kernel/trace/trace_power.c @@ -38,6 +38,7 @@ static void probe_power_end(struct power_trace *it) { struct ftrace_event_call *call = &event_power; struct ring_buffer_event *event; + struct ring_buffer *buffer; struct trace_power *entry; struct trace_array_cpu *data; struct trace_array *tr = power_trace; @@ -45,18 +46,20 @@ static void probe_power_end(struct power_trace *it) if (!trace_power_enabled) return; + buffer = tr->buffer; + preempt_disable(); it->end = ktime_get(); data = tr->data[smp_processor_id()]; - event = trace_buffer_lock_reserve(tr, TRACE_POWER, + event = trace_buffer_lock_reserve(buffer, TRACE_POWER, sizeof(*entry), 0, 0); if (!event) goto out; entry = ring_buffer_event_data(event); entry->state_data = *it; - if (!filter_check_discard(call, entry, tr->buffer, event)) - trace_buffer_unlock_commit(tr, event, 0, 0); + if (!filter_check_discard(call, entry, buffer, event)) + trace_buffer_unlock_commit(buffer, event, 0, 0); out: preempt_enable(); } @@ -66,6 +69,7 @@ static void probe_power_mark(struct power_trace *it, unsigned int type, { struct ftrace_event_call *call = &event_power; struct ring_buffer_event *event; + struct ring_buffer *buffer; struct trace_power *entry; struct trace_array_cpu *data; struct trace_array *tr = power_trace; @@ -73,6 +77,8 @@ static void probe_power_mark(struct power_trace *it, unsigned int type, if (!trace_power_enabled) return; + buffer = tr->buffer; + memset(it, 0, sizeof(struct power_trace)); it->state = level; it->type = type; @@ -81,14 +87,14 @@ static void probe_power_mark(struct power_trace *it, unsigned int type, it->end = it->stamp; data = tr->data[smp_processor_id()]; - event = trace_buffer_lock_reserve(tr, TRACE_POWER, + event = trace_buffer_lock_reserve(buffer, TRACE_POWER, sizeof(*entry), 0, 0); if (!event) goto out; entry = ring_buffer_event_data(event); entry->state_data = *it; - if (!filter_check_discard(call, entry, tr->buffer, event)) - trace_buffer_unlock_commit(tr, event, 0, 0); + if (!filter_check_discard(call, entry, buffer, event)) + trace_buffer_unlock_commit(buffer, event, 0, 0); out: preempt_enable(); } diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index e1285d7b5488..5fca0f51fde4 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -28,10 +28,11 @@ tracing_sched_switch_trace(struct trace_array *tr, unsigned long flags, int pc) { struct ftrace_event_call *call = &event_context_switch; + struct ring_buffer *buffer = tr->buffer; struct ring_buffer_event *event; struct ctx_switch_entry *entry; - event = trace_buffer_lock_reserve(tr, TRACE_CTX, + event = trace_buffer_lock_reserve(buffer, TRACE_CTX, sizeof(*entry), flags, pc); if (!event) return; @@ -44,8 +45,8 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->next_state = next->state; entry->next_cpu = task_cpu(next); - if (!filter_check_discard(call, entry, tr->buffer, event)) - trace_buffer_unlock_commit(tr, event, flags, pc); + if (!filter_check_discard(call, entry, buffer, event)) + trace_buffer_unlock_commit(buffer, event, flags, pc); } static void @@ -86,8 +87,9 @@ tracing_sched_wakeup_trace(struct trace_array *tr, struct ftrace_event_call *call = &event_wakeup; struct ring_buffer_event *event; struct ctx_switch_entry *entry; + struct ring_buffer *buffer = tr->buffer; - event = trace_buffer_lock_reserve(tr, TRACE_WAKE, + event = trace_buffer_lock_reserve(buffer, TRACE_WAKE, sizeof(*entry), flags, pc); if (!event) return; @@ -100,10 +102,10 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->next_state = wakee->state; entry->next_cpu = task_cpu(wakee); - if (!filter_check_discard(call, entry, tr->buffer, event)) - ring_buffer_unlock_commit(tr->buffer, event); - ftrace_trace_stack(tr, flags, 6, pc); - ftrace_trace_userstack(tr, flags, pc); + if (!filter_check_discard(call, entry, buffer, event)) + ring_buffer_unlock_commit(buffer, event); + ftrace_trace_stack(tr->buffer, flags, 6, pc); + ftrace_trace_userstack(tr->buffer, flags, pc); } static void diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 4f5fae6fad90..8712ce3c6a0e 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -223,6 +223,7 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id) struct syscall_trace_enter *entry; struct syscall_metadata *sys_data; struct ring_buffer_event *event; + struct ring_buffer *buffer; int size; int syscall_nr; @@ -238,8 +239,8 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id) size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; - event = trace_current_buffer_lock_reserve(sys_data->enter_id, size, - 0, 0); + event = trace_current_buffer_lock_reserve(&buffer, sys_data->enter_id, + size, 0, 0); if (!event) return; @@ -247,8 +248,9 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id) entry->nr = syscall_nr; syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args); - if (!filter_current_check_discard(sys_data->enter_event, entry, event)) - trace_current_buffer_unlock_commit(event, 0, 0); + if (!filter_current_check_discard(buffer, sys_data->enter_event, + entry, event)) + trace_current_buffer_unlock_commit(buffer, event, 0, 0); } void ftrace_syscall_exit(struct pt_regs *regs, long ret) @@ -256,6 +258,7 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret) struct syscall_trace_exit *entry; struct syscall_metadata *sys_data; struct ring_buffer_event *event; + struct ring_buffer *buffer; int syscall_nr; syscall_nr = syscall_get_nr(current, regs); @@ -268,7 +271,7 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret) if (!sys_data) return; - event = trace_current_buffer_lock_reserve(sys_data->exit_id, + event = trace_current_buffer_lock_reserve(&buffer, sys_data->exit_id, sizeof(*entry), 0, 0); if (!event) return; @@ -277,8 +280,9 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret) entry->nr = syscall_nr; entry->ret = syscall_get_return_value(current, regs); - if (!filter_current_check_discard(sys_data->exit_event, entry, event)) - trace_current_buffer_unlock_commit(event, 0, 0); + if (!filter_current_check_discard(buffer, sys_data->exit_event, + entry, event)) + trace_current_buffer_unlock_commit(buffer, event, 0, 0); } int reg_event_syscall_enter(void *ptr) -- cgit v1.2.3 From 85bac32c4a52c592b857f2c360cc5ec93a097d70 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 4 Sep 2009 14:24:40 -0400 Subject: ring-buffer: only enable ring_buffer_swap_cpu when needed Since the ability to swap the cpu buffers adds a small overhead to the recording of a trace, we only want to add it when needed. Only the irqsoff and preemptoff tracers use this feature, and both are not recommended for production kernels. This patch disables its use when neither irqsoff nor preemptoff is configured. Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 9 +++++++++ kernel/trace/Kconfig | 8 ++++++++ kernel/trace/ring_buffer.c | 4 ++++ 3 files changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index e061b4ecdc3a..5fcc31ed5771 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -140,8 +140,17 @@ unsigned long ring_buffer_size(struct ring_buffer *buffer); void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu); void ring_buffer_reset(struct ring_buffer *buffer); +#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, struct ring_buffer *buffer_b, int cpu); +#else +static inline int +ring_buffer_swap_cpu(struct ring_buffer *buffer_a, + struct ring_buffer *buffer_b, int cpu) +{ + return -ENODEV; +} +#endif int ring_buffer_empty(struct ring_buffer *buffer); int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu); diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 163fbfc2f39f..1ea0d1234f4a 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -62,6 +62,12 @@ config EVENT_TRACING config CONTEXT_SWITCH_TRACER bool +config RING_BUFFER_ALLOW_SWAP + bool + help + Allow the use of ring_buffer_swap_cpu. + Adds a very slight overhead to tracing when enabled. + # All tracer options should select GENERIC_TRACER. For those options that are # enabled by all tracers (context switch and event tracer) they select TRACING. # This allows those options to appear when no other tracer is selected. But the @@ -146,6 +152,7 @@ config IRQSOFF_TRACER select TRACE_IRQFLAGS select GENERIC_TRACER select TRACER_MAX_TRACE + select RING_BUFFER_ALLOW_SWAP help This option measures the time spent in irqs-off critical sections, with microsecond accuracy. @@ -167,6 +174,7 @@ config PREEMPT_TRACER depends on PREEMPT select GENERIC_TRACER select TRACER_MAX_TRACE + select RING_BUFFER_ALLOW_SWAP help This option measures the time spent in preemption off critical sections, with microsecond accuracy. diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 1766c0e8db5a..454e74e718cf 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2084,6 +2084,7 @@ rb_reserve_next_event(struct ring_buffer *buffer, rb_start_commit(cpu_buffer); +#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP /* * Due to the ability to swap a cpu buffer from a buffer * it is possible it was swapped before we committed. @@ -2096,6 +2097,7 @@ rb_reserve_next_event(struct ring_buffer *buffer, local_dec(&cpu_buffer->commits); return NULL; } +#endif length = rb_calculate_event_length(length); again: @@ -3498,6 +3500,7 @@ int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) } EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu); +#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP /** * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers * @buffer_a: One buffer to swap with @@ -3573,6 +3576,7 @@ out: return ret; } EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); +#endif /* CONFIG_RING_BUFFER_ALLOW_SWAP */ /** * ring_buffer_alloc_read_page - allocate a page to read from buffer -- cgit v1.2.3