summaryrefslogtreecommitdiff
path: root/kernel/trace
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-12-15 13:49:34 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2016-12-15 13:49:34 -0800
commit179a7ba6806805bd4cd7a5e4574b83353c5615ad (patch)
tree58855a59ba3bd66f947c3f9781cd44a7329c7d75 /kernel/trace
parent5e176d6973bdac04d9f298ca384c39f08eb084cb (diff)
parent3dbb16b87b57bb1088044ad2a0432e4769075002 (diff)
downloadlwn-179a7ba6806805bd4cd7a5e4574b83353c5615ad.tar.gz
lwn-179a7ba6806805bd4cd7a5e4574b83353c5615ad.zip
Merge tag 'trace-v4.10' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace
Pull tracing updates from Steven Rostedt: "This release has a few updates: - STM can hook into the function tracer - Function filtering now supports more advance glob matching - Ftrace selftests updates and added tests - Softirq tag in traces now show only softirqs - ARM nop added to non traced locations at compile time - New trace_marker_raw file that allows for binary input - Optimizations to the ring buffer - Removal of kmap in trace_marker - Wakeup and irqsoff tracers now adhere to the set_graph_notrace file - Other various fixes and clean ups" * tag 'trace-v4.10' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace: (42 commits) selftests: ftrace: Shift down default message verbosity kprobes/trace: Fix kprobe selftest for newer gcc tracing/kprobes: Add a helper method to return number of probe hits tracing/rb: Init the CPU mask on allocation tracing: Use SOFTIRQ_OFFSET for softirq dectection for more accurate results tracing/fgraph: Have wakeup and irqsoff tracers ignore graph functions too fgraph: Handle a case where a tracer ignores set_graph_notrace tracing: Replace kmap with copy_from_user() in trace_marker writing ftrace/x86_32: Set ftrace_stub to weak to prevent gcc from using short jumps to it tracing: Allow benchmark to be enabled at early_initcall() tracing: Have system enable return error if one of the events fail tracing: Do not start benchmark on boot up tracing: Have the reg function allow to fail ring-buffer: Force rb_end_commit() and rb_set_commit_to_write() inline ring-buffer: Froce rb_update_write_stamp() to be inlined ring-buffer: Force inline of hotpath helper functions tracing: Make __buffer_unlock_commit() always_inline tracing: Make tracepoint_printk a static_key ring-buffer: Always inline rb_event_data() ring-buffer: Make rb_reserve_next_event() always inlined ...
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig2
-rw-r--r--kernel/trace/ftrace.c4
-rw-r--r--kernel/trace/ring_buffer.c28
-rw-r--r--kernel/trace/trace.c485
-rw-r--r--kernel/trace/trace.h19
-rw-r--r--kernel/trace/trace_benchmark.c26
-rw-r--r--kernel/trace/trace_benchmark.h2
-rw-r--r--kernel/trace/trace_branch.c2
-rw-r--r--kernel/trace/trace_entries.h15
-rw-r--r--kernel/trace/trace_events.c83
-rw-r--r--kernel/trace/trace_events_filter.c119
-rw-r--r--kernel/trace/trace_functions_graph.c35
-rw-r--r--kernel/trace/trace_hwlat.c2
-rw-r--r--kernel/trace/trace_irqsoff.c12
-rw-r--r--kernel/trace/trace_kprobe.c47
-rw-r--r--kernel/trace/trace_output.c30
-rw-r--r--kernel/trace/trace_sched_wakeup.c13
17 files changed, 675 insertions, 249 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 2a96b063d659..d5038005eb5d 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -70,6 +70,7 @@ config FTRACE_NMI_ENTER
config EVENT_TRACING
select CONTEXT_SWITCH_TRACER
+ select GLOB
bool
config CONTEXT_SWITCH_TRACER
@@ -133,6 +134,7 @@ config FUNCTION_TRACER
select KALLSYMS
select GENERIC_TRACER
select CONTEXT_SWITCH_TRACER
+ select GLOB
help
Enable the kernel to trace every kernel function. This is done
by using a compiler feature to insert a small, 5-byte No-Operation
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 33dd57f53f88..1f0f547c54da 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3511,6 +3511,10 @@ static int ftrace_match(char *str, struct ftrace_glob *g)
memcmp(str + slen - g->len, g->search, g->len) == 0)
matched = 1;
break;
+ case MATCH_GLOB:
+ if (glob_match(g->search, str))
+ matched = 1;
+ break;
}
return matched;
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 89a2611a1635..a85739efcc30 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -245,7 +245,7 @@ unsigned ring_buffer_event_length(struct ring_buffer_event *event)
EXPORT_SYMBOL_GPL(ring_buffer_event_length);
/* inline for ring buffer fast paths */
-static void *
+static __always_inline void *
rb_event_data(struct ring_buffer_event *event)
{
if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
@@ -1798,48 +1798,48 @@ void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val)
}
EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite);
-static inline void *
+static __always_inline void *
__rb_data_page_index(struct buffer_data_page *bpage, unsigned index)
{
return bpage->data + index;
}
-static inline void *__rb_page_index(struct buffer_page *bpage, unsigned index)
+static __always_inline void *__rb_page_index(struct buffer_page *bpage, unsigned index)
{
return bpage->page->data + index;
}
-static inline struct ring_buffer_event *
+static __always_inline struct ring_buffer_event *
rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
{
return __rb_page_index(cpu_buffer->reader_page,
cpu_buffer->reader_page->read);
}
-static inline struct ring_buffer_event *
+static __always_inline struct ring_buffer_event *
rb_iter_head_event(struct ring_buffer_iter *iter)
{
return __rb_page_index(iter->head_page, iter->head);
}
-static inline unsigned rb_page_commit(struct buffer_page *bpage)
+static __always_inline unsigned rb_page_commit(struct buffer_page *bpage)
{
return local_read(&bpage->page->commit);
}
/* Size is determined by what has been committed */
-static inline unsigned rb_page_size(struct buffer_page *bpage)
+static __always_inline unsigned rb_page_size(struct buffer_page *bpage)
{
return rb_page_commit(bpage);
}
-static inline unsigned
+static __always_inline unsigned
rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer)
{
return rb_page_commit(cpu_buffer->commit_page);
}
-static inline unsigned
+static __always_inline unsigned
rb_event_index(struct ring_buffer_event *event)
{
unsigned long addr = (unsigned long)event;
@@ -2355,7 +2355,7 @@ static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
local_inc(&cpu_buffer->commits);
}
-static void
+static __always_inline void
rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
{
unsigned long max_count;
@@ -2410,7 +2410,7 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
goto again;
}
-static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
+static __always_inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
{
unsigned long commits;
@@ -2455,7 +2455,7 @@ static inline void rb_event_discard(struct ring_buffer_event *event)
event->time_delta = 1;
}
-static inline bool
+static __always_inline bool
rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event)
{
@@ -2469,7 +2469,7 @@ rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
rb_commit_index(cpu_buffer) == index;
}
-static void
+static __always_inline void
rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event)
{
@@ -2702,7 +2702,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
return event;
}
-static struct ring_buffer_event *
+static __always_inline struct ring_buffer_event *
rb_reserve_next_event(struct ring_buffer *buffer,
struct ring_buffer_per_cpu *cpu_buffer,
unsigned long length)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 54d5270a5042..66f829c47bec 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -40,6 +40,7 @@
#include <linux/poll.h>
#include <linux/nmi.h>
#include <linux/fs.h>
+#include <linux/trace.h>
#include <linux/sched/rt.h>
#include "trace.h"
@@ -68,6 +69,7 @@ bool __read_mostly tracing_selftest_disabled;
/* Pipe tracepoints to printk */
struct trace_iterator *tracepoint_print_iter;
int tracepoint_printk;
+static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
/* For tracers that don't implement custom flags */
static struct tracer_opt dummy_tracer_opt[] = {
@@ -738,6 +740,31 @@ static inline void ftrace_trace_stack(struct trace_array *tr,
#endif
+static __always_inline void
+trace_event_setup(struct ring_buffer_event *event,
+ int type, unsigned long flags, int pc)
+{
+ struct trace_entry *ent = ring_buffer_event_data(event);
+
+ tracing_generic_entry_update(ent, flags, pc);
+ ent->type = type;
+}
+
+static __always_inline struct ring_buffer_event *
+__trace_buffer_lock_reserve(struct ring_buffer *buffer,
+ int type,
+ unsigned long len,
+ unsigned long flags, int pc)
+{
+ struct ring_buffer_event *event;
+
+ event = ring_buffer_lock_reserve(buffer, len);
+ if (event != NULL)
+ trace_event_setup(event, type, flags, pc);
+
+ return event;
+}
+
static void tracer_tracing_on(struct trace_array *tr)
{
if (tr->trace_buffer.buffer)
@@ -767,6 +794,22 @@ void tracing_on(void)
}
EXPORT_SYMBOL_GPL(tracing_on);
+
+static __always_inline void
+__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
+{
+ __this_cpu_write(trace_cmdline_save, true);
+
+ /* If this is the temp buffer, we need to commit fully */
+ if (this_cpu_read(trace_buffered_event) == event) {
+ /* Length is in event->array[0] */
+ ring_buffer_write(buffer, event->array[0], &event->array[1]);
+ /* Release the temp buffer */
+ this_cpu_dec(trace_buffered_event_cnt);
+ } else
+ ring_buffer_unlock_commit(buffer, event);
+}
+
/**
* __trace_puts - write a constant string into the trace buffer.
* @ip: The address of the caller
@@ -794,8 +837,8 @@ int __trace_puts(unsigned long ip, const char *str, int size)
local_save_flags(irq_flags);
buffer = global_trace.trace_buffer.buffer;
- event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
- irq_flags, pc);
+ event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
+ irq_flags, pc);
if (!event)
return 0;
@@ -842,8 +885,8 @@ int __trace_bputs(unsigned long ip, const char *str)
local_save_flags(irq_flags);
buffer = global_trace.trace_buffer.buffer;
- event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
- irq_flags, pc);
+ event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
+ irq_flags, pc);
if (!event)
return 0;
@@ -1907,35 +1950,19 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
#endif
((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) |
((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
- ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
+ ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
}
EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
-static __always_inline void
-trace_event_setup(struct ring_buffer_event *event,
- int type, unsigned long flags, int pc)
-{
- struct trace_entry *ent = ring_buffer_event_data(event);
-
- tracing_generic_entry_update(ent, flags, pc);
- ent->type = type;
-}
-
struct ring_buffer_event *
trace_buffer_lock_reserve(struct ring_buffer *buffer,
int type,
unsigned long len,
unsigned long flags, int pc)
{
- struct ring_buffer_event *event;
-
- event = ring_buffer_lock_reserve(buffer, len);
- if (event != NULL)
- trace_event_setup(event, type, flags, pc);
-
- return event;
+ return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
}
DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
@@ -2049,21 +2076,6 @@ void trace_buffered_event_disable(void)
preempt_enable();
}
-void
-__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
-{
- __this_cpu_write(trace_cmdline_save, true);
-
- /* If this is the temp buffer, we need to commit fully */
- if (this_cpu_read(trace_buffered_event) == event) {
- /* Length is in event->array[0] */
- ring_buffer_write(buffer, event->array[0], &event->array[1]);
- /* Release the temp buffer */
- this_cpu_dec(trace_buffered_event_cnt);
- } else
- ring_buffer_unlock_commit(buffer, event);
-}
-
static struct ring_buffer *temp_buffer;
struct ring_buffer_event *
@@ -2090,8 +2102,8 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
this_cpu_dec(trace_buffered_event_cnt);
}
- entry = trace_buffer_lock_reserve(*current_rb,
- type, len, flags, pc);
+ entry = __trace_buffer_lock_reserve(*current_rb,
+ type, len, flags, pc);
/*
* If tracing is off, but we have triggers enabled
* we still need to look at the event data. Use the temp_buffer
@@ -2100,13 +2112,88 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
*/
if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
*current_rb = temp_buffer;
- entry = trace_buffer_lock_reserve(*current_rb,
- type, len, flags, pc);
+ entry = __trace_buffer_lock_reserve(*current_rb,
+ type, len, flags, pc);
}
return entry;
}
EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
+static DEFINE_SPINLOCK(tracepoint_iter_lock);
+static DEFINE_MUTEX(tracepoint_printk_mutex);
+
+static void output_printk(struct trace_event_buffer *fbuffer)
+{
+ struct trace_event_call *event_call;
+ struct trace_event *event;
+ unsigned long flags;
+ struct trace_iterator *iter = tracepoint_print_iter;
+
+ /* We should never get here if iter is NULL */
+ if (WARN_ON_ONCE(!iter))
+ return;
+
+ event_call = fbuffer->trace_file->event_call;
+ if (!event_call || !event_call->event.funcs ||
+ !event_call->event.funcs->trace)
+ return;
+
+ event = &fbuffer->trace_file->event_call->event;
+
+ spin_lock_irqsave(&tracepoint_iter_lock, flags);
+ trace_seq_init(&iter->seq);
+ iter->ent = fbuffer->entry;
+ event_call->event.funcs->trace(iter, 0, event);
+ trace_seq_putc(&iter->seq, 0);
+ printk("%s", iter->seq.buffer);
+
+ spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
+}
+
+int tracepoint_printk_sysctl(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ int save_tracepoint_printk;
+ int ret;
+
+ mutex_lock(&tracepoint_printk_mutex);
+ save_tracepoint_printk = tracepoint_printk;
+
+ ret = proc_dointvec(table, write, buffer, lenp, ppos);
+
+ /*
+ * This will force exiting early, as tracepoint_printk
+ * is always zero when tracepoint_printk_iter is not allocated
+ */
+ if (!tracepoint_print_iter)
+ tracepoint_printk = 0;
+
+ if (save_tracepoint_printk == tracepoint_printk)
+ goto out;
+
+ if (tracepoint_printk)
+ static_key_enable(&tracepoint_printk_key.key);
+ else
+ static_key_disable(&tracepoint_printk_key.key);
+
+ out:
+ mutex_unlock(&tracepoint_printk_mutex);
+
+ return ret;
+}
+
+void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
+{
+ if (static_key_false(&tracepoint_printk_key.key))
+ output_printk(fbuffer);
+
+ event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
+ fbuffer->event, fbuffer->entry,
+ fbuffer->flags, fbuffer->pc);
+}
+EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
+
void trace_buffer_unlock_commit_regs(struct trace_array *tr,
struct ring_buffer *buffer,
struct ring_buffer_event *event,
@@ -2129,6 +2216,139 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr,
ftrace_trace_userstack(buffer, flags, pc);
}
+/*
+ * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
+ */
+void
+trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
+ struct ring_buffer_event *event)
+{
+ __buffer_unlock_commit(buffer, event);
+}
+
+static void
+trace_process_export(struct trace_export *export,
+ struct ring_buffer_event *event)
+{
+ struct trace_entry *entry;
+ unsigned int size = 0;
+
+ entry = ring_buffer_event_data(event);
+ size = ring_buffer_event_length(event);
+ export->write(entry, size);
+}
+
+static DEFINE_MUTEX(ftrace_export_lock);
+
+static struct trace_export __rcu *ftrace_exports_list __read_mostly;
+
+static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
+
+static inline void ftrace_exports_enable(void)
+{
+ static_branch_enable(&ftrace_exports_enabled);
+}
+
+static inline void ftrace_exports_disable(void)
+{
+ static_branch_disable(&ftrace_exports_enabled);
+}
+
+void ftrace_exports(struct ring_buffer_event *event)
+{
+ struct trace_export *export;
+
+ preempt_disable_notrace();
+
+ export = rcu_dereference_raw_notrace(ftrace_exports_list);
+ while (export) {
+ trace_process_export(export, event);
+ export = rcu_dereference_raw_notrace(export->next);
+ }
+
+ preempt_enable_notrace();
+}
+
+static inline void
+add_trace_export(struct trace_export **list, struct trace_export *export)
+{
+ rcu_assign_pointer(export->next, *list);
+ /*
+ * We are entering export into the list but another
+ * CPU might be walking that list. We need to make sure
+ * the export->next pointer is valid before another CPU sees
+ * the export pointer included into the list.
+ */
+ rcu_assign_pointer(*list, export);
+}
+
+static inline int
+rm_trace_export(struct trace_export **list, struct trace_export *export)
+{
+ struct trace_export **p;
+
+ for (p = list; *p != NULL; p = &(*p)->next)
+ if (*p == export)
+ break;
+
+ if (*p != export)
+ return -1;
+
+ rcu_assign_pointer(*p, (*p)->next);
+
+ return 0;
+}
+
+static inline void
+add_ftrace_export(struct trace_export **list, struct trace_export *export)
+{
+ if (*list == NULL)
+ ftrace_exports_enable();
+
+ add_trace_export(list, export);
+}
+
+static inline int
+rm_ftrace_export(struct trace_export **list, struct trace_export *export)
+{
+ int ret;
+
+ ret = rm_trace_export(list, export);
+ if (*list == NULL)
+ ftrace_exports_disable();
+
+ return ret;
+}
+
+int register_ftrace_export(struct trace_export *export)
+{
+ if (WARN_ON_ONCE(!export->write))
+ return -1;
+
+ mutex_lock(&ftrace_export_lock);
+
+ add_ftrace_export(&ftrace_exports_list, export);
+
+ mutex_unlock(&ftrace_export_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(register_ftrace_export);
+
+int unregister_ftrace_export(struct trace_export *export)
+{
+ int ret;
+
+ mutex_lock(&ftrace_export_lock);
+
+ ret = rm_ftrace_export(&ftrace_exports_list, export);
+
+ mutex_unlock(&ftrace_export_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(unregister_ftrace_export);
+
void
trace_function(struct trace_array *tr,
unsigned long ip, unsigned long parent_ip, unsigned long flags,
@@ -2139,16 +2359,19 @@ trace_function(struct trace_array *tr,
struct ring_buffer_event *event;
struct ftrace_entry *entry;
- event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
- flags, pc);
+ event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
+ flags, pc);
if (!event)
return;
entry = ring_buffer_event_data(event);
entry->ip = ip;
entry->parent_ip = parent_ip;
- if (!call_filter_check_discard(call, entry, buffer, event))
+ if (!call_filter_check_discard(call, entry, buffer, event)) {
+ if (static_branch_unlikely(&ftrace_exports_enabled))
+ ftrace_exports(event);
__buffer_unlock_commit(buffer, event);
+ }
}
#ifdef CONFIG_STACKTRACE
@@ -2216,8 +2439,8 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
size *= sizeof(unsigned long);
- event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
- sizeof(*entry) + size, flags, pc);
+ event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
+ sizeof(*entry) + size, flags, pc);
if (!event)
goto out;
entry = ring_buffer_event_data(event);
@@ -2318,8 +2541,8 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
__this_cpu_inc(user_stack_count);
- event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
- sizeof(*entry), flags, pc);
+ event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
+ sizeof(*entry), flags, pc);
if (!event)
goto out_drop_count;
entry = ring_buffer_event_data(event);
@@ -2489,8 +2712,8 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
local_save_flags(flags);
size = sizeof(*entry) + sizeof(u32) * len;
buffer = tr->trace_buffer.buffer;
- event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
- flags, pc);
+ event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
+ flags, pc);
if (!event)
goto out;
entry = ring_buffer_event_data(event);
@@ -2545,8 +2768,8 @@ __trace_array_vprintk(struct ring_buffer *buffer,
local_save_flags(flags);
size = sizeof(*entry) + len + 1;
- event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
- flags, pc);
+ event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
+ flags, pc);
if (!event)
goto out;
entry = ring_buffer_event_data(event);
@@ -4055,6 +4278,7 @@ static const char readme_msg[] =
" x86-tsc: TSC cycle counter\n"
#endif
"\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
+ "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
" tracing_cpumask\t- Limit which CPUs to trace\n"
" instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
"\t\t\t Remove sub-buffer with rmdir\n"
@@ -4066,7 +4290,7 @@ static const char readme_msg[] =
"\n available_filter_functions - list of functions that can be filtered on\n"
" set_ftrace_filter\t- echo function name in here to only trace these\n"
"\t\t\t functions\n"
- "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
+ "\t accepts: func_full_name or glob-matching-pattern\n"
"\t modules: Can select a group via module\n"
"\t Format: :mod:<module-name>\n"
"\t example: echo :mod:ext3 > set_ftrace_filter\n"
@@ -5519,21 +5743,18 @@ static ssize_t
tracing_mark_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *fpos)
{
- unsigned long addr = (unsigned long)ubuf;
struct trace_array *tr = filp->private_data;
struct ring_buffer_event *event;
struct ring_buffer *buffer;
struct print_entry *entry;
unsigned long irq_flags;
- struct page *pages[2];
- void *map_page[2];
- int nr_pages = 1;
+ const char faulted[] = "<faulted>";
ssize_t written;
- int offset;
int size;
int len;
- int ret;
- int i;
+
+/* Used in tracing_mark_raw_write() as well */
+#define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
if (tracing_disabled)
return -EINVAL;
@@ -5544,60 +5765,33 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
if (cnt > TRACE_BUF_SIZE)
cnt = TRACE_BUF_SIZE;
- /*
- * Userspace is injecting traces into the kernel trace buffer.
- * We want to be as non intrusive as possible.
- * To do so, we do not want to allocate any special buffers
- * or take any locks, but instead write the userspace data
- * straight into the ring buffer.
- *
- * First we need to pin the userspace buffer into memory,
- * which, most likely it is, because it just referenced it.
- * But there's no guarantee that it is. By using get_user_pages_fast()
- * and kmap_atomic/kunmap_atomic() we can get access to the
- * pages directly. We then write the data directly into the
- * ring buffer.
- */
BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
- /* check if we cross pages */
- if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
- nr_pages = 2;
-
- offset = addr & (PAGE_SIZE - 1);
- addr &= PAGE_MASK;
-
- ret = get_user_pages_fast(addr, nr_pages, 0, pages);
- if (ret < nr_pages) {
- while (--ret >= 0)
- put_page(pages[ret]);
- written = -EFAULT;
- goto out;
- }
+ local_save_flags(irq_flags);
+ size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
- for (i = 0; i < nr_pages; i++)
- map_page[i] = kmap_atomic(pages[i]);
+ /* If less than "<faulted>", then make sure we can still add that */
+ if (cnt < FAULTED_SIZE)
+ size += FAULTED_SIZE - cnt;
- local_save_flags(irq_flags);
- size = sizeof(*entry) + cnt + 2; /* possible \n added */
buffer = tr->trace_buffer.buffer;
- event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
- irq_flags, preempt_count());
- if (!event) {
+ event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
+ irq_flags, preempt_count());
+ if (unlikely(!event))
/* Ring buffer disabled, return as if not open for write */
- written = -EBADF;
- goto out_unlock;
- }
+ return -EBADF;
entry = ring_buffer_event_data(event);
entry->ip = _THIS_IP_;
- if (nr_pages == 2) {
- len = PAGE_SIZE - offset;
- memcpy(&entry->buf, map_page[0] + offset, len);
- memcpy(&entry->buf[len], map_page[1], cnt - len);
+ len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
+ if (len) {
+ memcpy(&entry->buf, faulted, FAULTED_SIZE);
+ cnt = FAULTED_SIZE;
+ written = -EFAULT;
} else
- memcpy(&entry->buf, map_page[0] + offset, cnt);
+ written = cnt;
+ len = cnt;
if (entry->buf[cnt - 1] != '\n') {
entry->buf[cnt] = '\n';
@@ -5607,16 +5801,73 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
__buffer_unlock_commit(buffer, event);
- written = cnt;
+ if (written > 0)
+ *fpos += written;
- *fpos += written;
+ return written;
+}
+
+/* Limit it for now to 3K (including tag) */
+#define RAW_DATA_MAX_SIZE (1024*3)
+
+static ssize_t
+tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *fpos)
+{
+ struct trace_array *tr = filp->private_data;
+ struct ring_buffer_event *event;
+ struct ring_buffer *buffer;
+ struct raw_data_entry *entry;
+ const char faulted[] = "<faulted>";
+ unsigned long irq_flags;
+ ssize_t written;
+ int size;
+ int len;
+
+#define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
+
+ if (tracing_disabled)
+ return -EINVAL;
+
+ if (!(tr->trace_flags & TRACE_ITER_MARKERS))
+ return -EINVAL;
+
+ /* The marker must at least have a tag id */
+ if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
+ return -EINVAL;
+
+ if (cnt > TRACE_BUF_SIZE)
+ cnt = TRACE_BUF_SIZE;
+
+ BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
+
+ local_save_flags(irq_flags);
+ size = sizeof(*entry) + cnt;
+ if (cnt < FAULT_SIZE_ID)
+ size += FAULT_SIZE_ID - cnt;
+
+ buffer = tr->trace_buffer.buffer;
+ event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
+ irq_flags, preempt_count());
+ if (!event)
+ /* Ring buffer disabled, return as if not open for write */
+ return -EBADF;
+
+ entry = ring_buffer_event_data(event);
+
+ len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
+ if (len) {
+ entry->id = -1;
+ memcpy(&entry->buf, faulted, FAULTED_SIZE);
+ written = -EFAULT;
+ } else
+ written = cnt;
+
+ __buffer_unlock_commit(buffer, event);
+
+ if (written > 0)
+ *fpos += written;
- out_unlock:
- for (i = nr_pages - 1; i >= 0; i--) {
- kunmap_atomic(map_page[i]);
- put_page(pages[i]);
- }
- out:
return written;
}
@@ -5946,6 +6197,13 @@ static const struct file_operations tracing_mark_fops = {
.release = tracing_release_generic_tr,
};
+static const struct file_operations tracing_mark_raw_fops = {
+ .open = tracing_open_generic_tr,
+ .write = tracing_mark_raw_write,
+ .llseek = generic_file_llseek,
+ .release = tracing_release_generic_tr,
+};
+
static const struct file_operations trace_clock_fops = {
.open = tracing_clock_open,
.read = seq_read,
@@ -7215,6 +7473,9 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
trace_create_file("trace_marker", 0220, d_tracer,
tr, &tracing_mark_fops);
+ trace_create_file("trace_marker_raw", 0220, d_tracer,
+ tr, &tracing_mark_raw_fops);
+
trace_create_file("trace_clock", 0644, d_tracer, tr,
&trace_clock_fops);
@@ -7752,6 +8013,8 @@ void __init trace_init(void)
kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
if (WARN_ON(!tracepoint_print_iter))
tracepoint_printk = 0;
+ else
+ static_key_enable(&tracepoint_printk_key.key);
}
tracer_alloc_buffers();
trace_event_init();
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index fd24b1f9ac43..c2234494f40c 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -15,6 +15,7 @@
#include <linux/trace_events.h>
#include <linux/compiler.h>
#include <linux/trace_seq.h>
+#include <linux/glob.h>
#ifdef CONFIG_FTRACE_SYSCALLS
#include <asm/unistd.h> /* For NR_SYSCALLS */
@@ -39,6 +40,7 @@ enum trace_type {
TRACE_BLK,
TRACE_BPUTS,
TRACE_HWLAT,
+ TRACE_RAW_DATA,
__TRACE_LAST_TYPE,
};
@@ -330,6 +332,7 @@ extern void __ftrace_bad_type(void);
IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT); \
IF_ASSIGN(var, ent, struct bputs_entry, TRACE_BPUTS); \
IF_ASSIGN(var, ent, struct hwlat_entry, TRACE_HWLAT); \
+ IF_ASSIGN(var, ent, struct raw_data_entry, TRACE_RAW_DATA);\
IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \
TRACE_MMIO_RW); \
IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \
@@ -599,8 +602,8 @@ struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
int *ent_cpu, u64 *ent_ts);
-void __buffer_unlock_commit(struct ring_buffer *buffer,
- struct ring_buffer_event *event);
+void trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
+ struct ring_buffer_event *event);
int trace_empty(struct trace_iterator *iter);
@@ -843,6 +846,17 @@ static inline int ftrace_graph_notrace_addr(unsigned long addr)
return 0;
}
#endif /* CONFIG_DYNAMIC_FTRACE */
+
+extern unsigned int fgraph_max_depth;
+
+static inline bool ftrace_graph_ignore_func(struct ftrace_graph_ent *trace)
+{
+ /* trace it when it is-nested-in or is a function enabled. */
+ return !(trace->depth || ftrace_graph_addr(trace->func)) ||
+ (trace->depth < 0) ||
+ (fgraph_max_depth && trace->depth >= fgraph_max_depth);
+}
+
#else /* CONFIG_FUNCTION_GRAPH_TRACER */
static inline enum print_line_t
print_graph_function_flags(struct trace_iterator *iter, u32 flags)
@@ -1257,6 +1271,7 @@ enum regex_type {
MATCH_FRONT_ONLY,
MATCH_MIDDLE_ONLY,
MATCH_END_ONLY,
+ MATCH_GLOB,
};
struct regex {
diff --git a/kernel/trace/trace_benchmark.c b/kernel/trace/trace_benchmark.c
index 0f109c4130d3..e3b488825ae3 100644
--- a/kernel/trace/trace_benchmark.c
+++ b/kernel/trace/trace_benchmark.c
@@ -21,6 +21,8 @@ static u64 bm_stddev;
static unsigned int bm_avg;
static unsigned int bm_std;
+static bool ok_to_run;
+
/*
* This gets called in a loop recording the time it took to write
* the tracepoint. What it writes is the time statistics of the last
@@ -164,11 +166,21 @@ static int benchmark_event_kthread(void *arg)
* When the benchmark tracepoint is enabled, it calls this
* function and the thread that calls the tracepoint is created.
*/
-void trace_benchmark_reg(void)
+int trace_benchmark_reg(void)
{
+ if (!ok_to_run) {
+ pr_warning("trace benchmark cannot be started via kernel command line\n");
+ return -EBUSY;
+ }
+
bm_event_thread = kthread_run(benchmark_event_kthread,
NULL, "event_benchmark");
- WARN_ON(!bm_event_thread);
+ if (!bm_event_thread) {
+ pr_warning("trace benchmark failed to create kernel thread\n");
+ return -ENOMEM;
+ }
+
+ return 0;
}
/*
@@ -182,6 +194,7 @@ void trace_benchmark_unreg(void)
return;
kthread_stop(bm_event_thread);
+ bm_event_thread = NULL;
strcpy(bm_str, "START");
bm_total = 0;
@@ -196,3 +209,12 @@ void trace_benchmark_unreg(void)
bm_avg = 0;
bm_stddev = 0;
}
+
+static __init int ok_to_run_trace_benchmark(void)
+{
+ ok_to_run = true;
+
+ return 0;
+}
+
+early_initcall(ok_to_run_trace_benchmark);
diff --git a/kernel/trace/trace_benchmark.h b/kernel/trace/trace_benchmark.h
index 3c1df1df4e29..ebdbfc2f2a64 100644
--- a/kernel/trace/trace_benchmark.h
+++ b/kernel/trace/trace_benchmark.h
@@ -6,7 +6,7 @@
#include <linux/tracepoint.h>
-extern void trace_benchmark_reg(void);
+extern int trace_benchmark_reg(void);
extern void trace_benchmark_unreg(void);
#define BENCHMARK_EVENT_STRLEN 128
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 3a2a73716a5b..75489de546b6 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -81,7 +81,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
entry->correct = val == expect;
if (!call_filter_check_discard(call, entry, buffer, event))
- __buffer_unlock_commit(buffer, event);
+ trace_buffer_unlock_commit_nostack(buffer, event);
out:
current->trace_recursion &= ~TRACE_BRANCH_BIT;
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index d1cc37e78f99..eb7396b7e7c3 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -244,6 +244,21 @@ FTRACE_ENTRY(print, print_entry,
FILTER_OTHER
);
+FTRACE_ENTRY(raw_data, raw_data_entry,
+
+ TRACE_RAW_DATA,
+
+ F_STRUCT(
+ __field( unsigned int, id )
+ __dynamic_array( char, buf )
+ ),
+
+ F_printk("id:%04x %08x",
+ __entry->id, (int)__entry->buf[0]),
+
+ FILTER_OTHER
+);
+
FTRACE_ENTRY(bputs, bputs_entry,
TRACE_BPUTS,
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 03c0a48c3ac4..93116549a284 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -283,46 +283,6 @@ void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer,
}
EXPORT_SYMBOL_GPL(trace_event_buffer_reserve);
-static DEFINE_SPINLOCK(tracepoint_iter_lock);
-
-static void output_printk(struct trace_event_buffer *fbuffer)
-{
- struct trace_event_call *event_call;
- struct trace_event *event;
- unsigned long flags;
- struct trace_iterator *iter = tracepoint_print_iter;
-
- if (!iter)
- return;
-
- event_call = fbuffer->trace_file->event_call;
- if (!event_call || !event_call->event.funcs ||
- !event_call->event.funcs->trace)
- return;
-
- event = &fbuffer->trace_file->event_call->event;
-
- spin_lock_irqsave(&tracepoint_iter_lock, flags);
- trace_seq_init(&iter->seq);
- iter->ent = fbuffer->entry;
- event_call->event.funcs->trace(iter, 0, event);
- trace_seq_putc(&iter->seq, 0);
- printk("%s", iter->seq.buffer);
-
- spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
-}
-
-void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
-{
- if (tracepoint_printk)
- output_printk(fbuffer);
-
- event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
- fbuffer->event, fbuffer->entry,
- fbuffer->flags, fbuffer->pc);
-}
-EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
-
int trace_event_reg(struct trace_event_call *call,
enum trace_reg type, void *data)
{
@@ -742,6 +702,7 @@ __ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match,
struct trace_event_call *call;
const char *name;
int ret = -EINVAL;
+ int eret = 0;
list_for_each_entry(file, &tr->events, list) {
@@ -765,9 +726,17 @@ __ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match,
if (event && strcmp(event, name) != 0)
continue;
- ftrace_event_enable_disable(file, set);
+ ret = ftrace_event_enable_disable(file, set);
- ret = 0;
+ /*
+ * Save the first error and return that. Some events
+ * may still have been enabled, but let the user
+ * know that something went wrong.
+ */
+ if (ret && !eret)
+ eret = ret;
+
+ ret = eret;
}
return ret;
@@ -2843,20 +2812,32 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
return -ENOMEM;
}
+ entry = trace_create_file("enable", 0644, d_events,
+ tr, &ftrace_tr_enable_fops);
+ if (!entry) {
+ pr_warn("Could not create tracefs 'enable' entry\n");
+ return -ENOMEM;
+ }
+
+ /* There are not as crucial, just warn if they are not created */
+
entry = tracefs_create_file("set_event_pid", 0644, parent,
tr, &ftrace_set_event_pid_fops);
+ if (!entry)
+ pr_warn("Could not create tracefs 'set_event_pid' entry\n");
/* ring buffer internal formats */
- trace_create_file("header_page", 0444, d_events,
- ring_buffer_print_page_header,
- &ftrace_show_header_fops);
-
- trace_create_file("header_event", 0444, d_events,
- ring_buffer_print_entry_header,
- &ftrace_show_header_fops);
+ entry = trace_create_file("header_page", 0444, d_events,
+ ring_buffer_print_page_header,
+ &ftrace_show_header_fops);
+ if (!entry)
+ pr_warn("Could not create tracefs 'header_page' entry\n");
- trace_create_file("enable", 0644, d_events,
- tr, &ftrace_tr_enable_fops);
+ entry = trace_create_file("header_event", 0444, d_events,
+ ring_buffer_print_entry_header,
+ &ftrace_show_header_fops);
+ if (!entry)
+ pr_warn("Could not create tracefs 'header_event' entry\n");
tr->event_dir = d_events;
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 9daa9b3bc6d9..59a411ff60c7 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -108,12 +108,12 @@ static char *err_text[] = {
};
struct opstack_op {
- int op;
+ enum filter_op_ids op;
struct list_head list;
};
struct postfix_elt {
- int op;
+ enum filter_op_ids op;
char *operand;
struct list_head list;
};
@@ -145,34 +145,50 @@ struct pred_stack {
/* If not of not match is equal to not of not, then it is a match */
#define DEFINE_COMPARISON_PRED(type) \
-static int filter_pred_##type(struct filter_pred *pred, void *event) \
+static int filter_pred_LT_##type(struct filter_pred *pred, void *event) \
{ \
type *addr = (type *)(event + pred->offset); \
type val = (type)pred->val; \
- int match = 0; \
- \
- switch (pred->op) { \
- case OP_LT: \
- match = (*addr < val); \
- break; \
- case OP_LE: \
- match = (*addr <= val); \
- break; \
- case OP_GT: \
- match = (*addr > val); \
- break; \
- case OP_GE: \
- match = (*addr >= val); \
- break; \
- case OP_BAND: \
- match = (*addr & val); \
- break; \
- default: \
- break; \
- } \
- \
+ int match = (*addr < val); \
return !!match == !pred->not; \
-}
+} \
+static int filter_pred_LE_##type(struct filter_pred *pred, void *event) \
+{ \
+ type *addr = (type *)(event + pred->offset); \
+ type val = (type)pred->val; \
+ int match = (*addr <= val); \
+ return !!match == !pred->not; \
+} \
+static int filter_pred_GT_##type(struct filter_pred *pred, void *event) \
+{ \
+ type *addr = (type *)(event + pred->offset); \
+ type val = (type)pred->val; \
+ int match = (*addr > val); \
+ return !!match == !pred->not; \
+} \
+static int filter_pred_GE_##type(struct filter_pred *pred, void *event) \
+{ \
+ type *addr = (type *)(event + pred->offset); \
+ type val = (type)pred->val; \
+ int match = (*addr >= val); \
+ return !!match == !pred->not; \
+} \
+static int filter_pred_BAND_##type(struct filter_pred *pred, void *event) \
+{ \
+ type *addr = (type *)(event + pred->offset); \
+ type val = (type)pred->val; \
+ int match = !!(*addr & val); \
+ return match == !pred->not; \
+} \
+static const filter_pred_fn_t pred_funcs_##type[] = { \
+ filter_pred_LT_##type, \
+ filter_pred_LE_##type, \
+ filter_pred_GT_##type, \
+ filter_pred_GE_##type, \
+ filter_pred_BAND_##type, \
+};
+
+#define PRED_FUNC_START OP_LT
#define DEFINE_EQUALITY_PRED(size) \
static int filter_pred_##size(struct filter_pred *pred, void *event) \
@@ -344,6 +360,12 @@ static int regex_match_end(char *str, struct regex *r, int len)
return 0;
}
+static int regex_match_glob(char *str, struct regex *r, int len __maybe_unused)
+{
+ if (glob_match(r->pattern, str))
+ return 1;
+ return 0;
+}
/**
* filter_parse_regex - parse a basic regex
* @buff: the raw regex
@@ -380,14 +402,20 @@ enum regex_type filter_parse_regex(char *buff, int len, char **search, int *not)
if (!i) {
*search = buff + 1;
type = MATCH_END_ONLY;
- } else {
+ } else if (i == len - 1) {
if (type == MATCH_END_ONLY)
type = MATCH_MIDDLE_ONLY;
else
type = MATCH_FRONT_ONLY;
buff[i] = 0;
break;
+ } else { /* pattern continues, use full glob */
+ type = MATCH_GLOB;
+ break;
}
+ } else if (strchr("[?\\", buff[i])) {
+ type = MATCH_GLOB;
+ break;
}
}
@@ -420,6 +448,9 @@ static void filter_build_regex(struct filter_pred *pred)
case MATCH_END_ONLY:
r->match = regex_match_end;
break;
+ case MATCH_GLOB:
+ r->match = regex_match_glob;
+ break;
}
pred->not ^= not;
@@ -946,7 +977,7 @@ int filter_assign_type(const char *type)
return FILTER_OTHER;
}
-static bool is_legal_op(struct ftrace_event_field *field, int op)
+static bool is_legal_op(struct ftrace_event_field *field, enum filter_op_ids op)
{
if (is_string_field(field) &&
(op != OP_EQ && op != OP_NE && op != OP_GLOB))
@@ -957,8 +988,8 @@ static bool is_legal_op(struct ftrace_event_field *field, int op)
return true;
}
-static filter_pred_fn_t select_comparison_fn(int op, int field_size,
- int field_is_signed)
+static filter_pred_fn_t select_comparison_fn(enum filter_op_ids op,
+ int field_size, int field_is_signed)
{
filter_pred_fn_t fn = NULL;
@@ -967,33 +998,33 @@ static filter_pred_fn_t select_comparison_fn(int op, int field_size,
if (op == OP_EQ || op == OP_NE)
fn = filter_pred_64;
else if (field_is_signed)
- fn = filter_pred_s64;
+ fn = pred_funcs_s64[op - PRED_FUNC_START];
else
- fn = filter_pred_u64;
+ fn = pred_funcs_u64[op - PRED_FUNC_START];
break;
case 4:
if (op == OP_EQ || op == OP_NE)
fn = filter_pred_32;
else if (field_is_signed)
- fn = filter_pred_s32;
+ fn = pred_funcs_s32[op - PRED_FUNC_START];
else
- fn = filter_pred_u32;
+ fn = pred_funcs_u32[op - PRED_FUNC_START];
break;
case 2:
if (op == OP_EQ || op == OP_NE)
fn = filter_pred_16;
else if (field_is_signed)
- fn = filter_pred_s16;
+ fn = pred_funcs_s16[op - PRED_FUNC_START];
else
- fn = filter_pred_u16;
+ fn = pred_funcs_u16[op - PRED_FUNC_START];
break;
case 1:
if (op == OP_EQ || op == OP_NE)
fn = filter_pred_8;
else if (field_is_signed)
- fn = filter_pred_s8;
+ fn = pred_funcs_s8[op - PRED_FUNC_START];
else
- fn = filter_pred_u8;
+ fn = pred_funcs_u8[op - PRED_FUNC_START];
break;
}
@@ -1166,7 +1197,8 @@ static inline int append_operand_char(struct filter_parse_state *ps, char c)
return 0;
}
-static int filter_opstack_push(struct filter_parse_state *ps, int op)
+static int filter_opstack_push(struct filter_parse_state *ps,
+ enum filter_op_ids op)
{
struct opstack_op *opstack_op;
@@ -1200,7 +1232,7 @@ static int filter_opstack_top(struct filter_parse_state *ps)
static int filter_opstack_pop(struct filter_parse_state *ps)
{
struct opstack_op *opstack_op;
- int op;
+ enum filter_op_ids op;
if (filter_opstack_empty(ps))
return OP_NONE;
@@ -1245,7 +1277,7 @@ static int postfix_append_operand(struct filter_parse_state *ps, char *operand)
return 0;
}
-static int postfix_append_op(struct filter_parse_state *ps, int op)
+static int postfix_append_op(struct filter_parse_state *ps, enum filter_op_ids op)
{
struct postfix_elt *elt;
@@ -1275,8 +1307,8 @@ static void postfix_clear(struct filter_parse_state *ps)
static int filter_parse(struct filter_parse_state *ps)
{
+ enum filter_op_ids op, top_op;
int in_string = 0;
- int op, top_op;
char ch;
while ((ch = infix_next(ps))) {
@@ -1367,7 +1399,8 @@ parse_operand:
static struct filter_pred *create_pred(struct filter_parse_state *ps,
struct trace_event_call *call,
- int op, char *operand1, char *operand2)
+ enum filter_op_ids op,
+ char *operand1, char *operand2)
{
struct ftrace_event_field *field;
static struct filter_pred pred;
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 4e480e870474..d56123cdcc89 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -65,7 +65,7 @@ struct fgraph_data {
#define TRACE_GRAPH_INDENT 2
-static unsigned int max_depth;
+unsigned int fgraph_max_depth;
static struct tracer_opt trace_opts[] = {
/* Display overruns? (for self-debug purpose) */
@@ -358,7 +358,7 @@ int __trace_graph_entry(struct trace_array *tr,
entry = ring_buffer_event_data(event);
entry->graph_ent = *trace;
if (!call_filter_check_discard(call, entry, buffer, event))
- __buffer_unlock_commit(buffer, event);
+ trace_buffer_unlock_commit_nostack(buffer, event);
return 1;
}
@@ -384,10 +384,10 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
if (!ftrace_trace_task(tr))
return 0;
- /* trace it when it is-nested-in or is a function enabled. */
- if ((!(trace->depth || ftrace_graph_addr(trace->func)) ||
- ftrace_graph_ignore_irqs()) || (trace->depth < 0) ||
- (max_depth && trace->depth >= max_depth))
+ if (ftrace_graph_ignore_func(trace))
+ return 0;
+
+ if (ftrace_graph_ignore_irqs())
return 0;
/*
@@ -469,7 +469,7 @@ void __trace_graph_return(struct trace_array *tr,
entry = ring_buffer_event_data(event);
entry->ret = *trace;
if (!call_filter_check_discard(call, entry, buffer, event))
- __buffer_unlock_commit(buffer, event);
+ trace_buffer_unlock_commit_nostack(buffer, event);
}
void trace_graph_return(struct ftrace_graph_ret *trace)
@@ -842,6 +842,10 @@ print_graph_entry_leaf(struct trace_iterator *iter,
cpu_data = per_cpu_ptr(data->cpu_data, cpu);
+ /* If a graph tracer ignored set_graph_notrace */
+ if (call->depth < -1)
+ call->depth += FTRACE_NOTRACE_DEPTH;
+
/*
* Comments display at + 1 to depth. Since
* this is a leaf function, keep the comments
@@ -850,7 +854,8 @@ print_graph_entry_leaf(struct trace_iterator *iter,
cpu_data->depth = call->depth - 1;
/* No need to keep this function around for this depth */
- if (call->depth < FTRACE_RETFUNC_DEPTH)
+ if (call->depth < FTRACE_RETFUNC_DEPTH &&
+ !WARN_ON_ONCE(call->depth < 0))
cpu_data->enter_funcs[call->depth] = 0;
}
@@ -880,11 +885,16 @@ print_graph_entry_nested(struct trace_iterator *iter,
struct fgraph_cpu_data *cpu_data;
int cpu = iter->cpu;
+ /* If a graph tracer ignored set_graph_notrace */
+ if (call->depth < -1)
+ call->depth += FTRACE_NOTRACE_DEPTH;
+
cpu_data = per_cpu_ptr(data->cpu_data, cpu);
cpu_data->depth = call->depth;
/* Save this function pointer to see if the exit matches */
- if (call->depth < FTRACE_RETFUNC_DEPTH)
+ if (call->depth < FTRACE_RETFUNC_DEPTH &&
+ !WARN_ON_ONCE(call->depth < 0))
cpu_data->enter_funcs[call->depth] = call->func;
}
@@ -1114,7 +1124,8 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
*/
cpu_data->depth = trace->depth - 1;
- if (trace->depth < FTRACE_RETFUNC_DEPTH) {
+ if (trace->depth < FTRACE_RETFUNC_DEPTH &&
+ !WARN_ON_ONCE(trace->depth < 0)) {
if (cpu_data->enter_funcs[trace->depth] != trace->func)
func_match = 0;
cpu_data->enter_funcs[trace->depth] = 0;
@@ -1489,7 +1500,7 @@ graph_depth_write(struct file *filp, const char __user *ubuf, size_t cnt,
if (ret)
return ret;
- max_depth = val;
+ fgraph_max_depth = val;
*ppos += cnt;
@@ -1503,7 +1514,7 @@ graph_depth_read(struct file *filp, char __user *ubuf, size_t cnt,
char buf[15]; /* More than enough to hold UINT_MAX + "\n"*/
int n;
- n = sprintf(buf, "%d\n", max_depth);
+ n = sprintf(buf, "%d\n", fgraph_max_depth);
return simple_read_from_buffer(ubuf, cnt, ppos, buf, n);
}
diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c
index b97286c48735..775569ec50d0 100644
--- a/kernel/trace/trace_hwlat.c
+++ b/kernel/trace/trace_hwlat.c
@@ -127,7 +127,7 @@ static void trace_hwlat_sample(struct hwlat_sample *sample)
entry->nmi_count = sample->nmi_count;
if (!call_filter_check_discard(call, entry, buffer, event))
- __buffer_unlock_commit(buffer, event);
+ trace_buffer_unlock_commit_nostack(buffer, event);
}
/* Macros to encapsulate the time capturing infrastructure */
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 03cdff84d026..86654d7e1afe 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -175,6 +175,18 @@ static int irqsoff_graph_entry(struct ftrace_graph_ent *trace)
int ret;
int pc;
+ if (ftrace_graph_ignore_func(trace))
+ return 0;
+ /*
+ * Do not trace a function if it's filtered by set_graph_notrace.
+ * Make the index of ret stack negative to indicate that it should
+ * ignore further functions. But it needs its own ret stack entry
+ * to recover the original index in order to continue tracing after
+ * returning from the function.
+ */
+ if (ftrace_graph_notrace_addr(trace->func))
+ return 1;
+
if (!func_prolog_dec(tr, &data, &flags))
return 0;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index eb6c9f1d3a93..a133ecd741e4 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -73,6 +73,17 @@ static nokprobe_inline bool trace_kprobe_is_on_module(struct trace_kprobe *tk)
return !!strchr(trace_kprobe_symbol(tk), ':');
}
+static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk)
+{
+ unsigned long nhit = 0;
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ nhit += *per_cpu_ptr(tk->nhit, cpu);
+
+ return nhit;
+}
+
static int register_kprobe_event(struct trace_kprobe *tk);
static int unregister_kprobe_event(struct trace_kprobe *tk);
@@ -882,14 +893,10 @@ static const struct file_operations kprobe_events_ops = {
static int probes_profile_seq_show(struct seq_file *m, void *v)
{
struct trace_kprobe *tk = v;
- unsigned long nhit = 0;
- int cpu;
-
- for_each_possible_cpu(cpu)
- nhit += *per_cpu_ptr(tk->nhit, cpu);
seq_printf(m, " %-44s %15lu %15lu\n",
- trace_event_name(&tk->tp.call), nhit,
+ trace_event_name(&tk->tp.call),
+ trace_kprobe_nhit(tk),
tk->rp.kp.nmissed);
return 0;
@@ -1354,18 +1361,18 @@ fs_initcall(init_kprobe_trace);
#ifdef CONFIG_FTRACE_STARTUP_TEST
-
/*
* The "__used" keeps gcc from removing the function symbol
- * from the kallsyms table.
+ * from the kallsyms table. 'noinline' makes sure that there
+ * isn't an inlined version used by the test method below
*/
-static __used int kprobe_trace_selftest_target(int a1, int a2, int a3,
- int a4, int a5, int a6)
+static __used __init noinline int
+kprobe_trace_selftest_target(int a1, int a2, int a3, int a4, int a5, int a6)
{
return a1 + a2 + a3 + a4 + a5 + a6;
}
-static struct trace_event_file *
+static struct __init trace_event_file *
find_trace_probe_file(struct trace_kprobe *tk, struct trace_array *tr)
{
struct trace_event_file *file;
@@ -1443,12 +1450,25 @@ static __init int kprobe_trace_self_tests_init(void)
ret = target(1, 2, 3, 4, 5, 6);
+ /*
+ * Not expecting an error here, the check is only to prevent the
+ * optimizer from removing the call to target() as otherwise there
+ * are no side-effects and the call is never performed.
+ */
+ if (ret != 21)
+ warn++;
+
/* Disable trace points before removing it */
tk = find_trace_kprobe("testprobe", KPROBE_EVENT_SYSTEM);
if (WARN_ON_ONCE(tk == NULL)) {
pr_warn("error on getting test probe.\n");
warn++;
} else {
+ if (trace_kprobe_nhit(tk) != 1) {
+ pr_warn("incorrect number of testprobe hits\n");
+ warn++;
+ }
+
file = find_trace_probe_file(tk, top_trace_array());
if (WARN_ON_ONCE(file == NULL)) {
pr_warn("error on getting probe file.\n");
@@ -1462,6 +1482,11 @@ static __init int kprobe_trace_self_tests_init(void)
pr_warn("error on getting 2nd test probe.\n");
warn++;
} else {
+ if (trace_kprobe_nhit(tk) != 1) {
+ pr_warn("incorrect number of testprobe2 hits\n");
+ warn++;
+ }
+
file = find_trace_probe_file(tk, top_trace_array());
if (WARN_ON_ONCE(file == NULL)) {
pr_warn("error on getting probe file.\n");
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 3fc20422c166..5d33a7352919 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -1288,6 +1288,35 @@ static struct trace_event trace_print_event = {
.funcs = &trace_print_funcs,
};
+static enum print_line_t trace_raw_data(struct trace_iterator *iter, int flags,
+ struct trace_event *event)
+{
+ struct raw_data_entry *field;
+ int i;
+
+ trace_assign_type(field, iter->ent);
+
+ trace_seq_printf(&iter->seq, "# %x buf:", field->id);
+
+ for (i = 0; i < iter->ent_size - offsetof(struct raw_data_entry, buf); i++)
+ trace_seq_printf(&iter->seq, " %02x",
+ (unsigned char)field->buf[i]);
+
+ trace_seq_putc(&iter->seq, '\n');
+
+ return trace_handle_return(&iter->seq);
+}
+
+static struct trace_event_functions trace_raw_data_funcs = {
+ .trace = trace_raw_data,
+ .raw = trace_raw_data,
+};
+
+static struct trace_event trace_raw_data_event = {
+ .type = TRACE_RAW_DATA,
+ .funcs = &trace_raw_data_funcs,
+};
+
static struct trace_event *events[] __initdata = {
&trace_fn_event,
@@ -1299,6 +1328,7 @@ static struct trace_event *events[] __initdata = {
&trace_bprint_event,
&trace_print_event,
&trace_hwlat_event,
+ &trace_raw_data_event,
NULL
};
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 9d4399b553a3..5d0bb025bb21 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -239,6 +239,18 @@ static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
unsigned long flags;
int pc, ret = 0;
+ if (ftrace_graph_ignore_func(trace))
+ return 0;
+ /*
+ * Do not trace a function if it's filtered by set_graph_notrace.
+ * Make the index of ret stack negative to indicate that it should
+ * ignore further functions. But it needs its own ret stack entry
+ * to recover the original index in order to continue tracing after
+ * returning from the function.
+ */
+ if (ftrace_graph_notrace_addr(trace->func))
+ return 1;
+
if (!func_prolog_preempt_disable(tr, &data, &pc))
return 0;
@@ -790,6 +802,7 @@ static struct tracer wakeup_dl_tracer __read_mostly =
#endif
.open = wakeup_trace_open,
.close = wakeup_trace_close,
+ .allow_instances = true,
.use_max_tr = true,
};