diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-15 13:49:34 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-15 13:49:34 -0800 |
commit | 179a7ba6806805bd4cd7a5e4574b83353c5615ad (patch) | |
tree | 58855a59ba3bd66f947c3f9781cd44a7329c7d75 | |
parent | 5e176d6973bdac04d9f298ca384c39f08eb084cb (diff) | |
parent | 3dbb16b87b57bb1088044ad2a0432e4769075002 (diff) | |
download | lwn-179a7ba6806805bd4cd7a5e4574b83353c5615ad.tar.gz lwn-179a7ba6806805bd4cd7a5e4574b83353c5615ad.zip |
Merge tag 'trace-v4.10' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace
Pull tracing updates from Steven Rostedt:
"This release has a few updates:
- STM can hook into the function tracer
- Function filtering now supports more advance glob matching
- Ftrace selftests updates and added tests
- Softirq tag in traces now show only softirqs
- ARM nop added to non traced locations at compile time
- New trace_marker_raw file that allows for binary input
- Optimizations to the ring buffer
- Removal of kmap in trace_marker
- Wakeup and irqsoff tracers now adhere to the set_graph_notrace file
- Other various fixes and clean ups"
* tag 'trace-v4.10' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace: (42 commits)
selftests: ftrace: Shift down default message verbosity
kprobes/trace: Fix kprobe selftest for newer gcc
tracing/kprobes: Add a helper method to return number of probe hits
tracing/rb: Init the CPU mask on allocation
tracing: Use SOFTIRQ_OFFSET for softirq dectection for more accurate results
tracing/fgraph: Have wakeup and irqsoff tracers ignore graph functions too
fgraph: Handle a case where a tracer ignores set_graph_notrace
tracing: Replace kmap with copy_from_user() in trace_marker writing
ftrace/x86_32: Set ftrace_stub to weak to prevent gcc from using short jumps to it
tracing: Allow benchmark to be enabled at early_initcall()
tracing: Have system enable return error if one of the events fail
tracing: Do not start benchmark on boot up
tracing: Have the reg function allow to fail
ring-buffer: Force rb_end_commit() and rb_set_commit_to_write() inline
ring-buffer: Froce rb_update_write_stamp() to be inlined
ring-buffer: Force inline of hotpath helper functions
tracing: Make __buffer_unlock_commit() always_inline
tracing: Make tracepoint_printk a static_key
ring-buffer: Always inline rb_event_data()
ring-buffer: Make rb_reserve_next_event() always inlined
...
52 files changed, 1064 insertions, 300 deletions
diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt index 08d74d75150d..2cc08d4a326e 100644 --- a/Documentation/trace/events.txt +++ b/Documentation/trace/events.txt @@ -189,16 +189,13 @@ And for string fields they are: ==, !=, ~ -The glob (~) only accepts a wild card character (*) at the start and or -end of the string. For example: +The glob (~) accepts a wild card character (*,?) and character classes +([). For example: prev_comm ~ "*sh" prev_comm ~ "sh*" prev_comm ~ "*sh*" - -But does not allow for it to be within the string: - - prev_comm ~ "ba*sh" <-- is invalid + prev_comm ~ "ba*sh" 5.2 Setting filters ------------------- diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index 5596e2d71d6d..006f47c7d913 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -416,6 +416,12 @@ of ftrace. Here is a list of some of the key files: trace_fd = open("trace_marker", WR_ONLY); + trace_marker_raw: + + This is similar to trace_marker above, but is meant for for binary data + to be written to it, where a tool can be used to parse the data + from trace_pipe_raw. + uprobe_events: Add dynamic tracepoints in programs. @@ -2238,16 +2244,13 @@ hrtimer_interrupt sys_nanosleep -Perhaps this is not enough. The filters also allow simple wild -cards. Only the following are currently available +Perhaps this is not enough. The filters also allow glob(7) matching. <match>* - will match functions that begin with <match> *<match> - will match functions that end with <match> *<match>* - will match functions that have <match> in it - -These are the only wild cards which are supported. - - <match>*<match> will not work. + <match1>*<match2> - will match functions that begin with + <match1> and end with <match2> Note: It is better to use quotes to enclose the wild cards, otherwise the shell may expand the parameters into names diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h index 32e36b16773f..c05cef6ee06c 100644 --- a/arch/powerpc/include/asm/trace.h +++ b/arch/powerpc/include/asm/trace.h @@ -54,7 +54,7 @@ DEFINE_EVENT(ppc64_interrupt_class, timer_interrupt_exit, ); #ifdef CONFIG_PPC_PSERIES -extern void hcall_tracepoint_regfunc(void); +extern int hcall_tracepoint_regfunc(void); extern void hcall_tracepoint_unregfunc(void); TRACE_EVENT_FN_COND(hcall_entry, @@ -104,7 +104,7 @@ TRACE_EVENT_FN_COND(hcall_exit, #endif #ifdef CONFIG_PPC_POWERNV -extern void opal_tracepoint_regfunc(void); +extern int opal_tracepoint_regfunc(void); extern void opal_tracepoint_unregfunc(void); TRACE_EVENT_FN(opal_entry, diff --git a/arch/powerpc/platforms/powernv/opal-tracepoints.c b/arch/powerpc/platforms/powernv/opal-tracepoints.c index 1e496b780efd..3c447002edff 100644 --- a/arch/powerpc/platforms/powernv/opal-tracepoints.c +++ b/arch/powerpc/platforms/powernv/opal-tracepoints.c @@ -6,9 +6,10 @@ #ifdef HAVE_JUMP_LABEL struct static_key opal_tracepoint_key = STATIC_KEY_INIT; -void opal_tracepoint_regfunc(void) +int opal_tracepoint_regfunc(void) { static_key_slow_inc(&opal_tracepoint_key); + return 0; } void opal_tracepoint_unregfunc(void) @@ -25,9 +26,10 @@ void opal_tracepoint_unregfunc(void) /* NB: reg/unreg are called while guarded with the tracepoints_mutex */ extern long opal_tracepoint_refcount; -void opal_tracepoint_regfunc(void) +int opal_tracepoint_regfunc(void) { opal_tracepoint_refcount++; + return 0; } void opal_tracepoint_unregfunc(void) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index f2c98f6c1c9c..a78da511ffeb 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -661,9 +661,10 @@ EXPORT_SYMBOL(arch_free_page); #ifdef HAVE_JUMP_LABEL struct static_key hcall_tracepoint_key = STATIC_KEY_INIT; -void hcall_tracepoint_regfunc(void) +int hcall_tracepoint_regfunc(void) { static_key_slow_inc(&hcall_tracepoint_key); + return 0; } void hcall_tracepoint_unregfunc(void) @@ -680,9 +681,10 @@ void hcall_tracepoint_unregfunc(void) /* NB: reg/unreg are called while guarded with the tracepoints_mutex */ extern long hcall_tracepoint_refcount; -void hcall_tracepoint_regfunc(void) +int hcall_tracepoint_regfunc(void) { hcall_tracepoint_refcount++; + return 0; } void hcall_tracepoint_unregfunc(void) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index acc0c6f36f3f..701d29f8e4d3 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -926,8 +926,8 @@ ftrace_graph_call: jmp ftrace_stub #endif -.globl ftrace_stub -ftrace_stub: +/* This is weak to keep gas from relaxing the jumps */ +WEAK(ftrace_stub) ret END(ftrace_caller) diff --git a/arch/x86/include/asm/trace/exceptions.h b/arch/x86/include/asm/trace/exceptions.h index 2fbc66c7885b..2422b14c50a7 100644 --- a/arch/x86/include/asm/trace/exceptions.h +++ b/arch/x86/include/asm/trace/exceptions.h @@ -6,7 +6,7 @@ #include <linux/tracepoint.h> -extern void trace_irq_vector_regfunc(void); +extern int trace_irq_vector_regfunc(void); extern void trace_irq_vector_unregfunc(void); DECLARE_EVENT_CLASS(x86_exceptions, diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h index 38a09a13a9bc..32dd6a9e343c 100644 --- a/arch/x86/include/asm/trace/irq_vectors.h +++ b/arch/x86/include/asm/trace/irq_vectors.h @@ -6,7 +6,7 @@ #include <linux/tracepoint.h> -extern void trace_irq_vector_regfunc(void); +extern int trace_irq_vector_regfunc(void); extern void trace_irq_vector_unregfunc(void); DECLARE_EVENT_CLASS(x86_irq_vector, diff --git a/arch/x86/kernel/tracepoint.c b/arch/x86/kernel/tracepoint.c index 1c113db9ed57..15515132bf0d 100644 --- a/arch/x86/kernel/tracepoint.c +++ b/arch/x86/kernel/tracepoint.c @@ -34,7 +34,7 @@ static void switch_idt(void *arg) local_irq_restore(flags); } -void trace_irq_vector_regfunc(void) +int trace_irq_vector_regfunc(void) { mutex_lock(&irq_vector_mutex); if (!trace_irq_vector_refcount) { @@ -44,6 +44,7 @@ void trace_irq_vector_regfunc(void) } trace_irq_vector_refcount++; mutex_unlock(&irq_vector_mutex); + return 0; } void trace_irq_vector_unregfunc(void) diff --git a/drivers/hwtracing/coresight/coresight-stm.c b/drivers/hwtracing/coresight/coresight-stm.c index 944c17b48d23..e4c55c5f9988 100644 --- a/drivers/hwtracing/coresight/coresight-stm.c +++ b/drivers/hwtracing/coresight/coresight-stm.c @@ -406,7 +406,7 @@ static long stm_generic_set_options(struct stm_data *stm_data, return 0; } -static ssize_t stm_generic_packet(struct stm_data *stm_data, +static ssize_t notrace stm_generic_packet(struct stm_data *stm_data, unsigned int master, unsigned int channel, unsigned int packet, diff --git a/drivers/hwtracing/intel_th/sth.c b/drivers/hwtracing/intel_th/sth.c index e1aee61dd7b3..b03444624648 100644 --- a/drivers/hwtracing/intel_th/sth.c +++ b/drivers/hwtracing/intel_th/sth.c @@ -67,10 +67,13 @@ static void sth_iowrite(void __iomem *dest, const unsigned char *payload, } } -static ssize_t sth_stm_packet(struct stm_data *stm_data, unsigned int master, - unsigned int channel, unsigned int packet, - unsigned int flags, unsigned int size, - const unsigned char *payload) +static ssize_t notrace sth_stm_packet(struct stm_data *stm_data, + unsigned int master, + unsigned int channel, + unsigned int packet, + unsigned int flags, + unsigned int size, + const unsigned char *payload) { struct sth_device *sth = container_of(stm_data, struct sth_device, stm); struct intel_th_channel __iomem *out = diff --git a/drivers/hwtracing/stm/Kconfig b/drivers/hwtracing/stm/Kconfig index 847a39b35307..723e2d90083d 100644 --- a/drivers/hwtracing/stm/Kconfig +++ b/drivers/hwtracing/stm/Kconfig @@ -39,4 +39,15 @@ config STM_SOURCE_HEARTBEAT If you want to send heartbeat messages over STM devices, say Y. +config STM_SOURCE_FTRACE + tristate "Copy the output from kernel Ftrace to STM engine" + depends on FUNCTION_TRACER + help + This option can be used to copy the output from kernel Ftrace + to STM engine. Enabling this option will introduce a slight + timing effect. + + If you want to send kernel Ftrace messages over STM devices, + say Y. + endif diff --git a/drivers/hwtracing/stm/Makefile b/drivers/hwtracing/stm/Makefile index a9ce3d487e57..3abd84ce13d4 100644 --- a/drivers/hwtracing/stm/Makefile +++ b/drivers/hwtracing/stm/Makefile @@ -6,6 +6,8 @@ obj-$(CONFIG_STM_DUMMY) += dummy_stm.o obj-$(CONFIG_STM_SOURCE_CONSOLE) += stm_console.o obj-$(CONFIG_STM_SOURCE_HEARTBEAT) += stm_heartbeat.o +obj-$(CONFIG_STM_SOURCE_FTRACE) += stm_ftrace.o stm_console-y := console.o stm_heartbeat-y := heartbeat.o +stm_ftrace-y := ftrace.o diff --git a/drivers/hwtracing/stm/core.c b/drivers/hwtracing/stm/core.c index a6ea387b5b00..0e731143f6a4 100644 --- a/drivers/hwtracing/stm/core.c +++ b/drivers/hwtracing/stm/core.c @@ -427,7 +427,7 @@ static int stm_file_assign(struct stm_file *stmf, char *id, unsigned int width) return ret; } -static ssize_t stm_write(struct stm_data *data, unsigned int master, +static ssize_t notrace stm_write(struct stm_data *data, unsigned int master, unsigned int channel, const char *buf, size_t count) { unsigned int flags = STP_PACKET_TIMESTAMPED; @@ -1123,8 +1123,9 @@ void stm_source_unregister_device(struct stm_source_data *data) } EXPORT_SYMBOL_GPL(stm_source_unregister_device); -int stm_source_write(struct stm_source_data *data, unsigned int chan, - const char *buf, size_t count) +int notrace stm_source_write(struct stm_source_data *data, + unsigned int chan, + const char *buf, size_t count) { struct stm_source_device *src = data->src; struct stm_device *stm; diff --git a/drivers/hwtracing/stm/dummy_stm.c b/drivers/hwtracing/stm/dummy_stm.c index a86612d989f9..c5f94ca31c4d 100644 --- a/drivers/hwtracing/stm/dummy_stm.c +++ b/drivers/hwtracing/stm/dummy_stm.c @@ -21,7 +21,7 @@ #include <linux/slab.h> #include <linux/stm.h> -static ssize_t +static ssize_t notrace dummy_stm_packet(struct stm_data *stm_data, unsigned int master, unsigned int channel, unsigned int packet, unsigned int flags, unsigned int size, const unsigned char *payload) diff --git a/drivers/hwtracing/stm/ftrace.c b/drivers/hwtracing/stm/ftrace.c new file mode 100644 index 000000000000..bd126a7c6da2 --- /dev/null +++ b/drivers/hwtracing/stm/ftrace.c @@ -0,0 +1,87 @@ +/* + * Simple kernel driver to link kernel Ftrace and an STM device + * Copyright (c) 2016, Linaro Ltd. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * STM Ftrace will be registered as a trace_export. + */ + +#include <linux/module.h> +#include <linux/stm.h> +#include <linux/trace.h> + +#define STM_FTRACE_NR_CHANNELS 1 +#define STM_FTRACE_CHAN 0 + +static int stm_ftrace_link(struct stm_source_data *data); +static void stm_ftrace_unlink(struct stm_source_data *data); + +static struct stm_ftrace { + struct stm_source_data data; + struct trace_export ftrace; +} stm_ftrace = { + .data = { + .name = "ftrace", + .nr_chans = STM_FTRACE_NR_CHANNELS, + .link = stm_ftrace_link, + .unlink = stm_ftrace_unlink, + }, +}; + +/** + * stm_ftrace_write() - write data to STM via 'stm_ftrace' source + * @buf: buffer containing the data packet + * @len: length of the data packet + */ +static void notrace +stm_ftrace_write(const void *buf, unsigned int len) +{ + stm_source_write(&stm_ftrace.data, STM_FTRACE_CHAN, buf, len); +} + +static int stm_ftrace_link(struct stm_source_data *data) +{ + struct stm_ftrace *sf = container_of(data, struct stm_ftrace, data); + + sf->ftrace.write = stm_ftrace_write; + + return register_ftrace_export(&sf->ftrace); +} + +static void stm_ftrace_unlink(struct stm_source_data *data) +{ + struct stm_ftrace *sf = container_of(data, struct stm_ftrace, data); + + unregister_ftrace_export(&sf->ftrace); +} + +static int __init stm_ftrace_init(void) +{ + int ret; + + ret = stm_source_register_device(NULL, &stm_ftrace.data); + if (ret) + pr_err("Failed to register stm_source - ftrace.\n"); + + return ret; +} + +static void __exit stm_ftrace_exit(void) +{ + stm_source_unregister_device(&stm_ftrace.data); +} + +module_init(stm_ftrace_init); +module_exit(stm_ftrace_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("stm_ftrace driver"); +MODULE_AUTHOR("Chunyan Zhang <zhang.chunyan@linaro.org>"); diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 3a1bc9c4efc7..3de95a29024c 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -80,9 +80,10 @@ static int i2c_detect(struct i2c_adapter *adapter, struct i2c_driver *driver); static struct static_key i2c_trace_msg = STATIC_KEY_INIT_FALSE; static bool is_registered; -void i2c_transfer_trace_reg(void) +int i2c_transfer_trace_reg(void) { static_key_slow_inc(&i2c_trace_msg); + return 0; } void i2c_transfer_trace_unreg(void) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index d4a884db16a3..3633e8beff39 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -947,6 +947,10 @@ extern int __disable_trace_on_warning; #define INIT_TRACE_RECURSION .trace_recursion = 0, #endif +int tracepoint_printk_sysctl(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); + #else /* CONFIG_TRACING */ static inline void disable_trace_on_warning(void) { } #endif /* CONFIG_TRACING */ diff --git a/include/linux/stm.h b/include/linux/stm.h index 8369d8a8cabd..210ff2292361 100644 --- a/include/linux/stm.h +++ b/include/linux/stm.h @@ -133,7 +133,7 @@ int stm_source_register_device(struct device *parent, struct stm_source_data *data); void stm_source_unregister_device(struct stm_source_data *data); -int stm_source_write(struct stm_source_data *data, unsigned int chan, - const char *buf, size_t count); +int notrace stm_source_write(struct stm_source_data *data, unsigned int chan, + const char *buf, size_t count); #endif /* _STM_H_ */ diff --git a/include/linux/trace.h b/include/linux/trace.h new file mode 100644 index 000000000000..9330a58e2651 --- /dev/null +++ b/include/linux/trace.h @@ -0,0 +1,28 @@ +#ifndef _LINUX_TRACE_H +#define _LINUX_TRACE_H + +#ifdef CONFIG_TRACING +/* + * The trace export - an export of Ftrace output. The trace_export + * can process traces and export them to a registered destination as + * an addition to the current only output of Ftrace - i.e. ring buffer. + * + * If you want traces to be sent to some other place rather than ring + * buffer only, just need to register a new trace_export and implement + * its own .write() function for writing traces to the storage. + * + * next - pointer to the next trace_export + * write - copy traces which have been delt with ->commit() to + * the destination + */ +struct trace_export { + struct trace_export __rcu *next; + void (*write)(const void *, unsigned int); +}; + +int register_ftrace_export(struct trace_export *export); +int unregister_ftrace_export(struct trace_export *export); + +#endif /* CONFIG_TRACING */ + +#endif /* _LINUX_TRACE_H */ diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h index 4ac89acb6136..a03192052066 100644 --- a/include/linux/tracepoint-defs.h +++ b/include/linux/tracepoint-defs.h @@ -29,7 +29,7 @@ struct tracepoint_func { struct tracepoint { const char *name; /* Tracepoint name */ struct static_key key; - void (*regfunc)(void); + int (*regfunc)(void); void (*unregfunc)(void); struct tracepoint_func __rcu *funcs; }; diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index be586c632a0c..f72fcfe0e66a 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -81,7 +81,7 @@ static inline void tracepoint_synchronize_unregister(void) } #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS -extern void syscall_regfunc(void); +extern int syscall_regfunc(void); extern void syscall_unregfunc(void); #endif /* CONFIG_HAVE_SYSCALL_TRACEPOINTS */ diff --git a/include/trace/events/i2c.h b/include/trace/events/i2c.h index fe17187df65d..4abb8eab34d3 100644 --- a/include/trace/events/i2c.h +++ b/include/trace/events/i2c.h @@ -20,7 +20,7 @@ /* * drivers/i2c/i2c-core.c */ -extern void i2c_transfer_trace_reg(void); +extern int i2c_transfer_trace_reg(void); extern void i2c_transfer_trace_unreg(void); /* diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 1475d2545b7e..1a292ebcbbb6 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -627,7 +627,7 @@ static struct ctl_table kern_table[] = { .data = &tracepoint_printk, .maxlen = sizeof(tracepoint_printk), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = tracepoint_printk_sysctl, }, #endif #ifdef CONFIG_KEXEC_CORE diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 2a96b063d659..d5038005eb5d 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -70,6 +70,7 @@ config FTRACE_NMI_ENTER config EVENT_TRACING select CONTEXT_SWITCH_TRACER + select GLOB bool config CONTEXT_SWITCH_TRACER @@ -133,6 +134,7 @@ config FUNCTION_TRACER select KALLSYMS select GENERIC_TRACER select CONTEXT_SWITCH_TRACER + select GLOB help Enable the kernel to trace every kernel function. This is done by using a compiler feature to insert a small, 5-byte No-Operation diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 33dd57f53f88..1f0f547c54da 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3511,6 +3511,10 @@ static int ftrace_match(char *str, struct ftrace_glob *g) memcmp(str + slen - g->len, g->search, g->len) == 0) matched = 1; break; + case MATCH_GLOB: + if (glob_match(g->search, str)) + matched = 1; + break; } return matched; diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 89a2611a1635..a85739efcc30 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -245,7 +245,7 @@ unsigned ring_buffer_event_length(struct ring_buffer_event *event) EXPORT_SYMBOL_GPL(ring_buffer_event_length); /* inline for ring buffer fast paths */ -static void * +static __always_inline void * rb_event_data(struct ring_buffer_event *event) { if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) @@ -1798,48 +1798,48 @@ void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val) } EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite); -static inline void * +static __always_inline void * __rb_data_page_index(struct buffer_data_page *bpage, unsigned index) { return bpage->data + index; } -static inline void *__rb_page_index(struct buffer_page *bpage, unsigned index) +static __always_inline void *__rb_page_index(struct buffer_page *bpage, unsigned index) { return bpage->page->data + index; } -static inline struct ring_buffer_event * +static __always_inline struct ring_buffer_event * rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer) { return __rb_page_index(cpu_buffer->reader_page, cpu_buffer->reader_page->read); } -static inline struct ring_buffer_event * +static __always_inline struct ring_buffer_event * rb_iter_head_event(struct ring_buffer_iter *iter) { return __rb_page_index(iter->head_page, iter->head); } -static inline unsigned rb_page_commit(struct buffer_page *bpage) +static __always_inline unsigned rb_page_commit(struct buffer_page *bpage) { return local_read(&bpage->page->commit); } /* Size is determined by what has been committed */ -static inline unsigned rb_page_size(struct buffer_page *bpage) +static __always_inline unsigned rb_page_size(struct buffer_page *bpage) { return rb_page_commit(bpage); } -static inline unsigned +static __always_inline unsigned rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer) { return rb_page_commit(cpu_buffer->commit_page); } -static inline unsigned +static __always_inline unsigned rb_event_index(struct ring_buffer_event *event) { unsigned long addr = (unsigned long)event; @@ -2355,7 +2355,7 @@ static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer) local_inc(&cpu_buffer->commits); } -static void +static __always_inline void rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) { unsigned long max_count; @@ -2410,7 +2410,7 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) goto again; } -static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) +static __always_inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) { unsigned long commits; @@ -2455,7 +2455,7 @@ static inline void rb_event_discard(struct ring_buffer_event *event) event->time_delta = 1; } -static inline bool +static __always_inline bool rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer, struct ring_buffer_event *event) { @@ -2469,7 +2469,7 @@ rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer, rb_commit_index(cpu_buffer) == index; } -static void +static __always_inline void rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer, struct ring_buffer_event *event) { @@ -2702,7 +2702,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, return event; } -static struct ring_buffer_event * +static __always_inline struct ring_buffer_event * rb_reserve_next_event(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer, unsigned long length) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 54d5270a5042..66f829c47bec 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -40,6 +40,7 @@ #include <linux/poll.h> #include <linux/nmi.h> #include <linux/fs.h> +#include <linux/trace.h> #include <linux/sched/rt.h> #include "trace.h" @@ -68,6 +69,7 @@ bool __read_mostly tracing_selftest_disabled; /* Pipe tracepoints to printk */ struct trace_iterator *tracepoint_print_iter; int tracepoint_printk; +static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key); /* For tracers that don't implement custom flags */ static struct tracer_opt dummy_tracer_opt[] = { @@ -738,6 +740,31 @@ static inline void ftrace_trace_stack(struct trace_array *tr, #endif +static __always_inline void +trace_event_setup(struct ring_buffer_event *event, + int type, unsigned long flags, int pc) +{ + struct trace_entry *ent = ring_buffer_event_data(event); + + tracing_generic_entry_update(ent, flags, pc); + ent->type = type; +} + +static __always_inline struct ring_buffer_event * +__trace_buffer_lock_reserve(struct ring_buffer *buffer, + int type, + unsigned long len, + unsigned long flags, int pc) +{ + struct ring_buffer_event *event; + + event = ring_buffer_lock_reserve(buffer, len); + if (event != NULL) + trace_event_setup(event, type, flags, pc); + + return event; +} + static void tracer_tracing_on(struct trace_array *tr) { if (tr->trace_buffer.buffer) @@ -767,6 +794,22 @@ void tracing_on(void) } EXPORT_SYMBOL_GPL(tracing_on); + +static __always_inline void +__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event) +{ + __this_cpu_write(trace_cmdline_save, true); + + /* If this is the temp buffer, we need to commit fully */ + if (this_cpu_read(trace_buffered_event) == event) { + /* Length is in event->array[0] */ + ring_buffer_write(buffer, event->array[0], &event->array[1]); + /* Release the temp buffer */ + this_cpu_dec(trace_buffered_event_cnt); + } else + ring_buffer_unlock_commit(buffer, event); +} + /** * __trace_puts - write a constant string into the trace buffer. * @ip: The address of the caller @@ -794,8 +837,8 @@ int __trace_puts(unsigned long ip, const char *str, int size) local_save_flags(irq_flags); buffer = global_trace.trace_buffer.buffer; - event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, - irq_flags, pc); + event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, + irq_flags, pc); if (!event) return 0; @@ -842,8 +885,8 @@ int __trace_bputs(unsigned long ip, const char *str) local_save_flags(irq_flags); buffer = global_trace.trace_buffer.buffer; - event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size, - irq_flags, pc); + event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size, + irq_flags, pc); if (!event) return 0; @@ -1907,35 +1950,19 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, #endif ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) | ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | - ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | + ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) | (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) | (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0); } EXPORT_SYMBOL_GPL(tracing_generic_entry_update); -static __always_inline void -trace_event_setup(struct ring_buffer_event *event, - int type, unsigned long flags, int pc) -{ - struct trace_entry *ent = ring_buffer_event_data(event); - - tracing_generic_entry_update(ent, flags, pc); - ent->type = type; -} - struct ring_buffer_event * trace_buffer_lock_reserve(struct ring_buffer *buffer, int type, unsigned long len, unsigned long flags, int pc) { - struct ring_buffer_event *event; - - event = ring_buffer_lock_reserve(buffer, len); - if (event != NULL) - trace_event_setup(event, type, flags, pc); - - return event; + return __trace_buffer_lock_reserve(buffer, type, len, flags, pc); } DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event); @@ -2049,21 +2076,6 @@ void trace_buffered_event_disable(void) preempt_enable(); } -void -__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event) -{ - __this_cpu_write(trace_cmdline_save, true); - - /* If this is the temp buffer, we need to commit fully */ - if (this_cpu_read(trace_buffered_event) == event) { - /* Length is in event->array[0] */ - ring_buffer_write(buffer, event->array[0], &event->array[1]); - /* Release the temp buffer */ - this_cpu_dec(trace_buffered_event_cnt); - } else - ring_buffer_unlock_commit(buffer, event); -} - static struct ring_buffer *temp_buffer; struct ring_buffer_event * @@ -2090,8 +2102,8 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_rb, this_cpu_dec(trace_buffered_event_cnt); } - entry = trace_buffer_lock_reserve(*current_rb, - type, len, flags, pc); + entry = __trace_buffer_lock_reserve(*current_rb, + type, len, flags, pc); /* * If tracing is off, but we have triggers enabled * we still need to look at the event data. Use the temp_buffer @@ -2100,13 +2112,88 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_rb, */ if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) { *current_rb = temp_buffer; - entry = trace_buffer_lock_reserve(*current_rb, - type, len, flags, pc); + entry = __trace_buffer_lock_reserve(*current_rb, + type, len, flags, pc); } return entry; } EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve); +static DEFINE_SPINLOCK(tracepoint_iter_lock); +static DEFINE_MUTEX(tracepoint_printk_mutex); + +static void output_printk(struct trace_event_buffer *fbuffer) +{ + struct trace_event_call *event_call; + struct trace_event *event; + unsigned long flags; + struct trace_iterator *iter = tracepoint_print_iter; + + /* We should never get here if iter is NULL */ + if (WARN_ON_ONCE(!iter)) + return; + + event_call = fbuffer->trace_file->event_call; + if (!event_call || !event_call->event.funcs || + !event_call->event.funcs->trace) + return; + + event = &fbuffer->trace_file->event_call->event; + + spin_lock_irqsave(&tracepoint_iter_lock, flags); + trace_seq_init(&iter->seq); + iter->ent = fbuffer->entry; + event_call->event.funcs->trace(iter, 0, event); + trace_seq_putc(&iter->seq, 0); + printk("%s", iter->seq.buffer); + + spin_unlock_irqrestore(&tracepoint_iter_lock, flags); +} + +int tracepoint_printk_sysctl(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int save_tracepoint_printk; + int ret; + + mutex_lock(&tracepoint_printk_mutex); + save_tracepoint_printk = tracepoint_printk; + + ret = proc_dointvec(table, write, buffer, lenp, ppos); + + /* + * This will force exiting early, as tracepoint_printk + * is always zero when tracepoint_printk_iter is not allocated + */ + if (!tracepoint_print_iter) + tracepoint_printk = 0; + + if (save_tracepoint_printk == tracepoint_printk) + goto out; + + if (tracepoint_printk) + static_key_enable(&tracepoint_printk_key.key); + else + static_key_disable(&tracepoint_printk_key.key); + + out: + mutex_unlock(&tracepoint_printk_mutex); + + return ret; +} + +void trace_event_buffer_commit(struct trace_event_buffer *fbuffer) +{ + if (static_key_false(&tracepoint_printk_key.key)) + output_printk(fbuffer); + + event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer, + fbuffer->event, fbuffer->entry, + fbuffer->flags, fbuffer->pc); +} +EXPORT_SYMBOL_GPL(trace_event_buffer_commit); + void trace_buffer_unlock_commit_regs(struct trace_array *tr, struct ring_buffer *buffer, struct ring_buffer_event *event, @@ -2129,6 +2216,139 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr, ftrace_trace_userstack(buffer, flags, pc); } +/* + * Similar to trace_buffer_unlock_commit_regs() but do not dump stack. + */ +void +trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer, + struct ring_buffer_event *event) +{ + __buffer_unlock_commit(buffer, event); +} + +static void +trace_process_export(struct trace_export *export, + struct ring_buffer_event *event) +{ + struct trace_entry *entry; + unsigned int size = 0; + + entry = ring_buffer_event_data(event); + size = ring_buffer_event_length(event); + export->write(entry, size); +} + +static DEFINE_MUTEX(ftrace_export_lock); + +static struct trace_export __rcu *ftrace_exports_list __read_mostly; + +static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled); + +static inline void ftrace_exports_enable(void) +{ + static_branch_enable(&ftrace_exports_enabled); +} + +static inline void ftrace_exports_disable(void) +{ + static_branch_disable(&ftrace_exports_enabled); +} + +void ftrace_exports(struct ring_buffer_event *event) +{ + struct trace_export *export; + + preempt_disable_notrace(); + + export = rcu_dereference_raw_notrace(ftrace_exports_list); + while (export) { + trace_process_export(export, event); + export = rcu_dereference_raw_notrace(export->next); + } + + preempt_enable_notrace(); +} + +static inline void +add_trace_export(struct trace_export **list, struct trace_export *export) +{ + rcu_assign_pointer(export->next, *list); + /* + * We are entering export into the list but another + * CPU might be walking that list. We need to make sure + * the export->next pointer is valid before another CPU sees + * the export pointer included into the list. + */ + rcu_assign_pointer(*list, export); +} + +static inline int +rm_trace_export(struct trace_export **list, struct trace_export *export) +{ + struct trace_export **p; + + for (p = list; *p != NULL; p = &(*p)->next) + if (*p == export) + break; + + if (*p != export) + return -1; + + rcu_assign_pointer(*p, (*p)->next); + + return 0; +} + +static inline void +add_ftrace_export(struct trace_export **list, struct trace_export *export) +{ + if (*list == NULL) + ftrace_exports_enable(); + + add_trace_export(list, export); +} + +static inline int +rm_ftrace_export(struct trace_export **list, struct trace_export *export) +{ + int ret; + + ret = rm_trace_export(list, export); + if (*list == NULL) + ftrace_exports_disable(); + + return ret; +} + +int register_ftrace_export(struct trace_export *export) +{ + if (WARN_ON_ONCE(!export->write)) + return -1; + + mutex_lock(&ftrace_export_lock); + + add_ftrace_export(&ftrace_exports_list, export); + + mutex_unlock(&ftrace_export_lock); + + return 0; +} +EXPORT_SYMBOL_GPL(register_ftrace_export); + +int unregister_ftrace_export(struct trace_export *export) +{ + int ret; + + mutex_lock(&ftrace_export_lock); + + ret = rm_ftrace_export(&ftrace_exports_list, export); + + mutex_unlock(&ftrace_export_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(unregister_ftrace_export); + void trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, unsigned long flags, @@ -2139,16 +2359,19 @@ trace_function(struct trace_array *tr, struct ring_buffer_event *event; struct ftrace_entry *entry; - event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry), - flags, pc); + event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry), + flags, pc); if (!event) return; entry = ring_buffer_event_data(event); entry->ip = ip; entry->parent_ip = parent_ip; - if (!call_filter_check_discard(call, entry, buffer, event)) + if (!call_filter_check_discard(call, entry, buffer, event)) { + if (static_branch_unlikely(&ftrace_exports_enabled)) + ftrace_exports(event); __buffer_unlock_commit(buffer, event); + } } #ifdef CONFIG_STACKTRACE @@ -2216,8 +2439,8 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer, size *= sizeof(unsigned long); - event = trace_buffer_lock_reserve(buffer, TRACE_STACK, - sizeof(*entry) + size, flags, pc); + event = __trace_buffer_lock_reserve(buffer, TRACE_STACK, + sizeof(*entry) + size, flags, pc); if (!event) goto out; entry = ring_buffer_event_data(event); @@ -2318,8 +2541,8 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) __this_cpu_inc(user_stack_count); - event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, - sizeof(*entry), flags, pc); + event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, + sizeof(*entry), flags, pc); if (!event) goto out_drop_count; entry = ring_buffer_event_data(event); @@ -2489,8 +2712,8 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) local_save_flags(flags); size = sizeof(*entry) + sizeof(u32) * len; buffer = tr->trace_buffer.buffer; - event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, - flags, pc); + event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, + flags, pc); if (!event) goto out; entry = ring_buffer_event_data(event); @@ -2545,8 +2768,8 @@ __trace_array_vprintk(struct ring_buffer *buffer, local_save_flags(flags); size = sizeof(*entry) + len + 1; - event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, - flags, pc); + event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, + flags, pc); if (!event) goto out; entry = ring_buffer_event_data(event); @@ -4055,6 +4278,7 @@ static const char readme_msg[] = " x86-tsc: TSC cycle counter\n" #endif "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n" + "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n" " tracing_cpumask\t- Limit which CPUs to trace\n" " instances\t\t- Make sub-buffers with: mkdir instances/foo\n" "\t\t\t Remove sub-buffer with rmdir\n" @@ -4066,7 +4290,7 @@ static const char readme_msg[] = "\n available_filter_functions - list of functions that can be filtered on\n" " set_ftrace_filter\t- echo function name in here to only trace these\n" "\t\t\t functions\n" - "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n" + "\t accepts: func_full_name or glob-matching-pattern\n" "\t modules: Can select a group via module\n" "\t Format: :mod:<module-name>\n" "\t example: echo :mod:ext3 > set_ftrace_filter\n" @@ -5519,21 +5743,18 @@ static ssize_t tracing_mark_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *fpos) { - unsigned long addr = (unsigned long)ubuf; struct trace_array *tr = filp->private_data; struct ring_buffer_event *event; struct ring_buffer *buffer; struct print_entry *entry; unsigned long irq_flags; - struct page *pages[2]; - void *map_page[2]; - int nr_pages = 1; + const char faulted[] = "<faulted>"; ssize_t written; - int offset; int size; int len; - int ret; - int i; + +/* Used in tracing_mark_raw_write() as well */ +#define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */ if (tracing_disabled) return -EINVAL; @@ -5544,60 +5765,33 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, if (cnt > TRACE_BUF_SIZE) cnt = TRACE_BUF_SIZE; - /* - * Userspace is injecting traces into the kernel trace buffer. - * We want to be as non intrusive as possible. - * To do so, we do not want to allocate any special buffers - * or take any locks, but instead write the userspace data - * straight into the ring buffer. - * - * First we need to pin the userspace buffer into memory, - * which, most likely it is, because it just referenced it. - * But there's no guarantee that it is. By using get_user_pages_fast() - * and kmap_atomic/kunmap_atomic() we can get access to the - * pages directly. We then write the data directly into the - * ring buffer. - */ BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE); - /* check if we cross pages */ - if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK)) - nr_pages = 2; - - offset = addr & (PAGE_SIZE - 1); - addr &= PAGE_MASK; - - ret = get_user_pages_fast(addr, nr_pages, 0, pages); - if (ret < nr_pages) { - while (--ret >= 0) - put_page(pages[ret]); - written = -EFAULT; - goto out; - } + local_save_flags(irq_flags); + size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */ - for (i = 0; i < nr_pages; i++) - map_page[i] = kmap_atomic(pages[i]); + /* If less than "<faulted>", then make sure we can still add that */ + if (cnt < FAULTED_SIZE) + size += FAULTED_SIZE - cnt; - local_save_flags(irq_flags); - size = sizeof(*entry) + cnt + 2; /* possible \n added */ buffer = tr->trace_buffer.buffer; - event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, - irq_flags, preempt_count()); - if (!event) { + event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, + irq_flags, preempt_count()); + if (unlikely(!event)) /* Ring buffer disabled, return as if not open for write */ - written = -EBADF; - goto out_unlock; - } + return -EBADF; entry = ring_buffer_event_data(event); entry->ip = _THIS_IP_; - if (nr_pages == 2) { - len = PAGE_SIZE - offset; - memcpy(&entry->buf, map_page[0] + offset, len); - memcpy(&entry->buf[len], map_page[1], cnt - len); + len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt); + if (len) { + memcpy(&entry->buf, faulted, FAULTED_SIZE); + cnt = FAULTED_SIZE; + written = -EFAULT; } else - memcpy(&entry->buf, map_page[0] + offset, cnt); + written = cnt; + len = cnt; if (entry->buf[cnt - 1] != '\n') { entry->buf[cnt] = '\n'; @@ -5607,16 +5801,73 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, __buffer_unlock_commit(buffer, event); - written = cnt; + if (written > 0) + *fpos += written; - *fpos += written; + return written; +} + +/* Limit it for now to 3K (including tag) */ +#define RAW_DATA_MAX_SIZE (1024*3) + +static ssize_t +tracing_mark_raw_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *fpos) +{ + struct trace_array *tr = filp->private_data; + struct ring_buffer_event *event; + struct ring_buffer *buffer; + struct raw_data_entry *entry; + const char faulted[] = "<faulted>"; + unsigned long irq_flags; + ssize_t written; + int size; + int len; + +#define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int)) + + if (tracing_disabled) + return -EINVAL; + + if (!(tr->trace_flags & TRACE_ITER_MARKERS)) + return -EINVAL; + + /* The marker must at least have a tag id */ + if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE) + return -EINVAL; + + if (cnt > TRACE_BUF_SIZE) + cnt = TRACE_BUF_SIZE; + + BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE); + + local_save_flags(irq_flags); + size = sizeof(*entry) + cnt; + if (cnt < FAULT_SIZE_ID) + size += FAULT_SIZE_ID - cnt; + + buffer = tr->trace_buffer.buffer; + event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size, + irq_flags, preempt_count()); + if (!event) + /* Ring buffer disabled, return as if not open for write */ + return -EBADF; + + entry = ring_buffer_event_data(event); + + len = __copy_from_user_inatomic(&entry->id, ubuf, cnt); + if (len) { + entry->id = -1; + memcpy(&entry->buf, faulted, FAULTED_SIZE); + written = -EFAULT; + } else + written = cnt; + + __buffer_unlock_commit(buffer, event); + + if (written > 0) + *fpos += written; - out_unlock: - for (i = nr_pages - 1; i >= 0; i--) { - kunmap_atomic(map_page[i]); - put_page(pages[i]); - } - out: return written; } @@ -5946,6 +6197,13 @@ static const struct file_operations tracing_mark_fops = { .release = tracing_release_generic_tr, }; +static const struct file_operations tracing_mark_raw_fops = { + .open = tracing_open_generic_tr, + .write = tracing_mark_raw_write, + .llseek = generic_file_llseek, + .release = tracing_release_generic_tr, +}; + static const struct file_operations trace_clock_fops = { .open = tracing_clock_open, .read = seq_read, @@ -7215,6 +7473,9 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) trace_create_file("trace_marker", 0220, d_tracer, tr, &tracing_mark_fops); + trace_create_file("trace_marker_raw", 0220, d_tracer, + tr, &tracing_mark_raw_fops); + trace_create_file("trace_clock", 0644, d_tracer, tr, &trace_clock_fops); @@ -7752,6 +8013,8 @@ void __init trace_init(void) kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL); if (WARN_ON(!tracepoint_print_iter)) tracepoint_printk = 0; + else + static_key_enable(&tracepoint_printk_key.key); } tracer_alloc_buffers(); trace_event_init(); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index fd24b1f9ac43..c2234494f40c 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -15,6 +15,7 @@ #include <linux/trace_events.h> #include <linux/compiler.h> #include <linux/trace_seq.h> +#include <linux/glob.h> #ifdef CONFIG_FTRACE_SYSCALLS #include <asm/unistd.h> /* For NR_SYSCALLS */ @@ -39,6 +40,7 @@ enum trace_type { TRACE_BLK, TRACE_BPUTS, TRACE_HWLAT, + TRACE_RAW_DATA, __TRACE_LAST_TYPE, }; @@ -330,6 +332,7 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT); \ IF_ASSIGN(var, ent, struct bputs_entry, TRACE_BPUTS); \ IF_ASSIGN(var, ent, struct hwlat_entry, TRACE_HWLAT); \ + IF_ASSIGN(var, ent, struct raw_data_entry, TRACE_RAW_DATA);\ IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ TRACE_MMIO_RW); \ IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \ @@ -599,8 +602,8 @@ struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts); -void __buffer_unlock_commit(struct ring_buffer *buffer, - struct ring_buffer_event *event); +void trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer, + struct ring_buffer_event *event); int trace_empty(struct trace_iterator *iter); @@ -843,6 +846,17 @@ static inline int ftrace_graph_notrace_addr(unsigned long addr) return 0; } #endif /* CONFIG_DYNAMIC_FTRACE */ + +extern unsigned int fgraph_max_depth; + +static inline bool ftrace_graph_ignore_func(struct ftrace_graph_ent *trace) +{ + /* trace it when it is-nested-in or is a function enabled. */ + return !(trace->depth || ftrace_graph_addr(trace->func)) || + (trace->depth < 0) || + (fgraph_max_depth && trace->depth >= fgraph_max_depth); +} + #else /* CONFIG_FUNCTION_GRAPH_TRACER */ static inline enum print_line_t print_graph_function_flags(struct trace_iterator *iter, u32 flags) @@ -1257,6 +1271,7 @@ enum regex_type { MATCH_FRONT_ONLY, MATCH_MIDDLE_ONLY, MATCH_END_ONLY, + MATCH_GLOB, }; struct regex { diff --git a/kernel/trace/trace_benchmark.c b/kernel/trace/trace_benchmark.c index 0f109c4130d3..e3b488825ae3 100644 --- a/kernel/trace/trace_benchmark.c +++ b/kernel/trace/trace_benchmark.c @@ -21,6 +21,8 @@ static u64 bm_stddev; static unsigned int bm_avg; static unsigned int bm_std; +static bool ok_to_run; + /* * This gets called in a loop recording the time it took to write * the tracepoint. What it writes is the time statistics of the last @@ -164,11 +166,21 @@ static int benchmark_event_kthread(void *arg) * When the benchmark tracepoint is enabled, it calls this * function and the thread that calls the tracepoint is created. */ -void trace_benchmark_reg(void) +int trace_benchmark_reg(void) { + if (!ok_to_run) { + pr_warning("trace benchmark cannot be started via kernel command line\n"); + return -EBUSY; + } + bm_event_thread = kthread_run(benchmark_event_kthread, NULL, "event_benchmark"); - WARN_ON(!bm_event_thread); + if (!bm_event_thread) { + pr_warning("trace benchmark failed to create kernel thread\n"); + return -ENOMEM; + } + + return 0; } /* @@ -182,6 +194,7 @@ void trace_benchmark_unreg(void) return; kthread_stop(bm_event_thread); + bm_event_thread = NULL; strcpy(bm_str, "START"); bm_total = 0; @@ -196,3 +209,12 @@ void trace_benchmark_unreg(void) bm_avg = 0; bm_stddev = 0; } + +static __init int ok_to_run_trace_benchmark(void) +{ + ok_to_run = true; + + return 0; +} + +early_initcall(ok_to_run_trace_benchmark); diff --git a/kernel/trace/trace_benchmark.h b/kernel/trace/trace_benchmark.h index 3c1df1df4e29..ebdbfc2f2a64 100644 --- a/kernel/trace/trace_benchmark.h +++ b/kernel/trace/trace_benchmark.h @@ -6,7 +6,7 @@ #include <linux/tracepoint.h> -extern void trace_benchmark_reg(void); +extern int trace_benchmark_reg(void); extern void trace_benchmark_unreg(void); #define BENCHMARK_EVENT_STRLEN 128 diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index 3a2a73716a5b..75489de546b6 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -81,7 +81,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) entry->correct = val == expect; if (!call_filter_check_discard(call, entry, buffer, event)) - __buffer_unlock_commit(buffer, event); + trace_buffer_unlock_commit_nostack(buffer, event); out: current->trace_recursion &= ~TRACE_BRANCH_BIT; diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index d1cc37e78f99..eb7396b7e7c3 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -244,6 +244,21 @@ FTRACE_ENTRY(print, print_entry, FILTER_OTHER ); +FTRACE_ENTRY(raw_data, raw_data_entry, + + TRACE_RAW_DATA, + + F_STRUCT( + __field( unsigned int, id ) + __dynamic_array( char, buf ) + ), + + F_printk("id:%04x %08x", + __entry->id, (int)__entry->buf[0]), + + FILTER_OTHER +); + FTRACE_ENTRY(bputs, bputs_entry, TRACE_BPUTS, diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 03c0a48c3ac4..93116549a284 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -283,46 +283,6 @@ void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer, } EXPORT_SYMBOL_GPL(trace_event_buffer_reserve); -static DEFINE_SPINLOCK(tracepoint_iter_lock); - -static void output_printk(struct trace_event_buffer *fbuffer) -{ - struct trace_event_call *event_call; - struct trace_event *event; - unsigned long flags; - struct trace_iterator *iter = tracepoint_print_iter; - - if (!iter) - return; - - event_call = fbuffer->trace_file->event_call; - if (!event_call || !event_call->event.funcs || - !event_call->event.funcs->trace) - return; - - event = &fbuffer->trace_file->event_call->event; - - spin_lock_irqsave(&tracepoint_iter_lock, flags); - trace_seq_init(&iter->seq); - iter->ent = fbuffer->entry; - event_call->event.funcs->trace(iter, 0, event); - trace_seq_putc(&iter->seq, 0); - printk("%s", iter->seq.buffer); - - spin_unlock_irqrestore(&tracepoint_iter_lock, flags); -} - -void trace_event_buffer_commit(struct trace_event_buffer *fbuffer) -{ - if (tracepoint_printk) - output_printk(fbuffer); - - event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer, - fbuffer->event, fbuffer->entry, - fbuffer->flags, fbuffer->pc); -} -EXPORT_SYMBOL_GPL(trace_event_buffer_commit); - int trace_event_reg(struct trace_event_call *call, enum trace_reg type, void *data) { @@ -742,6 +702,7 @@ __ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match, struct trace_event_call *call; const char *name; int ret = -EINVAL; + int eret = 0; list_for_each_entry(file, &tr->events, list) { @@ -765,9 +726,17 @@ __ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match, if (event && strcmp(event, name) != 0) continue; - ftrace_event_enable_disable(file, set); + ret = ftrace_event_enable_disable(file, set); - ret = 0; + /* + * Save the first error and return that. Some events + * may still have been enabled, but let the user + * know that something went wrong. + */ + if (ret && !eret) + eret = ret; + + ret = eret; } return ret; @@ -2843,20 +2812,32 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) return -ENOMEM; } + entry = trace_create_file("enable", 0644, d_events, + tr, &ftrace_tr_enable_fops); + if (!entry) { + pr_warn("Could not create tracefs 'enable' entry\n"); + return -ENOMEM; + } + + /* There are not as crucial, just warn if they are not created */ + entry = tracefs_create_file("set_event_pid", 0644, parent, tr, &ftrace_set_event_pid_fops); + if (!entry) + pr_warn("Could not create tracefs 'set_event_pid' entry\n"); /* ring buffer internal formats */ - trace_create_file("header_page", 0444, d_events, - ring_buffer_print_page_header, - &ftrace_show_header_fops); - - trace_create_file("header_event", 0444, d_events, - ring_buffer_print_entry_header, - &ftrace_show_header_fops); + entry = trace_create_file("header_page", 0444, d_events, + ring_buffer_print_page_header, + &ftrace_show_header_fops); + if (!entry) + pr_warn("Could not create tracefs 'header_page' entry\n"); - trace_create_file("enable", 0644, d_events, - tr, &ftrace_tr_enable_fops); + entry = trace_create_file("header_event", 0444, d_events, + ring_buffer_print_entry_header, + &ftrace_show_header_fops); + if (!entry) + pr_warn("Could not create tracefs 'header_event' entry\n"); tr->event_dir = d_events; diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 9daa9b3bc6d9..59a411ff60c7 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -108,12 +108,12 @@ static char *err_text[] = { }; struct opstack_op { - int op; + enum filter_op_ids op; struct list_head list; }; struct postfix_elt { - int op; + enum filter_op_ids op; char *operand; struct list_head list; }; @@ -145,34 +145,50 @@ struct pred_stack { /* If not of not match is equal to not of not, then it is a match */ #define DEFINE_COMPARISON_PRED(type) \ -static int filter_pred_##type(struct filter_pred *pred, void *event) \ +static int filter_pred_LT_##type(struct filter_pred *pred, void *event) \ { \ type *addr = (type *)(event + pred->offset); \ type val = (type)pred->val; \ - int match = 0; \ - \ - switch (pred->op) { \ - case OP_LT: \ - match = (*addr < val); \ - break; \ - case OP_LE: \ - match = (*addr <= val); \ - break; \ - case OP_GT: \ - match = (*addr > val); \ - break; \ - case OP_GE: \ - match = (*addr >= val); \ - break; \ - case OP_BAND: \ - match = (*addr & val); \ - break; \ - default: \ - break; \ - } \ - \ + int match = (*addr < val); \ return !!match == !pred->not; \ -} +} \ +static int filter_pred_LE_##type(struct filter_pred *pred, void *event) \ +{ \ + type *addr = (type *)(event + pred->offset); \ + type val = (type)pred->val; \ + int match = (*addr <= val); \ + return !!match == !pred->not; \ +} \ +static int filter_pred_GT_##type(struct filter_pred *pred, void *event) \ +{ \ + type *addr = (type *)(event + pred->offset); \ + type val = (type)pred->val; \ + int match = (*addr > val); \ + return !!match == !pred->not; \ +} \ +static int filter_pred_GE_##type(struct filter_pred *pred, void *event) \ +{ \ + type *addr = (type *)(event + pred->offset); \ + type val = (type)pred->val; \ + int match = (*addr >= val); \ + return !!match == !pred->not; \ +} \ +static int filter_pred_BAND_##type(struct filter_pred *pred, void *event) \ +{ \ + type *addr = (type *)(event + pred->offset); \ + type val = (type)pred->val; \ + int match = !!(*addr & val); \ + return match == !pred->not; \ +} \ +static const filter_pred_fn_t pred_funcs_##type[] = { \ + filter_pred_LT_##type, \ + filter_pred_LE_##type, \ + filter_pred_GT_##type, \ + filter_pred_GE_##type, \ + filter_pred_BAND_##type, \ +}; + +#define PRED_FUNC_START OP_LT #define DEFINE_EQUALITY_PRED(size) \ static int filter_pred_##size(struct filter_pred *pred, void *event) \ @@ -344,6 +360,12 @@ static int regex_match_end(char *str, struct regex *r, int len) return 0; } +static int regex_match_glob(char *str, struct regex *r, int len __maybe_unused) +{ + if (glob_match(r->pattern, str)) + return 1; + return 0; +} /** * filter_parse_regex - parse a basic regex * @buff: the raw regex @@ -380,14 +402,20 @@ enum regex_type filter_parse_regex(char *buff, int len, char **search, int *not) if (!i) { *search = buff + 1; type = MATCH_END_ONLY; - } else { + } else if (i == len - 1) { if (type == MATCH_END_ONLY) type = MATCH_MIDDLE_ONLY; else type = MATCH_FRONT_ONLY; buff[i] = 0; break; + } else { /* pattern continues, use full glob */ + type = MATCH_GLOB; + break; } + } else if (strchr("[?\\", buff[i])) { + type = MATCH_GLOB; + break; } } @@ -420,6 +448,9 @@ static void filter_build_regex(struct filter_pred *pred) case MATCH_END_ONLY: r->match = regex_match_end; break; + case MATCH_GLOB: + r->match = regex_match_glob; + break; } pred->not ^= not; @@ -946,7 +977,7 @@ int filter_assign_type(const char *type) return FILTER_OTHER; } -static bool is_legal_op(struct ftrace_event_field *field, int op) +static bool is_legal_op(struct ftrace_event_field *field, enum filter_op_ids op) { if (is_string_field(field) && (op != OP_EQ && op != OP_NE && op != OP_GLOB)) @@ -957,8 +988,8 @@ static bool is_legal_op(struct ftrace_event_field *field, int op) return true; } -static filter_pred_fn_t select_comparison_fn(int op, int field_size, - int field_is_signed) +static filter_pred_fn_t select_comparison_fn(enum filter_op_ids op, + int field_size, int field_is_signed) { filter_pred_fn_t fn = NULL; @@ -967,33 +998,33 @@ static filter_pred_fn_t select_comparison_fn(int op, int field_size, if (op == OP_EQ || op == OP_NE) fn = filter_pred_64; else if (field_is_signed) - fn = filter_pred_s64; + fn = pred_funcs_s64[op - PRED_FUNC_START]; else - fn = filter_pred_u64; + fn = pred_funcs_u64[op - PRED_FUNC_START]; break; case 4: if (op == OP_EQ || op == OP_NE) fn = filter_pred_32; else if (field_is_signed) - fn = filter_pred_s32; + fn = pred_funcs_s32[op - PRED_FUNC_START]; else - fn = filter_pred_u32; + fn = pred_funcs_u32[op - PRED_FUNC_START]; break; case 2: if (op == OP_EQ || op == OP_NE) fn = filter_pred_16; else if (field_is_signed) - fn = filter_pred_s16; + fn = pred_funcs_s16[op - PRED_FUNC_START]; else - fn = filter_pred_u16; + fn = pred_funcs_u16[op - PRED_FUNC_START]; break; case 1: if (op == OP_EQ || op == OP_NE) fn = filter_pred_8; else if (field_is_signed) - fn = filter_pred_s8; + fn = pred_funcs_s8[op - PRED_FUNC_START]; else - fn = filter_pred_u8; + fn = pred_funcs_u8[op - PRED_FUNC_START]; break; } @@ -1166,7 +1197,8 @@ static inline int append_operand_char(struct filter_parse_state *ps, char c) return 0; } -static int filter_opstack_push(struct filter_parse_state *ps, int op) +static int filter_opstack_push(struct filter_parse_state *ps, + enum filter_op_ids op) { struct opstack_op *opstack_op; @@ -1200,7 +1232,7 @@ static int filter_opstack_top(struct filter_parse_state *ps) static int filter_opstack_pop(struct filter_parse_state *ps) { struct opstack_op *opstack_op; - int op; + enum filter_op_ids op; if (filter_opstack_empty(ps)) return OP_NONE; @@ -1245,7 +1277,7 @@ static int postfix_append_operand(struct filter_parse_state *ps, char *operand) return 0; } -static int postfix_append_op(struct filter_parse_state *ps, int op) +static int postfix_append_op(struct filter_parse_state *ps, enum filter_op_ids op) { struct postfix_elt *elt; @@ -1275,8 +1307,8 @@ static void postfix_clear(struct filter_parse_state *ps) static int filter_parse(struct filter_parse_state *ps) { + enum filter_op_ids op, top_op; int in_string = 0; - int op, top_op; char ch; while ((ch = infix_next(ps))) { @@ -1367,7 +1399,8 @@ parse_operand: static struct filter_pred *create_pred(struct filter_parse_state *ps, struct trace_event_call *call, - int op, char *operand1, char *operand2) + enum filter_op_ids op, + char *operand1, char *operand2) { struct ftrace_event_field *field; static struct filter_pred pred; diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 4e480e870474..d56123cdcc89 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -65,7 +65,7 @@ struct fgraph_data { #define TRACE_GRAPH_INDENT 2 -static unsigned int max_depth; +unsigned int fgraph_max_depth; static struct tracer_opt trace_opts[] = { /* Display overruns? (for self-debug purpose) */ @@ -358,7 +358,7 @@ int __trace_graph_entry(struct trace_array *tr, entry = ring_buffer_event_data(event); entry->graph_ent = *trace; if (!call_filter_check_discard(call, entry, buffer, event)) - __buffer_unlock_commit(buffer, event); + trace_buffer_unlock_commit_nostack(buffer, event); return 1; } @@ -384,10 +384,10 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) if (!ftrace_trace_task(tr)) return 0; - /* trace it when it is-nested-in or is a function enabled. */ - if ((!(trace->depth || ftrace_graph_addr(trace->func)) || - ftrace_graph_ignore_irqs()) || (trace->depth < 0) || - (max_depth && trace->depth >= max_depth)) + if (ftrace_graph_ignore_func(trace)) + return 0; + + if (ftrace_graph_ignore_irqs()) return 0; /* @@ -469,7 +469,7 @@ void __trace_graph_return(struct trace_array *tr, entry = ring_buffer_event_data(event); entry->ret = *trace; if (!call_filter_check_discard(call, entry, buffer, event)) - __buffer_unlock_commit(buffer, event); + trace_buffer_unlock_commit_nostack(buffer, event); } void trace_graph_return(struct ftrace_graph_ret *trace) @@ -842,6 +842,10 @@ print_graph_entry_leaf(struct trace_iterator *iter, cpu_data = per_cpu_ptr(data->cpu_data, cpu); + /* If a graph tracer ignored set_graph_notrace */ + if (call->depth < -1) + call->depth += FTRACE_NOTRACE_DEPTH; + /* * Comments display at + 1 to depth. Since * this is a leaf function, keep the comments @@ -850,7 +854,8 @@ print_graph_entry_leaf(struct trace_iterator *iter, cpu_data->depth = call->depth - 1; /* No need to keep this function around for this depth */ - if (call->depth < FTRACE_RETFUNC_DEPTH) + if (call->depth < FTRACE_RETFUNC_DEPTH && + !WARN_ON_ONCE(call->depth < 0)) cpu_data->enter_funcs[call->depth] = 0; } @@ -880,11 +885,16 @@ print_graph_entry_nested(struct trace_iterator *iter, struct fgraph_cpu_data *cpu_data; int cpu = iter->cpu; + /* If a graph tracer ignored set_graph_notrace */ + if (call->depth < -1) + call->depth += FTRACE_NOTRACE_DEPTH; + cpu_data = per_cpu_ptr(data->cpu_data, cpu); cpu_data->depth = call->depth; /* Save this function pointer to see if the exit matches */ - if (call->depth < FTRACE_RETFUNC_DEPTH) + if (call->depth < FTRACE_RETFUNC_DEPTH && + !WARN_ON_ONCE(call->depth < 0)) cpu_data->enter_funcs[call->depth] = call->func; } @@ -1114,7 +1124,8 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, */ cpu_data->depth = trace->depth - 1; - if (trace->depth < FTRACE_RETFUNC_DEPTH) { + if (trace->depth < FTRACE_RETFUNC_DEPTH && + !WARN_ON_ONCE(trace->depth < 0)) { if (cpu_data->enter_funcs[trace->depth] != trace->func) func_match = 0; cpu_data->enter_funcs[trace->depth] = 0; @@ -1489,7 +1500,7 @@ graph_depth_write(struct file *filp, const char __user *ubuf, size_t cnt, if (ret) return ret; - max_depth = val; + fgraph_max_depth = val; *ppos += cnt; @@ -1503,7 +1514,7 @@ graph_depth_read(struct file *filp, char __user *ubuf, size_t cnt, char buf[15]; /* More than enough to hold UINT_MAX + "\n"*/ int n; - n = sprintf(buf, "%d\n", max_depth); + n = sprintf(buf, "%d\n", fgraph_max_depth); return simple_read_from_buffer(ubuf, cnt, ppos, buf, n); } diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index b97286c48735..775569ec50d0 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -127,7 +127,7 @@ static void trace_hwlat_sample(struct hwlat_sample *sample) entry->nmi_count = sample->nmi_count; if (!call_filter_check_discard(call, entry, buffer, event)) - __buffer_unlock_commit(buffer, event); + trace_buffer_unlock_commit_nostack(buffer, event); } /* Macros to encapsulate the time capturing infrastructure */ diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 03cdff84d026..86654d7e1afe 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -175,6 +175,18 @@ static int irqsoff_graph_entry(struct ftrace_graph_ent *trace) int ret; int pc; + if (ftrace_graph_ignore_func(trace)) + return 0; + /* + * Do not trace a function if it's filtered by set_graph_notrace. + * Make the index of ret stack negative to indicate that it should + * ignore further functions. But it needs its own ret stack entry + * to recover the original index in order to continue tracing after + * returning from the function. + */ + if (ftrace_graph_notrace_addr(trace->func)) + return 1; + if (!func_prolog_dec(tr, &data, &flags)) return 0; diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index eb6c9f1d3a93..a133ecd741e4 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -73,6 +73,17 @@ static nokprobe_inline bool trace_kprobe_is_on_module(struct trace_kprobe *tk) return !!strchr(trace_kprobe_symbol(tk), ':'); } +static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk) +{ + unsigned long nhit = 0; + int cpu; + + for_each_possible_cpu(cpu) + nhit += *per_cpu_ptr(tk->nhit, cpu); + + return nhit; +} + static int register_kprobe_event(struct trace_kprobe *tk); static int unregister_kprobe_event(struct trace_kprobe *tk); @@ -882,14 +893,10 @@ static const struct file_operations kprobe_events_ops = { static int probes_profile_seq_show(struct seq_file *m, void *v) { struct trace_kprobe *tk = v; - unsigned long nhit = 0; - int cpu; - - for_each_possible_cpu(cpu) - nhit += *per_cpu_ptr(tk->nhit, cpu); seq_printf(m, " %-44s %15lu %15lu\n", - trace_event_name(&tk->tp.call), nhit, + trace_event_name(&tk->tp.call), + trace_kprobe_nhit(tk), tk->rp.kp.nmissed); return 0; @@ -1354,18 +1361,18 @@ fs_initcall(init_kprobe_trace); #ifdef CONFIG_FTRACE_STARTUP_TEST - /* * The "__used" keeps gcc from removing the function symbol - * from the kallsyms table. + * from the kallsyms table. 'noinline' makes sure that there + * isn't an inlined version used by the test method below */ -static __used int kprobe_trace_selftest_target(int a1, int a2, int a3, - int a4, int a5, int a6) +static __used __init noinline int +kprobe_trace_selftest_target(int a1, int a2, int a3, int a4, int a5, int a6) { return a1 + a2 + a3 + a4 + a5 + a6; } -static struct trace_event_file * +static struct __init trace_event_file * find_trace_probe_file(struct trace_kprobe *tk, struct trace_array *tr) { struct trace_event_file *file; @@ -1443,12 +1450,25 @@ static __init int kprobe_trace_self_tests_init(void) ret = target(1, 2, 3, 4, 5, 6); + /* + * Not expecting an error here, the check is only to prevent the + * optimizer from removing the call to target() as otherwise there + * are no side-effects and the call is never performed. + */ + if (ret != 21) + warn++; + /* Disable trace points before removing it */ tk = find_trace_kprobe("testprobe", KPROBE_EVENT_SYSTEM); if (WARN_ON_ONCE(tk == NULL)) { pr_warn("error on getting test probe.\n"); warn++; } else { + if (trace_kprobe_nhit(tk) != 1) { + pr_warn("incorrect number of testprobe hits\n"); + warn++; + } + file = find_trace_probe_file(tk, top_trace_array()); if (WARN_ON_ONCE(file == NULL)) { pr_warn("error on getting probe file.\n"); @@ -1462,6 +1482,11 @@ static __init int kprobe_trace_self_tests_init(void) pr_warn("error on getting 2nd test probe.\n"); warn++; } else { + if (trace_kprobe_nhit(tk) != 1) { + pr_warn("incorrect number of testprobe2 hits\n"); + warn++; + } + file = find_trace_probe_file(tk, top_trace_array()); if (WARN_ON_ONCE(file == NULL)) { pr_warn("error on getting probe file.\n"); diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 3fc20422c166..5d33a7352919 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -1288,6 +1288,35 @@ static struct trace_event trace_print_event = { .funcs = &trace_print_funcs, }; +static enum print_line_t trace_raw_data(struct trace_iterator *iter, int flags, + struct trace_event *event) +{ + struct raw_data_entry *field; + int i; + + trace_assign_type(field, iter->ent); + + trace_seq_printf(&iter->seq, "# %x buf:", field->id); + + for (i = 0; i < iter->ent_size - offsetof(struct raw_data_entry, buf); i++) + trace_seq_printf(&iter->seq, " %02x", + (unsigned char)field->buf[i]); + + trace_seq_putc(&iter->seq, '\n'); + + return trace_handle_return(&iter->seq); +} + +static struct trace_event_functions trace_raw_data_funcs = { + .trace = trace_raw_data, + .raw = trace_raw_data, +}; + +static struct trace_event trace_raw_data_event = { + .type = TRACE_RAW_DATA, + .funcs = &trace_raw_data_funcs, +}; + static struct trace_event *events[] __initdata = { &trace_fn_event, @@ -1299,6 +1328,7 @@ static struct trace_event *events[] __initdata = { &trace_bprint_event, &trace_print_event, &trace_hwlat_event, + &trace_raw_data_event, NULL }; diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 9d4399b553a3..5d0bb025bb21 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -239,6 +239,18 @@ static int wakeup_graph_entry(struct ftrace_graph_ent *trace) unsigned long flags; int pc, ret = 0; + if (ftrace_graph_ignore_func(trace)) + return 0; + /* + * Do not trace a function if it's filtered by set_graph_notrace. + * Make the index of ret stack negative to indicate that it should + * ignore further functions. But it needs its own ret stack entry + * to recover the original index in order to continue tracing after + * returning from the function. + */ + if (ftrace_graph_notrace_addr(trace->func)) + return 1; + if (!func_prolog_preempt_disable(tr, &data, &pc)) return 0; @@ -790,6 +802,7 @@ static struct tracer wakeup_dl_tracer __read_mostly = #endif .open = wakeup_trace_open, .close = wakeup_trace_close, + .allow_instances = true, .use_max_tr = true, }; diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index d0639d917899..1f9a31f934a4 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -194,9 +194,13 @@ static int tracepoint_add_func(struct tracepoint *tp, struct tracepoint_func *func, int prio) { struct tracepoint_func *old, *tp_funcs; + int ret; - if (tp->regfunc && !static_key_enabled(&tp->key)) - tp->regfunc(); + if (tp->regfunc && !static_key_enabled(&tp->key)) { + ret = tp->regfunc(); + if (ret < 0) + return ret; + } tp_funcs = rcu_dereference_protected(tp->funcs, lockdep_is_held(&tracepoints_mutex)); @@ -529,7 +533,7 @@ EXPORT_SYMBOL_GPL(for_each_kernel_tracepoint); /* NB: reg/unreg are called while guarded with the tracepoints_mutex */ static int sys_tracepoint_refcount; -void syscall_regfunc(void) +int syscall_regfunc(void) { struct task_struct *p, *t; @@ -541,6 +545,8 @@ void syscall_regfunc(void) read_unlock(&tasklist_lock); } sys_tracepoint_refcount++; + + return 0; } void syscall_unregfunc(void) diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c index 880a7d1d27d2..30e282d33d4d 100644 --- a/samples/trace_events/trace-events-sample.c +++ b/samples/trace_events/trace-events-sample.c @@ -79,7 +79,7 @@ static int simple_thread_fn(void *arg) static DEFINE_MUTEX(thread_mutex); -void foo_bar_reg(void) +int foo_bar_reg(void) { pr_info("Starting thread for foo_bar_fn\n"); /* @@ -90,6 +90,7 @@ void foo_bar_reg(void) mutex_lock(&thread_mutex); simple_tsk_fn = kthread_run(simple_thread_fn, NULL, "event-sample-fn"); mutex_unlock(&thread_mutex); + return 0; } void foo_bar_unreg(void) diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h index d6b75bb495b3..76a75ab7a608 100644 --- a/samples/trace_events/trace-events-sample.h +++ b/samples/trace_events/trace-events-sample.h @@ -354,7 +354,7 @@ TRACE_EVENT_CONDITION(foo_bar_with_cond, TP_printk("foo %s %d", __get_str(foo), __entry->bar) ); -void foo_bar_reg(void); +int foo_bar_reg(void); void foo_bar_unreg(void); /* diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index 5423a58d1b06..aeb34223167c 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -213,6 +213,59 @@ static int make_nop_x86(void *map, size_t const offset) return 0; } +static unsigned char ideal_nop4_arm_le[4] = { 0x00, 0x00, 0xa0, 0xe1 }; /* mov r0, r0 */ +static unsigned char ideal_nop4_arm_be[4] = { 0xe1, 0xa0, 0x00, 0x00 }; /* mov r0, r0 */ +static unsigned char *ideal_nop4_arm; + +static unsigned char bl_mcount_arm_le[4] = { 0xfe, 0xff, 0xff, 0xeb }; /* bl */ +static unsigned char bl_mcount_arm_be[4] = { 0xeb, 0xff, 0xff, 0xfe }; /* bl */ +static unsigned char *bl_mcount_arm; + +static unsigned char push_arm_le[4] = { 0x04, 0xe0, 0x2d, 0xe5 }; /* push {lr} */ +static unsigned char push_arm_be[4] = { 0xe5, 0x2d, 0xe0, 0x04 }; /* push {lr} */ +static unsigned char *push_arm; + +static unsigned char ideal_nop2_thumb_le[2] = { 0x00, 0xbf }; /* nop */ +static unsigned char ideal_nop2_thumb_be[2] = { 0xbf, 0x00 }; /* nop */ +static unsigned char *ideal_nop2_thumb; + +static unsigned char push_bl_mcount_thumb_le[6] = { 0x00, 0xb5, 0xff, 0xf7, 0xfe, 0xff }; /* push {lr}, bl */ +static unsigned char push_bl_mcount_thumb_be[6] = { 0xb5, 0x00, 0xf7, 0xff, 0xff, 0xfe }; /* push {lr}, bl */ +static unsigned char *push_bl_mcount_thumb; + +static int make_nop_arm(void *map, size_t const offset) +{ + char *ptr; + int cnt = 1; + int nop_size; + size_t off = offset; + + ptr = map + offset; + if (memcmp(ptr, bl_mcount_arm, 4) == 0) { + if (memcmp(ptr - 4, push_arm, 4) == 0) { + off -= 4; + cnt = 2; + } + ideal_nop = ideal_nop4_arm; + nop_size = 4; + } else if (memcmp(ptr - 2, push_bl_mcount_thumb, 6) == 0) { + cnt = 3; + nop_size = 2; + off -= 2; + ideal_nop = ideal_nop2_thumb; + } else + return -1; + + /* Convert to nop */ + ulseek(fd_map, off, SEEK_SET); + + do { + uwrite(fd_map, ideal_nop, nop_size); + } while (--cnt > 0); + + return 0; +} + static unsigned char ideal_nop4_arm64[4] = {0x1f, 0x20, 0x03, 0xd5}; static int make_nop_arm64(void *map, size_t const offset) { @@ -430,6 +483,11 @@ do_file(char const *const fname) w2 = w2rev; w8 = w8rev; } + ideal_nop4_arm = ideal_nop4_arm_le; + bl_mcount_arm = bl_mcount_arm_le; + push_arm = push_arm_le; + ideal_nop2_thumb = ideal_nop2_thumb_le; + push_bl_mcount_thumb = push_bl_mcount_thumb_le; break; case ELFDATA2MSB: if (*(unsigned char const *)&endian != 0) { @@ -438,6 +496,11 @@ do_file(char const *const fname) w2 = w2rev; w8 = w8rev; } + ideal_nop4_arm = ideal_nop4_arm_be; + bl_mcount_arm = bl_mcount_arm_be; + push_arm = push_arm_be; + ideal_nop2_thumb = ideal_nop2_thumb_be; + push_bl_mcount_thumb = push_bl_mcount_thumb_be; break; } /* end switch */ if (memcmp(ELFMAG, ehdr->e_ident, SELFMAG) != 0 @@ -463,6 +526,8 @@ do_file(char const *const fname) break; case EM_ARM: reltype = R_ARM_ABS32; altmcount = "__gnu_mcount_nc"; + make_nop = make_nop_arm; + rel_type_nop = R_ARM_NONE; break; case EM_AARCH64: reltype = R_AARCH64_ABS64; diff --git a/tools/testing/selftests/ftrace/.gitignore b/tools/testing/selftests/ftrace/.gitignore new file mode 100644 index 000000000000..98d8a5a63049 --- /dev/null +++ b/tools/testing/selftests/ftrace/.gitignore @@ -0,0 +1 @@ +logs diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest index 4c6a0bf8ba79..52e3c4df28d6 100755 --- a/tools/testing/selftests/ftrace/ftracetest +++ b/tools/testing/selftests/ftrace/ftracetest @@ -13,7 +13,8 @@ echo "Usage: ftracetest [options] [testcase(s)] [testcase-directory(s)]" echo " Options:" echo " -h|--help Show help message" echo " -k|--keep Keep passed test logs" -echo " -v|--verbose Show all stdout messages in testcases" +echo " -v|--verbose Increase verbosity of test messages" +echo " -vv Alias of -v -v (Show all results in stdout)" echo " -d|--debug Debug mode (trace all shell commands)" exit $1 } @@ -54,8 +55,9 @@ parse_opts() { # opts KEEP_LOG=1 shift 1 ;; - --verbose|-v) - VERBOSE=1 + --verbose|-v|-vv) + VERBOSE=$((VERBOSE + 1)) + [ $1 == '-vv' ] && VERBOSE=$((VERBOSE + 1)) shift 1 ;; --debug|-d) @@ -228,7 +230,7 @@ trap 'SIG_RESULT=$XFAIL' $SIG_XFAIL __run_test() { # testfile # setup PID and PPID, $$ is not updated. - (cd $TRACING_DIR; read PID _ < /proc/self/stat ; set -e; set -x; . $1) + (cd $TRACING_DIR; read PID _ < /proc/self/stat; set -e; set -x; initialize_ftrace; . $1) [ $? -ne 0 ] && kill -s $SIG_FAIL $SIG_PID } @@ -236,10 +238,11 @@ __run_test() { # testfile run_test() { # testfile local testname=`basename $1` local testlog=`mktemp $LOG_DIR/${testname}-log.XXXXXX` + export TMPDIR=`mktemp -d /tmp/ftracetest-dir.XXXXXX` testcase $1 echo "execute: "$1 > $testlog SIG_RESULT=0 - if [ $VERBOSE -ne 0 ]; then + if [ $VERBOSE -ge 2 ]; then __run_test $1 2>> $testlog | tee -a $testlog else __run_test $1 >> $testlog 2>&1 @@ -249,9 +252,10 @@ run_test() { # testfile # Remove test log if the test was done as it was expected. [ $KEEP_LOG -eq 0 ] && rm $testlog else - catlog $testlog + [ $VERBOSE -ge 1 ] && catlog $testlog TOTAL_RESULT=1 fi + rm -rf $TMPDIR } # load in the helper functions diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc new file mode 100644 index 000000000000..9dcd0ca1f49c --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc @@ -0,0 +1,49 @@ +#!/bin/sh +# description: ftrace - function glob filters + +# Make sure that function glob matching filter works. + +if ! grep -q function available_tracers; then + echo "no function tracer configured" + exit_unsupported +fi + +disable_tracing +clear_trace + +# filter by ?, schedule is always good +if ! echo "sch?dule" > set_ftrace_filter; then + # test for powerpc 64 + if ! echo ".sch?dule" > set_ftrace_filter; then + fail "can not enable schedule filter" + fi + cat set_ftrace_filter | grep '^.schedule$' +else + cat set_ftrace_filter | grep '^schedule$' +fi + +ftrace_filter_check() { # glob grep + echo "$1" > set_ftrace_filter + cut -f1 -d" " set_ftrace_filter > $TMPDIR/actual + cut -f1 -d" " available_filter_functions | grep "$2" > $TMPDIR/expected + DIFF=`diff $TMPDIR/actual $TMPDIR/expected` + test -z "$DIFF" +} + +# filter by *, front match +ftrace_filter_check '*schedule' '^.*schedule$' + +# filter by *, middle match +ftrace_filter_check '*schedule*' '^.*schedule.*$' + +# filter by *, end match +ftrace_filter_check 'schedule*' '^schedule.*$' + +# filter by *, both side match +ftrace_filter_check 'sch*ule' '^sch.*ule$' + +# filter by char class. +ftrace_filter_check '[Ss]y[Ss]_*' '^[Ss]y[Ss]_.*$' + +echo > set_ftrace_filter +enable_tracing diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index c37262f6c269..91de1a8e4f19 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -23,3 +23,31 @@ reset_trigger() { # reset all current setting triggers done } +reset_events_filter() { # reset all current setting filters + grep -v ^none events/*/*/filter | + while read line; do + echo 0 > `echo $line | cut -f1 -d:` + done +} + +disable_events() { + echo 0 > events/enable +} + +initialize_ftrace() { # Reset ftrace to initial-state +# As the initial state, ftrace will be set to nop tracer, +# no events, no triggers, no filters, no function filters, +# no probes, and tracing on. + disable_tracing + reset_tracer + reset_trigger + reset_events_filter + disable_events + echo > set_event_pid # event tracer is always on + [ -f set_ftrace_filter ] && echo | tee set_ftrace_* + [ -f set_graph_function ] && echo | tee set_graph_* + [ -f stack_trace_filter ] && echo > stack_trace_filter + [ -f kprobe_events ] && echo > kprobe_events + [ -f uprobe_events ] && echo > uprobe_events + enable_tracing +} diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc new file mode 100644 index 000000000000..0a78705b43b2 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc @@ -0,0 +1,37 @@ +#!/bin/sh +# description: Kprobes event arguments with types + +[ -f kprobe_events ] || exit_unsupported # this is configurable + +grep "x8/16/32/64" README > /dev/null || exit_unsupported # version issue + +echo 0 > events/enable +echo > kprobe_events +enable_tracing + +echo 'p:testprobe _do_fork $stack0:s32 $stack0:u32 $stack0:x32 $stack0:b8@4/32' > kprobe_events +grep testprobe kprobe_events +test -d events/kprobes/testprobe + +echo 1 > events/kprobes/testprobe/enable +( echo "forked") +echo 0 > events/kprobes/testprobe/enable +ARGS=`tail -n 1 trace | sed -e 's/.* arg1=\(.*\) arg2=\(.*\) arg3=\(.*\) arg4=\(.*\)/\1 \2 \3 \4/'` + +check_types() { + X1=`printf "%x" $1 | tail -c 8` + X2=`printf "%x" $2` + X3=`printf "%x" $3` + test $X1 = $X2 + test $X2 = $X3 + test 0x$X3 = $3 + + B4=`printf "%x" $4` + B3=`echo -n $X3 | tail -c 3 | head -c 2` + test $B3 = $B4 +} +check_types $ARGS + +echo "-:testprobe" >> kprobe_events +clear_trace +test -d events/kprobes/testprobe && exit 1 || exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc index 0bf5085281f3..400e98b64948 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc @@ -56,7 +56,7 @@ echo "Test histogram with syscall modifier" echo 'hist:keys=id.syscall' > events/raw_syscalls/sys_exit/trigger for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done -grep "id: sys_" events/raw_syscalls/sys_exit/hist > /dev/null || \ +grep "id: \(unknown_\|sys_\)" events/raw_syscalls/sys_exit/hist > /dev/null || \ fail "syscall modifier on raw_syscalls/sys_exit did not work" diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc index f84b80d551a2..ed94f0c4e0e4 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc @@ -23,6 +23,11 @@ if [ ! -f events/sched/sched_process_fork/trigger ]; then exit_unsupported fi +if [ ! -f snapshot ]; then + echo "snapshot is not supported" + exit_unsupported +fi + reset_tracer do_reset |