diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-02-22 14:07:15 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-02-22 14:07:15 -0800 |
commit | c9584234709aff90fbf38f71904a068ee9e8bce3 (patch) | |
tree | 85d0d76c25767b49470eb3f6f2cb9142914c5dae /kernel | |
parent | 3a36281a17199737b468befb826d4a23eb774445 (diff) | |
parent | c85c9a2c6e368dc94907e63babb18a9788e5c9b6 (diff) | |
download | lwn-c9584234709aff90fbf38f71904a068ee9e8bce3.tar.gz lwn-c9584234709aff90fbf38f71904a068ee9e8bce3.zip |
Merge tag 'trace-v5.12' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace
Pull tracing updates from Steven Rostedt:
- Update to the way irqs and preemption is tracked via the trace event
PC field
- Fix handling of unregistering event failing due to allocate memory.
This is only triggered by failure injection, as it is pretty much
guaranteed to have less than a page allocation succeed.
- Do not show the useless "filter" or "enable" files for the "ftrace"
trace system, as they have no effect on doing anything.
- Add a warning if kprobes are registered more than once.
- Synthetic events now have their fields parsed by semicolons. Old
formats without semicolons will still work, but new features will
require them.
- New option to allow trace events to show %p without hashing in trace
file. The trace file can only be read by root, and reading the raw
event buffer did not have any pointers hashed, so this does not
expose anything new.
- New directory in tools called tools/tracing, where a new tool that
reads sequential latency reports from the ftrace latency tracers.
- Other minor fixes and cleanups.
* tag 'trace-v5.12' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace: (33 commits)
kprobes: Fix to delay the kprobes jump optimization
tracing/tools: Add the latency-collector to tools directory
tracing: Make hash-ptr option default
tracing: Add ptr-hash option to show the hashed pointer value
tracing: Update the stage 3 of trace event macro comment
tracing: Show real address for trace event arguments
selftests/ftrace: Add '!event' synthetic event syntax check
selftests/ftrace: Update synthetic event syntax errors
tracing: Add a backward-compatibility check for synthetic event creation
tracing: Update synth command errors
tracing: Rework synthetic event command parsing
tracing/dynevent: Delegate parsing to create function
kprobes: Warn if the kprobe is reregistered
ftrace: Remove unused ftrace_force_update()
tracepoints: Code clean up
tracepoints: Do not punish non static call users
tracepoints: Remove unnecessary "data_args" macro parameter
tracing: Do not create "enable" or "filter" files for ftrace event subsystem
kernel: trace: preemptirq_delay_test: add cpu affinity
tracepoint: Do not fail unregistering a probe due to memory failure
...
Diffstat (limited to 'kernel')
27 files changed, 770 insertions, 564 deletions
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index d5a3eb74a657..745f08fdd7a6 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -861,7 +861,6 @@ out: cpus_read_unlock(); } -#ifdef CONFIG_SYSCTL static void optimize_all_kprobes(void) { struct hlist_head *head; @@ -887,6 +886,7 @@ out: mutex_unlock(&kprobe_mutex); } +#ifdef CONFIG_SYSCTL static void unoptimize_all_kprobes(void) { struct hlist_head *head; @@ -1520,13 +1520,16 @@ valid: return ap; } -/* Return error if the kprobe is being re-registered */ -static inline int check_kprobe_rereg(struct kprobe *p) +/* + * Warn and return error if the kprobe is being re-registered since + * there must be a software bug. + */ +static inline int warn_kprobe_rereg(struct kprobe *p) { int ret = 0; mutex_lock(&kprobe_mutex); - if (__get_valid_kprobe(p)) + if (WARN_ON_ONCE(__get_valid_kprobe(p))) ret = -EINVAL; mutex_unlock(&kprobe_mutex); @@ -1614,7 +1617,7 @@ int register_kprobe(struct kprobe *p) return PTR_ERR(addr); p->addr = addr; - ret = check_kprobe_rereg(p); + ret = warn_kprobe_rereg(p); if (ret) return ret; @@ -1995,7 +1998,7 @@ int register_kretprobe(struct kretprobe *rp) return ret; /* If only rp->kp.addr is specified, check reregistering kprobes */ - if (rp->kp.addr && check_kprobe_rereg(&rp->kp)) + if (rp->kp.addr && warn_kprobe_rereg(&rp->kp)) return -EINVAL; if (kretprobe_blacklist_size) { @@ -2497,18 +2500,14 @@ static int __init init_kprobes(void) } } -#if defined(CONFIG_OPTPROBES) -#if defined(__ARCH_WANT_KPROBES_INSN_SLOT) - /* Init kprobe_optinsn_slots */ - kprobe_optinsn_slots.insn_size = MAX_OPTINSN_SIZE; -#endif - /* By default, kprobes can be optimized */ - kprobes_allow_optimization = true; -#endif - /* By default, kprobes are armed */ kprobes_all_disarmed = false; +#if defined(CONFIG_OPTPROBES) && defined(__ARCH_WANT_KPROBES_INSN_SLOT) + /* Init kprobe_optinsn_slots for allocation */ + kprobe_optinsn_slots.insn_size = MAX_OPTINSN_SIZE; +#endif + err = arch_init_kprobes(); if (!err) err = register_die_notifier(&kprobe_exceptions_nb); @@ -2523,6 +2522,21 @@ static int __init init_kprobes(void) } early_initcall(init_kprobes); +#if defined(CONFIG_OPTPROBES) +static int __init init_optprobes(void) +{ + /* + * Enable kprobe optimization - this kicks the optimizer which + * depends on synchronize_rcu_tasks() and ksoftirqd, that is + * not spawned in early initcall. So delay the optimization. + */ + optimize_all_kprobes(); + + return 0; +} +subsys_initcall(init_optprobes); +#endif + #ifdef CONFIG_DEBUG_FS static void report_probe(struct seq_file *pi, struct kprobe *p, const char *sym, int offset, char *modname, struct kprobe *pp) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index c1a62ae7e812..799dbcfe65ad 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -545,7 +545,7 @@ config KPROBE_EVENTS_ON_NOTRACE using kprobe events. If kprobes can use ftrace instead of breakpoint, ftrace related - functions are protected from kprobe-events to prevent an infinit + functions are protected from kprobe-events to prevent an infinite recursion or any unexpected execution path which leads to a kernel crash. @@ -886,6 +886,10 @@ config PREEMPTIRQ_DELAY_TEST irq-disabled critical sections for 500us: modprobe preemptirq_delay_test test_mode=irq delay=500 burst_size=3 + What's more, if you want to attach the test on the cpu which the latency + tracer is running on, specify cpu_affinity=cpu_num at the end of the + command. + If unsure, say N config SYNTH_EVENT_GEN_TEST diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 9e9ee4945043..c286c13bd31a 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -72,17 +72,17 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action, struct blk_io_trace *t; struct ring_buffer_event *event = NULL; struct trace_buffer *buffer = NULL; - int pc = 0; + unsigned int trace_ctx = 0; int cpu = smp_processor_id(); bool blk_tracer = blk_tracer_enabled; ssize_t cgid_len = cgid ? sizeof(cgid) : 0; if (blk_tracer) { buffer = blk_tr->array_buffer.buffer; - pc = preempt_count(); + trace_ctx = tracing_gen_ctx_flags(0); event = trace_buffer_lock_reserve(buffer, TRACE_BLK, sizeof(*t) + len + cgid_len, - 0, pc); + trace_ctx); if (!event) return; t = ring_buffer_event_data(event); @@ -107,7 +107,7 @@ record_it: memcpy((void *) t + sizeof(*t) + cgid_len, data, len); if (blk_tracer) - trace_buffer_unlock_commit(blk_tr, buffer, event, 0, pc); + trace_buffer_unlock_commit(blk_tr, buffer, event, trace_ctx); } } @@ -222,8 +222,9 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, struct blk_io_trace *t; unsigned long flags = 0; unsigned long *sequence; + unsigned int trace_ctx = 0; pid_t pid; - int cpu, pc = 0; + int cpu; bool blk_tracer = blk_tracer_enabled; ssize_t cgid_len = cgid ? sizeof(cgid) : 0; @@ -252,10 +253,10 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, tracing_record_cmdline(current); buffer = blk_tr->array_buffer.buffer; - pc = preempt_count(); + trace_ctx = tracing_gen_ctx_flags(0); event = trace_buffer_lock_reserve(buffer, TRACE_BLK, sizeof(*t) + pdu_len + cgid_len, - 0, pc); + trace_ctx); if (!event) return; t = ring_buffer_event_data(event); @@ -301,7 +302,7 @@ record_it: memcpy((void *)t + sizeof(*t) + cgid_len, pdu_data, pdu_len); if (blk_tracer) { - trace_buffer_unlock_commit(blk_tr, buffer, event, 0, pc); + trace_buffer_unlock_commit(blk_tr, buffer, event, trace_ctx); return; } } diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c index 312d1a0ca3b6..8c4ffd076162 100644 --- a/kernel/trace/preemptirq_delay_test.c +++ b/kernel/trace/preemptirq_delay_test.c @@ -21,13 +21,16 @@ static ulong delay = 100; static char test_mode[12] = "irq"; static uint burst_size = 1; +static int cpu_affinity = -1; module_param_named(delay, delay, ulong, 0444); module_param_string(test_mode, test_mode, 12, 0444); module_param_named(burst_size, burst_size, uint, 0444); +module_param_named(cpu_affinity, cpu_affinity, int, 0444); MODULE_PARM_DESC(delay, "Period in microseconds (100 us default)"); MODULE_PARM_DESC(test_mode, "Mode of the test such as preempt, irq, or alternate (default irq)"); MODULE_PARM_DESC(burst_size, "The size of a burst (default 1)"); +MODULE_PARM_DESC(cpu_affinity, "Cpu num test is running on"); static struct completion done; @@ -36,7 +39,9 @@ static struct completion done; static void busy_wait(ulong time) { u64 start, end; + start = trace_clock_local(); + do { end = trace_clock_local(); if (kthread_should_stop()) @@ -47,6 +52,7 @@ static void busy_wait(ulong time) static __always_inline void irqoff_test(void) { unsigned long flags; + local_irq_save(flags); busy_wait(delay); local_irq_restore(flags); @@ -113,6 +119,14 @@ static int preemptirq_delay_run(void *data) { int i; int s = MIN(burst_size, NR_TEST_FUNCS); + struct cpumask cpu_mask; + + if (cpu_affinity > -1) { + cpumask_clear(&cpu_mask); + cpumask_set_cpu(cpu_affinity, &cpu_mask); + if (set_cpus_allowed_ptr(current, &cpu_mask)) + pr_err("cpu_affinity:%d, failed\n", cpu_affinity); + } for (i = 0; i < s; i++) (testfuncs[i])(i); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index ec08f948dd80..b9dad3500041 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1112,8 +1112,7 @@ static struct list_head *rb_list_head(struct list_head *list) * its flags will be non zero. */ static inline int -rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer, - struct buffer_page *page, struct list_head *list) +rb_is_head_page(struct buffer_page *page, struct list_head *list) { unsigned long val; @@ -1142,8 +1141,7 @@ static bool rb_is_reader_page(struct buffer_page *page) /* * rb_set_list_to_head - set a list_head to be pointing to head. */ -static void rb_set_list_to_head(struct ring_buffer_per_cpu *cpu_buffer, - struct list_head *list) +static void rb_set_list_to_head(struct list_head *list) { unsigned long *ptr; @@ -1166,7 +1164,7 @@ static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer) /* * Set the previous list pointer to have the HEAD flag. */ - rb_set_list_to_head(cpu_buffer, head->list.prev); + rb_set_list_to_head(head->list.prev); } static void rb_list_head_clear(struct list_head *list) @@ -1241,8 +1239,7 @@ static int rb_head_page_set_normal(struct ring_buffer_per_cpu *cpu_buffer, old_flag, RB_PAGE_NORMAL); } -static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer, - struct buffer_page **bpage) +static inline void rb_inc_page(struct buffer_page **bpage) { struct list_head *p = rb_list_head((*bpage)->list.next); @@ -1274,11 +1271,11 @@ rb_set_head_page(struct ring_buffer_per_cpu *cpu_buffer) */ for (i = 0; i < 3; i++) { do { - if (rb_is_head_page(cpu_buffer, page, page->list.prev)) { + if (rb_is_head_page(page, page->list.prev)) { cpu_buffer->head_page = page; return page; } - rb_inc_page(cpu_buffer, &page); + rb_inc_page(&page); } while (page != head); } @@ -1824,7 +1821,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages) cond_resched(); to_remove_page = tmp_iter_page; - rb_inc_page(cpu_buffer, &tmp_iter_page); + rb_inc_page(&tmp_iter_page); /* update the counters */ page_entries = rb_page_entries(to_remove_page); @@ -2062,10 +2059,6 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size, put_online_cpus(); } else { - /* Make sure this CPU has been initialized */ - if (!cpumask_test_cpu(cpu_id, buffer->cpumask)) - goto out; - cpu_buffer = buffer->buffers[cpu_id]; if (nr_pages == cpu_buffer->nr_pages) @@ -2271,7 +2264,7 @@ static void rb_inc_iter(struct ring_buffer_iter *iter) if (iter->head_page == cpu_buffer->reader_page) iter->head_page = rb_set_head_page(cpu_buffer); else - rb_inc_page(cpu_buffer, &iter->head_page); + rb_inc_page(&iter->head_page); iter->page_stamp = iter->read_stamp = iter->head_page->page->time_stamp; iter->head = 0; @@ -2374,7 +2367,7 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer, * want the outer most commit to reset it. */ new_head = next_page; - rb_inc_page(cpu_buffer, &new_head); + rb_inc_page(&new_head); ret = rb_head_page_set_head(cpu_buffer, new_head, next_page, RB_PAGE_NORMAL); @@ -2526,7 +2519,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, next_page = tail_page; - rb_inc_page(cpu_buffer, &next_page); + rb_inc_page(&next_page); /* * If for some reason, we had an interrupt storm that made @@ -2552,7 +2545,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, * the buffer, unless the commit page is still on the * reader page. */ - if (rb_is_head_page(cpu_buffer, next_page, &tail_page->list)) { + if (rb_is_head_page(next_page, &tail_page->list)) { /* * If the commit is not on the reader page, then @@ -2583,7 +2576,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, * have filled up the buffer with events * from interrupts and such, and wrapped. * - * Note, if the tail page is also the on the + * Note, if the tail page is also on the * reader_page, we let it move out. */ if (unlikely((cpu_buffer->commit_page != @@ -2879,7 +2872,7 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) return; local_set(&cpu_buffer->commit_page->page->commit, rb_page_write(cpu_buffer->commit_page)); - rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); + rb_inc_page(&cpu_buffer->commit_page); /* add barrier to keep gcc from optimizing too much */ barrier(); } @@ -3638,14 +3631,14 @@ rb_decrement_entry(struct ring_buffer_per_cpu *cpu_buffer, * Because the commit page may be on the reader page we * start with the next page and check the end loop there. */ - rb_inc_page(cpu_buffer, &bpage); + rb_inc_page(&bpage); start = bpage; do { if (bpage->page == (void *)addr) { local_dec(&bpage->entries); return; } - rb_inc_page(cpu_buffer, &bpage); + rb_inc_page(&bpage); } while (bpage != start); /* commit not part of this buffer?? */ @@ -4367,7 +4360,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->pages = reader->list.prev; /* The reader page will be pointing to the new head */ - rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list); + rb_set_list_to_head(&cpu_buffer->reader_page->list); /* * We want to make sure we read the overruns after we set up our @@ -4406,7 +4399,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) * Now make the new head point back to the reader page. */ rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list; - rb_inc_page(cpu_buffer, &cpu_buffer->head_page); + rb_inc_page(&cpu_buffer->head_page); local_inc(&cpu_buffer->pages_read); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b5815a022ecc..e295c413580e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -176,7 +176,7 @@ static union trace_eval_map_item *trace_eval_maps; int tracing_set_tracer(struct trace_array *tr, const char *buf); static void ftrace_trace_userstack(struct trace_array *tr, struct trace_buffer *buffer, - unsigned long flags, int pc); + unsigned int trace_ctx); #define MAX_TRACER_SIZE 100 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata; @@ -408,7 +408,8 @@ EXPORT_SYMBOL_GPL(unregister_ftrace_export); TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \ TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \ TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \ - TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS) + TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | \ + TRACE_ITER_HASH_PTR) /* trace_options that are only supported by global_trace */ #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \ @@ -454,6 +455,7 @@ static void __trace_array_put(struct trace_array *this_tr) /** * trace_array_put - Decrement the reference counter for this trace array. + * @this_tr : pointer to the trace array * * NOTE: Use this when we no longer need the trace array returned by * trace_array_get_by_name(). This ensures the trace array can be later @@ -530,6 +532,7 @@ trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid) /** * trace_ignore_this_task - should a task be ignored for tracing * @filtered_pids: The list of pids to check + * @filtered_no_pids: The list of pids not to be traced * @task: The task that should be ignored if not filtered * * Checks if @task should be traced or not from @filtered_pids. @@ -780,7 +783,7 @@ u64 ftrace_now(int cpu) } /** - * tracing_is_enabled - Show if global_trace has been disabled + * tracing_is_enabled - Show if global_trace has been enabled * * Shows if the global trace has been enabled or not. It uses the * mirror flag "buffer_disabled" to be used in fast paths such as for @@ -905,23 +908,23 @@ static inline void trace_access_lock_init(void) #ifdef CONFIG_STACKTRACE static void __ftrace_trace_stack(struct trace_buffer *buffer, - unsigned long flags, - int skip, int pc, struct pt_regs *regs); + unsigned int trace_ctx, + int skip, struct pt_regs *regs); static inline void ftrace_trace_stack(struct trace_array *tr, struct trace_buffer *buffer, - unsigned long flags, - int skip, int pc, struct pt_regs *regs); + unsigned int trace_ctx, + int skip, struct pt_regs *regs); #else static inline void __ftrace_trace_stack(struct trace_buffer *buffer, - unsigned long flags, - int skip, int pc, struct pt_regs *regs) + unsigned int trace_ctx, + int skip, struct pt_regs *regs) { } static inline void ftrace_trace_stack(struct trace_array *tr, struct trace_buffer *buffer, - unsigned long flags, - int skip, int pc, struct pt_regs *regs) + unsigned long trace_ctx, + int skip, struct pt_regs *regs) { } @@ -929,24 +932,24 @@ static inline void ftrace_trace_stack(struct trace_array *tr, static __always_inline void trace_event_setup(struct ring_buffer_event *event, - int type, unsigned long flags, int pc) + int type, unsigned int trace_ctx) { struct trace_entry *ent = ring_buffer_event_data(event); - tracing_generic_entry_update(ent, type, flags, pc); + tracing_generic_entry_update(ent, type, trace_ctx); } static __always_inline struct ring_buffer_event * __trace_buffer_lock_reserve(struct trace_buffer *buffer, int type, unsigned long len, - unsigned long flags, int pc) + unsigned int trace_ctx) { struct ring_buffer_event *event; event = ring_buffer_lock_reserve(buffer, len); if (event != NULL) - trace_event_setup(event, type, flags, pc); + trace_event_setup(event, type, trace_ctx); return event; } @@ -1007,25 +1010,22 @@ int __trace_puts(unsigned long ip, const char *str, int size) struct ring_buffer_event *event; struct trace_buffer *buffer; struct print_entry *entry; - unsigned long irq_flags; + unsigned int trace_ctx; int alloc; - int pc; if (!(global_trace.trace_flags & TRACE_ITER_PRINTK)) return 0; - pc = preempt_count(); - if (unlikely(tracing_selftest_running || tracing_disabled)) return 0; alloc = sizeof(*entry) + size + 2; /* possible \n added */ - local_save_flags(irq_flags); + trace_ctx = tracing_gen_ctx(); buffer = global_trace.array_buffer.buffer; ring_buffer_nest_start(buffer); - event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, - irq_flags, pc); + event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, + trace_ctx); if (!event) { size = 0; goto out; @@ -1044,7 +1044,7 @@ int __trace_puts(unsigned long ip, const char *str, int size) entry->buf[size] = '\0'; __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL); + ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL); out: ring_buffer_nest_end(buffer); return size; @@ -1061,25 +1061,22 @@ int __trace_bputs(unsigned long ip, const char *str) struct ring_buffer_event *event; struct trace_buffer *buffer; struct bputs_entry *entry; - unsigned long irq_flags; + unsigned int trace_ctx; int size = sizeof(struct bputs_entry); int ret = 0; - int pc; if (!(global_trace.trace_flags & TRACE_ITER_PRINTK)) return 0; - pc = preempt_count(); - if (unlikely(tracing_selftest_running || tracing_disabled)) return 0; - local_save_flags(irq_flags); + trace_ctx = tracing_gen_ctx(); buffer = global_trace.array_buffer.buffer; ring_buffer_nest_start(buffer); event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size, - irq_flags, pc); + trace_ctx); if (!event) goto out; @@ -1088,7 +1085,7 @@ int __trace_bputs(unsigned long ip, const char *str) entry->str = str; __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL); + ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL); ret = 1; out: @@ -2584,36 +2581,34 @@ enum print_line_t trace_handle_return(struct trace_seq *s) } EXPORT_SYMBOL_GPL(trace_handle_return); -void -tracing_generic_entry_update(struct trace_entry *entry, unsigned short type, - unsigned long flags, int pc) +unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status) { - struct task_struct *tsk = current; + unsigned int trace_flags = irqs_status; + unsigned int pc; - entry->preempt_count = pc & 0xff; - entry->pid = (tsk) ? tsk->pid : 0; - entry->type = type; - entry->flags = -#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT - (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | -#else - TRACE_FLAG_IRQS_NOSUPPORT | -#endif - ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) | - ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | - ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) | - (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) | - (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0); + pc = preempt_count(); + + if (pc & NMI_MASK) + trace_flags |= TRACE_FLAG_NMI; + if (pc & HARDIRQ_MASK) + trace_flags |= TRACE_FLAG_HARDIRQ; + if (in_serving_softirq()) + trace_flags |= TRACE_FLAG_SOFTIRQ; + + if (tif_need_resched()) + trace_flags |= TRACE_FLAG_NEED_RESCHED; + if (test_preempt_need_resched()) + trace_flags |= TRACE_FLAG_PREEMPT_RESCHED; + return (trace_flags << 16) | (pc & 0xff); } -EXPORT_SYMBOL_GPL(tracing_generic_entry_update); struct ring_buffer_event * trace_buffer_lock_reserve(struct trace_buffer *buffer, int type, unsigned long len, - unsigned long flags, int pc) + unsigned int trace_ctx) { - return __trace_buffer_lock_reserve(buffer, type, len, flags, pc); + return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx); } DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event); @@ -2733,7 +2728,7 @@ struct ring_buffer_event * trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, struct trace_event_file *trace_file, int type, unsigned long len, - unsigned long flags, int pc) + unsigned int trace_ctx) { struct ring_buffer_event *entry; int val; @@ -2746,15 +2741,15 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, /* Try to use the per cpu buffer first */ val = this_cpu_inc_return(trace_buffered_event_cnt); if ((len < (PAGE_SIZE - sizeof(*entry))) && val == 1) { - trace_event_setup(entry, type, flags, pc); + trace_event_setup(entry, type, trace_ctx); entry->array[0] = len; return entry; } this_cpu_dec(trace_buffered_event_cnt); } - entry = __trace_buffer_lock_reserve(*current_rb, - type, len, flags, pc); + entry = __trace_buffer_lock_reserve(*current_rb, type, len, + trace_ctx); /* * If tracing is off, but we have triggers enabled * we still need to look at the event data. Use the temp_buffer @@ -2763,8 +2758,8 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, */ if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) { *current_rb = temp_buffer; - entry = __trace_buffer_lock_reserve(*current_rb, - type, len, flags, pc); + entry = __trace_buffer_lock_reserve(*current_rb, type, len, + trace_ctx); } return entry; } @@ -2850,7 +2845,7 @@ void trace_event_buffer_commit(struct trace_event_buffer *fbuffer) ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT); event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer, fbuffer->event, fbuffer->entry, - fbuffer->flags, fbuffer->pc, fbuffer->regs); + fbuffer->trace_ctx, fbuffer->regs); } EXPORT_SYMBOL_GPL(trace_event_buffer_commit); @@ -2866,7 +2861,7 @@ EXPORT_SYMBOL_GPL(trace_event_buffer_commit); void trace_buffer_unlock_commit_regs(struct trace_array *tr, struct trace_buffer *buffer, struct ring_buffer_event *event, - unsigned long flags, int pc, + unsigned int trace_ctx, struct pt_regs *regs) { __buffer_unlock_commit(buffer, event); @@ -2877,8 +2872,8 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr, * and mmiotrace, but that's ok if they lose a function or * two. They are not that meaningful. */ - ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs); - ftrace_trace_userstack(tr, buffer, flags, pc); + ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs); + ftrace_trace_userstack(tr, buffer, trace_ctx); } /* @@ -2892,9 +2887,8 @@ trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer, } void -trace_function(struct trace_array *tr, - unsigned long ip, unsigned long parent_ip, unsigned long flags, - int pc) +trace_function(struct trace_array *tr, unsigned long ip, unsigned long + parent_ip, unsigned int trace_ctx) { struct trace_event_call *call = &event_function; struct trace_buffer *buffer = tr->array_buffer.buffer; @@ -2902,7 +2896,7 @@ trace_function(struct trace_array *tr, struct ftrace_entry *entry; event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry), - flags, pc); + trace_ctx); if (!event) return; entry = ring_buffer_event_data(event); @@ -2936,8 +2930,8 @@ static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks); static DEFINE_PER_CPU(int, ftrace_stack_reserve); static void __ftrace_trace_stack(struct trace_buffer *buffer, - unsigned long flags, - int skip, int pc, struct pt_regs *regs) + unsigned int trace_ctx, + int skip, struct pt_regs *regs) { struct trace_event_call *call = &event_kernel_stack; struct ring_buffer_event *event; @@ -2984,7 +2978,7 @@ static void __ftrace_trace_stack(struct trace_buffer *buffer, size = nr_entries * sizeof(unsigned long); event = __trace_buffer_lock_reserve(buffer, TRACE_STACK, - sizeof(*entry) + size, flags, pc); + sizeof(*entry) + size, trace_ctx); if (!event) goto out; entry = ring_buffer_event_data(event); @@ -3005,22 +2999,22 @@ static void __ftrace_trace_stack(struct trace_buffer *buffer, static inline void ftrace_trace_stack(struct trace_array *tr, struct trace_buffer *buffer, - unsigned long flags, - int skip, int pc, struct pt_regs *regs) + unsigned int trace_ctx, + int skip, struct pt_regs *regs) { if (!(tr->trace_flags & TRACE_ITER_STACKTRACE)) return; - __ftrace_trace_stack(buffer, flags, skip, pc, regs); + __ftrace_trace_stack(buffer, trace_ctx, skip, regs); } -void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, - int pc) +void __trace_stack(struct trace_array *tr, unsigned int trace_ctx, + int skip) { struct trace_buffer *buffer = tr->array_buffer.buffer; if (rcu_is_watching()) { - __ftrace_trace_stack(buffer, flags, skip, pc, NULL); + __ftrace_trace_stack(buffer, trace_ctx, skip, NULL); return; } @@ -3034,7 +3028,7 @@ void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, return; rcu_irq_enter_irqson(); - __ftrace_trace_stack(buffer, flags, skip, pc, NULL); + __ftrace_trace_stack(buffer, trace_ctx, skip, NULL); rcu_irq_exit_irqson(); } @@ -3044,19 +3038,15 @@ void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, */ void trace_dump_stack(int skip) { - unsigned long flags; - if (tracing_disabled || tracing_selftest_running) return; - local_save_flags(flags); - #ifndef CONFIG_UNWINDER_ORC /* Skip 1 to skip this function. */ skip++; #endif __ftrace_trace_stack(global_trace.array_buffer.buffer, - flags, skip, preempt_count(), NULL); + tracing_gen_ctx(), skip, NULL); } EXPORT_SYMBOL_GPL(trace_dump_stack); @@ -3065,7 +3055,7 @@ static DEFINE_PER_CPU(int, user_stack_count); static void ftrace_trace_userstack(struct trace_array *tr, - struct trace_buffer *buffer, unsigned long flags, int pc) + struct trace_buffer *buffer, unsigned int trace_ctx) { struct trace_event_call *call = &event_user_stack; struct ring_buffer_event *event; @@ -3092,7 +3082,7 @@ ftrace_trace_userstack(struct trace_array *tr, __this_cpu_inc(user_stack_count); event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, - sizeof(*entry), flags, pc); + sizeof(*entry), trace_ctx); if (!event) goto out_drop_count; entry = ring_buffer_event_data(event); @@ -3112,7 +3102,7 @@ ftrace_trace_userstack(struct trace_array *tr, #else /* CONFIG_USER_STACKTRACE_SUPPORT */ static void ftrace_trace_userstack(struct trace_array *tr, struct trace_buffer *buffer, - unsigned long flags, int pc) + unsigned int trace_ctx) { } #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */ @@ -3242,9 +3232,9 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) struct trace_buffer *buffer; struct trace_array *tr = &global_trace; struct bprint_entry *entry; - unsigned long flags; + unsigned int trace_ctx; char *tbuffer; - int len = 0, size, pc; + int len = 0, size; if (unlikely(tracing_selftest_running || tracing_disabled)) return 0; @@ -3252,7 +3242,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) /* Don't pollute graph traces with trace_vprintk internals */ pause_graph_tracing(); - pc = preempt_count(); + trace_ctx = tracing_gen_ctx(); preempt_disable_notrace(); tbuffer = get_trace_buf(); @@ -3266,12 +3256,11 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0) goto out_put; - local_save_flags(flags); size = sizeof(*entry) + sizeof(u32) * len; buffer = tr->array_buffer.buffer; ring_buffer_nest_start(buffer); event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, - flags, pc); + trace_ctx); if (!event) goto out; entry = ring_buffer_event_data(event); @@ -3281,7 +3270,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) memcpy(entry->buf, tbuffer, sizeof(u32) * len); if (!call_filter_check_discard(call, entry, buffer, event)) { __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL); + ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL); } out: @@ -3304,9 +3293,9 @@ __trace_array_vprintk(struct trace_buffer *buffer, { struct trace_event_call *call = &event_print; struct ring_buffer_event *event; - int len = 0, size, pc; + int len = 0, size; struct print_entry *entry; - unsigned long flags; + unsigned int trace_ctx; char *tbuffer; if (tracing_disabled || tracing_selftest_running) @@ -3315,7 +3304,7 @@ __trace_array_vprintk(struct trace_buffer *buffer, /* Don't pollute graph traces with trace_vprintk internals */ pause_graph_tracing(); - pc = preempt_count(); + trace_ctx = tracing_gen_ctx(); preempt_disable_notrace(); @@ -3327,11 +3316,10 @@ __trace_array_vprintk(struct trace_buffer *buffer, len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args); - local_save_flags(flags); size = sizeof(*entry) + len + 1; ring_buffer_nest_start(buffer); event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, - flags, pc); + trace_ctx); if (!event) goto out; entry = ring_buffer_event_data(event); @@ -3340,7 +3328,7 @@ __trace_array_vprintk(struct trace_buffer *buffer, memcpy(&entry->buf, tbuffer, len + 1); if (!call_filter_check_discard(call, entry, buffer, event)) { __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL); + ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL); } out: @@ -3543,6 +3531,65 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, return next; } +#define STATIC_FMT_BUF_SIZE 128 +static char static_fmt_buf[STATIC_FMT_BUF_SIZE]; + +static char *trace_iter_expand_format(struct trace_iterator *iter) +{ + char *tmp; + + if (iter->fmt == static_fmt_buf) + return NULL; + + tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE, + GFP_KERNEL); + if (tmp) { + iter->fmt_size += STATIC_FMT_BUF_SIZE; + iter->fmt = tmp; + } + + return tmp; +} + +const char *trace_event_format(struct trace_iterator *iter, const char *fmt) +{ + const char *p, *new_fmt; + char *q; + + if (WARN_ON_ONCE(!fmt)) + return fmt; + + if (iter->tr->trace_flags & TRACE_ITER_HASH_PTR) + return fmt; + + p = fmt; + new_fmt = q = iter->fmt; + while (*p) { + if (unlikely(q - new_fmt + 3 > iter->fmt_size)) { + if (!trace_iter_expand_format(iter)) + return fmt; + + q += iter->fmt - new_fmt; + new_fmt = iter->fmt; + } + + *q++ = *p++; + + /* Replace %p with %px */ + if (p[-1] == '%') { + if (p[0] == '%') { + *q++ = *p++; + } else if (p[0] == 'p' && !isalnum(p[1])) { + *q++ = *p++; + *q++ = 'x'; + } + } + } + *q = '\0'; + + return new_fmt; +} + #define STATIC_TEMP_BUF_SIZE 128 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4); @@ -4336,6 +4383,16 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) iter->temp_size = 128; /* + * trace_event_printf() may need to modify given format + * string to replace %p with %px so that it shows real address + * instead of hash value. However, that is only for the event + * tracing, other tracer may not need. Defer the allocation + * until it is needed. + */ + iter->fmt = NULL; + iter->fmt_size = 0; + + /* * We make a copy of the current tracer to avoid concurrent * changes on it while we are reading. */ @@ -4486,6 +4543,7 @@ static int tracing_release(struct inode *inode, struct file *file) mutex_destroy(&iter->mutex); free_cpumask_var(iter->started); + kfree(iter->fmt); kfree(iter->temp); kfree(iter->trace); kfree(iter->buffer_iter); @@ -6653,7 +6711,6 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, enum event_trigger_type tt = ETT_NONE; struct trace_buffer *buffer; struct print_entry *entry; - unsigned long irq_flags; ssize_t written; int size; int len; @@ -6673,7 +6730,6 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE); - local_save_flags(irq_flags); size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */ /* If less than "<faulted>", then make sure we can still add that */ @@ -6682,7 +6738,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, buffer = tr->array_buffer.buffer; event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, - irq_flags, preempt_count()); + tracing_gen_ctx()); if (unlikely(!event)) /* Ring buffer disabled, return as if not open for write */ return -EBADF; @@ -6734,7 +6790,6 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, struct ring_buffer_event *event; struct trace_buffer *buffer; struct raw_data_entry *entry; - unsigned long irq_flags; ssize_t written; int size; int len; @@ -6756,14 +6811,13 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE); - local_save_flags(irq_flags); size = sizeof(*entry) + cnt; if (cnt < FAULT_SIZE_ID) size += FAULT_SIZE_ID - cnt; buffer = tr->array_buffer.buffer; event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size, - irq_flags, preempt_count()); + tracing_gen_ctx()); if (!event) /* Ring buffer disabled, return as if not open for write */ return -EBADF; @@ -9348,9 +9402,11 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) /* Simulate the iterator */ trace_init_global_iter(&iter); - /* Can not use kmalloc for iter.temp */ + /* Can not use kmalloc for iter.temp and iter.fmt */ iter.temp = static_temp_buf; iter.temp_size = STATIC_TEMP_BUF_SIZE; + iter.fmt = static_fmt_buf; + iter.fmt_size = STATIC_FMT_BUF_SIZE; for_each_tracing_cpu(cpu) { atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); @@ -9429,30 +9485,11 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) } EXPORT_SYMBOL_GPL(ftrace_dump); -int trace_run_command(const char *buf, int (*createfn)(int, char **)) -{ - char **argv; - int argc, ret; - - argc = 0; - ret = 0; - argv = argv_split(GFP_KERNEL, buf, &argc); - if (!argv) - return -ENOMEM; - - if (argc) - ret = createfn(argc, argv); - - argv_free(argv); - - return ret; -} - #define WRITE_BUFSIZE 4096 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, size_t count, loff_t *ppos, - int (*createfn)(int, char **)) + int (*createfn)(const char *)) { char *kbuf, *buf, *tmp; int ret = 0; @@ -9500,7 +9537,7 @@ ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, if (tmp) *tmp = '\0'; - ret = trace_run_command(buf, createfn); + ret = createfn(buf); if (ret) goto out; buf += size; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index e448d2da0b99..dec13ff66077 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -136,25 +136,6 @@ struct kretprobe_trace_entry_head { unsigned long ret_ip; }; -/* - * trace_flag_type is an enumeration that holds different - * states when a trace occurs. These are: - * IRQS_OFF - interrupts were disabled - * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags - * NEED_RESCHED - reschedule is requested - * HARDIRQ - inside an interrupt handler - * SOFTIRQ - inside a softirq handler - */ -enum trace_flag_type { - TRACE_FLAG_IRQS_OFF = 0x01, - TRACE_FLAG_IRQS_NOSUPPORT = 0x02, - TRACE_FLAG_NEED_RESCHED = 0x04, - TRACE_FLAG_HARDIRQ = 0x08, - TRACE_FLAG_SOFTIRQ = 0x10, - TRACE_FLAG_PREEMPT_RESCHED = 0x20, - TRACE_FLAG_NMI = 0x40, -}; - #define TRACE_BUF_SIZE 1024 struct trace_array; @@ -589,8 +570,7 @@ struct ring_buffer_event * trace_buffer_lock_reserve(struct trace_buffer *buffer, int type, unsigned long len, - unsigned long flags, - int pc); + unsigned int trace_ctx); struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data); @@ -601,6 +581,8 @@ struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, void trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer, struct ring_buffer_event *event); +const char *trace_event_format(struct trace_iterator *iter, const char *fmt); + int trace_empty(struct trace_iterator *iter); void *trace_find_next_entry_inc(struct trace_iterator *iter); @@ -615,11 +597,11 @@ unsigned long trace_total_entries(struct trace_array *tr); void trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, - unsigned long flags, int pc); + unsigned int trace_ctx); void trace_graph_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, - unsigned long flags, int pc); + unsigned int trace_ctx); void trace_latency_header(struct seq_file *m); void trace_default_header(struct seq_file *m); void print_trace_header(struct seq_file *m, struct trace_iterator *iter); @@ -687,11 +669,10 @@ static inline void latency_fsnotify(struct trace_array *tr) { } #endif #ifdef CONFIG_STACKTRACE -void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, - int pc); +void __trace_stack(struct trace_array *tr, unsigned int trace_ctx, int skip); #else -static inline void __trace_stack(struct trace_array *tr, unsigned long flags, - int skip, int pc) +static inline void __trace_stack(struct trace_array *tr, unsigned int trace_ctx, + int skip) { } #endif /* CONFIG_STACKTRACE */ @@ -831,10 +812,10 @@ extern void graph_trace_open(struct trace_iterator *iter); extern void graph_trace_close(struct trace_iterator *iter); extern int __trace_graph_entry(struct trace_array *tr, struct ftrace_graph_ent *trace, - unsigned long flags, int pc); + unsigned int trace_ctx); extern void __trace_graph_return(struct trace_array *tr, struct ftrace_graph_ret *trace, - unsigned long flags, int pc); + unsigned int trace_ctx); #ifdef CONFIG_DYNAMIC_FTRACE extern struct ftrace_hash __rcu *ftrace_graph_hash; @@ -1194,6 +1175,7 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf, C(MARKERS, "markers"), \ C(EVENT_FORK, "event-fork"), \ C(PAUSE_ON_TRACE, "pause-on-trace"), \ + C(HASH_PTR, "hash-ptr"), /* Print hashed pointer */ \ FUNCTION_FLAGS \ FGRAPH_FLAGS \ STACK_FLAGS \ @@ -1297,15 +1279,15 @@ extern int call_filter_check_discard(struct trace_event_call *call, void *rec, void trace_buffer_unlock_commit_regs(struct trace_array *tr, struct trace_buffer *buffer, struct ring_buffer_event *event, - unsigned long flags, int pc, + unsigned int trcace_ctx, struct pt_regs *regs); static inline void trace_buffer_unlock_commit(struct trace_array *tr, struct trace_buffer *buffer, struct ring_buffer_event *event, - unsigned long flags, int pc) + unsigned int trace_ctx) { - trace_buffer_unlock_commit_regs(tr, buffer, event, flags, pc, NULL); + trace_buffer_unlock_commit_regs(tr, buffer, event, trace_ctx, NULL); } DECLARE_PER_CPU(struct ring_buffer_event *, trace_buffered_event); @@ -1366,8 +1348,7 @@ __event_trigger_test_discard(struct trace_event_file *file, * @buffer: The ring buffer that the event is being written to * @event: The event meta data in the ring buffer * @entry: The event itself - * @irq_flags: The state of the interrupts at the start of the event - * @pc: The state of the preempt count at the start of the event. + * @trace_ctx: The tracing context flags. * * This is a helper function to handle triggers that require data * from the event itself. It also tests the event against filters and @@ -1377,12 +1358,12 @@ static inline void event_trigger_unlock_commit(struct trace_event_file *file, struct trace_buffer *buffer, struct ring_buffer_event *event, - void *entry, unsigned long irq_flags, int pc) + void *entry, unsigned int trace_ctx) { enum event_trigger_type tt = ETT_NONE; if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) - trace_buffer_unlock_commit(file->tr, buffer, event, irq_flags, pc); + trace_buffer_unlock_commit(file->tr, buffer, event, trace_ctx); if (tt) event_triggers_post_call(file, tt); @@ -1394,8 +1375,7 @@ event_trigger_unlock_commit(struct trace_event_file *file, * @buffer: The ring buffer that the event is being written to * @event: The event meta data in the ring buffer * @entry: The event itself - * @irq_flags: The state of the interrupts at the start of the event - * @pc: The state of the preempt count at the start of the event. + * @trace_ctx: The tracing context flags. * * This is a helper function to handle triggers that require data * from the event itself. It also tests the event against filters and @@ -1408,14 +1388,14 @@ static inline void event_trigger_unlock_commit_regs(struct trace_event_file *file, struct trace_buffer *buffer, struct ring_buffer_event *event, - void *entry, unsigned long irq_flags, int pc, + void *entry, unsigned int trace_ctx, struct pt_regs *regs) { enum event_trigger_type tt = ETT_NONE; if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) trace_buffer_unlock_commit_regs(file->tr, buffer, event, - irq_flags, pc, regs); + trace_ctx, regs); if (tt) event_triggers_post_call(file, tt); @@ -1830,10 +1810,9 @@ extern int tracing_set_cpumask(struct trace_array *tr, #define MAX_EVENT_NAME_LEN 64 -extern int trace_run_command(const char *buf, int (*createfn)(int, char**)); extern ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, size_t count, loff_t *ppos, - int (*createfn)(int, char**)); + int (*createfn)(const char *)); extern unsigned int err_pos(char *cmd, const char *str); extern void tracing_log_err(struct trace_array *tr, diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index eff099123aa2..e47fdb4c92fb 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -37,7 +37,7 @@ probe_likely_condition(struct ftrace_likely_data *f, int val, int expect) struct ring_buffer_event *event; struct trace_branch *entry; unsigned long flags; - int pc; + unsigned int trace_ctx; const char *p; if (current->trace_recursion & TRACE_BRANCH_BIT) @@ -59,10 +59,10 @@ probe_likely_condition(struct ftrace_likely_data *f, int val, int expect) if (atomic_read(&data->disabled)) goto out; - pc = preempt_count(); + trace_ctx = tracing_gen_ctx_flags(flags); buffer = tr->array_buffer.buffer; event = trace_buffer_lock_reserve(buffer, TRACE_BRANCH, - sizeof(*entry), flags, pc); + sizeof(*entry), trace_ctx); if (!event) goto out; diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c index 4f967d5cd917..dc971a68dda4 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c @@ -31,23 +31,31 @@ int dyn_event_register(struct dyn_event_operations *ops) return 0; } -int dyn_event_release(int argc, char **argv, struct dyn_event_operations *type) +int dyn_event_release(const char *raw_command, struct dyn_event_operations *type) { struct dyn_event *pos, *n; char *system = NULL, *event, *p; - int ret = -ENOENT; + int argc, ret = -ENOENT; + char **argv; + + argv = argv_split(GFP_KERNEL, raw_command, &argc); + if (!argv) + return -ENOMEM; if (argv[0][0] == '-') { - if (argv[0][1] != ':') - return -EINVAL; + if (argv[0][1] != ':') { + ret = -EINVAL; + goto out; + } event = &argv[0][2]; } else { event = strchr(argv[0], ':'); - if (!event) - return -EINVAL; + if (!event) { + ret = -EINVAL; + goto out; + } event++; } - argc--; argv++; p = strchr(event, '/'); if (p) { @@ -63,7 +71,7 @@ int dyn_event_release(int argc, char **argv, struct dyn_event_operations *type) if (type && type != pos->ops) continue; if (!pos->ops->match(system, event, - argc, (const char **)argv, pos)) + argc - 1, (const char **)argv + 1, pos)) continue; ret = pos->ops->free(pos); @@ -71,21 +79,22 @@ int dyn_event_release(int argc, char **argv, struct dyn_event_operations *type) break; } mutex_unlock(&event_mutex); - +out: + argv_free(argv); return ret; } -static int create_dyn_event(int argc, char **argv) +static int create_dyn_event(const char *raw_command) { struct dyn_event_operations *ops; int ret = -ENODEV; - if (argv[0][0] == '-' || argv[0][0] == '!') - return dyn_event_release(argc, argv, NULL); + if (raw_command[0] == '-' || raw_command[0] == '!') + return dyn_event_release(raw_command, NULL); mutex_lock(&dyn_event_ops_mutex); list_for_each_entry(ops, &dyn_event_ops_list, list) { - ret = ops->create(argc, (const char **)argv); + ret = ops->create(raw_command); if (!ret || ret != -ECANCELED) break; } diff --git a/kernel/trace/trace_dynevent.h b/kernel/trace/trace_dynevent.h index d6f72dcb7269..7754936b57ee 100644 --- a/kernel/trace/trace_dynevent.h +++ b/kernel/trace/trace_dynevent.h @@ -39,7 +39,7 @@ struct dyn_event; */ struct dyn_event_operations { struct list_head list; - int (*create)(int argc, const char *argv[]); + int (*create)(const char *raw_command); int (*show)(struct seq_file *m, struct dyn_event *ev); bool (*is_busy)(struct dyn_event *ev); int (*free)(struct dyn_event *ev); @@ -97,7 +97,7 @@ void *dyn_event_seq_start(struct seq_file *m, loff_t *pos); void *dyn_event_seq_next(struct seq_file *m, void *v, loff_t *pos); void dyn_event_seq_stop(struct seq_file *m, void *v); int dyn_events_release_all(struct dyn_event_operations *type); -int dyn_event_release(int argc, char **argv, struct dyn_event_operations *type); +int dyn_event_release(const char *raw_command, struct dyn_event_operations *type); /* * for_each_dyn_event - iterate over the dyn_event list diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index a71181655958..288ad2c274fb 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -421,11 +421,8 @@ NOKPROBE_SYMBOL(perf_trace_buf_alloc); void perf_trace_buf_update(void *record, u16 type) { struct trace_entry *entry = record; - int pc = preempt_count(); - unsigned long flags; - local_save_flags(flags); - tracing_generic_entry_update(entry, type, flags, pc); + tracing_generic_entry_update(entry, type, tracing_gen_ctx()); } NOKPROBE_SYMBOL(perf_trace_buf_update); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index d387b774ceeb..a3563afd412d 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -258,22 +258,19 @@ void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer, trace_event_ignore_this_pid(trace_file)) return NULL; - local_save_flags(fbuffer->flags); - fbuffer->pc = preempt_count(); /* * If CONFIG_PREEMPTION is enabled, then the tracepoint itself disables * preemption (adding one to the preempt_count). Since we are * interested in the preempt_count at the time the tracepoint was * hit, we need to subtract one to offset the increment. */ - if (IS_ENABLED(CONFIG_PREEMPTION)) - fbuffer->pc--; + fbuffer->trace_ctx = tracing_gen_ctx_dec(); fbuffer->trace_file = trace_file; fbuffer->event = trace_event_buffer_lock_reserve(&fbuffer->buffer, trace_file, event_call->event.type, len, - fbuffer->flags, fbuffer->pc); + fbuffer->trace_ctx); if (!fbuffer->event) return NULL; @@ -2101,16 +2098,20 @@ event_subsystem_dir(struct trace_array *tr, const char *name, dir->subsystem = system; file->system = dir; - entry = tracefs_create_file("filter", 0644, dir->entry, dir, - &ftrace_subsystem_filter_fops); - if (!entry) { - kfree(system->filter); - system->filter = NULL; - pr_warn("Could not create tracefs '%s/filter' entry\n", name); - } + /* the ftrace system is special, do not create enable or filter files */ + if (strcmp(name, "ftrace") != 0) { - trace_create_file("enable", 0644, dir->entry, dir, - &ftrace_system_enable_fops); + entry = tracefs_create_file("filter", 0644, dir->entry, dir, + &ftrace_subsystem_filter_fops); + if (!entry) { + kfree(system->filter); + system->filter = NULL; + pr_warn("Could not create tracefs '%s/filter' entry\n", name); + } + + trace_create_file("enable", 0644, dir->entry, dir, + &ftrace_system_enable_fops); + } list_add(&dir->list, &tr->systems); @@ -3679,12 +3680,11 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip, struct trace_buffer *buffer; struct ring_buffer_event *event; struct ftrace_entry *entry; - unsigned long flags; + unsigned int trace_ctx; long disabled; int cpu; - int pc; - pc = preempt_count(); + trace_ctx = tracing_gen_ctx(); preempt_disable_notrace(); cpu = raw_smp_processor_id(); disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu)); @@ -3692,11 +3692,9 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip, if (disabled != 1) goto out; - local_save_flags(flags); - event = trace_event_buffer_lock_reserve(&buffer, &event_trace_file, TRACE_FN, sizeof(*entry), - flags, pc); + trace_ctx); if (!event) goto out; entry = ring_buffer_event_data(event); @@ -3704,7 +3702,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip, entry->parent_ip = parent_ip; event_trigger_unlock_commit(&event_trace_file, buffer, event, - entry, flags, pc); + entry, trace_ctx); out: atomic_dec(&per_cpu(ftrace_test_event_disable, cpu)); preempt_enable_notrace(); diff --git a/kernel/trace/trace_events_inject.c b/kernel/trace/trace_events_inject.c index 22bcf7c51d1e..c188045c5f97 100644 --- a/kernel/trace/trace_events_inject.c +++ b/kernel/trace/trace_events_inject.c @@ -192,7 +192,6 @@ static void *trace_alloc_entry(struct trace_event_call *call, int *size) static int parse_entry(char *str, struct trace_event_call *call, void **pentry) { struct ftrace_event_field *field; - unsigned long irq_flags; void *entry = NULL; int entry_size; u64 val = 0; @@ -203,9 +202,8 @@ static int parse_entry(char *str, struct trace_event_call *call, void **pentry) if (!entry) return -ENOMEM; - local_save_flags(irq_flags); - tracing_generic_entry_update(entry, call->event.type, irq_flags, - preempt_count()); + tracing_generic_entry_update(entry, call->event.type, + tracing_gen_ctx()); while ((len = parse_field(str, call, &field, &val)) > 0) { if (is_function_field(field)) diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 5a8bc0b421f1..2979a96595b4 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -23,13 +23,14 @@ #undef ERRORS #define ERRORS \ C(BAD_NAME, "Illegal name"), \ - C(CMD_INCOMPLETE, "Incomplete command"), \ + C(INVALID_CMD, "Command must be of the form: <name> field[;field] ..."),\ + C(INVALID_DYN_CMD, "Command must be of the form: s or -:[synthetic/]<name> field[;field] ..."),\ C(EVENT_EXISTS, "Event already exists"), \ C(TOO_MANY_FIELDS, "Too many fields"), \ C(INCOMPLETE_TYPE, "Incomplete type"), \ C(INVALID_TYPE, "Invalid type"), \ - C(INVALID_FIELD, "Invalid field"), \ - C(CMD_TOO_LONG, "Command too long"), + C(INVALID_FIELD, "Invalid field"), \ + C(INVALID_ARRAY_SPEC, "Invalid array specification"), #undef C #define C(a, b) SYNTH_ERR_##a @@ -48,7 +49,7 @@ static int errpos(const char *str) return err_pos(last_cmd, str); } -static void last_cmd_set(char *str) +static void last_cmd_set(const char *str) { if (!str) return; @@ -62,7 +63,7 @@ static void synth_err(u8 err_type, u8 err_pos) err_type, err_pos); } -static int create_synth_event(int argc, const char **argv); +static int create_synth_event(const char *raw_command); static int synth_event_show(struct seq_file *m, struct dyn_event *ev); static int synth_event_release(struct dyn_event *ev); static bool synth_event_is_busy(struct dyn_event *ev); @@ -579,18 +580,32 @@ static void free_synth_field(struct synth_field *field) kfree(field); } -static struct synth_field *parse_synth_field(int argc, const char **argv, - int *consumed) +static int check_field_version(const char *prefix, const char *field_type, + const char *field_name) +{ + /* + * For backward compatibility, the old synthetic event command + * format did not require semicolons, and in order to not + * break user space, that old format must still work. If a new + * feature is added, then the format that uses the new feature + * will be required to have semicolons, as nothing that uses + * the old format would be using the new, yet to be created, + * feature. When a new feature is added, this will detect it, + * and return a number greater than 1, and require the format + * to use semicolons. + */ + return 1; +} + +static struct synth_field *parse_synth_field(int argc, char **argv, + int *consumed, int *field_version) { - struct synth_field *field; const char *prefix = NULL, *field_type = argv[0], *field_name, *array; + struct synth_field *field; int len, ret = -ENOMEM; struct seq_buf s; ssize_t size; - if (field_type[0] == ';') - field_type++; - if (!strcmp(field_type, "unsigned")) { if (argc < 3) { synth_err(SYNTH_ERR_INCOMPLETE_TYPE, errpos(field_type)); @@ -599,12 +614,19 @@ static struct synth_field *parse_synth_field(int argc, const char **argv, prefix = "unsigned "; field_type = argv[1]; field_name = argv[2]; - *consumed = 3; + *consumed += 3; } else { field_name = argv[1]; - *consumed = 2; + *consumed += 2; } + if (!field_name) { + synth_err(SYNTH_ERR_INVALID_FIELD, errpos(field_type)); + return ERR_PTR(-EINVAL); + } + + *field_version = check_field_version(prefix, field_type, field_name); + field = kzalloc(sizeof(*field), GFP_KERNEL); if (!field) return ERR_PTR(-ENOMEM); @@ -613,8 +635,6 @@ static struct synth_field *parse_synth_field(int argc, const char **argv, array = strchr(field_name, '['); if (array) len -= strlen(array); - else if (field_name[len - 1] == ';') - len--; field->name = kmemdup_nul(field_name, len, GFP_KERNEL); if (!field->name) @@ -626,8 +646,6 @@ static struct synth_field *parse_synth_field(int argc, const char **argv, goto free; } - if (field_type[0] == ';') - field_type++; len = strlen(field_type) + 1; if (array) @@ -644,11 +662,8 @@ static struct synth_field *parse_synth_field(int argc, const char **argv, if (prefix) seq_buf_puts(&s, prefix); seq_buf_puts(&s, field_type); - if (array) { + if (array) seq_buf_puts(&s, array); - if (s.buffer[s.len - 1] == ';') - s.len--; - } if (WARN_ON_ONCE(!seq_buf_buffer_left(&s))) goto free; @@ -656,7 +671,10 @@ static struct synth_field *parse_synth_field(int argc, const char **argv, size = synth_field_size(field->type); if (size < 0) { - synth_err(SYNTH_ERR_INVALID_TYPE, errpos(field_type)); + if (array) + synth_err(SYNTH_ERR_INVALID_ARRAY_SPEC, errpos(field_name)); + else + synth_err(SYNTH_ERR_INVALID_TYPE, errpos(field_type)); ret = -EINVAL; goto free; } else if (size == 0) { @@ -1160,46 +1178,13 @@ int synth_event_gen_cmd_array_start(struct dynevent_cmd *cmd, const char *name, } EXPORT_SYMBOL_GPL(synth_event_gen_cmd_array_start); -static int save_cmdstr(int argc, const char *name, const char **argv) -{ - struct seq_buf s; - char *buf; - int i; - - buf = kzalloc(MAX_DYNEVENT_CMD_LEN, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - seq_buf_init(&s, buf, MAX_DYNEVENT_CMD_LEN); - - seq_buf_puts(&s, name); - - for (i = 0; i < argc; i++) { - seq_buf_putc(&s, ' '); - seq_buf_puts(&s, argv[i]); - } - - if (!seq_buf_buffer_left(&s)) { - synth_err(SYNTH_ERR_CMD_TOO_LONG, 0); - kfree(buf); - return -EINVAL; - } - buf[s.len] = 0; - last_cmd_set(buf); - - kfree(buf); - return 0; -} - -static int __create_synth_event(int argc, const char *name, const char **argv) +static int __create_synth_event(const char *name, const char *raw_fields) { + char **argv, *field_str, *tmp_fields, *saved_fields = NULL; struct synth_field *field, *fields[SYNTH_FIELDS_MAX]; + int consumed, cmd_version = 1, n_fields_this_loop; + int i, argc, n_fields = 0, ret = 0; struct synth_event *event = NULL; - int i, consumed = 0, n_fields = 0, ret = 0; - - ret = save_cmdstr(argc, name, argv); - if (ret) - return ret; /* * Argument syntax: @@ -1208,46 +1193,99 @@ static int __create_synth_event(int argc, const char *name, const char **argv) * where 'field' = type field_name */ - if (name[0] == '\0' || argc < 1) { - synth_err(SYNTH_ERR_CMD_INCOMPLETE, 0); + if (name[0] == '\0') { + synth_err(SYNTH_ERR_INVALID_CMD, 0); return -EINVAL; } - mutex_lock(&event_mutex); - if (!is_good_name(name)) { synth_err(SYNTH_ERR_BAD_NAME, errpos(name)); - ret = -EINVAL; - goto out; + return -EINVAL; } + mutex_lock(&event_mutex); + event = find_synth_event(name); if (event) { synth_err(SYNTH_ERR_EVENT_EXISTS, errpos(name)); ret = -EEXIST; - goto out; + goto err; } - for (i = 0; i < argc - 1; i++) { - if (strcmp(argv[i], ";") == 0) - continue; - if (n_fields == SYNTH_FIELDS_MAX) { - synth_err(SYNTH_ERR_TOO_MANY_FIELDS, 0); - ret = -EINVAL; + tmp_fields = saved_fields = kstrdup(raw_fields, GFP_KERNEL); + if (!tmp_fields) { + ret = -ENOMEM; + goto err; + } + + while ((field_str = strsep(&tmp_fields, ";")) != NULL) { + argv = argv_split(GFP_KERNEL, field_str, &argc); + if (!argv) { + ret = -ENOMEM; goto err; } - field = parse_synth_field(argc - i, &argv[i], &consumed); - if (IS_ERR(field)) { - ret = PTR_ERR(field); + if (!argc) + continue; + + n_fields_this_loop = 0; + consumed = 0; + while (argc > consumed) { + int field_version; + + field = parse_synth_field(argc - consumed, + argv + consumed, &consumed, + &field_version); + if (IS_ERR(field)) { + argv_free(argv); + ret = PTR_ERR(field); + goto err; + } + + /* + * Track the highest version of any field we + * found in the command. + */ + if (field_version > cmd_version) + cmd_version = field_version; + + /* + * Now sort out what is and isn't valid for + * each supported version. + * + * If we see more than 1 field per loop, it + * means we have multiple fields between + * semicolons, and that's something we no + * longer support in a version 2 or greater + * command. + */ + if (cmd_version > 1 && n_fields_this_loop >= 1) { + synth_err(SYNTH_ERR_INVALID_CMD, errpos(field_str)); + ret = -EINVAL; + goto err; + } + + fields[n_fields++] = field; + if (n_fields == SYNTH_FIELDS_MAX) { + synth_err(SYNTH_ERR_TOO_MANY_FIELDS, 0); + ret = -EINVAL; + goto err; + } + + n_fields_this_loop++; + } + + if (consumed < argc) { + synth_err(SYNTH_ERR_INVALID_CMD, 0); + ret = -EINVAL; goto err; } - fields[n_fields++] = field; - i += consumed - 1; + + argv_free(argv); } - if (i < argc && strcmp(argv[i], ";") != 0) { - synth_err(SYNTH_ERR_INVALID_FIELD, errpos(argv[i])); + if (n_fields == 0) { + synth_err(SYNTH_ERR_INVALID_CMD, 0); ret = -EINVAL; goto err; } @@ -1266,6 +1304,8 @@ static int __create_synth_event(int argc, const char *name, const char **argv) out: mutex_unlock(&event_mutex); + kfree(saved_fields); + return ret; err: for (i = 0; i < n_fields; i++) @@ -1383,19 +1423,79 @@ int synth_event_delete(const char *event_name) } EXPORT_SYMBOL_GPL(synth_event_delete); -static int create_or_delete_synth_event(int argc, char **argv) +static int check_command(const char *raw_command) { - const char *name = argv[0]; - int ret; + char **argv = NULL, *cmd, *saved_cmd, *name_and_field; + int argc, ret = 0; + + cmd = saved_cmd = kstrdup(raw_command, GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + name_and_field = strsep(&cmd, ";"); + if (!name_and_field) { + ret = -EINVAL; + goto free; + } + + if (name_and_field[0] == '!') + goto free; + + argv = argv_split(GFP_KERNEL, name_and_field, &argc); + if (!argv) { + ret = -ENOMEM; + goto free; + } + argv_free(argv); + + if (argc < 3) + ret = -EINVAL; +free: + kfree(saved_cmd); + + return ret; +} + +static int create_or_delete_synth_event(const char *raw_command) +{ + char *name = NULL, *fields, *p; + int ret = 0; + + raw_command = skip_spaces(raw_command); + if (raw_command[0] == '\0') + return ret; + + last_cmd_set(raw_command); + + ret = check_command(raw_command); + if (ret) { + synth_err(SYNTH_ERR_INVALID_CMD, 0); + return ret; + } + + p = strpbrk(raw_command, " \t"); + if (!p && raw_command[0] != '!') { + synth_err(SYNTH_ERR_INVALID_CMD, 0); + ret = -EINVAL; + goto free; + } + + name = kmemdup_nul(raw_command, p ? p - raw_command : strlen(raw_command), GFP_KERNEL); + if (!name) + return -ENOMEM; - /* trace_run_command() ensures argc != 0 */ if (name[0] == '!') { ret = synth_event_delete(name + 1); - return ret; + goto free; } - ret = __create_synth_event(argc - 1, name, (const char **)argv + 1); - return ret == -ECANCELED ? -EINVAL : ret; + fields = skip_spaces(p); + + ret = __create_synth_event(name, fields); +free: + kfree(name); + + return ret; } static int synth_event_run_command(struct dynevent_cmd *cmd) @@ -1403,7 +1503,7 @@ static int synth_event_run_command(struct dynevent_cmd *cmd) struct synth_event *se; int ret; - ret = trace_run_command(cmd->seq.buffer, create_or_delete_synth_event); + ret = create_or_delete_synth_event(cmd->seq.buffer); if (ret) return ret; @@ -1939,10 +2039,27 @@ int synth_event_trace_end(struct synth_event_trace_state *trace_state) } EXPORT_SYMBOL_GPL(synth_event_trace_end); -static int create_synth_event(int argc, const char **argv) +static int create_synth_event(const char *raw_command) { - const char *name = argv[0]; - int len; + char *fields, *p; + const char *name; + int len, ret = 0; + + raw_command = skip_spaces(raw_command); + if (raw_command[0] == '\0') + return ret; + + last_cmd_set(raw_command); + + p = strpbrk(raw_command, " \t"); + if (!p) { + synth_err(SYNTH_ERR_INVALID_CMD, 0); + return -EINVAL; + } + + fields = skip_spaces(p); + + name = raw_command; if (name[0] != 's' || name[1] != ':') return -ECANCELED; @@ -1951,11 +2068,30 @@ static int create_synth_event(int argc, const char **argv) /* This interface accepts group name prefix */ if (strchr(name, '/')) { len = str_has_prefix(name, SYNTH_SYSTEM "/"); - if (len == 0) + if (len == 0) { + synth_err(SYNTH_ERR_INVALID_DYN_CMD, 0); return -EINVAL; + } name += len; } - return __create_synth_event(argc - 1, name, argv + 1); + + len = name - raw_command; + + ret = check_command(raw_command + len); + if (ret) { + synth_err(SYNTH_ERR_INVALID_CMD, 0); + return ret; + } + + name = kmemdup_nul(raw_command + len, p - raw_command - len, GFP_KERNEL); + if (!name) + return -ENOMEM; + + ret = __create_synth_event(name, fields); + + kfree(name); + + return ret; } static int synth_event_release(struct dyn_event *ev) diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index c5095dd28e20..f93723ca66bc 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -106,8 +106,7 @@ static int function_trace_init(struct trace_array *tr) ftrace_init_array_ops(tr, func); - tr->array_buffer.cpu = get_cpu(); - put_cpu(); + tr->array_buffer.cpu = raw_smp_processor_id(); tracing_start_cmdline_record(); tracing_start_function_trace(tr); @@ -132,10 +131,9 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, { struct trace_array *tr = op->private; struct trace_array_cpu *data; - unsigned long flags; + unsigned int trace_ctx; int bit; int cpu; - int pc; if (unlikely(!tr->function_enabled)) return; @@ -144,15 +142,14 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, if (bit < 0) return; - pc = preempt_count(); + trace_ctx = tracing_gen_ctx(); preempt_disable_notrace(); cpu = smp_processor_id(); data = per_cpu_ptr(tr->array_buffer.data, cpu); - if (!atomic_read(&data->disabled)) { - local_save_flags(flags); - trace_function(tr, ip, parent_ip, flags, pc); - } + if (!atomic_read(&data->disabled)) + trace_function(tr, ip, parent_ip, trace_ctx); + ftrace_test_recursion_unlock(bit); preempt_enable_notrace(); } @@ -184,7 +181,7 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip, unsigned long flags; long disabled; int cpu; - int pc; + unsigned int trace_ctx; if (unlikely(!tr->function_enabled)) return; @@ -199,9 +196,9 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip, disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { - pc = preempt_count(); - trace_function(tr, ip, parent_ip, flags, pc); - __trace_stack(tr, flags, STACK_SKIP, pc); + trace_ctx = tracing_gen_ctx_flags(flags); + trace_function(tr, ip, parent_ip, trace_ctx); + __trace_stack(tr, trace_ctx, STACK_SKIP); } atomic_dec(&data->disabled); @@ -404,13 +401,11 @@ ftrace_traceoff(unsigned long ip, unsigned long parent_ip, static __always_inline void trace_stack(struct trace_array *tr) { - unsigned long flags; - int pc; + unsigned int trace_ctx; - local_save_flags(flags); - pc = preempt_count(); + trace_ctx = tracing_gen_ctx(); - __trace_stack(tr, flags, FTRACE_STACK_SKIP, pc); + __trace_stack(tr, trace_ctx, FTRACE_STACK_SKIP); } static void diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index d874dec87131..0aa6e6faa943 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -96,8 +96,7 @@ print_graph_duration(struct trace_array *tr, unsigned long long duration, int __trace_graph_entry(struct trace_array *tr, struct ftrace_graph_ent *trace, - unsigned long flags, - int pc) + unsigned int trace_ctx) { struct trace_event_call *call = &event_funcgraph_entry; struct ring_buffer_event *event; @@ -105,7 +104,7 @@ int __trace_graph_entry(struct trace_array *tr, struct ftrace_graph_ent_entry *entry; event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT, - sizeof(*entry), flags, pc); + sizeof(*entry), trace_ctx); if (!event) return 0; entry = ring_buffer_event_data(event); @@ -129,10 +128,10 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) struct trace_array *tr = graph_array; struct trace_array_cpu *data; unsigned long flags; + unsigned int trace_ctx; long disabled; int ret; int cpu; - int pc; if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT)) return 0; @@ -174,8 +173,8 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) data = per_cpu_ptr(tr->array_buffer.data, cpu); disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { - pc = preempt_count(); - ret = __trace_graph_entry(tr, trace, flags, pc); + trace_ctx = tracing_gen_ctx_flags(flags); + ret = __trace_graph_entry(tr, trace, trace_ctx); } else { ret = 0; } @@ -188,7 +187,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) static void __trace_graph_function(struct trace_array *tr, - unsigned long ip, unsigned long flags, int pc) + unsigned long ip, unsigned int trace_ctx) { u64 time = trace_clock_local(); struct ftrace_graph_ent ent = { @@ -202,22 +201,21 @@ __trace_graph_function(struct trace_array *tr, .rettime = time, }; - __trace_graph_entry(tr, &ent, flags, pc); - __trace_graph_return(tr, &ret, flags, pc); + __trace_graph_entry(tr, &ent, trace_ctx); + __trace_graph_return(tr, &ret, trace_ctx); } void trace_graph_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, - unsigned long flags, int pc) + unsigned int trace_ctx) { - __trace_graph_function(tr, ip, flags, pc); + __trace_graph_function(tr, ip, trace_ctx); } void __trace_graph_return(struct trace_array *tr, struct ftrace_graph_ret *trace, - unsigned long flags, - int pc) + unsigned int trace_ctx) { struct trace_event_call *call = &event_funcgraph_exit; struct ring_buffer_event *event; @@ -225,7 +223,7 @@ void __trace_graph_return(struct trace_array *tr, struct ftrace_graph_ret_entry *entry; event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET, - sizeof(*entry), flags, pc); + sizeof(*entry), trace_ctx); if (!event) return; entry = ring_buffer_event_data(event); @@ -239,9 +237,9 @@ void trace_graph_return(struct ftrace_graph_ret *trace) struct trace_array *tr = graph_array; struct trace_array_cpu *data; unsigned long flags; + unsigned int trace_ctx; long disabled; int cpu; - int pc; ftrace_graph_addr_finish(trace); @@ -255,8 +253,8 @@ void trace_graph_return(struct ftrace_graph_ret *trace) data = per_cpu_ptr(tr->array_buffer.data, cpu); disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { - pc = preempt_count(); - __trace_graph_return(tr, trace, flags, pc); + trace_ctx = tracing_gen_ctx_flags(flags); + __trace_graph_return(tr, trace, trace_ctx); } atomic_dec(&data->disabled); local_irq_restore(flags); diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index c0df9b97f147..34dc1a712dcb 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -108,14 +108,9 @@ static void trace_hwlat_sample(struct hwlat_sample *sample) struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct hwlat_entry *entry; - unsigned long flags; - int pc; - - pc = preempt_count(); - local_save_flags(flags); event = trace_buffer_lock_reserve(buffer, TRACE_HWLAT, sizeof(*entry), - flags, pc); + tracing_gen_ctx()); if (!event) return; entry = ring_buffer_event_data(event); diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 6756379b661f..590b3d51afae 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -143,11 +143,14 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip, struct trace_array *tr = irqsoff_trace; struct trace_array_cpu *data; unsigned long flags; + unsigned int trace_ctx; if (!func_prolog_dec(tr, &data, &flags)) return; - trace_function(tr, ip, parent_ip, flags, preempt_count()); + trace_ctx = tracing_gen_ctx_flags(flags); + + trace_function(tr, ip, parent_ip, trace_ctx); atomic_dec(&data->disabled); } @@ -177,8 +180,8 @@ static int irqsoff_graph_entry(struct ftrace_graph_ent *trace) struct trace_array *tr = irqsoff_trace; struct trace_array_cpu *data; unsigned long flags; + unsigned int trace_ctx; int ret; - int pc; if (ftrace_graph_ignore_func(trace)) return 0; @@ -195,8 +198,8 @@ static int irqsoff_graph_entry(struct ftrace_graph_ent *trace) if (!func_prolog_dec(tr, &data, &flags)) return 0; - pc = preempt_count(); - ret = __trace_graph_entry(tr, trace, flags, pc); + trace_ctx = tracing_gen_ctx_flags(flags); + ret = __trace_graph_entry(tr, trace, trace_ctx); atomic_dec(&data->disabled); return ret; @@ -207,15 +210,15 @@ static void irqsoff_graph_return(struct ftrace_graph_ret *trace) struct trace_array *tr = irqsoff_trace; struct trace_array_cpu *data; unsigned long flags; - int pc; + unsigned int trace_ctx; ftrace_graph_addr_finish(trace); if (!func_prolog_dec(tr, &data, &flags)) return; - pc = preempt_count(); - __trace_graph_return(tr, trace, flags, pc); + trace_ctx = tracing_gen_ctx_flags(flags); + __trace_graph_return(tr, trace, trace_ctx); atomic_dec(&data->disabled); } @@ -267,12 +270,12 @@ static void irqsoff_print_header(struct seq_file *s) static void __trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, - unsigned long flags, int pc) + unsigned int trace_ctx) { if (is_graph(tr)) - trace_graph_function(tr, ip, parent_ip, flags, pc); + trace_graph_function(tr, ip, parent_ip, trace_ctx); else - trace_function(tr, ip, parent_ip, flags, pc); + trace_function(tr, ip, parent_ip, trace_ctx); } #else @@ -322,15 +325,13 @@ check_critical_timing(struct trace_array *tr, { u64 T0, T1, delta; unsigned long flags; - int pc; + unsigned int trace_ctx; T0 = data->preempt_timestamp; T1 = ftrace_now(cpu); delta = T1-T0; - local_save_flags(flags); - - pc = preempt_count(); + trace_ctx = tracing_gen_ctx(); if (!report_latency(tr, delta)) goto out; @@ -341,9 +342,9 @@ check_critical_timing(struct trace_array *tr, if (!report_latency(tr, delta)) goto out_unlock; - __trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); + __trace_function(tr, CALLER_ADDR0, parent_ip, trace_ctx); /* Skip 5 functions to get to the irq/preempt enable function */ - __trace_stack(tr, flags, 5, pc); + __trace_stack(tr, trace_ctx, 5); if (data->critical_sequence != max_sequence) goto out_unlock; @@ -363,16 +364,15 @@ out_unlock: out: data->critical_sequence = max_sequence; data->preempt_timestamp = ftrace_now(cpu); - __trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); + __trace_function(tr, CALLER_ADDR0, parent_ip, trace_ctx); } static nokprobe_inline void -start_critical_timing(unsigned long ip, unsigned long parent_ip, int pc) +start_critical_timing(unsigned long ip, unsigned long parent_ip) { int cpu; struct trace_array *tr = irqsoff_trace; struct trace_array_cpu *data; - unsigned long flags; if (!tracer_enabled || !tracing_is_enabled()) return; @@ -393,9 +393,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip, int pc) data->preempt_timestamp = ftrace_now(cpu); data->critical_start = parent_ip ? : ip; - local_save_flags(flags); - - __trace_function(tr, ip, parent_ip, flags, pc); + __trace_function(tr, ip, parent_ip, tracing_gen_ctx()); per_cpu(tracing_cpu, cpu) = 1; @@ -403,12 +401,12 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip, int pc) } static nokprobe_inline void -stop_critical_timing(unsigned long ip, unsigned long parent_ip, int pc) +stop_critical_timing(unsigned long ip, unsigned long parent_ip) { int cpu; struct trace_array *tr = irqsoff_trace; struct trace_array_cpu *data; - unsigned long flags; + unsigned int trace_ctx; cpu = raw_smp_processor_id(); /* Always clear the tracing cpu on stopping the trace */ @@ -428,8 +426,8 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip, int pc) atomic_inc(&data->disabled); - local_save_flags(flags); - __trace_function(tr, ip, parent_ip, flags, pc); + trace_ctx = tracing_gen_ctx(); + __trace_function(tr, ip, parent_ip, trace_ctx); check_critical_timing(tr, data, parent_ip ? : ip, cpu); data->critical_start = 0; atomic_dec(&data->disabled); @@ -438,20 +436,16 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip, int pc) /* start and stop critical timings used to for stoppage (in idle) */ void start_critical_timings(void) { - int pc = preempt_count(); - - if (preempt_trace(pc) || irq_trace()) - start_critical_timing(CALLER_ADDR0, CALLER_ADDR1, pc); + if (preempt_trace(preempt_count()) || irq_trace()) + start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); } EXPORT_SYMBOL_GPL(start_critical_timings); NOKPROBE_SYMBOL(start_critical_timings); void stop_critical_timings(void) { - int pc = preempt_count(); - - if (preempt_trace(pc) || irq_trace()) - stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1, pc); + if (preempt_trace(preempt_count()) || irq_trace()) + stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); } EXPORT_SYMBOL_GPL(stop_critical_timings); NOKPROBE_SYMBOL(stop_critical_timings); @@ -613,19 +607,15 @@ static void irqsoff_tracer_stop(struct trace_array *tr) */ void tracer_hardirqs_on(unsigned long a0, unsigned long a1) { - unsigned int pc = preempt_count(); - - if (!preempt_trace(pc) && irq_trace()) - stop_critical_timing(a0, a1, pc); + if (!preempt_trace(preempt_count()) && irq_trace()) + stop_critical_timing(a0, a1); } NOKPROBE_SYMBOL(tracer_hardirqs_on); void tracer_hardirqs_off(unsigned long a0, unsigned long a1) { - unsigned int pc = preempt_count(); - - if (!preempt_trace(pc) && irq_trace()) - start_critical_timing(a0, a1, pc); + if (!preempt_trace(preempt_count()) && irq_trace()) + start_critical_timing(a0, a1); } NOKPROBE_SYMBOL(tracer_hardirqs_off); @@ -665,18 +655,14 @@ static struct tracer irqsoff_tracer __read_mostly = #ifdef CONFIG_PREEMPT_TRACER void tracer_preempt_on(unsigned long a0, unsigned long a1) { - int pc = preempt_count(); - - if (preempt_trace(pc) && !irq_trace()) - stop_critical_timing(a0, a1, pc); + if (preempt_trace(preempt_count()) && !irq_trace()) + stop_critical_timing(a0, a1); } void tracer_preempt_off(unsigned long a0, unsigned long a1) { - int pc = preempt_count(); - - if (preempt_trace(pc) && !irq_trace()) - start_critical_timing(a0, a1, pc); + if (preempt_trace(preempt_count()) && !irq_trace()) + start_critical_timing(a0, a1); } static int preemptoff_tracer_init(struct trace_array *tr) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 56c7fbff7bd7..8a1cb0878cbc 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -35,7 +35,7 @@ static int __init set_kprobe_boot_events(char *str) } __setup("kprobe_event=", set_kprobe_boot_events); -static int trace_kprobe_create(int argc, const char **argv); +static int trace_kprobe_create(const char *raw_command); static int trace_kprobe_show(struct seq_file *m, struct dyn_event *ev); static int trace_kprobe_release(struct dyn_event *ev); static bool trace_kprobe_is_busy(struct dyn_event *ev); @@ -711,7 +711,7 @@ static inline void sanitize_event_name(char *name) *name = '_'; } -static int trace_kprobe_create(int argc, const char *argv[]) +static int __trace_kprobe_create(int argc, const char *argv[]) { /* * Argument syntax: @@ -910,20 +910,25 @@ error: goto out; } -static int create_or_delete_trace_kprobe(int argc, char **argv) +static int trace_kprobe_create(const char *raw_command) +{ + return trace_probe_create(raw_command, __trace_kprobe_create); +} + +static int create_or_delete_trace_kprobe(const char *raw_command) { int ret; - if (argv[0][0] == '-') - return dyn_event_release(argc, argv, &trace_kprobe_ops); + if (raw_command[0] == '-') + return dyn_event_release(raw_command, &trace_kprobe_ops); - ret = trace_kprobe_create(argc, (const char **)argv); + ret = trace_kprobe_create(raw_command); return ret == -ECANCELED ? -EINVAL : ret; } static int trace_kprobe_run_command(struct dynevent_cmd *cmd) { - return trace_run_command(cmd->seq.buffer, create_or_delete_trace_kprobe); + return create_or_delete_trace_kprobe(cmd->seq.buffer); } /** @@ -1084,7 +1089,7 @@ int kprobe_event_delete(const char *name) snprintf(buf, MAX_EVENT_NAME_LEN, "-:%s", name); - return trace_run_command(buf, create_or_delete_trace_kprobe); + return create_or_delete_trace_kprobe(buf); } EXPORT_SYMBOL_GPL(kprobe_event_delete); @@ -1386,8 +1391,7 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs, if (trace_trigger_soft_disabled(trace_file)) return; - local_save_flags(fbuffer.flags); - fbuffer.pc = preempt_count(); + fbuffer.trace_ctx = tracing_gen_ctx(); fbuffer.trace_file = trace_file; dsize = __get_data_size(&tk->tp, regs); @@ -1396,7 +1400,7 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs, trace_event_buffer_lock_reserve(&fbuffer.buffer, trace_file, call->event.type, sizeof(*entry) + tk->tp.size + dsize, - fbuffer.flags, fbuffer.pc); + fbuffer.trace_ctx); if (!fbuffer.event) return; @@ -1434,8 +1438,7 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, if (trace_trigger_soft_disabled(trace_file)) return; - local_save_flags(fbuffer.flags); - fbuffer.pc = preempt_count(); + fbuffer.trace_ctx = tracing_gen_ctx(); fbuffer.trace_file = trace_file; dsize = __get_data_size(&tk->tp, regs); @@ -1443,7 +1446,7 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, trace_event_buffer_lock_reserve(&fbuffer.buffer, trace_file, call->event.type, sizeof(*entry) + tk->tp.size + dsize, - fbuffer.flags, fbuffer.pc); + fbuffer.trace_ctx); if (!fbuffer.event) return; @@ -1888,7 +1891,7 @@ static __init void setup_boot_kprobe_events(void) if (p) *p++ = '\0'; - ret = trace_run_command(cmd, create_or_delete_trace_kprobe); + ret = create_or_delete_trace_kprobe(cmd); if (ret) pr_warn("Failed to add event(%d): %s\n", ret, cmd); @@ -1982,8 +1985,7 @@ static __init int kprobe_trace_self_tests_init(void) pr_info("Testing kprobe tracing: "); - ret = trace_run_command("p:testprobe kprobe_trace_selftest_target $stack $stack0 +0($stack)", - create_or_delete_trace_kprobe); + ret = create_or_delete_trace_kprobe("p:testprobe kprobe_trace_selftest_target $stack $stack0 +0($stack)"); if (WARN_ON_ONCE(ret)) { pr_warn("error on probing function entry.\n"); warn++; @@ -2004,8 +2006,7 @@ static __init int kprobe_trace_self_tests_init(void) } } - ret = trace_run_command("r:testprobe2 kprobe_trace_selftest_target $retval", - create_or_delete_trace_kprobe); + ret = create_or_delete_trace_kprobe("r:testprobe2 kprobe_trace_selftest_target $retval"); if (WARN_ON_ONCE(ret)) { pr_warn("error on probing function return.\n"); warn++; @@ -2078,13 +2079,13 @@ static __init int kprobe_trace_self_tests_init(void) trace_probe_event_call(&tk->tp), file); } - ret = trace_run_command("-:testprobe", create_or_delete_trace_kprobe); + ret = create_or_delete_trace_kprobe("-:testprobe"); if (WARN_ON_ONCE(ret)) { pr_warn("error on deleting a probe.\n"); warn++; } - ret = trace_run_command("-:testprobe2", create_or_delete_trace_kprobe); + ret = create_or_delete_trace_kprobe("-:testprobe2"); if (WARN_ON_ONCE(ret)) { pr_warn("error on deleting a probe.\n"); warn++; diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 84582bf1ed5f..64e77b513697 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -5,8 +5,6 @@ * Copyright (C) 2008 Pekka Paalanen <pq@iki.fi> */ -#define DEBUG 1 - #include <linux/kernel.h> #include <linux/mmiotrace.h> #include <linux/pci.h> @@ -300,10 +298,11 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct trace_mmiotrace_rw *entry; - int pc = preempt_count(); + unsigned int trace_ctx; + trace_ctx = tracing_gen_ctx_flags(0); event = trace_buffer_lock_reserve(buffer, TRACE_MMIO_RW, - sizeof(*entry), 0, pc); + sizeof(*entry), trace_ctx); if (!event) { atomic_inc(&dropped_count); return; @@ -312,7 +311,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, entry->rw = *rw; if (!call_filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit(tr, buffer, event, 0, pc); + trace_buffer_unlock_commit(tr, buffer, event, trace_ctx); } void mmio_trace_rw(struct mmiotrace_rw *rw) @@ -330,10 +329,11 @@ static void __trace_mmiotrace_map(struct trace_array *tr, struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct trace_mmiotrace_map *entry; - int pc = preempt_count(); + unsigned int trace_ctx; + trace_ctx = tracing_gen_ctx_flags(0); event = trace_buffer_lock_reserve(buffer, TRACE_MMIO_MAP, - sizeof(*entry), 0, pc); + sizeof(*entry), trace_ctx); if (!event) { atomic_inc(&dropped_count); return; @@ -342,7 +342,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr, entry->map = *map; if (!call_filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit(tr, buffer, event, 0, pc); + trace_buffer_unlock_commit(tr, buffer, event, trace_ctx); } void mmio_trace_mapping(struct mmiotrace_map *map) diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 92b1575ae0ca..61255bad7e01 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -312,13 +312,23 @@ int trace_raw_output_prep(struct trace_iterator *iter, } EXPORT_SYMBOL(trace_raw_output_prep); +void trace_event_printf(struct trace_iterator *iter, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + trace_seq_vprintf(&iter->seq, trace_event_format(iter, fmt), ap); + va_end(ap); +} +EXPORT_SYMBOL(trace_event_printf); + static int trace_output_raw(struct trace_iterator *iter, char *name, char *fmt, va_list ap) { struct trace_seq *s = &iter->seq; trace_seq_printf(s, "%s: ", name); - trace_seq_vprintf(s, fmt, ap); + trace_seq_vprintf(s, trace_event_format(iter, fmt), ap); return trace_handle_return(s); } diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index d2867ccc6aca..ec589a4612df 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -1134,3 +1134,20 @@ bool trace_probe_match_command_args(struct trace_probe *tp, } return true; } + +int trace_probe_create(const char *raw_command, int (*createfn)(int, const char **)) +{ + int argc = 0, ret = 0; + char **argv; + + argv = argv_split(GFP_KERNEL, raw_command, &argc); + if (!argv) + return -ENOMEM; + + if (argc) + ret = createfn(argc, (const char **)argv); + + argv_free(argv); + + return ret; +} diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 2f703a20c724..7ce4027089ee 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -341,6 +341,7 @@ struct event_file_link *trace_probe_get_file_link(struct trace_probe *tp, int trace_probe_compare_arg_type(struct trace_probe *a, struct trace_probe *b); bool trace_probe_match_command_args(struct trace_probe *tp, int argc, const char **argv); +int trace_probe_create(const char *raw_command, int (*createfn)(int, const char **)); #define trace_probe_for_each_link(pos, tp) \ list_for_each_entry(pos, &(tp)->event->files, list) diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index c0181066dbe9..e5778d1d7a5b 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -67,7 +67,7 @@ static bool function_enabled; static int func_prolog_preempt_disable(struct trace_array *tr, struct trace_array_cpu **data, - int *pc) + unsigned int *trace_ctx) { long disabled; int cpu; @@ -75,7 +75,7 @@ func_prolog_preempt_disable(struct trace_array *tr, if (likely(!wakeup_task)) return 0; - *pc = preempt_count(); + *trace_ctx = tracing_gen_ctx(); preempt_disable_notrace(); cpu = raw_smp_processor_id(); @@ -116,8 +116,8 @@ static int wakeup_graph_entry(struct ftrace_graph_ent *trace) { struct trace_array *tr = wakeup_trace; struct trace_array_cpu *data; - unsigned long flags; - int pc, ret = 0; + unsigned int trace_ctx; + int ret = 0; if (ftrace_graph_ignore_func(trace)) return 0; @@ -131,11 +131,10 @@ static int wakeup_graph_entry(struct ftrace_graph_ent *trace) if (ftrace_graph_notrace_addr(trace->func)) return 1; - if (!func_prolog_preempt_disable(tr, &data, &pc)) + if (!func_prolog_preempt_disable(tr, &data, &trace_ctx)) return 0; - local_save_flags(flags); - ret = __trace_graph_entry(tr, trace, flags, pc); + ret = __trace_graph_entry(tr, trace, trace_ctx); atomic_dec(&data->disabled); preempt_enable_notrace(); @@ -146,16 +145,14 @@ static void wakeup_graph_return(struct ftrace_graph_ret *trace) { struct trace_array *tr = wakeup_trace; struct trace_array_cpu *data; - unsigned long flags; - int pc; + unsigned int trace_ctx; ftrace_graph_addr_finish(trace); - if (!func_prolog_preempt_disable(tr, &data, &pc)) + if (!func_prolog_preempt_disable(tr, &data, &trace_ctx)) return; - local_save_flags(flags); - __trace_graph_return(tr, trace, flags, pc); + __trace_graph_return(tr, trace, trace_ctx); atomic_dec(&data->disabled); preempt_enable_notrace(); @@ -217,13 +214,13 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip, struct trace_array *tr = wakeup_trace; struct trace_array_cpu *data; unsigned long flags; - int pc; + unsigned int trace_ctx; - if (!func_prolog_preempt_disable(tr, &data, &pc)) + if (!func_prolog_preempt_disable(tr, &data, &trace_ctx)) return; local_irq_save(flags); - trace_function(tr, ip, parent_ip, flags, pc); + trace_function(tr, ip, parent_ip, trace_ctx); local_irq_restore(flags); atomic_dec(&data->disabled); @@ -303,12 +300,12 @@ static void wakeup_print_header(struct seq_file *s) static void __trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, - unsigned long flags, int pc) + unsigned int trace_ctx) { if (is_graph(tr)) - trace_graph_function(tr, ip, parent_ip, flags, pc); + trace_graph_function(tr, ip, parent_ip, trace_ctx); else - trace_function(tr, ip, parent_ip, flags, pc); + trace_function(tr, ip, parent_ip, trace_ctx); } static int wakeup_flag_changed(struct trace_array *tr, u32 mask, int set) @@ -375,7 +372,7 @@ static void tracing_sched_switch_trace(struct trace_array *tr, struct task_struct *prev, struct task_struct *next, - unsigned long flags, int pc) + unsigned int trace_ctx) { struct trace_event_call *call = &event_context_switch; struct trace_buffer *buffer = tr->array_buffer.buffer; @@ -383,7 +380,7 @@ tracing_sched_switch_trace(struct trace_array *tr, struct ctx_switch_entry *entry; event = trace_buffer_lock_reserve(buffer, TRACE_CTX, - sizeof(*entry), flags, pc); + sizeof(*entry), trace_ctx); if (!event) return; entry = ring_buffer_event_data(event); @@ -396,14 +393,14 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->next_cpu = task_cpu(next); if (!call_filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit(tr, buffer, event, flags, pc); + trace_buffer_unlock_commit(tr, buffer, event, trace_ctx); } static void tracing_sched_wakeup_trace(struct trace_array *tr, struct task_struct *wakee, struct task_struct *curr, - unsigned long flags, int pc) + unsigned int trace_ctx) { struct trace_event_call *call = &event_wakeup; struct ring_buffer_event *event; @@ -411,7 +408,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, struct trace_buffer *buffer = tr->array_buffer.buffer; event = trace_buffer_lock_reserve(buffer, TRACE_WAKE, - sizeof(*entry), flags, pc); + sizeof(*entry), trace_ctx); if (!event) return; entry = ring_buffer_event_data(event); @@ -424,7 +421,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->next_cpu = task_cpu(wakee); if (!call_filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit(tr, buffer, event, flags, pc); + trace_buffer_unlock_commit(tr, buffer, event, trace_ctx); } static void notrace @@ -436,7 +433,7 @@ probe_wakeup_sched_switch(void *ignore, bool preempt, unsigned long flags; long disabled; int cpu; - int pc; + unsigned int trace_ctx; tracing_record_cmdline(prev); @@ -455,8 +452,6 @@ probe_wakeup_sched_switch(void *ignore, bool preempt, if (next != wakeup_task) return; - pc = preempt_count(); - /* disable local data, not wakeup_cpu data */ cpu = raw_smp_processor_id(); disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->array_buffer.data, cpu)->disabled); @@ -464,6 +459,8 @@ probe_wakeup_sched_switch(void *ignore, bool preempt, goto out; local_irq_save(flags); + trace_ctx = tracing_gen_ctx_flags(flags); + arch_spin_lock(&wakeup_lock); /* We could race with grabbing wakeup_lock */ @@ -473,9 +470,9 @@ probe_wakeup_sched_switch(void *ignore, bool preempt, /* The task we are waiting for is waking up */ data = per_cpu_ptr(wakeup_trace->array_buffer.data, wakeup_cpu); - __trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); - tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); - __trace_stack(wakeup_trace, flags, 0, pc); + __trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, trace_ctx); + tracing_sched_switch_trace(wakeup_trace, prev, next, trace_ctx); + __trace_stack(wakeup_trace, trace_ctx, 0); T0 = data->preempt_timestamp; T1 = ftrace_now(cpu); @@ -527,9 +524,8 @@ probe_wakeup(void *ignore, struct task_struct *p) { struct trace_array_cpu *data; int cpu = smp_processor_id(); - unsigned long flags; long disabled; - int pc; + unsigned int trace_ctx; if (likely(!tracer_enabled)) return; @@ -550,11 +546,12 @@ probe_wakeup(void *ignore, struct task_struct *p) (!dl_task(p) && (p->prio >= wakeup_prio || p->prio >= current->prio))) return; - pc = preempt_count(); disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->array_buffer.data, cpu)->disabled); if (unlikely(disabled != 1)) goto out; + trace_ctx = tracing_gen_ctx(); + /* interrupts should be off from try_to_wake_up */ arch_spin_lock(&wakeup_lock); @@ -581,19 +578,17 @@ probe_wakeup(void *ignore, struct task_struct *p) wakeup_task = get_task_struct(p); - local_save_flags(flags); - data = per_cpu_ptr(wakeup_trace->array_buffer.data, wakeup_cpu); data->preempt_timestamp = ftrace_now(cpu); - tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc); - __trace_stack(wakeup_trace, flags, 0, pc); + tracing_sched_wakeup_trace(wakeup_trace, p, current, trace_ctx); + __trace_stack(wakeup_trace, trace_ctx, 0); /* * We must be careful in using CALLER_ADDR2. But since wake_up * is not called by an assembly function (where as schedule is) * it should be safe to use it here. */ - __trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); + __trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, trace_ctx); out_locked: arch_spin_unlock(&wakeup_lock); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index d85a2f0f316b..8bfcd3b09422 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -298,9 +298,8 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) struct syscall_metadata *sys_data; struct ring_buffer_event *event; struct trace_buffer *buffer; - unsigned long irq_flags; + unsigned int trace_ctx; unsigned long args[6]; - int pc; int syscall_nr; int size; @@ -322,12 +321,11 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; - local_save_flags(irq_flags); - pc = preempt_count(); + trace_ctx = tracing_gen_ctx(); buffer = tr->array_buffer.buffer; event = trace_buffer_lock_reserve(buffer, - sys_data->enter_event->event.type, size, irq_flags, pc); + sys_data->enter_event->event.type, size, trace_ctx); if (!event) return; @@ -337,7 +335,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) memcpy(entry->args, args, sizeof(unsigned long) * sys_data->nb_args); event_trigger_unlock_commit(trace_file, buffer, event, entry, - irq_flags, pc); + trace_ctx); } static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) @@ -348,8 +346,7 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) struct syscall_metadata *sys_data; struct ring_buffer_event *event; struct trace_buffer *buffer; - unsigned long irq_flags; - int pc; + unsigned int trace_ctx; int syscall_nr; syscall_nr = trace_get_syscall_nr(current, regs); @@ -368,13 +365,12 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) if (!sys_data) return; - local_save_flags(irq_flags); - pc = preempt_count(); + trace_ctx = tracing_gen_ctx(); buffer = tr->array_buffer.buffer; event = trace_buffer_lock_reserve(buffer, sys_data->exit_event->event.type, sizeof(*entry), - irq_flags, pc); + trace_ctx); if (!event) return; @@ -383,7 +379,7 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) entry->ret = syscall_get_return_value(current, regs); event_trigger_unlock_commit(trace_file, buffer, event, entry, - irq_flags, pc); + trace_ctx); } static int reg_event_syscall_enter(struct trace_event_file *file, diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 3cf7128e1ad3..9b50869a5ddb 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -34,7 +34,7 @@ struct uprobe_trace_entry_head { #define DATAOF_TRACE_ENTRY(entry, is_return) \ ((void*)(entry) + SIZEOF_TRACE_ENTRY(is_return)) -static int trace_uprobe_create(int argc, const char **argv); +static int trace_uprobe_create(const char *raw_command); static int trace_uprobe_show(struct seq_file *m, struct dyn_event *ev); static int trace_uprobe_release(struct dyn_event *ev); static bool trace_uprobe_is_busy(struct dyn_event *ev); @@ -530,7 +530,7 @@ end: * Argument syntax: * - Add uprobe: p|r[:[GRP/]EVENT] PATH:OFFSET[%return][(REF)] [FETCHARGS] */ -static int trace_uprobe_create(int argc, const char **argv) +static int __trace_uprobe_create(int argc, const char **argv) { struct trace_uprobe *tu; const char *event = NULL, *group = UPROBE_EVENT_SYSTEM; @@ -716,14 +716,19 @@ fail_address_parse: return ret; } -static int create_or_delete_trace_uprobe(int argc, char **argv) +int trace_uprobe_create(const char *raw_command) +{ + return trace_probe_create(raw_command, __trace_uprobe_create); +} + +static int create_or_delete_trace_uprobe(const char *raw_command) { int ret; - if (argv[0][0] == '-') - return dyn_event_release(argc, argv, &trace_uprobe_ops); + if (raw_command[0] == '-') + return dyn_event_release(raw_command, &trace_uprobe_ops); - ret = trace_uprobe_create(argc, (const char **)argv); + ret = trace_uprobe_create(raw_command); return ret == -ECANCELED ? -EINVAL : ret; } @@ -961,7 +966,7 @@ static void __uprobe_trace_func(struct trace_uprobe *tu, esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); size = esize + tu->tp.size + dsize; event = trace_event_buffer_lock_reserve(&buffer, trace_file, - call->event.type, size, 0, 0); + call->event.type, size, 0); if (!event) return; @@ -977,7 +982,7 @@ static void __uprobe_trace_func(struct trace_uprobe *tu, memcpy(data, ucb->buf, tu->tp.size + dsize); - event_trigger_unlock_commit(trace_file, buffer, event, entry, 0, 0); + event_trigger_unlock_commit(trace_file, buffer, event, entry, 0); } /* uprobe handler */ @@ -1635,7 +1640,7 @@ void destroy_local_trace_uprobe(struct trace_event_call *event_call) } #endif /* CONFIG_PERF_EVENTS */ -/* Make a trace interface for controling probe points */ +/* Make a trace interface for controlling probe points */ static __init int init_uprobe_trace(void) { int ret; diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 7261fa0f5e3c..9f478d29b926 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -53,6 +53,12 @@ struct tp_probes { struct tracepoint_func probes[]; }; +/* Called in removal of a func but failed to allocate a new tp_funcs */ +static void tp_stub_func(void) +{ + return; +} + static inline void *allocate_probes(int count) { struct tp_probes *p = kmalloc(struct_size(p, probes, count), @@ -130,8 +136,9 @@ func_add(struct tracepoint_func **funcs, struct tracepoint_func *tp_func, int prio) { struct tracepoint_func *old, *new; - int nr_probes = 0; - int pos = -1; + int iter_probes; /* Iterate over old probe array. */ + int nr_probes = 0; /* Counter for probes */ + int pos = -1; /* Insertion position into new array */ if (WARN_ON(!tp_func->func)) return ERR_PTR(-EINVAL); @@ -140,13 +147,13 @@ func_add(struct tracepoint_func **funcs, struct tracepoint_func *tp_func, old = *funcs; if (old) { /* (N -> N+1), (N != 0, 1) probes */ - for (nr_probes = 0; old[nr_probes].func; nr_probes++) { - /* Insert before probes of lower priority */ - if (pos < 0 && old[nr_probes].prio < prio) - pos = nr_probes; - if (old[nr_probes].func == tp_func->func && - old[nr_probes].data == tp_func->data) + for (iter_probes = 0; old[iter_probes].func; iter_probes++) { + if (old[iter_probes].func == tp_stub_func) + continue; /* Skip stub functions. */ + if (old[iter_probes].func == tp_func->func && + old[iter_probes].data == tp_func->data) return ERR_PTR(-EEXIST); + nr_probes++; } } /* + 2 : one for new probe, one for NULL func */ @@ -154,20 +161,24 @@ func_add(struct tracepoint_func **funcs, struct tracepoint_func *tp_func, if (new == NULL) return ERR_PTR(-ENOMEM); if (old) { - if (pos < 0) { - pos = nr_probes; - memcpy(new, old, nr_probes * sizeof(struct tracepoint_func)); - } else { - /* Copy higher priority probes ahead of the new probe */ - memcpy(new, old, pos * sizeof(struct tracepoint_func)); - /* Copy the rest after it. */ - memcpy(new + pos + 1, old + pos, - (nr_probes - pos) * sizeof(struct tracepoint_func)); + nr_probes = 0; + for (iter_probes = 0; old[iter_probes].func; iter_probes++) { + if (old[iter_probes].func == tp_stub_func) + continue; + /* Insert before probes of lower priority */ + if (pos < 0 && old[iter_probes].prio < prio) + pos = nr_probes++; + new[nr_probes++] = old[iter_probes]; } - } else + if (pos < 0) + pos = nr_probes++; + /* nr_probes now points to the end of the new array */ + } else { pos = 0; + nr_probes = 1; /* must point at end of array */ + } new[pos] = *tp_func; - new[nr_probes + 1].func = NULL; + new[nr_probes].func = NULL; *funcs = new; debug_print_probes(*funcs); return old; @@ -188,8 +199,9 @@ static void *func_remove(struct tracepoint_func **funcs, /* (N -> M), (N > 1, M >= 0) probes */ if (tp_func->func) { for (nr_probes = 0; old[nr_probes].func; nr_probes++) { - if (old[nr_probes].func == tp_func->func && - old[nr_probes].data == tp_func->data) + if ((old[nr_probes].func == tp_func->func && + old[nr_probes].data == tp_func->data) || + old[nr_probes].func == tp_stub_func) nr_del++; } } @@ -208,14 +220,27 @@ static void *func_remove(struct tracepoint_func **funcs, /* N -> M, (N > 1, M > 0) */ /* + 1 for NULL */ new = allocate_probes(nr_probes - nr_del + 1); - if (new == NULL) - return ERR_PTR(-ENOMEM); - for (i = 0; old[i].func; i++) - if (old[i].func != tp_func->func - || old[i].data != tp_func->data) - new[j++] = old[i]; - new[nr_probes - nr_del].func = NULL; - *funcs = new; + if (new) { + for (i = 0; old[i].func; i++) { + if ((old[i].func != tp_func->func || + old[i].data != tp_func->data) && + old[i].func != tp_stub_func) + new[j++] = old[i]; + } + new[nr_probes - nr_del].func = NULL; + *funcs = new; + } else { + /* + * Failed to allocate, replace the old function + * with calls to tp_stub_func. + */ + for (i = 0; old[i].func; i++) { + if (old[i].func == tp_func->func && + old[i].data == tp_func->data) + WRITE_ONCE(old[i].func, tp_stub_func); + } + *funcs = old; + } } debug_print_probes(*funcs); return old; @@ -295,10 +320,12 @@ static int tracepoint_remove_func(struct tracepoint *tp, tp_funcs = rcu_dereference_protected(tp->funcs, lockdep_is_held(&tracepoints_mutex)); old = func_remove(&tp_funcs, func); - if (IS_ERR(old)) { - WARN_ON_ONCE(PTR_ERR(old) != -ENOMEM); + if (WARN_ON_ONCE(IS_ERR(old))) return PTR_ERR(old); - } + + if (tp_funcs == old) + /* Failed allocating new tp_funcs, replaced func with stub */ + return 0; if (!tp_funcs) { /* Removed last function */ |