diff options
Diffstat (limited to 'kernel/trace')
66 files changed, 2413 insertions, 406 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index d570b8b9c0a9..033fba0633cf 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -263,6 +263,18 @@ config FUNCTION_GRAPH_RETADDR the function is called. This feature is off by default, and you can enable it via the trace option funcgraph-retaddr. +config FUNCTION_TRACE_ARGS + bool + depends on HAVE_FUNCTION_ARG_ACCESS_API + depends on DEBUG_INFO_BTF + default y + help + If supported with function argument access API and BTF, then + the function tracer and function graph tracer will support printing + of function arguments. This feature is off by default, and can be + enabled via the trace option func-args (for the function tracer) and + funcgraph-args (for the function graph tracer) + config DYNAMIC_FTRACE bool "enable/disable function tracing dynamically" depends on FUNCTION_TRACER diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index adc947587eb8..187dc37d61d4 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -392,7 +392,7 @@ static const struct bpf_func_proto bpf_trace_printk_proto = { .arg2_type = ARG_CONST_SIZE, }; -static void __set_printk_clr_event(void) +static void __set_printk_clr_event(struct work_struct *work) { /* * This program might be calling bpf_trace_printk, @@ -405,10 +405,11 @@ static void __set_printk_clr_event(void) if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1)) pr_warn_ratelimited("could not enable bpf_trace_printk events"); } +static DECLARE_WORK(set_printk_work, __set_printk_clr_event); const struct bpf_func_proto *bpf_get_trace_printk_proto(void) { - __set_printk_clr_event(); + schedule_work(&set_printk_work); return &bpf_trace_printk_proto; } @@ -451,7 +452,7 @@ static const struct bpf_func_proto bpf_trace_vprintk_proto = { const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void) { - __set_printk_clr_event(); + schedule_work(&set_printk_work); return &bpf_trace_vprintk_proto; } @@ -606,6 +607,11 @@ static const struct bpf_func_proto bpf_perf_event_read_value_proto = { .arg4_type = ARG_CONST_SIZE, }; +const struct bpf_func_proto *bpf_get_perf_event_read_value_proto(void) +{ + return &bpf_perf_event_read_value_proto; +} + static __always_inline u64 __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, u64 flags, struct perf_raw_record *raw, @@ -843,7 +849,7 @@ static int bpf_send_signal_common(u32 sig, enum pid_type type, struct task_struc if (unlikely(is_global_init(task))) return -EPERM; - if (!preemptible()) { + if (preempt_count() != 0 || irqs_disabled()) { /* Do an early check on signal validity. Otherwise, * the error is lost in deferred irq_work. */ @@ -1038,27 +1044,14 @@ static const struct bpf_func_proto bpf_get_func_ip_proto_tracing = { .arg1_type = ARG_PTR_TO_CTX, }; -#ifdef CONFIG_X86_KERNEL_IBT -static unsigned long get_entry_ip(unsigned long fentry_ip) +static inline unsigned long get_entry_ip(unsigned long fentry_ip) { - u32 instr; - - /* We want to be extra safe in case entry ip is on the page edge, - * but otherwise we need to avoid get_kernel_nofault()'s overhead. - */ - if ((fentry_ip & ~PAGE_MASK) < ENDBR_INSN_SIZE) { - if (get_kernel_nofault(instr, (u32 *)(fentry_ip - ENDBR_INSN_SIZE))) - return fentry_ip; - } else { - instr = *(u32 *)(fentry_ip - ENDBR_INSN_SIZE); - } - if (is_endbr(instr)) +#ifdef CONFIG_X86_KERNEL_IBT + if (is_endbr((void *)(fentry_ip - ENDBR_INSN_SIZE))) fentry_ip -= ENDBR_INSN_SIZE; +#endif return fentry_ip; } -#else -#define get_entry_ip(fentry_ip) fentry_ip -#endif BPF_CALL_1(bpf_get_func_ip_kprobe, struct pt_regs *, regs) { @@ -2345,10 +2338,9 @@ void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp) { struct module *mod; - preempt_disable(); + guard(rcu)(); mod = __module_address((unsigned long)btp); module_put(mod); - preempt_enable(); } static __always_inline @@ -2932,18 +2924,21 @@ static int get_modules_for_addrs(struct module ***mods, unsigned long *addrs, u3 u32 i, err = 0; for (i = 0; i < addrs_cnt; i++) { + bool skip_add = false; struct module *mod; - preempt_disable(); - mod = __module_address(addrs[i]); - /* Either no module or we it's already stored */ - if (!mod || has_module(&arr, mod)) { - preempt_enable(); - continue; + scoped_guard(rcu) { + mod = __module_address(addrs[i]); + /* Either no module or it's already stored */ + if (!mod || has_module(&arr, mod)) { + skip_add = true; + break; /* scoped_guard */ + } + if (!try_module_get(mod)) + err = -EINVAL; } - if (!try_module_get(mod)) - err = -EINVAL; - preempt_enable(); + if (skip_add) + continue; if (err) break; err = add_module(&arr, mod); diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index 5dddfc2149f6..8d925cbdce3a 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -865,7 +865,7 @@ __ftrace_return_to_handler(struct ftrace_regs *fregs, unsigned long frame_pointe } /* - * After all architecures have selected HAVE_FUNCTION_GRAPH_FREGS, we can + * After all architectures have selected HAVE_FUNCTION_GRAPH_FREGS, we can * leave only ftrace_return_to_handler(fregs). */ #ifdef CONFIG_HAVE_FUNCTION_GRAPH_FREGS diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index fc88e0688daf..92015de6203d 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1293,6 +1293,8 @@ static void free_ftrace_hash_rcu(struct ftrace_hash *hash) void ftrace_free_filter(struct ftrace_ops *ops) { ftrace_ops_init(ops); + if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED)) + return; free_ftrace_hash(ops->func_hash->filter_hash); free_ftrace_hash(ops->func_hash->notrace_hash); } @@ -7016,6 +7018,7 @@ static int ftrace_process_locs(struct module *mod, unsigned long *p; unsigned long addr; unsigned long flags = 0; /* Shut up gcc */ + unsigned long pages; int ret = -ENOMEM; count = end - start; @@ -7023,6 +7026,8 @@ static int ftrace_process_locs(struct module *mod, if (!count) return 0; + pages = DIV_ROUND_UP(count, ENTRIES_PER_PAGE); + /* * Sorting mcount in vmlinux at build time depend on * CONFIG_BUILDTIME_MCOUNT_SORT, while mcount loc in @@ -7067,7 +7072,9 @@ static int ftrace_process_locs(struct module *mod, pg = start_pg; while (p < end) { unsigned long end_offset; - addr = ftrace_call_adjust(*p++); + + addr = *p++; + /* * Some architecture linkers will pad between * the different mcount_loc sections of different @@ -7079,6 +7086,19 @@ static int ftrace_process_locs(struct module *mod, continue; } + /* + * If this is core kernel, make sure the address is in core + * or inittext, as weak functions get zeroed and KASLR can + * move them to something other than zero. It just will not + * move it to an area where kernel text is. + */ + if (!mod && !(is_kernel_text(addr) || is_kernel_inittext(addr))) { + skipped++; + continue; + } + + addr = ftrace_call_adjust(addr); + end_offset = (pg->index+1) * sizeof(pg->records[0]); if (end_offset > PAGE_SIZE << pg->order) { /* We should have allocated enough */ @@ -7118,11 +7138,41 @@ static int ftrace_process_locs(struct module *mod, /* We should have used all pages unless we skipped some */ if (pg_unuse) { - WARN_ON(!skipped); + unsigned long pg_remaining, remaining = 0; + unsigned long skip; + + /* Count the number of entries unused and compare it to skipped. */ + pg_remaining = (ENTRIES_PER_PAGE << pg->order) - pg->index; + + if (!WARN(skipped < pg_remaining, "Extra allocated pages for ftrace")) { + + skip = skipped - pg_remaining; + + for (pg = pg_unuse; pg; pg = pg->next) + remaining += 1 << pg->order; + + pages -= remaining; + + skip = DIV_ROUND_UP(skip, ENTRIES_PER_PAGE); + + /* + * Check to see if the number of pages remaining would + * just fit the number of entries skipped. + */ + WARN(skip != remaining, "Extra allocated pages for ftrace: %lu with %lu skipped", + remaining, skipped); + } /* Need to synchronize with ftrace_location_range() */ synchronize_rcu(); ftrace_free_pages(pg_unuse); } + + if (!mod) { + count -= skipped; + pr_info("ftrace: allocating %ld entries in %ld pages\n", + count, pages); + } + return ret; } @@ -7768,9 +7818,6 @@ void __init ftrace_init(void) goto failed; } - pr_info("ftrace: allocating %ld entries in %ld pages\n", - count, DIV_ROUND_UP(count, ENTRIES_PER_PAGE)); - ret = ftrace_process_locs(NULL, __start_mcount_loc, __stop_mcount_loc); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index bb6089c2951e..d8d7b28e2c2f 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -31,6 +31,7 @@ #include <asm/local64.h> #include <asm/local.h> +#include <asm/setup.h> #include "trace.h" @@ -48,9 +49,12 @@ static void update_pages_handler(struct work_struct *work); struct ring_buffer_meta { int magic; - int struct_size; - unsigned long text_addr; - unsigned long data_addr; + int struct_sizes; + unsigned long total_size; + unsigned long buffers_offset; +}; + +struct ring_buffer_cpu_meta { unsigned long first_buffer; unsigned long head_buffer; unsigned long commit_buffer; @@ -517,7 +521,7 @@ struct ring_buffer_per_cpu { struct mutex mapping_lock; unsigned long *subbuf_ids; /* ID to subbuf VA */ struct trace_buffer_meta *meta_page; - struct ring_buffer_meta *ring_meta; + struct ring_buffer_cpu_meta *ring_meta; /* ring buffer pages to update, > 0 to add, < 0 to remove */ long nr_pages_to_update; @@ -550,8 +554,7 @@ struct trace_buffer { unsigned long range_addr_start; unsigned long range_addr_end; - long last_text_delta; - long last_data_delta; + struct ring_buffer_meta *meta; unsigned int subbuf_size; unsigned int subbuf_order; @@ -1271,7 +1274,7 @@ static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer) rb_set_list_to_head(head->list.prev); if (cpu_buffer->ring_meta) { - struct ring_buffer_meta *meta = cpu_buffer->ring_meta; + struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta; meta->head_buffer = (unsigned long)head->page; } } @@ -1569,7 +1572,7 @@ out_locked: static unsigned long rb_range_align_subbuf(unsigned long addr, int subbuf_size, int nr_subbufs) { - addr += sizeof(struct ring_buffer_meta) + + addr += sizeof(struct ring_buffer_cpu_meta) + sizeof(int) * nr_subbufs; return ALIGN(addr, subbuf_size); } @@ -1580,19 +1583,22 @@ rb_range_align_subbuf(unsigned long addr, int subbuf_size, int nr_subbufs) static void *rb_range_meta(struct trace_buffer *buffer, int nr_pages, int cpu) { int subbuf_size = buffer->subbuf_size + BUF_PAGE_HDR_SIZE; - unsigned long ptr = buffer->range_addr_start; - struct ring_buffer_meta *meta; + struct ring_buffer_cpu_meta *meta; + struct ring_buffer_meta *bmeta; + unsigned long ptr; int nr_subbufs; - if (!ptr) + bmeta = buffer->meta; + if (!bmeta) return NULL; + ptr = (unsigned long)bmeta + bmeta->buffers_offset; + meta = (struct ring_buffer_cpu_meta *)ptr; + /* When nr_pages passed in is zero, the first meta has already been initialized */ if (!nr_pages) { - meta = (struct ring_buffer_meta *)ptr; nr_subbufs = meta->nr_subbufs; } else { - meta = NULL; /* Include the reader page */ nr_subbufs = nr_pages + 1; } @@ -1624,7 +1630,7 @@ static void *rb_range_meta(struct trace_buffer *buffer, int nr_pages, int cpu) } /* Return the start of subbufs given the meta pointer */ -static void *rb_subbufs_from_meta(struct ring_buffer_meta *meta) +static void *rb_subbufs_from_meta(struct ring_buffer_cpu_meta *meta) { int subbuf_size = meta->subbuf_size; unsigned long ptr; @@ -1640,7 +1646,7 @@ static void *rb_subbufs_from_meta(struct ring_buffer_meta *meta) */ static void *rb_range_buffer(struct ring_buffer_per_cpu *cpu_buffer, int idx) { - struct ring_buffer_meta *meta; + struct ring_buffer_cpu_meta *meta; unsigned long ptr; int subbuf_size; @@ -1666,14 +1672,77 @@ static void *rb_range_buffer(struct ring_buffer_per_cpu *cpu_buffer, int idx) } /* + * See if the existing memory contains a valid meta section. + * if so, use that, otherwise initialize it. + */ +static bool rb_meta_init(struct trace_buffer *buffer, int scratch_size) +{ + unsigned long ptr = buffer->range_addr_start; + struct ring_buffer_meta *bmeta; + unsigned long total_size; + int struct_sizes; + + bmeta = (struct ring_buffer_meta *)ptr; + buffer->meta = bmeta; + + total_size = buffer->range_addr_end - buffer->range_addr_start; + + struct_sizes = sizeof(struct ring_buffer_cpu_meta); + struct_sizes |= sizeof(*bmeta) << 16; + + /* The first buffer will start word size after the meta page */ + ptr += sizeof(*bmeta); + ptr = ALIGN(ptr, sizeof(long)); + ptr += scratch_size; + + if (bmeta->magic != RING_BUFFER_META_MAGIC) { + pr_info("Ring buffer boot meta mismatch of magic\n"); + goto init; + } + + if (bmeta->struct_sizes != struct_sizes) { + pr_info("Ring buffer boot meta mismatch of struct size\n"); + goto init; + } + + if (bmeta->total_size != total_size) { + pr_info("Ring buffer boot meta mismatch of total size\n"); + goto init; + } + + if (bmeta->buffers_offset > bmeta->total_size) { + pr_info("Ring buffer boot meta mismatch of offset outside of total size\n"); + goto init; + } + + if (bmeta->buffers_offset != (void *)ptr - (void *)bmeta) { + pr_info("Ring buffer boot meta mismatch of first buffer offset\n"); + goto init; + } + + return true; + + init: + bmeta->magic = RING_BUFFER_META_MAGIC; + bmeta->struct_sizes = struct_sizes; + bmeta->total_size = total_size; + bmeta->buffers_offset = (void *)ptr - (void *)bmeta; + + /* Zero out the scatch pad */ + memset((void *)bmeta + sizeof(*bmeta), 0, bmeta->buffers_offset - sizeof(*bmeta)); + + return false; +} + +/* * See if the existing memory contains valid ring buffer data. * As the previous kernel must be the same as this kernel, all * the calculations (size of buffers and number of buffers) * must be the same. */ -static bool rb_meta_valid(struct ring_buffer_meta *meta, int cpu, - struct trace_buffer *buffer, int nr_pages, - unsigned long *subbuf_mask) +static bool rb_cpu_meta_valid(struct ring_buffer_cpu_meta *meta, int cpu, + struct trace_buffer *buffer, int nr_pages, + unsigned long *subbuf_mask) { int subbuf_size = PAGE_SIZE; struct buffer_data_page *subbuf; @@ -1684,20 +1753,6 @@ static bool rb_meta_valid(struct ring_buffer_meta *meta, int cpu, if (!subbuf_mask) return false; - /* Check the meta magic and meta struct size */ - if (meta->magic != RING_BUFFER_META_MAGIC || - meta->struct_size != sizeof(*meta)) { - pr_info("Ring buffer boot meta[%d] mismatch of magic or struct size\n", cpu); - return false; - } - - /* The subbuffer's size and number of subbuffers must match */ - if (meta->subbuf_size != subbuf_size || - meta->nr_subbufs != nr_pages + 1) { - pr_info("Ring buffer boot meta [%d] mismatch of subbuf_size/nr_pages\n", cpu); - return false; - } - buffers_start = meta->first_buffer; buffers_end = meta->first_buffer + (subbuf_size * meta->nr_subbufs); @@ -1743,7 +1798,7 @@ static bool rb_meta_valid(struct ring_buffer_meta *meta, int cpu, return true; } -static int rb_meta_subbuf_idx(struct ring_buffer_meta *meta, void *subbuf); +static int rb_meta_subbuf_idx(struct ring_buffer_cpu_meta *meta, void *subbuf); static int rb_read_data_buffer(struct buffer_data_page *dpage, int tail, int cpu, unsigned long long *timestamp, u64 *delta_ptr) @@ -1810,7 +1865,7 @@ static int rb_validate_buffer(struct buffer_data_page *dpage, int cpu) /* If the meta data has been validated, now validate the events */ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer) { - struct ring_buffer_meta *meta = cpu_buffer->ring_meta; + struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta; struct buffer_page *head_page; unsigned long entry_bytes = 0; unsigned long entries = 0; @@ -1891,24 +1946,13 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer) } } -/* Used to calculate data delta */ -static char rb_data_ptr[] = ""; - -#define THIS_TEXT_PTR ((unsigned long)rb_meta_init_text_addr) -#define THIS_DATA_PTR ((unsigned long)rb_data_ptr) - -static void rb_meta_init_text_addr(struct ring_buffer_meta *meta) -{ - meta->text_addr = THIS_TEXT_PTR; - meta->data_addr = THIS_DATA_PTR; -} - -static void rb_range_meta_init(struct trace_buffer *buffer, int nr_pages) +static void rb_range_meta_init(struct trace_buffer *buffer, int nr_pages, int scratch_size) { - struct ring_buffer_meta *meta; + struct ring_buffer_cpu_meta *meta; unsigned long *subbuf_mask; unsigned long delta; void *subbuf; + bool valid = false; int cpu; int i; @@ -1916,20 +1960,21 @@ static void rb_range_meta_init(struct trace_buffer *buffer, int nr_pages) subbuf_mask = bitmap_alloc(nr_pages + 1, GFP_KERNEL); /* If subbuf_mask fails to allocate, then rb_meta_valid() will return false */ + if (rb_meta_init(buffer, scratch_size)) + valid = true; + for (cpu = 0; cpu < nr_cpu_ids; cpu++) { void *next_meta; meta = rb_range_meta(buffer, nr_pages, cpu); - if (rb_meta_valid(meta, cpu, buffer, nr_pages, subbuf_mask)) { + if (valid && rb_cpu_meta_valid(meta, cpu, buffer, nr_pages, subbuf_mask)) { /* Make the mappings match the current address */ subbuf = rb_subbufs_from_meta(meta); delta = (unsigned long)subbuf - meta->first_buffer; meta->first_buffer += delta; meta->head_buffer += delta; meta->commit_buffer += delta; - buffer->last_text_delta = THIS_TEXT_PTR - meta->text_addr; - buffer->last_data_delta = THIS_DATA_PTR - meta->data_addr; continue; } @@ -1940,16 +1985,12 @@ static void rb_range_meta_init(struct trace_buffer *buffer, int nr_pages) memset(meta, 0, next_meta - (void *)meta); - meta->magic = RING_BUFFER_META_MAGIC; - meta->struct_size = sizeof(*meta); - meta->nr_subbufs = nr_pages + 1; meta->subbuf_size = PAGE_SIZE; subbuf = rb_subbufs_from_meta(meta); meta->first_buffer = (unsigned long)subbuf; - rb_meta_init_text_addr(meta); /* * The buffers[] array holds the order of the sub-buffers @@ -1971,7 +2012,7 @@ static void rb_range_meta_init(struct trace_buffer *buffer, int nr_pages) static void *rbm_start(struct seq_file *m, loff_t *pos) { struct ring_buffer_per_cpu *cpu_buffer = m->private; - struct ring_buffer_meta *meta = cpu_buffer->ring_meta; + struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta; unsigned long val; if (!meta) @@ -1996,7 +2037,7 @@ static void *rbm_next(struct seq_file *m, void *v, loff_t *pos) static int rbm_show(struct seq_file *m, void *v) { struct ring_buffer_per_cpu *cpu_buffer = m->private; - struct ring_buffer_meta *meta = cpu_buffer->ring_meta; + struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta; unsigned long val = (unsigned long)v; if (val == 1) { @@ -2045,7 +2086,7 @@ int ring_buffer_meta_seq_init(struct file *file, struct trace_buffer *buffer, in static void rb_meta_buffer_update(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *bpage) { - struct ring_buffer_meta *meta = cpu_buffer->ring_meta; + struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta; if (meta->head_buffer == (unsigned long)bpage->page) cpu_buffer->head_page = bpage; @@ -2060,7 +2101,7 @@ static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, long nr_pages, struct list_head *pages) { struct trace_buffer *buffer = cpu_buffer->buffer; - struct ring_buffer_meta *meta = NULL; + struct ring_buffer_cpu_meta *meta = NULL; struct buffer_page *bpage, *tmp; bool user_thread = current->mm != NULL; gfp_t mflags; @@ -2184,7 +2225,7 @@ static struct ring_buffer_per_cpu * rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; - struct ring_buffer_meta *meta; + struct ring_buffer_cpu_meta *meta; struct buffer_page *bpage; struct page *page; int ret; @@ -2313,6 +2354,7 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags, int order, unsigned long start, unsigned long end, + unsigned long scratch_size, struct lock_class_key *key) { struct trace_buffer *buffer; @@ -2355,10 +2397,23 @@ static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags, /* If start/end are specified, then that overrides size */ if (start && end) { + unsigned long buffers_start; unsigned long ptr; int n; - size = end - start; + /* Make sure that start is word aligned */ + start = ALIGN(start, sizeof(long)); + + /* scratch_size needs to be aligned too */ + scratch_size = ALIGN(scratch_size, sizeof(long)); + + /* Subtract the buffer meta data and word aligned */ + buffers_start = start + sizeof(struct ring_buffer_cpu_meta); + buffers_start = ALIGN(buffers_start, sizeof(long)); + buffers_start += scratch_size; + + /* Calculate the size for the per CPU data */ + size = end - buffers_start; size = size / nr_cpu_ids; /* @@ -2368,7 +2423,7 @@ static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags, * needed, plus account for the integer array index that * will be appended to the meta data. */ - nr_pages = (size - sizeof(struct ring_buffer_meta)) / + nr_pages = (size - sizeof(struct ring_buffer_cpu_meta)) / (subbuf_size + sizeof(int)); /* Need at least two pages plus the reader page */ if (nr_pages < 3) @@ -2376,8 +2431,8 @@ static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags, again: /* Make sure that the size fits aligned */ - for (n = 0, ptr = start; n < nr_cpu_ids; n++) { - ptr += sizeof(struct ring_buffer_meta) + + for (n = 0, ptr = buffers_start; n < nr_cpu_ids; n++) { + ptr += sizeof(struct ring_buffer_cpu_meta) + sizeof(int) * nr_pages; ptr = ALIGN(ptr, subbuf_size); ptr += subbuf_size * nr_pages; @@ -2394,7 +2449,7 @@ static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags, buffer->range_addr_start = start; buffer->range_addr_end = end; - rb_range_meta_init(buffer, nr_pages); + rb_range_meta_init(buffer, nr_pages, scratch_size); } else { /* need at least two pages */ @@ -2447,7 +2502,7 @@ struct trace_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *key) { /* Default buffer page size - one system page */ - return alloc_buffer(size, flags, 0, 0, 0,key); + return alloc_buffer(size, flags, 0, 0, 0, 0, key); } EXPORT_SYMBOL_GPL(__ring_buffer_alloc); @@ -2459,6 +2514,7 @@ EXPORT_SYMBOL_GPL(__ring_buffer_alloc); * @order: sub-buffer order * @start: start of allocated range * @range_size: size of allocated range + * @scratch_size: size of scratch area (for preallocated memory buffers) * @key: ring buffer reader_lock_key. * * Currently the only flag that is available is the RB_FL_OVERWRITE @@ -2469,32 +2525,29 @@ EXPORT_SYMBOL_GPL(__ring_buffer_alloc); struct trace_buffer *__ring_buffer_alloc_range(unsigned long size, unsigned flags, int order, unsigned long start, unsigned long range_size, + unsigned long scratch_size, struct lock_class_key *key) { - return alloc_buffer(size, flags, order, start, start + range_size, key); + return alloc_buffer(size, flags, order, start, start + range_size, + scratch_size, key); } -/** - * ring_buffer_last_boot_delta - return the delta offset from last boot - * @buffer: The buffer to return the delta from - * @text: Return text delta - * @data: Return data delta - * - * Returns: The true if the delta is non zero - */ -bool ring_buffer_last_boot_delta(struct trace_buffer *buffer, long *text, - long *data) +void *ring_buffer_meta_scratch(struct trace_buffer *buffer, unsigned int *size) { - if (!buffer) - return false; + struct ring_buffer_meta *meta; + void *ptr; - if (!buffer->last_text_delta) - return false; + if (!buffer || !buffer->meta) + return NULL; - *text = buffer->last_text_delta; - *data = buffer->last_data_delta; + meta = buffer->meta; - return true; + ptr = (void *)ALIGN((unsigned long)meta + sizeof(*meta), sizeof(long)); + + if (size) + *size = (void *)meta + meta->buffers_offset - ptr; + + return ptr; } /** @@ -3105,7 +3158,7 @@ static void rb_inc_iter(struct ring_buffer_iter *iter) } /* Return the index into the sub-buffers for a given sub-buffer */ -static int rb_meta_subbuf_idx(struct ring_buffer_meta *meta, void *subbuf) +static int rb_meta_subbuf_idx(struct ring_buffer_cpu_meta *meta, void *subbuf) { void *subbuf_array; @@ -3117,7 +3170,7 @@ static int rb_meta_subbuf_idx(struct ring_buffer_meta *meta, void *subbuf) static void rb_update_meta_head(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *next_page) { - struct ring_buffer_meta *meta = cpu_buffer->ring_meta; + struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta; unsigned long old_head = (unsigned long)next_page->page; unsigned long new_head; @@ -3134,7 +3187,7 @@ static void rb_update_meta_head(struct ring_buffer_per_cpu *cpu_buffer, static void rb_update_meta_reader(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *reader) { - struct ring_buffer_meta *meta = cpu_buffer->ring_meta; + struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta; void *old_reader = cpu_buffer->reader_page->page; void *new_reader = reader->page; int id; @@ -3763,7 +3816,7 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) rb_page_write(cpu_buffer->commit_page)); rb_inc_page(&cpu_buffer->commit_page); if (cpu_buffer->ring_meta) { - struct ring_buffer_meta *meta = cpu_buffer->ring_meta; + struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta; meta->commit_buffer = (unsigned long)cpu_buffer->commit_page->page; } /* add barrier to keep gcc from optimizing too much */ @@ -5318,7 +5371,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) * moving it. The page before the header page has the * flag bit '1' set if it is pointing to the page we want. * but if the writer is in the process of moving it - * than it will be '2' or already moved '0'. + * then it will be '2' or already moved '0'. */ ret = rb_head_page_replace(reader, cpu_buffer->reader_page); @@ -6016,7 +6069,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) if (cpu_buffer->mapped) { rb_update_meta_page(cpu_buffer); if (cpu_buffer->ring_meta) { - struct ring_buffer_meta *meta = cpu_buffer->ring_meta; + struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta; meta->commit_buffer = meta->head_buffer; } } @@ -6050,7 +6103,6 @@ static void reset_disabled_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; - struct ring_buffer_meta *meta; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return; @@ -6069,11 +6121,6 @@ void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu) atomic_dec(&cpu_buffer->record_disabled); atomic_dec(&cpu_buffer->resize_disabled); - /* Make sure persistent meta now uses this buffer's addresses */ - meta = rb_range_meta(buffer, 0, cpu_buffer->cpu); - if (meta) - rb_meta_init_text_addr(meta); - mutex_unlock(&buffer->mutex); } EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu); @@ -6088,7 +6135,6 @@ EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu); void ring_buffer_reset_online_cpus(struct trace_buffer *buffer) { struct ring_buffer_per_cpu *cpu_buffer; - struct ring_buffer_meta *meta; int cpu; /* prevent another thread from changing buffer sizes */ @@ -6116,11 +6162,6 @@ void ring_buffer_reset_online_cpus(struct trace_buffer *buffer) reset_disabled_cpu_buffer(cpu_buffer); - /* Make sure persistent meta now uses this buffer's addresses */ - meta = rb_range_meta(buffer, 0, cpu_buffer->cpu); - if (meta) - rb_meta_init_text_addr(meta); - atomic_dec(&cpu_buffer->record_disabled); atomic_sub(RESET_BIT, &cpu_buffer->resize_disabled); } @@ -7411,9 +7452,9 @@ static __init int rb_write_something(struct rb_test_data *data, bool nested) /* Ignore dropped events before test starts. */ if (started) { if (nested) - data->bytes_dropped += len; - else data->bytes_dropped_nested += len; + else + data->bytes_dropped += len; } return len; } diff --git a/kernel/trace/rv/Kconfig b/kernel/trace/rv/Kconfig index 8226352a0062..b39f36013ef2 100644 --- a/kernel/trace/rv/Kconfig +++ b/kernel/trace/rv/Kconfig @@ -27,6 +27,13 @@ menuconfig RV source "kernel/trace/rv/monitors/wip/Kconfig" source "kernel/trace/rv/monitors/wwnr/Kconfig" +source "kernel/trace/rv/monitors/sched/Kconfig" +source "kernel/trace/rv/monitors/tss/Kconfig" +source "kernel/trace/rv/monitors/sco/Kconfig" +source "kernel/trace/rv/monitors/snroc/Kconfig" +source "kernel/trace/rv/monitors/scpd/Kconfig" +source "kernel/trace/rv/monitors/snep/Kconfig" +source "kernel/trace/rv/monitors/sncid/Kconfig" # Add new monitors here config RV_REACTORS diff --git a/kernel/trace/rv/Makefile b/kernel/trace/rv/Makefile index 188b64668e1f..f9b2cd0483c3 100644 --- a/kernel/trace/rv/Makefile +++ b/kernel/trace/rv/Makefile @@ -5,6 +5,13 @@ ccflags-y += -I $(src) # needed for trace events obj-$(CONFIG_RV) += rv.o obj-$(CONFIG_RV_MON_WIP) += monitors/wip/wip.o obj-$(CONFIG_RV_MON_WWNR) += monitors/wwnr/wwnr.o +obj-$(CONFIG_RV_MON_SCHED) += monitors/sched/sched.o +obj-$(CONFIG_RV_MON_TSS) += monitors/tss/tss.o +obj-$(CONFIG_RV_MON_SCO) += monitors/sco/sco.o +obj-$(CONFIG_RV_MON_SNROC) += monitors/snroc/snroc.o +obj-$(CONFIG_RV_MON_SCPD) += monitors/scpd/scpd.o +obj-$(CONFIG_RV_MON_SNEP) += monitors/snep/snep.o +obj-$(CONFIG_RV_MON_SNCID) += monitors/sncid/sncid.o # Add new monitors here obj-$(CONFIG_RV_REACTORS) += rv_reactors.o obj-$(CONFIG_RV_REACT_PRINTK) += reactor_printk.o diff --git a/kernel/trace/rv/monitors/sched/Kconfig b/kernel/trace/rv/monitors/sched/Kconfig new file mode 100644 index 000000000000..ae3eb410abd7 --- /dev/null +++ b/kernel/trace/rv/monitors/sched/Kconfig @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +config RV_MON_SCHED + depends on RV + bool "sched monitor" + help + Collection of monitors to check the scheduler behaves according to specifications. + Enable this to enable all scheduler specification supported by the current kernel. + + For further information, see: + Documentation/trace/rv/monitor_sched.rst diff --git a/kernel/trace/rv/monitors/sched/sched.c b/kernel/trace/rv/monitors/sched/sched.c new file mode 100644 index 000000000000..905e03c3c934 --- /dev/null +++ b/kernel/trace/rv/monitors/sched/sched.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/rv.h> + +#define MODULE_NAME "sched" + +#include "sched.h" + +struct rv_monitor rv_sched; + +struct rv_monitor rv_sched = { + .name = "sched", + .description = "container for several scheduler monitor specifications.", + .enable = NULL, + .disable = NULL, + .reset = NULL, + .enabled = 0, +}; + +static int __init register_sched(void) +{ + rv_register_monitor(&rv_sched, NULL); + return 0; +} + +static void __exit unregister_sched(void) +{ + rv_unregister_monitor(&rv_sched); +} + +module_init(register_sched); +module_exit(unregister_sched); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Gabriele Monaco <gmonaco@redhat.com>"); +MODULE_DESCRIPTION("sched: container for several scheduler monitor specifications."); diff --git a/kernel/trace/rv/monitors/sched/sched.h b/kernel/trace/rv/monitors/sched/sched.h new file mode 100644 index 000000000000..ba148dd8d48b --- /dev/null +++ b/kernel/trace/rv/monitors/sched/sched.h @@ -0,0 +1,3 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +extern struct rv_monitor rv_sched; diff --git a/kernel/trace/rv/monitors/sco/Kconfig b/kernel/trace/rv/monitors/sco/Kconfig new file mode 100644 index 000000000000..097c96cccdd7 --- /dev/null +++ b/kernel/trace/rv/monitors/sco/Kconfig @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +config RV_MON_SCO + depends on RV + depends on RV_MON_SCHED + default y + select DA_MON_EVENTS_IMPLICIT + bool "sco monitor" + help + Monitor to ensure sched_set_state happens only in thread context. + This monitor is part of the sched monitors collection. + + For further information, see: + Documentation/trace/rv/monitor_sched.rst diff --git a/kernel/trace/rv/monitors/sco/sco.c b/kernel/trace/rv/monitors/sco/sco.c new file mode 100644 index 000000000000..4cff59220bfc --- /dev/null +++ b/kernel/trace/rv/monitors/sco/sco.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/ftrace.h> +#include <linux/tracepoint.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/rv.h> +#include <rv/instrumentation.h> +#include <rv/da_monitor.h> + +#define MODULE_NAME "sco" + +#include <trace/events/sched.h> +#include <rv_trace.h> +#include <monitors/sched/sched.h> + +#include "sco.h" + +static struct rv_monitor rv_sco; +DECLARE_DA_MON_PER_CPU(sco, unsigned char); + +static void handle_sched_set_state(void *data, struct task_struct *tsk, int state) +{ + da_handle_start_event_sco(sched_set_state_sco); +} + +static void handle_schedule_entry(void *data, bool preempt, unsigned long ip) +{ + da_handle_event_sco(schedule_entry_sco); +} + +static void handle_schedule_exit(void *data, bool is_switch, unsigned long ip) +{ + da_handle_start_event_sco(schedule_exit_sco); +} + +static int enable_sco(void) +{ + int retval; + + retval = da_monitor_init_sco(); + if (retval) + return retval; + + rv_attach_trace_probe("sco", sched_set_state_tp, handle_sched_set_state); + rv_attach_trace_probe("sco", sched_entry_tp, handle_schedule_entry); + rv_attach_trace_probe("sco", sched_exit_tp, handle_schedule_exit); + + return 0; +} + +static void disable_sco(void) +{ + rv_sco.enabled = 0; + + rv_detach_trace_probe("sco", sched_set_state_tp, handle_sched_set_state); + rv_detach_trace_probe("sco", sched_entry_tp, handle_schedule_entry); + rv_detach_trace_probe("sco", sched_exit_tp, handle_schedule_exit); + + da_monitor_destroy_sco(); +} + +static struct rv_monitor rv_sco = { + .name = "sco", + .description = "scheduling context operations.", + .enable = enable_sco, + .disable = disable_sco, + .reset = da_monitor_reset_all_sco, + .enabled = 0, +}; + +static int __init register_sco(void) +{ + rv_register_monitor(&rv_sco, &rv_sched); + return 0; +} + +static void __exit unregister_sco(void) +{ + rv_unregister_monitor(&rv_sco); +} + +module_init(register_sco); +module_exit(unregister_sco); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Gabriele Monaco <gmonaco@redhat.com>"); +MODULE_DESCRIPTION("sco: scheduling context operations."); diff --git a/kernel/trace/rv/monitors/sco/sco.h b/kernel/trace/rv/monitors/sco/sco.h new file mode 100644 index 000000000000..7a4c1f2d5ca1 --- /dev/null +++ b/kernel/trace/rv/monitors/sco/sco.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Automatically generated C representation of sco automaton + * For further information about this format, see kernel documentation: + * Documentation/trace/rv/deterministic_automata.rst + */ + +enum states_sco { + thread_context_sco = 0, + scheduling_context_sco, + state_max_sco +}; + +#define INVALID_STATE state_max_sco + +enum events_sco { + sched_set_state_sco = 0, + schedule_entry_sco, + schedule_exit_sco, + event_max_sco +}; + +struct automaton_sco { + char *state_names[state_max_sco]; + char *event_names[event_max_sco]; + unsigned char function[state_max_sco][event_max_sco]; + unsigned char initial_state; + bool final_states[state_max_sco]; +}; + +static const struct automaton_sco automaton_sco = { + .state_names = { + "thread_context", + "scheduling_context" + }, + .event_names = { + "sched_set_state", + "schedule_entry", + "schedule_exit" + }, + .function = { + { thread_context_sco, scheduling_context_sco, INVALID_STATE }, + { INVALID_STATE, INVALID_STATE, thread_context_sco }, + }, + .initial_state = thread_context_sco, + .final_states = { 1, 0 }, +}; diff --git a/kernel/trace/rv/monitors/sco/sco_trace.h b/kernel/trace/rv/monitors/sco/sco_trace.h new file mode 100644 index 000000000000..b711cd9024ec --- /dev/null +++ b/kernel/trace/rv/monitors/sco/sco_trace.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Snippet to be included in rv_trace.h + */ + +#ifdef CONFIG_RV_MON_SCO +DEFINE_EVENT(event_da_monitor, event_sco, + TP_PROTO(char *state, char *event, char *next_state, bool final_state), + TP_ARGS(state, event, next_state, final_state)); + +DEFINE_EVENT(error_da_monitor, error_sco, + TP_PROTO(char *state, char *event), + TP_ARGS(state, event)); +#endif /* CONFIG_RV_MON_SCO */ diff --git a/kernel/trace/rv/monitors/scpd/Kconfig b/kernel/trace/rv/monitors/scpd/Kconfig new file mode 100644 index 000000000000..b9114fbf680f --- /dev/null +++ b/kernel/trace/rv/monitors/scpd/Kconfig @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +config RV_MON_SCPD + depends on RV + depends on PREEMPT_TRACER + depends on RV_MON_SCHED + default y + select DA_MON_EVENTS_IMPLICIT + bool "scpd monitor" + help + Monitor to ensure schedule is called with preemption disabled. + This monitor is part of the sched monitors collection. + + For further information, see: + Documentation/trace/rv/monitor_sched.rst diff --git a/kernel/trace/rv/monitors/scpd/scpd.c b/kernel/trace/rv/monitors/scpd/scpd.c new file mode 100644 index 000000000000..cbdd6a5f8d7f --- /dev/null +++ b/kernel/trace/rv/monitors/scpd/scpd.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/ftrace.h> +#include <linux/tracepoint.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/rv.h> +#include <rv/instrumentation.h> +#include <rv/da_monitor.h> + +#define MODULE_NAME "scpd" + +#include <trace/events/sched.h> +#include <trace/events/preemptirq.h> +#include <rv_trace.h> +#include <monitors/sched/sched.h> + +#include "scpd.h" + +static struct rv_monitor rv_scpd; +DECLARE_DA_MON_PER_CPU(scpd, unsigned char); + +static void handle_preempt_disable(void *data, unsigned long ip, unsigned long parent_ip) +{ + da_handle_event_scpd(preempt_disable_scpd); +} + +static void handle_preempt_enable(void *data, unsigned long ip, unsigned long parent_ip) +{ + da_handle_start_event_scpd(preempt_enable_scpd); +} + +static void handle_schedule_entry(void *data, bool preempt, unsigned long ip) +{ + da_handle_event_scpd(schedule_entry_scpd); +} + +static void handle_schedule_exit(void *data, bool is_switch, unsigned long ip) +{ + da_handle_event_scpd(schedule_exit_scpd); +} + +static int enable_scpd(void) +{ + int retval; + + retval = da_monitor_init_scpd(); + if (retval) + return retval; + + rv_attach_trace_probe("scpd", preempt_disable, handle_preempt_disable); + rv_attach_trace_probe("scpd", preempt_enable, handle_preempt_enable); + rv_attach_trace_probe("scpd", sched_entry_tp, handle_schedule_entry); + rv_attach_trace_probe("scpd", sched_exit_tp, handle_schedule_exit); + + return 0; +} + +static void disable_scpd(void) +{ + rv_scpd.enabled = 0; + + rv_detach_trace_probe("scpd", preempt_disable, handle_preempt_disable); + rv_detach_trace_probe("scpd", preempt_enable, handle_preempt_enable); + rv_detach_trace_probe("scpd", sched_entry_tp, handle_schedule_entry); + rv_detach_trace_probe("scpd", sched_exit_tp, handle_schedule_exit); + + da_monitor_destroy_scpd(); +} + +static struct rv_monitor rv_scpd = { + .name = "scpd", + .description = "schedule called with preemption disabled.", + .enable = enable_scpd, + .disable = disable_scpd, + .reset = da_monitor_reset_all_scpd, + .enabled = 0, +}; + +static int __init register_scpd(void) +{ + rv_register_monitor(&rv_scpd, &rv_sched); + return 0; +} + +static void __exit unregister_scpd(void) +{ + rv_unregister_monitor(&rv_scpd); +} + +module_init(register_scpd); +module_exit(unregister_scpd); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Gabriele Monaco <gmonaco@redhat.com>"); +MODULE_DESCRIPTION("scpd: schedule called with preemption disabled."); diff --git a/kernel/trace/rv/monitors/scpd/scpd.h b/kernel/trace/rv/monitors/scpd/scpd.h new file mode 100644 index 000000000000..295f735a5811 --- /dev/null +++ b/kernel/trace/rv/monitors/scpd/scpd.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Automatically generated C representation of scpd automaton + * For further information about this format, see kernel documentation: + * Documentation/trace/rv/deterministic_automata.rst + */ + +enum states_scpd { + cant_sched_scpd = 0, + can_sched_scpd, + state_max_scpd +}; + +#define INVALID_STATE state_max_scpd + +enum events_scpd { + preempt_disable_scpd = 0, + preempt_enable_scpd, + schedule_entry_scpd, + schedule_exit_scpd, + event_max_scpd +}; + +struct automaton_scpd { + char *state_names[state_max_scpd]; + char *event_names[event_max_scpd]; + unsigned char function[state_max_scpd][event_max_scpd]; + unsigned char initial_state; + bool final_states[state_max_scpd]; +}; + +static const struct automaton_scpd automaton_scpd = { + .state_names = { + "cant_sched", + "can_sched" + }, + .event_names = { + "preempt_disable", + "preempt_enable", + "schedule_entry", + "schedule_exit" + }, + .function = { + { can_sched_scpd, INVALID_STATE, INVALID_STATE, INVALID_STATE }, + { INVALID_STATE, cant_sched_scpd, can_sched_scpd, can_sched_scpd }, + }, + .initial_state = cant_sched_scpd, + .final_states = { 1, 0 }, +}; diff --git a/kernel/trace/rv/monitors/scpd/scpd_trace.h b/kernel/trace/rv/monitors/scpd/scpd_trace.h new file mode 100644 index 000000000000..6b0f4aa4732e --- /dev/null +++ b/kernel/trace/rv/monitors/scpd/scpd_trace.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Snippet to be included in rv_trace.h + */ + +#ifdef CONFIG_RV_MON_SCPD +DEFINE_EVENT(event_da_monitor, event_scpd, + TP_PROTO(char *state, char *event, char *next_state, bool final_state), + TP_ARGS(state, event, next_state, final_state)); + +DEFINE_EVENT(error_da_monitor, error_scpd, + TP_PROTO(char *state, char *event), + TP_ARGS(state, event)); +#endif /* CONFIG_RV_MON_SCPD */ diff --git a/kernel/trace/rv/monitors/sncid/Kconfig b/kernel/trace/rv/monitors/sncid/Kconfig new file mode 100644 index 000000000000..76bcfef4fd10 --- /dev/null +++ b/kernel/trace/rv/monitors/sncid/Kconfig @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +config RV_MON_SNCID + depends on RV + depends on IRQSOFF_TRACER + depends on RV_MON_SCHED + default y + select DA_MON_EVENTS_IMPLICIT + bool "sncid monitor" + help + Monitor to ensure schedule is not called with interrupt disabled. + This monitor is part of the sched monitors collection. + + For further information, see: + Documentation/trace/rv/monitor_sched.rst diff --git a/kernel/trace/rv/monitors/sncid/sncid.c b/kernel/trace/rv/monitors/sncid/sncid.c new file mode 100644 index 000000000000..f5037cd6214c --- /dev/null +++ b/kernel/trace/rv/monitors/sncid/sncid.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/ftrace.h> +#include <linux/tracepoint.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/rv.h> +#include <rv/instrumentation.h> +#include <rv/da_monitor.h> + +#define MODULE_NAME "sncid" + +#include <trace/events/sched.h> +#include <trace/events/preemptirq.h> +#include <rv_trace.h> +#include <monitors/sched/sched.h> + +#include "sncid.h" + +static struct rv_monitor rv_sncid; +DECLARE_DA_MON_PER_CPU(sncid, unsigned char); + +static void handle_irq_disable(void *data, unsigned long ip, unsigned long parent_ip) +{ + da_handle_event_sncid(irq_disable_sncid); +} + +static void handle_irq_enable(void *data, unsigned long ip, unsigned long parent_ip) +{ + da_handle_start_event_sncid(irq_enable_sncid); +} + +static void handle_schedule_entry(void *data, bool preempt, unsigned long ip) +{ + da_handle_start_event_sncid(schedule_entry_sncid); +} + +static void handle_schedule_exit(void *data, bool is_switch, unsigned long ip) +{ + da_handle_start_event_sncid(schedule_exit_sncid); +} + +static int enable_sncid(void) +{ + int retval; + + retval = da_monitor_init_sncid(); + if (retval) + return retval; + + rv_attach_trace_probe("sncid", irq_disable, handle_irq_disable); + rv_attach_trace_probe("sncid", irq_enable, handle_irq_enable); + rv_attach_trace_probe("sncid", sched_entry_tp, handle_schedule_entry); + rv_attach_trace_probe("sncid", sched_exit_tp, handle_schedule_exit); + + return 0; +} + +static void disable_sncid(void) +{ + rv_sncid.enabled = 0; + + rv_detach_trace_probe("sncid", irq_disable, handle_irq_disable); + rv_detach_trace_probe("sncid", irq_enable, handle_irq_enable); + rv_detach_trace_probe("sncid", sched_entry_tp, handle_schedule_entry); + rv_detach_trace_probe("sncid", sched_exit_tp, handle_schedule_exit); + + da_monitor_destroy_sncid(); +} + +static struct rv_monitor rv_sncid = { + .name = "sncid", + .description = "schedule not called with interrupt disabled.", + .enable = enable_sncid, + .disable = disable_sncid, + .reset = da_monitor_reset_all_sncid, + .enabled = 0, +}; + +static int __init register_sncid(void) +{ + rv_register_monitor(&rv_sncid, &rv_sched); + return 0; +} + +static void __exit unregister_sncid(void) +{ + rv_unregister_monitor(&rv_sncid); +} + +module_init(register_sncid); +module_exit(unregister_sncid); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Gabriele Monaco <gmonaco@redhat.com>"); +MODULE_DESCRIPTION("sncid: schedule not called with interrupt disabled."); diff --git a/kernel/trace/rv/monitors/sncid/sncid.h b/kernel/trace/rv/monitors/sncid/sncid.h new file mode 100644 index 000000000000..21304725142b --- /dev/null +++ b/kernel/trace/rv/monitors/sncid/sncid.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Automatically generated C representation of sncid automaton + * For further information about this format, see kernel documentation: + * Documentation/trace/rv/deterministic_automata.rst + */ + +enum states_sncid { + can_sched_sncid = 0, + cant_sched_sncid, + state_max_sncid +}; + +#define INVALID_STATE state_max_sncid + +enum events_sncid { + irq_disable_sncid = 0, + irq_enable_sncid, + schedule_entry_sncid, + schedule_exit_sncid, + event_max_sncid +}; + +struct automaton_sncid { + char *state_names[state_max_sncid]; + char *event_names[event_max_sncid]; + unsigned char function[state_max_sncid][event_max_sncid]; + unsigned char initial_state; + bool final_states[state_max_sncid]; +}; + +static const struct automaton_sncid automaton_sncid = { + .state_names = { + "can_sched", + "cant_sched" + }, + .event_names = { + "irq_disable", + "irq_enable", + "schedule_entry", + "schedule_exit" + }, + .function = { + { cant_sched_sncid, INVALID_STATE, can_sched_sncid, can_sched_sncid }, + { INVALID_STATE, can_sched_sncid, INVALID_STATE, INVALID_STATE }, + }, + .initial_state = can_sched_sncid, + .final_states = { 1, 0 }, +}; diff --git a/kernel/trace/rv/monitors/sncid/sncid_trace.h b/kernel/trace/rv/monitors/sncid/sncid_trace.h new file mode 100644 index 000000000000..3ce42a57671d --- /dev/null +++ b/kernel/trace/rv/monitors/sncid/sncid_trace.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Snippet to be included in rv_trace.h + */ + +#ifdef CONFIG_RV_MON_SNCID +DEFINE_EVENT(event_da_monitor, event_sncid, + TP_PROTO(char *state, char *event, char *next_state, bool final_state), + TP_ARGS(state, event, next_state, final_state)); + +DEFINE_EVENT(error_da_monitor, error_sncid, + TP_PROTO(char *state, char *event), + TP_ARGS(state, event)); +#endif /* CONFIG_RV_MON_SNCID */ diff --git a/kernel/trace/rv/monitors/snep/Kconfig b/kernel/trace/rv/monitors/snep/Kconfig new file mode 100644 index 000000000000..77527f971232 --- /dev/null +++ b/kernel/trace/rv/monitors/snep/Kconfig @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +config RV_MON_SNEP + depends on RV + depends on PREEMPT_TRACER + depends on RV_MON_SCHED + default y + select DA_MON_EVENTS_IMPLICIT + bool "snep monitor" + help + Monitor to ensure schedule does not enable preempt. + This monitor is part of the sched monitors collection. + + For further information, see: + Documentation/trace/rv/monitor_sched.rst diff --git a/kernel/trace/rv/monitors/snep/snep.c b/kernel/trace/rv/monitors/snep/snep.c new file mode 100644 index 000000000000..0076ba6d7ea4 --- /dev/null +++ b/kernel/trace/rv/monitors/snep/snep.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/ftrace.h> +#include <linux/tracepoint.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/rv.h> +#include <rv/instrumentation.h> +#include <rv/da_monitor.h> + +#define MODULE_NAME "snep" + +#include <trace/events/sched.h> +#include <trace/events/preemptirq.h> +#include <rv_trace.h> +#include <monitors/sched/sched.h> + +#include "snep.h" + +static struct rv_monitor rv_snep; +DECLARE_DA_MON_PER_CPU(snep, unsigned char); + +static void handle_preempt_disable(void *data, unsigned long ip, unsigned long parent_ip) +{ + da_handle_start_event_snep(preempt_disable_snep); +} + +static void handle_preempt_enable(void *data, unsigned long ip, unsigned long parent_ip) +{ + da_handle_start_event_snep(preempt_enable_snep); +} + +static void handle_schedule_entry(void *data, bool preempt, unsigned long ip) +{ + da_handle_event_snep(schedule_entry_snep); +} + +static void handle_schedule_exit(void *data, bool is_switch, unsigned long ip) +{ + da_handle_start_event_snep(schedule_exit_snep); +} + +static int enable_snep(void) +{ + int retval; + + retval = da_monitor_init_snep(); + if (retval) + return retval; + + rv_attach_trace_probe("snep", preempt_disable, handle_preempt_disable); + rv_attach_trace_probe("snep", preempt_enable, handle_preempt_enable); + rv_attach_trace_probe("snep", sched_entry_tp, handle_schedule_entry); + rv_attach_trace_probe("snep", sched_exit_tp, handle_schedule_exit); + + return 0; +} + +static void disable_snep(void) +{ + rv_snep.enabled = 0; + + rv_detach_trace_probe("snep", preempt_disable, handle_preempt_disable); + rv_detach_trace_probe("snep", preempt_enable, handle_preempt_enable); + rv_detach_trace_probe("snep", sched_entry_tp, handle_schedule_entry); + rv_detach_trace_probe("snep", sched_exit_tp, handle_schedule_exit); + + da_monitor_destroy_snep(); +} + +static struct rv_monitor rv_snep = { + .name = "snep", + .description = "schedule does not enable preempt.", + .enable = enable_snep, + .disable = disable_snep, + .reset = da_monitor_reset_all_snep, + .enabled = 0, +}; + +static int __init register_snep(void) +{ + rv_register_monitor(&rv_snep, &rv_sched); + return 0; +} + +static void __exit unregister_snep(void) +{ + rv_unregister_monitor(&rv_snep); +} + +module_init(register_snep); +module_exit(unregister_snep); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Gabriele Monaco <gmonaco@redhat.com>"); +MODULE_DESCRIPTION("snep: schedule does not enable preempt."); diff --git a/kernel/trace/rv/monitors/snep/snep.h b/kernel/trace/rv/monitors/snep/snep.h new file mode 100644 index 000000000000..6d16b9ad931e --- /dev/null +++ b/kernel/trace/rv/monitors/snep/snep.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Automatically generated C representation of snep automaton + * For further information about this format, see kernel documentation: + * Documentation/trace/rv/deterministic_automata.rst + */ + +enum states_snep { + non_scheduling_context_snep = 0, + scheduling_contex_snep, + state_max_snep +}; + +#define INVALID_STATE state_max_snep + +enum events_snep { + preempt_disable_snep = 0, + preempt_enable_snep, + schedule_entry_snep, + schedule_exit_snep, + event_max_snep +}; + +struct automaton_snep { + char *state_names[state_max_snep]; + char *event_names[event_max_snep]; + unsigned char function[state_max_snep][event_max_snep]; + unsigned char initial_state; + bool final_states[state_max_snep]; +}; + +static const struct automaton_snep automaton_snep = { + .state_names = { + "non_scheduling_context", + "scheduling_contex" + }, + .event_names = { + "preempt_disable", + "preempt_enable", + "schedule_entry", + "schedule_exit" + }, + .function = { + { non_scheduling_context_snep, non_scheduling_context_snep, scheduling_contex_snep, INVALID_STATE }, + { INVALID_STATE, INVALID_STATE, INVALID_STATE, non_scheduling_context_snep }, + }, + .initial_state = non_scheduling_context_snep, + .final_states = { 1, 0 }, +}; diff --git a/kernel/trace/rv/monitors/snep/snep_trace.h b/kernel/trace/rv/monitors/snep/snep_trace.h new file mode 100644 index 000000000000..01aad49a949a --- /dev/null +++ b/kernel/trace/rv/monitors/snep/snep_trace.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Snippet to be included in rv_trace.h + */ + +#ifdef CONFIG_RV_MON_SNEP +DEFINE_EVENT(event_da_monitor, event_snep, + TP_PROTO(char *state, char *event, char *next_state, bool final_state), + TP_ARGS(state, event, next_state, final_state)); + +DEFINE_EVENT(error_da_monitor, error_snep, + TP_PROTO(char *state, char *event), + TP_ARGS(state, event)); +#endif /* CONFIG_RV_MON_SNEP */ diff --git a/kernel/trace/rv/monitors/snroc/Kconfig b/kernel/trace/rv/monitors/snroc/Kconfig new file mode 100644 index 000000000000..6e4365a2fea3 --- /dev/null +++ b/kernel/trace/rv/monitors/snroc/Kconfig @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +config RV_MON_SNROC + depends on RV + depends on RV_MON_SCHED + default y + select DA_MON_EVENTS_ID + bool "snroc monitor" + help + Monitor to ensure sched_set_state happens only in the respective task's context. + This monitor is part of the sched monitors collection. + + For further information, see: + Documentation/trace/rv/monitor_sched.rst diff --git a/kernel/trace/rv/monitors/snroc/snroc.c b/kernel/trace/rv/monitors/snroc/snroc.c new file mode 100644 index 000000000000..bb1f60d55296 --- /dev/null +++ b/kernel/trace/rv/monitors/snroc/snroc.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/ftrace.h> +#include <linux/tracepoint.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/rv.h> +#include <rv/instrumentation.h> +#include <rv/da_monitor.h> + +#define MODULE_NAME "snroc" + +#include <trace/events/sched.h> +#include <rv_trace.h> +#include <monitors/sched/sched.h> + +#include "snroc.h" + +static struct rv_monitor rv_snroc; +DECLARE_DA_MON_PER_TASK(snroc, unsigned char); + +static void handle_sched_set_state(void *data, struct task_struct *tsk, int state) +{ + da_handle_event_snroc(tsk, sched_set_state_snroc); +} + +static void handle_sched_switch(void *data, bool preempt, + struct task_struct *prev, + struct task_struct *next, + unsigned int prev_state) +{ + da_handle_start_event_snroc(prev, sched_switch_out_snroc); + da_handle_event_snroc(next, sched_switch_in_snroc); +} + +static int enable_snroc(void) +{ + int retval; + + retval = da_monitor_init_snroc(); + if (retval) + return retval; + + rv_attach_trace_probe("snroc", sched_set_state_tp, handle_sched_set_state); + rv_attach_trace_probe("snroc", sched_switch, handle_sched_switch); + + return 0; +} + +static void disable_snroc(void) +{ + rv_snroc.enabled = 0; + + rv_detach_trace_probe("snroc", sched_set_state_tp, handle_sched_set_state); + rv_detach_trace_probe("snroc", sched_switch, handle_sched_switch); + + da_monitor_destroy_snroc(); +} + +static struct rv_monitor rv_snroc = { + .name = "snroc", + .description = "set non runnable on its own context.", + .enable = enable_snroc, + .disable = disable_snroc, + .reset = da_monitor_reset_all_snroc, + .enabled = 0, +}; + +static int __init register_snroc(void) +{ + rv_register_monitor(&rv_snroc, &rv_sched); + return 0; +} + +static void __exit unregister_snroc(void) +{ + rv_unregister_monitor(&rv_snroc); +} + +module_init(register_snroc); +module_exit(unregister_snroc); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Gabriele Monaco <gmonaco@redhat.com>"); +MODULE_DESCRIPTION("snroc: set non runnable on its own context."); diff --git a/kernel/trace/rv/monitors/snroc/snroc.h b/kernel/trace/rv/monitors/snroc/snroc.h new file mode 100644 index 000000000000..c3650a2b1b10 --- /dev/null +++ b/kernel/trace/rv/monitors/snroc/snroc.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Automatically generated C representation of snroc automaton + * For further information about this format, see kernel documentation: + * Documentation/trace/rv/deterministic_automata.rst + */ + +enum states_snroc { + other_context_snroc = 0, + own_context_snroc, + state_max_snroc +}; + +#define INVALID_STATE state_max_snroc + +enum events_snroc { + sched_set_state_snroc = 0, + sched_switch_in_snroc, + sched_switch_out_snroc, + event_max_snroc +}; + +struct automaton_snroc { + char *state_names[state_max_snroc]; + char *event_names[event_max_snroc]; + unsigned char function[state_max_snroc][event_max_snroc]; + unsigned char initial_state; + bool final_states[state_max_snroc]; +}; + +static const struct automaton_snroc automaton_snroc = { + .state_names = { + "other_context", + "own_context" + }, + .event_names = { + "sched_set_state", + "sched_switch_in", + "sched_switch_out" + }, + .function = { + { INVALID_STATE, own_context_snroc, INVALID_STATE }, + { own_context_snroc, INVALID_STATE, other_context_snroc }, + }, + .initial_state = other_context_snroc, + .final_states = { 1, 0 }, +}; diff --git a/kernel/trace/rv/monitors/snroc/snroc_trace.h b/kernel/trace/rv/monitors/snroc/snroc_trace.h new file mode 100644 index 000000000000..50114cef5122 --- /dev/null +++ b/kernel/trace/rv/monitors/snroc/snroc_trace.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Snippet to be included in rv_trace.h + */ + +#ifdef CONFIG_RV_MON_SNROC +DEFINE_EVENT(event_da_monitor_id, event_snroc, + TP_PROTO(int id, char *state, char *event, char *next_state, bool final_state), + TP_ARGS(id, state, event, next_state, final_state)); + +DEFINE_EVENT(error_da_monitor_id, error_snroc, + TP_PROTO(int id, char *state, char *event), + TP_ARGS(id, state, event)); +#endif /* CONFIG_RV_MON_SNROC */ diff --git a/kernel/trace/rv/monitors/tss/Kconfig b/kernel/trace/rv/monitors/tss/Kconfig new file mode 100644 index 000000000000..479f86f52e60 --- /dev/null +++ b/kernel/trace/rv/monitors/tss/Kconfig @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +config RV_MON_TSS + depends on RV + depends on RV_MON_SCHED + default y + select DA_MON_EVENTS_IMPLICIT + bool "tss monitor" + help + Monitor to ensure sched_switch happens only in scheduling context. + This monitor is part of the sched monitors collection. + + For further information, see: + Documentation/trace/rv/monitor_sched.rst diff --git a/kernel/trace/rv/monitors/tss/tss.c b/kernel/trace/rv/monitors/tss/tss.c new file mode 100644 index 000000000000..542787e6524f --- /dev/null +++ b/kernel/trace/rv/monitors/tss/tss.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/ftrace.h> +#include <linux/tracepoint.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/rv.h> +#include <rv/instrumentation.h> +#include <rv/da_monitor.h> + +#define MODULE_NAME "tss" + +#include <trace/events/sched.h> +#include <rv_trace.h> +#include <monitors/sched/sched.h> + +#include "tss.h" + +static struct rv_monitor rv_tss; +DECLARE_DA_MON_PER_CPU(tss, unsigned char); + +static void handle_sched_switch(void *data, bool preempt, + struct task_struct *prev, + struct task_struct *next, + unsigned int prev_state) +{ + da_handle_event_tss(sched_switch_tss); +} + +static void handle_schedule_entry(void *data, bool preempt, unsigned long ip) +{ + da_handle_event_tss(schedule_entry_tss); +} + +static void handle_schedule_exit(void *data, bool is_switch, unsigned long ip) +{ + da_handle_start_event_tss(schedule_exit_tss); +} + +static int enable_tss(void) +{ + int retval; + + retval = da_monitor_init_tss(); + if (retval) + return retval; + + rv_attach_trace_probe("tss", sched_switch, handle_sched_switch); + rv_attach_trace_probe("tss", sched_entry_tp, handle_schedule_entry); + rv_attach_trace_probe("tss", sched_exit_tp, handle_schedule_exit); + + return 0; +} + +static void disable_tss(void) +{ + rv_tss.enabled = 0; + + rv_detach_trace_probe("tss", sched_switch, handle_sched_switch); + rv_detach_trace_probe("tss", sched_entry_tp, handle_schedule_entry); + rv_detach_trace_probe("tss", sched_exit_tp, handle_schedule_exit); + + da_monitor_destroy_tss(); +} + +static struct rv_monitor rv_tss = { + .name = "tss", + .description = "task switch while scheduling.", + .enable = enable_tss, + .disable = disable_tss, + .reset = da_monitor_reset_all_tss, + .enabled = 0, +}; + +static int __init register_tss(void) +{ + rv_register_monitor(&rv_tss, &rv_sched); + return 0; +} + +static void __exit unregister_tss(void) +{ + rv_unregister_monitor(&rv_tss); +} + +module_init(register_tss); +module_exit(unregister_tss); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Gabriele Monaco <gmonaco@redhat.com>"); +MODULE_DESCRIPTION("tss: task switch while scheduling."); diff --git a/kernel/trace/rv/monitors/tss/tss.h b/kernel/trace/rv/monitors/tss/tss.h new file mode 100644 index 000000000000..f0a36fda1b87 --- /dev/null +++ b/kernel/trace/rv/monitors/tss/tss.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Automatically generated C representation of tss automaton + * For further information about this format, see kernel documentation: + * Documentation/trace/rv/deterministic_automata.rst + */ + +enum states_tss { + thread_tss = 0, + sched_tss, + state_max_tss +}; + +#define INVALID_STATE state_max_tss + +enum events_tss { + sched_switch_tss = 0, + schedule_entry_tss, + schedule_exit_tss, + event_max_tss +}; + +struct automaton_tss { + char *state_names[state_max_tss]; + char *event_names[event_max_tss]; + unsigned char function[state_max_tss][event_max_tss]; + unsigned char initial_state; + bool final_states[state_max_tss]; +}; + +static const struct automaton_tss automaton_tss = { + .state_names = { + "thread", + "sched" + }, + .event_names = { + "sched_switch", + "schedule_entry", + "schedule_exit" + }, + .function = { + { INVALID_STATE, sched_tss, INVALID_STATE }, + { sched_tss, INVALID_STATE, thread_tss }, + }, + .initial_state = thread_tss, + .final_states = { 1, 0 }, +}; diff --git a/kernel/trace/rv/monitors/tss/tss_trace.h b/kernel/trace/rv/monitors/tss/tss_trace.h new file mode 100644 index 000000000000..4619dbb50cc0 --- /dev/null +++ b/kernel/trace/rv/monitors/tss/tss_trace.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Snippet to be included in rv_trace.h + */ + +#ifdef CONFIG_RV_MON_TSS +DEFINE_EVENT(event_da_monitor, event_tss, + TP_PROTO(char *state, char *event, char *next_state, bool final_state), + TP_ARGS(state, event, next_state, final_state)); + +DEFINE_EVENT(error_da_monitor, error_tss, + TP_PROTO(char *state, char *event), + TP_ARGS(state, event)); +#endif /* CONFIG_RV_MON_TSS */ diff --git a/kernel/trace/rv/monitors/wip/Kconfig b/kernel/trace/rv/monitors/wip/Kconfig index 3ef664b5cd90..e464b9294865 100644 --- a/kernel/trace/rv/monitors/wip/Kconfig +++ b/kernel/trace/rv/monitors/wip/Kconfig @@ -1,3 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0-only +# config RV_MON_WIP depends on RV depends on PREEMPT_TRACER diff --git a/kernel/trace/rv/monitors/wip/wip.c b/kernel/trace/rv/monitors/wip/wip.c index db7389157c87..ed758fec8608 100644 --- a/kernel/trace/rv/monitors/wip/wip.c +++ b/kernel/trace/rv/monitors/wip/wip.c @@ -71,7 +71,7 @@ static struct rv_monitor rv_wip = { static int __init register_wip(void) { - rv_register_monitor(&rv_wip); + rv_register_monitor(&rv_wip, NULL); return 0; } diff --git a/kernel/trace/rv/monitors/wip/wip.h b/kernel/trace/rv/monitors/wip/wip.h index 2e373f2c65ed..c7193748bf36 100644 --- a/kernel/trace/rv/monitors/wip/wip.h +++ b/kernel/trace/rv/monitors/wip/wip.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Automatically generated C representation of wip automaton * For further information about this format, see kernel documentation: diff --git a/kernel/trace/rv/monitors/wwnr/Kconfig b/kernel/trace/rv/monitors/wwnr/Kconfig index ee741aa6d6b8..d3bfc20037db 100644 --- a/kernel/trace/rv/monitors/wwnr/Kconfig +++ b/kernel/trace/rv/monitors/wwnr/Kconfig @@ -1,3 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0-only +# config RV_MON_WWNR depends on RV select DA_MON_EVENTS_ID diff --git a/kernel/trace/rv/monitors/wwnr/wwnr.c b/kernel/trace/rv/monitors/wwnr/wwnr.c index 3b16994a9984..172f31c4b0f3 100644 --- a/kernel/trace/rv/monitors/wwnr/wwnr.c +++ b/kernel/trace/rv/monitors/wwnr/wwnr.c @@ -70,7 +70,7 @@ static struct rv_monitor rv_wwnr = { static int __init register_wwnr(void) { - rv_register_monitor(&rv_wwnr); + rv_register_monitor(&rv_wwnr, NULL); return 0; } diff --git a/kernel/trace/rv/monitors/wwnr/wwnr.h b/kernel/trace/rv/monitors/wwnr/wwnr.h index d0d9c4b8121b..0a59d23edf61 100644 --- a/kernel/trace/rv/monitors/wwnr/wwnr.h +++ b/kernel/trace/rv/monitors/wwnr/wwnr.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Automatically generated C representation of wwnr automaton * For further information about this format, see kernel documentation: diff --git a/kernel/trace/rv/rv.c b/kernel/trace/rv/rv.c index 8657fc8806e7..50344aa9f7f9 100644 --- a/kernel/trace/rv/rv.c +++ b/kernel/trace/rv/rv.c @@ -162,7 +162,7 @@ struct dentry *get_monitors_root(void) /* * Interface for the monitor register. */ -static LIST_HEAD(rv_monitors_list); +LIST_HEAD(rv_monitors_list); static int task_monitor_count; static bool task_monitor_slots[RV_PER_TASK_MONITORS]; @@ -207,6 +207,30 @@ void rv_put_task_monitor_slot(int slot) } /* + * Monitors with a parent are nested, + * Monitors without a parent could be standalone or containers. + */ +bool rv_is_nested_monitor(struct rv_monitor_def *mdef) +{ + return mdef->parent != NULL; +} + +/* + * We set our list to have nested monitors listed after their parent + * if a monitor has a child element its a container. + * Containers can be also identified based on their function pointers: + * as they are not real monitors they do not need function definitions + * for enable()/disable(). Use this condition to find empty containers. + * Keep both conditions in case we have some non-compliant containers. + */ +bool rv_is_container_monitor(struct rv_monitor_def *mdef) +{ + struct rv_monitor_def *next = list_next_entry(mdef, list); + + return next->parent == mdef->monitor || !mdef->monitor->enable; +} + +/* * This section collects the monitor/ files and folders. */ static ssize_t monitor_enable_read_data(struct file *filp, char __user *user_buf, size_t count, @@ -229,7 +253,8 @@ static int __rv_disable_monitor(struct rv_monitor_def *mdef, bool sync) if (mdef->monitor->enabled) { mdef->monitor->enabled = 0; - mdef->monitor->disable(); + if (mdef->monitor->disable) + mdef->monitor->disable(); /* * Wait for the execution of all events to finish. @@ -243,6 +268,60 @@ static int __rv_disable_monitor(struct rv_monitor_def *mdef, bool sync) return 0; } +static void rv_disable_single(struct rv_monitor_def *mdef) +{ + __rv_disable_monitor(mdef, true); +} + +static int rv_enable_single(struct rv_monitor_def *mdef) +{ + int retval; + + lockdep_assert_held(&rv_interface_lock); + + if (mdef->monitor->enabled) + return 0; + + retval = mdef->monitor->enable(); + + if (!retval) + mdef->monitor->enabled = 1; + + return retval; +} + +static void rv_disable_container(struct rv_monitor_def *mdef) +{ + struct rv_monitor_def *p = mdef; + int enabled = 0; + + list_for_each_entry_continue(p, &rv_monitors_list, list) { + if (p->parent != mdef->monitor) + break; + enabled += __rv_disable_monitor(p, false); + } + if (enabled) + tracepoint_synchronize_unregister(); + mdef->monitor->enabled = 0; +} + +static int rv_enable_container(struct rv_monitor_def *mdef) +{ + struct rv_monitor_def *p = mdef; + int retval = 0; + + list_for_each_entry_continue(p, &rv_monitors_list, list) { + if (retval || p->parent != mdef->monitor) + break; + retval = rv_enable_single(p); + } + if (retval) + rv_disable_container(mdef); + else + mdef->monitor->enabled = 1; + return retval; +} + /** * rv_disable_monitor - disable a given runtime monitor * @mdef: Pointer to the monitor definition structure. @@ -251,7 +330,11 @@ static int __rv_disable_monitor(struct rv_monitor_def *mdef, bool sync) */ int rv_disable_monitor(struct rv_monitor_def *mdef) { - __rv_disable_monitor(mdef, true); + if (rv_is_container_monitor(mdef)) + rv_disable_container(mdef); + else + rv_disable_single(mdef); + return 0; } @@ -265,15 +348,10 @@ int rv_enable_monitor(struct rv_monitor_def *mdef) { int retval; - lockdep_assert_held(&rv_interface_lock); - - if (mdef->monitor->enabled) - return 0; - - retval = mdef->monitor->enable(); - - if (!retval) - mdef->monitor->enabled = 1; + if (rv_is_container_monitor(mdef)) + retval = rv_enable_container(mdef); + else + retval = rv_enable_single(mdef); return retval; } @@ -336,9 +414,9 @@ static const struct file_operations interface_desc_fops = { * the monitor dir, where the specific options of the monitor * are exposed. */ -static int create_monitor_dir(struct rv_monitor_def *mdef) +static int create_monitor_dir(struct rv_monitor_def *mdef, struct rv_monitor_def *parent) { - struct dentry *root = get_monitors_root(); + struct dentry *root = parent ? parent->root_d : get_monitors_root(); const char *name = mdef->monitor->name; struct dentry *tmp; int retval; @@ -377,7 +455,11 @@ static int monitors_show(struct seq_file *m, void *p) { struct rv_monitor_def *mon_def = p; - seq_printf(m, "%s\n", mon_def->monitor->name); + if (mon_def->parent) + seq_printf(m, "%s:%s\n", mon_def->parent->name, + mon_def->monitor->name); + else + seq_printf(m, "%s\n", mon_def->monitor->name); return 0; } @@ -514,7 +596,7 @@ static ssize_t enabled_monitors_write(struct file *filp, const char __user *user struct rv_monitor_def *mdef; int retval = -EINVAL; bool enable = true; - char *ptr; + char *ptr, *tmp; int len; if (count < 1 || count > MAX_RV_MONITOR_NAME_SIZE + 1) @@ -541,6 +623,11 @@ static ssize_t enabled_monitors_write(struct file *filp, const char __user *user retval = -EINVAL; + /* we support 1 nesting level, trim the parent */ + tmp = strstr(ptr, ":"); + if (tmp) + ptr = tmp+1; + list_for_each_entry(mdef, &rv_monitors_list, list) { if (strcmp(ptr, mdef->monitor->name) != 0) continue; @@ -613,7 +700,7 @@ static void reset_all_monitors(void) struct rv_monitor_def *mdef; list_for_each_entry(mdef, &rv_monitors_list, list) { - if (mdef->monitor->enabled) + if (mdef->monitor->enabled && mdef->monitor->reset) mdef->monitor->reset(); } } @@ -685,18 +772,19 @@ static void destroy_monitor_dir(struct rv_monitor_def *mdef) /** * rv_register_monitor - register a rv monitor. * @monitor: The rv_monitor to be registered. + * @parent: The parent of the monitor to be registered, NULL if not nested. * * Returns 0 if successful, error otherwise. */ -int rv_register_monitor(struct rv_monitor *monitor) +int rv_register_monitor(struct rv_monitor *monitor, struct rv_monitor *parent) { - struct rv_monitor_def *r; + struct rv_monitor_def *r, *p = NULL; int retval = 0; if (strlen(monitor->name) >= MAX_RV_MONITOR_NAME_SIZE) { pr_info("Monitor %s has a name longer than %d\n", monitor->name, MAX_RV_MONITOR_NAME_SIZE); - return -1; + return -EINVAL; } mutex_lock(&rv_interface_lock); @@ -704,11 +792,26 @@ int rv_register_monitor(struct rv_monitor *monitor) list_for_each_entry(r, &rv_monitors_list, list) { if (strcmp(monitor->name, r->monitor->name) == 0) { pr_info("Monitor %s is already registered\n", monitor->name); - retval = -1; + retval = -EEXIST; goto out_unlock; } } + if (parent) { + list_for_each_entry(r, &rv_monitors_list, list) { + if (strcmp(parent->name, r->monitor->name) == 0) { + p = r; + break; + } + } + } + + if (p && rv_is_nested_monitor(p)) { + pr_info("Parent monitor %s is already nested, cannot nest further\n", + parent->name); + return -EINVAL; + } + r = kzalloc(sizeof(struct rv_monitor_def), GFP_KERNEL); if (!r) { retval = -ENOMEM; @@ -716,14 +819,19 @@ int rv_register_monitor(struct rv_monitor *monitor) } r->monitor = monitor; + r->parent = parent; - retval = create_monitor_dir(r); + retval = create_monitor_dir(r, p); if (retval) { kfree(r); goto out_unlock; } - list_add_tail(&r->list, &rv_monitors_list); + /* keep children close to the parent for easier visualisation */ + if (p) + list_add(&r->list, &p->list); + else + list_add_tail(&r->list, &rv_monitors_list); out_unlock: mutex_unlock(&rv_interface_lock); diff --git a/kernel/trace/rv/rv.h b/kernel/trace/rv/rv.h index db6cb0913dbd..98fca0a1adbc 100644 --- a/kernel/trace/rv/rv.h +++ b/kernel/trace/rv/rv.h @@ -21,6 +21,7 @@ struct rv_interface { #define MAX_RV_REACTOR_NAME_SIZE 32 extern struct mutex rv_interface_lock; +extern struct list_head rv_monitors_list; #ifdef CONFIG_RV_REACTORS struct rv_reactor_def { @@ -34,6 +35,7 @@ struct rv_reactor_def { struct rv_monitor_def { struct list_head list; struct rv_monitor *monitor; + struct rv_monitor *parent; struct dentry *root_d; #ifdef CONFIG_RV_REACTORS struct rv_reactor_def *rdef; @@ -45,6 +47,8 @@ struct rv_monitor_def { struct dentry *get_monitors_root(void); int rv_disable_monitor(struct rv_monitor_def *mdef); int rv_enable_monitor(struct rv_monitor_def *mdef); +bool rv_is_container_monitor(struct rv_monitor_def *mdef); +bool rv_is_nested_monitor(struct rv_monitor_def *mdef); #ifdef CONFIG_RV_REACTORS int reactor_populate_monitor(struct rv_monitor_def *mdef); diff --git a/kernel/trace/rv/rv_reactors.c b/kernel/trace/rv/rv_reactors.c index 7b49cbe388d4..9501ca886d83 100644 --- a/kernel/trace/rv/rv_reactors.c +++ b/kernel/trace/rv/rv_reactors.c @@ -158,8 +158,9 @@ static const struct seq_operations monitor_reactors_seq_ops = { .show = monitor_reactor_show }; -static void monitor_swap_reactors(struct rv_monitor_def *mdef, struct rv_reactor_def *rdef, - bool reacting) +static void monitor_swap_reactors_single(struct rv_monitor_def *mdef, + struct rv_reactor_def *rdef, + bool reacting, bool nested) { bool monitor_enabled; @@ -179,10 +180,31 @@ static void monitor_swap_reactors(struct rv_monitor_def *mdef, struct rv_reactor mdef->reacting = reacting; mdef->monitor->react = rdef->reactor->react; - if (monitor_enabled) + /* enable only once if iterating through a container */ + if (monitor_enabled && !nested) rv_enable_monitor(mdef); } +static void monitor_swap_reactors(struct rv_monitor_def *mdef, + struct rv_reactor_def *rdef, bool reacting) +{ + struct rv_monitor_def *p = mdef; + + if (rv_is_container_monitor(mdef)) + list_for_each_entry_continue(p, &rv_monitors_list, list) { + if (p->parent != mdef->monitor) + break; + monitor_swap_reactors_single(p, rdef, reacting, true); + } + /* + * This call enables and disables the monitor if they were active. + * In case of a container, we already disabled all and will enable all. + * All nested monitors are enabled also if they were off, we may refine + * this logic in the future. + */ + monitor_swap_reactors_single(mdef, rdef, reacting, false); +} + static ssize_t monitor_reactors_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) diff --git a/kernel/trace/rv/rv_trace.h b/kernel/trace/rv/rv_trace.h index 5e65097423ba..422b75f58891 100644 --- a/kernel/trace/rv/rv_trace.h +++ b/kernel/trace/rv/rv_trace.h @@ -58,6 +58,11 @@ DECLARE_EVENT_CLASS(error_da_monitor, ); #include <monitors/wip/wip_trace.h> +#include <monitors/tss/tss_trace.h> +#include <monitors/sco/sco_trace.h> +#include <monitors/scpd/scpd_trace.h> +#include <monitors/snep/snep_trace.h> +#include <monitors/sncid/sncid_trace.h> // Add new monitors based on CONFIG_DA_MON_EVENTS_IMPLICIT here #endif /* CONFIG_DA_MON_EVENTS_IMPLICIT */ @@ -118,6 +123,7 @@ DECLARE_EVENT_CLASS(error_da_monitor_id, ); #include <monitors/wwnr/wwnr_trace.h> +#include <monitors/snroc/snroc_trace.h> // Add new monitors based on CONFIG_DA_MON_EVENTS_ID here #endif /* CONFIG_DA_MON_EVENTS_ID */ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0e6d517e74e0..103b193875b3 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -49,6 +49,7 @@ #include <linux/fsnotify.h> #include <linux/irq_work.h> #include <linux/workqueue.h> +#include <linux/sort.h> #include <asm/setup.h> /* COMMAND_LINE_SIZE */ @@ -87,6 +88,7 @@ void __init disable_tracing_selftest(const char *reason) static struct trace_iterator *tracepoint_print_iter; int tracepoint_printk; static bool tracepoint_printk_stop_on_boot __initdata; +static bool traceoff_after_boot __initdata; static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key); /* For tracers that don't implement custom flags */ @@ -330,6 +332,13 @@ static int __init set_tracepoint_printk_stop(char *str) } __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop); +static int __init set_traceoff_after_boot(char *str) +{ + traceoff_after_boot = true; + return 1; +} +__setup("traceoff_after_boot", set_traceoff_after_boot); + unsigned long long ns2usecs(u64 nsec) { nsec += 500; @@ -2878,13 +2887,16 @@ trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer, void trace_function(struct trace_array *tr, unsigned long ip, unsigned long - parent_ip, unsigned int trace_ctx) + parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs) { struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct ftrace_entry *entry; + int size = sizeof(*entry); + + size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long); - event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry), + event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size, trace_ctx); if (!event) return; @@ -2892,6 +2904,13 @@ trace_function(struct trace_array *tr, unsigned long ip, unsigned long entry->ip = ip; entry->parent_ip = parent_ip; +#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API + if (fregs) { + for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++) + entry->args[i] = ftrace_regs_get_argument(fregs, i); + } +#endif + if (static_branch_unlikely(&trace_function_exports_enabled)) ftrace_exports(event, TRACE_EXPORT_FUNCTION); __buffer_unlock_commit(buffer, event); @@ -4100,12 +4119,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) entries, total, buf->cpu, - preempt_model_none() ? "server" : - preempt_model_voluntary() ? "desktop" : - preempt_model_full() ? "preempt" : - preempt_model_lazy() ? "lazy" : - preempt_model_rt() ? "preempt_rt" : - "unknown", + preempt_model_str(), /* These are reserved for later use */ 0, 0, 0, 0); #ifdef CONFIG_SMP @@ -4193,7 +4207,7 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) * safe to use if the array has delta offsets * Force printing via the fields. */ - if ((tr->text_delta || tr->data_delta) && + if ((tr->text_delta) && event->type > __TRACE_LAST_TYPE) return print_event_fields(iter, event); @@ -5988,11 +6002,130 @@ ssize_t tracing_resize_ring_buffer(struct trace_array *tr, return __tracing_resize_ring_buffer(tr, size, cpu_id); } +struct trace_mod_entry { + unsigned long mod_addr; + char mod_name[MODULE_NAME_LEN]; +}; + +struct trace_scratch { + unsigned long text_addr; + unsigned long nr_entries; + struct trace_mod_entry entries[]; +}; + +static DEFINE_MUTEX(scratch_mutex); + +static int cmp_mod_entry(const void *key, const void *pivot) +{ + unsigned long addr = (unsigned long)key; + const struct trace_mod_entry *ent = pivot; + + if (addr >= ent[0].mod_addr && addr < ent[1].mod_addr) + return 0; + else + return addr - ent->mod_addr; +} + +/** + * trace_adjust_address() - Adjust prev boot address to current address. + * @tr: Persistent ring buffer's trace_array. + * @addr: Address in @tr which is adjusted. + */ +unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr) +{ + struct trace_module_delta *module_delta; + struct trace_scratch *tscratch; + struct trace_mod_entry *entry; + int idx = 0, nr_entries; + + /* If we don't have last boot delta, return the address */ + if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) + return addr; + + /* tr->module_delta must be protected by rcu. */ + guard(rcu)(); + tscratch = tr->scratch; + /* if there is no tscrach, module_delta must be NULL. */ + module_delta = READ_ONCE(tr->module_delta); + if (!module_delta || tscratch->entries[0].mod_addr > addr) + return addr + tr->text_delta; + + /* Note that entries must be sorted. */ + nr_entries = tscratch->nr_entries; + if (nr_entries == 1 || + tscratch->entries[nr_entries - 1].mod_addr < addr) + idx = nr_entries - 1; + else { + entry = __inline_bsearch((void *)addr, + tscratch->entries, + nr_entries - 1, + sizeof(tscratch->entries[0]), + cmp_mod_entry); + if (entry) + idx = entry - tscratch->entries; + } + + return addr + module_delta->delta[idx]; +} + +#ifdef CONFIG_MODULES +static int save_mod(struct module *mod, void *data) +{ + struct trace_array *tr = data; + struct trace_scratch *tscratch; + struct trace_mod_entry *entry; + unsigned int size; + + tscratch = tr->scratch; + if (!tscratch) + return -1; + size = tr->scratch_size; + + if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size) + return -1; + + entry = &tscratch->entries[tscratch->nr_entries]; + + tscratch->nr_entries++; + + entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base; + strscpy(entry->mod_name, mod->name); + + return 0; +} +#else +static int save_mod(struct module *mod, void *data) +{ + return 0; +} +#endif + static void update_last_data(struct trace_array *tr) { - if (!tr->text_delta && !tr->data_delta) + struct trace_module_delta *module_delta; + struct trace_scratch *tscratch; + + if (!(tr->flags & TRACE_ARRAY_FL_BOOT)) + return; + + if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) return; + /* Only if the buffer has previous boot data clear and update it. */ + tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT; + + /* Reset the module list and reload them */ + if (tr->scratch) { + struct trace_scratch *tscratch = tr->scratch; + + memset(tscratch->entries, 0, + flex_array_size(tscratch, entries, tscratch->nr_entries)); + tscratch->nr_entries = 0; + + guard(mutex)(&scratch_mutex); + module_for_each_mod(save_mod, tr); + } + /* * Need to clear all CPU buffers as there cannot be events * from the previous boot mixed with events with this boot @@ -6003,7 +6136,17 @@ static void update_last_data(struct trace_array *tr) /* Using current data now */ tr->text_delta = 0; - tr->data_delta = 0; + + if (!tr->scratch) + return; + + tscratch = tr->scratch; + module_delta = READ_ONCE(tr->module_delta); + WRITE_ONCE(tr->module_delta, NULL); + kfree_rcu(module_delta, rcu); + + /* Set the persistent ring buffer meta data to this address */ + tscratch->text_addr = (unsigned long)_text; } /** @@ -6812,19 +6955,102 @@ tracing_total_entries_read(struct file *filp, char __user *ubuf, return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } -static ssize_t -tracing_last_boot_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) +#define LAST_BOOT_HEADER ((void *)1) + +static void *l_next(struct seq_file *m, void *v, loff_t *pos) { - struct trace_array *tr = filp->private_data; - struct seq_buf seq; - char buf[64]; + struct trace_array *tr = m->private; + struct trace_scratch *tscratch = tr->scratch; + unsigned int index = *pos; + + (*pos)++; + + if (*pos == 1) + return LAST_BOOT_HEADER; + + /* Only show offsets of the last boot data */ + if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) + return NULL; + + /* *pos 0 is for the header, 1 is for the first module */ + index--; + + if (index >= tscratch->nr_entries) + return NULL; + + return &tscratch->entries[index]; +} - seq_buf_init(&seq, buf, 64); +static void *l_start(struct seq_file *m, loff_t *pos) +{ + mutex_lock(&scratch_mutex); + + return l_next(m, NULL, pos); +} + +static void l_stop(struct seq_file *m, void *p) +{ + mutex_unlock(&scratch_mutex); +} - seq_buf_printf(&seq, "text delta:\t%ld\n", tr->text_delta); - seq_buf_printf(&seq, "data delta:\t%ld\n", tr->data_delta); +static void show_last_boot_header(struct seq_file *m, struct trace_array *tr) +{ + struct trace_scratch *tscratch = tr->scratch; - return simple_read_from_buffer(ubuf, cnt, ppos, buf, seq_buf_used(&seq)); + /* + * Do not leak KASLR address. This only shows the KASLR address of + * the last boot. When the ring buffer is started, the LAST_BOOT + * flag gets cleared, and this should only report "current". + * Otherwise it shows the KASLR address from the previous boot which + * should not be the same as the current boot. + */ + if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) + seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr); + else + seq_puts(m, "# Current\n"); +} + +static int l_show(struct seq_file *m, void *v) +{ + struct trace_array *tr = m->private; + struct trace_mod_entry *entry = v; + + if (v == LAST_BOOT_HEADER) { + show_last_boot_header(m, tr); + return 0; + } + + seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name); + return 0; +} + +static const struct seq_operations last_boot_seq_ops = { + .start = l_start, + .next = l_next, + .stop = l_stop, + .show = l_show, +}; + +static int tracing_last_boot_open(struct inode *inode, struct file *file) +{ + struct trace_array *tr = inode->i_private; + struct seq_file *m; + int ret; + + ret = tracing_check_open_get_tr(tr); + if (ret) + return ret; + + ret = seq_open(file, &last_boot_seq_ops); + if (ret) { + trace_array_put(tr); + return ret; + } + + m = file->private_data; + m->private = tr; + + return 0; } static int tracing_buffer_meta_open(struct inode *inode, struct file *filp) @@ -7453,10 +7679,10 @@ static const struct file_operations trace_time_stamp_mode_fops = { }; static const struct file_operations last_boot_fops = { - .open = tracing_open_generic_tr, - .read = tracing_last_boot_read, - .llseek = generic_file_llseek, - .release = tracing_release_generic_tr, + .open = tracing_last_boot_open, + .read = seq_read, + .llseek = seq_lseek, + .release = tracing_seq_release, }; #ifdef CONFIG_TRACER_SNAPSHOT @@ -9196,22 +9422,125 @@ static struct dentry *trace_instance_dir; static void init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer); +#ifdef CONFIG_MODULES +static int make_mod_delta(struct module *mod, void *data) +{ + struct trace_module_delta *module_delta; + struct trace_scratch *tscratch; + struct trace_mod_entry *entry; + struct trace_array *tr = data; + int i; + + tscratch = tr->scratch; + module_delta = READ_ONCE(tr->module_delta); + for (i = 0; i < tscratch->nr_entries; i++) { + entry = &tscratch->entries[i]; + if (strcmp(mod->name, entry->mod_name)) + continue; + if (mod->state == MODULE_STATE_GOING) + module_delta->delta[i] = 0; + else + module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base + - entry->mod_addr; + break; + } + return 0; +} +#else +static int make_mod_delta(struct module *mod, void *data) +{ + return 0; +} +#endif + +static int mod_addr_comp(const void *a, const void *b, const void *data) +{ + const struct trace_mod_entry *e1 = a; + const struct trace_mod_entry *e2 = b; + + return e1->mod_addr > e2->mod_addr ? 1 : -1; +} + +static void setup_trace_scratch(struct trace_array *tr, + struct trace_scratch *tscratch, unsigned int size) +{ + struct trace_module_delta *module_delta; + struct trace_mod_entry *entry; + int i, nr_entries; + + if (!tscratch) + return; + + tr->scratch = tscratch; + tr->scratch_size = size; + + if (tscratch->text_addr) + tr->text_delta = (unsigned long)_text - tscratch->text_addr; + + if (struct_size(tscratch, entries, tscratch->nr_entries) > size) + goto reset; + + /* Check if each module name is a valid string */ + for (i = 0; i < tscratch->nr_entries; i++) { + int n; + + entry = &tscratch->entries[i]; + + for (n = 0; n < MODULE_NAME_LEN; n++) { + if (entry->mod_name[n] == '\0') + break; + if (!isprint(entry->mod_name[n])) + goto reset; + } + if (n == MODULE_NAME_LEN) + goto reset; + } + + /* Sort the entries so that we can find appropriate module from address. */ + nr_entries = tscratch->nr_entries; + sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry), + mod_addr_comp, NULL, NULL); + + if (IS_ENABLED(CONFIG_MODULES)) { + module_delta = kzalloc(struct_size(module_delta, delta, nr_entries), GFP_KERNEL); + if (!module_delta) { + pr_info("module_delta allocation failed. Not able to decode module address."); + goto reset; + } + init_rcu_head(&module_delta->rcu); + } else + module_delta = NULL; + WRITE_ONCE(tr->module_delta, module_delta); + + /* Scan modules to make text delta for modules. */ + module_for_each_mod(make_mod_delta, tr); + return; + reset: + /* Invalid trace modules */ + memset(tscratch, 0, size); +} + static int allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size) { enum ring_buffer_flags rb_flags; + struct trace_scratch *tscratch; + unsigned int scratch_size = 0; rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0; buf->tr = tr; if (tr->range_addr_start && tr->range_addr_size) { + /* Add scratch buffer to handle 128 modules */ buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0, tr->range_addr_start, - tr->range_addr_size); + tr->range_addr_size, + struct_size(tscratch, entries, 128)); + + tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size); + setup_trace_scratch(tr, tscratch, scratch_size); - ring_buffer_last_boot_delta(buf->buffer, - &tr->text_delta, &tr->data_delta); /* * This is basically the same as a mapped buffer, * with the same restrictions. @@ -9284,6 +9613,9 @@ static void free_trace_buffers(struct trace_array *tr) #ifdef CONFIG_TRACER_MAX_TRACE free_trace_buffer(&tr->max_buffer); #endif + + if (tr->range_addr_start) + vunmap((void *)tr->range_addr_start); } static void init_trace_flags_index(struct trace_array *tr) @@ -9445,6 +9777,7 @@ trace_array_create_systems(const char *name, const char *systems, free_cpumask_var(tr->pipe_cpumask); free_cpumask_var(tr->tracing_cpumask); kfree_const(tr->system_names); + kfree(tr->range_name); kfree(tr->name); kfree(tr); @@ -9571,6 +9904,11 @@ static int __remove_instance(struct trace_array *tr) free_trace_buffers(tr); clear_tracing_err_log(tr); + if (tr->range_name) { + reserve_mem_release_by_name(tr->range_name); + kfree(tr->range_name); + } + for (i = 0; i < tr->nr_topts; i++) { kfree(tr->topts[i].topts); } @@ -9892,6 +10230,24 @@ static void trace_module_remove_evals(struct module *mod) static inline void trace_module_remove_evals(struct module *mod) { } #endif /* CONFIG_TRACE_EVAL_MAP_FILE */ +static void trace_module_record(struct module *mod, bool add) +{ + struct trace_array *tr; + unsigned long flags; + + list_for_each_entry(tr, &ftrace_trace_arrays, list) { + flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT); + /* Update any persistent trace array that has already been started */ + if (flags == TRACE_ARRAY_FL_BOOT && add) { + guard(mutex)(&scratch_mutex); + save_mod(mod, tr); + } else if (flags & TRACE_ARRAY_FL_LAST_BOOT) { + /* Update delta if the module loaded in previous boot */ + make_mod_delta(mod, tr); + } + } +} + static int trace_module_notify(struct notifier_block *self, unsigned long val, void *data) { @@ -9900,9 +10256,11 @@ static int trace_module_notify(struct notifier_block *self, switch (val) { case MODULE_STATE_COMING: trace_module_add_evals(mod); + trace_module_record(mod, true); break; case MODULE_STATE_GOING: trace_module_remove_evals(mod); + trace_module_record(mod, false); break; } @@ -10368,6 +10726,7 @@ __init static void enable_instances(void) bool traceoff = false; char *flag_delim; char *addr_delim; + char *rname __free(kfree) = NULL; tok = strsep(&curr_str, ","); @@ -10424,6 +10783,7 @@ __init static void enable_instances(void) pr_warn("Failed to map boot instance %s to %s\n", name, tok); continue; } + rname = kstrdup(tok, GFP_KERNEL); } if (start) { @@ -10459,8 +10819,8 @@ __init static void enable_instances(void) * to it. */ if (start) { - tr->flags |= TRACE_ARRAY_FL_BOOT; - tr->ref++; + tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT; + tr->range_name = no_free_ptr(rname); } while ((tok = strsep(&curr_str, ","))) { @@ -10704,6 +11064,9 @@ __init static int late_trace_init(void) tracepoint_printk = 0; } + if (traceoff_after_boot) + tracing_off(); + tracing_set_default_clock(); clear_boot_tracer(); return 0; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 9c21ba45b7af..c20f6bcc200a 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -21,6 +21,7 @@ #include <linux/workqueue.h> #include <linux/ctype.h> #include <linux/once_lite.h> +#include <linux/ftrace_regs.h> #include "pid_list.h" @@ -312,6 +313,11 @@ struct trace_func_repeats { u64 ts_last_call; }; +struct trace_module_delta { + struct rcu_head rcu; + long delta[]; +}; + /* * The trace array - an array of per-CPU trace arrays. This is the * highest level data structure that individual tracers deal with. @@ -348,8 +354,13 @@ struct trace_array { unsigned int mapped; unsigned long range_addr_start; unsigned long range_addr_size; + char *range_name; long text_delta; - long data_delta; + struct trace_module_delta *module_delta; + void *scratch; /* pointer in persistent memory */ + int scratch_size; + + int buffer_disabled; struct trace_pid_list __rcu *filtered_pids; struct trace_pid_list __rcu *filtered_no_pids; @@ -367,7 +378,6 @@ struct trace_array { * CONFIG_TRACER_MAX_TRACE. */ arch_spinlock_t max_lock; - int buffer_disabled; #ifdef CONFIG_FTRACE_SYSCALLS int sys_refcount_enter; int sys_refcount_exit; @@ -433,9 +443,10 @@ struct trace_array { }; enum { - TRACE_ARRAY_FL_GLOBAL = BIT(0), - TRACE_ARRAY_FL_BOOT = BIT(1), - TRACE_ARRAY_FL_MOD_INIT = BIT(2), + TRACE_ARRAY_FL_GLOBAL = BIT(0), + TRACE_ARRAY_FL_BOOT = BIT(1), + TRACE_ARRAY_FL_LAST_BOOT = BIT(2), + TRACE_ARRAY_FL_MOD_INIT = BIT(3), }; #ifdef CONFIG_MODULES @@ -462,6 +473,8 @@ extern int tracing_set_clock(struct trace_array *tr, const char *clockstr); extern bool trace_clock_in_ns(struct trace_array *tr); +extern unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr); + /* * The global tracer (top) should be the first trace array added, * but we check the flag anyway. @@ -697,7 +710,8 @@ unsigned long trace_total_entries(struct trace_array *tr); void trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, - unsigned int trace_ctx); + unsigned int trace_ctx, + struct ftrace_regs *regs); void trace_graph_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, @@ -783,6 +797,8 @@ extern void trace_find_cmdline(int pid, char comm[]); extern int trace_find_tgid(int pid); extern void trace_event_follow_fork(struct trace_array *tr, bool enable); +extern int trace_events_enabled(struct trace_array *tr, const char *system); + #ifdef CONFIG_DYNAMIC_FTRACE extern unsigned long ftrace_update_tot_cnt; extern unsigned long ftrace_number_of_pages; @@ -897,6 +913,7 @@ static __always_inline bool ftrace_hash_empty(struct ftrace_hash *hash) #define TRACE_GRAPH_PRINT_RETVAL 0x800 #define TRACE_GRAPH_PRINT_RETVAL_HEX 0x1000 #define TRACE_GRAPH_PRINT_RETADDR 0x2000 +#define TRACE_GRAPH_ARGS 0x4000 #define TRACE_GRAPH_PRINT_FILL_SHIFT 28 #define TRACE_GRAPH_PRINT_FILL_MASK (0x3 << TRACE_GRAPH_PRINT_FILL_SHIFT) @@ -1714,7 +1731,7 @@ struct event_trigger_data { unsigned long count; int ref; int flags; - struct event_trigger_ops *ops; + const struct event_trigger_ops *ops; struct event_command *cmd_ops; struct event_filter __rcu *filter; char *filter_str; @@ -1959,7 +1976,7 @@ struct event_command { int (*set_filter)(char *filter_str, struct event_trigger_data *data, struct trace_event_file *file); - struct event_trigger_ops *(*get_trigger_ops)(char *cmd, char *param); + const struct event_trigger_ops *(*get_trigger_ops)(char *cmd, char *param); }; /** diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index fbfb396905a6..ee40d4e6ad1c 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -61,8 +61,9 @@ FTRACE_ENTRY_REG(function, ftrace_entry, TRACE_FN, F_STRUCT( - __field_fn( unsigned long, ip ) - __field_fn( unsigned long, parent_ip ) + __field_fn( unsigned long, ip ) + __field_fn( unsigned long, parent_ip ) + __dynamic_array( unsigned long, args ) ), F_printk(" %ps <-- %ps", @@ -72,17 +73,18 @@ FTRACE_ENTRY_REG(function, ftrace_entry, ); /* Function call entry */ -FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry, +FTRACE_ENTRY(funcgraph_entry, ftrace_graph_ent_entry, TRACE_GRAPH_ENT, F_STRUCT( __field_struct( struct ftrace_graph_ent, graph_ent ) __field_packed( unsigned long, graph_ent, func ) - __field_packed( int, graph_ent, depth ) + __field_packed( unsigned long, graph_ent, depth ) + __dynamic_array(unsigned long, args ) ), - F_printk("--> %ps (%d)", (void *)__entry->func, __entry->depth) + F_printk("--> %ps (%lu)", (void *)__entry->func, __entry->depth) ); #ifdef CONFIG_FUNCTION_GRAPH_RETADDR diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index 82fd637cfc19..c08355c3ef32 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -478,7 +478,7 @@ static void eprobe_trigger_func(struct event_trigger_data *data, __eprobe_trace_func(edata, rec); } -static struct event_trigger_ops eprobe_trigger_ops = { +static const struct event_trigger_ops eprobe_trigger_ops = { .trigger = eprobe_trigger_func, .print = eprobe_trigger_print, .init = eprobe_trigger_init, @@ -507,8 +507,8 @@ static void eprobe_trigger_unreg_func(char *glob, } -static struct event_trigger_ops *eprobe_trigger_get_ops(char *cmd, - char *param) +static const struct event_trigger_ops *eprobe_trigger_get_ops(char *cmd, + char *param) { return &eprobe_trigger_ops; } @@ -913,6 +913,8 @@ static int __trace_eprobe_create(int argc, const char *argv[]) } if (argc - 2 > MAX_TRACE_ARGS) { + trace_probe_log_set_index(2); + trace_probe_log_err(0, TOO_MANY_ARGS); ret = -E2BIG; goto error; } diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 3ff9caa4a71b..a6bb7577e8c5 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -49,7 +49,7 @@ static int perf_trace_event_perm(struct trace_event_call *tp_event, /* The ftrace function trace is allowed only for root. */ if (ftrace_event_is_function(tp_event)) { - ret = perf_allow_tracepoint(&p_event->attr); + ret = perf_allow_tracepoint(); if (ret) return ret; @@ -86,7 +86,7 @@ static int perf_trace_event_perm(struct trace_event_call *tp_event, * ...otherwise raw tracepoint data can be a severe data leak, * only allow root to have these. */ - ret = perf_allow_tracepoint(&p_event->attr); + ret = perf_allow_tracepoint(); if (ret) return ret; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 513de9ceb80e..8638b7f7ff85 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -790,7 +790,9 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file, clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags); } - call->class->reg(call, TRACE_REG_UNREGISTER, file); + ret = call->class->reg(call, TRACE_REG_UNREGISTER, file); + + WARN_ON_ONCE(ret); } /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */ if (file->flags & EVENT_FILE_FL_SOFT_MODE) @@ -1818,28 +1820,28 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, return cnt; } -static ssize_t -system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, - loff_t *ppos) +/* + * Returns: + * 0 : no events exist? + * 1 : all events are disabled + * 2 : all events are enabled + * 3 : some events are enabled and some are enabled + */ +int trace_events_enabled(struct trace_array *tr, const char *system) { - const char set_to_char[4] = { '?', '0', '1', 'X' }; - struct trace_subsystem_dir *dir = filp->private_data; - struct event_subsystem *system = dir->subsystem; struct trace_event_call *call; struct trace_event_file *file; - struct trace_array *tr = dir->tr; - char buf[2]; int set = 0; - int ret; - mutex_lock(&event_mutex); + guard(mutex)(&event_mutex); + list_for_each_entry(file, &tr->events, list) { call = file->event_call; if ((call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) || !trace_event_name(call) || !call->class || !call->class->reg) continue; - if (system && strcmp(call->class->system, system->name) != 0) + if (system && strcmp(call->class->system, system) != 0) continue; /* @@ -1855,7 +1857,23 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, if (set == 3) break; } - mutex_unlock(&event_mutex); + + return set; +} + +static ssize_t +system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + const char set_to_char[4] = { '?', '0', '1', 'X' }; + struct trace_subsystem_dir *dir = filp->private_data; + struct event_subsystem *system = dir->subsystem; + struct trace_array *tr = dir->tr; + char buf[2]; + int set; + int ret; + + set = trace_events_enabled(tr, system ? system->name : NULL); buf[0] = set_to_char[set]; buf[1] = '\n'; diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 53dc6719181e..1260c23cfa5f 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -6203,7 +6203,7 @@ static void event_hist_trigger_free(struct event_trigger_data *data) } } -static struct event_trigger_ops event_hist_trigger_ops = { +static const struct event_trigger_ops event_hist_trigger_ops = { .trigger = event_hist_trigger, .print = event_hist_trigger_print, .init = event_hist_trigger_init, @@ -6235,15 +6235,15 @@ static void event_hist_trigger_named_free(struct event_trigger_data *data) } } -static struct event_trigger_ops event_hist_trigger_named_ops = { +static const struct event_trigger_ops event_hist_trigger_named_ops = { .trigger = event_hist_trigger, .print = event_hist_trigger_print, .init = event_hist_trigger_named_init, .free = event_hist_trigger_named_free, }; -static struct event_trigger_ops *event_hist_get_trigger_ops(char *cmd, - char *param) +static const struct event_trigger_ops *event_hist_get_trigger_ops(char *cmd, + char *param) { return &event_hist_trigger_ops; } @@ -6838,38 +6838,38 @@ hist_enable_count_trigger(struct event_trigger_data *data, hist_enable_trigger(data, buffer, rec, event); } -static struct event_trigger_ops hist_enable_trigger_ops = { +static const struct event_trigger_ops hist_enable_trigger_ops = { .trigger = hist_enable_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, }; -static struct event_trigger_ops hist_enable_count_trigger_ops = { +static const struct event_trigger_ops hist_enable_count_trigger_ops = { .trigger = hist_enable_count_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, }; -static struct event_trigger_ops hist_disable_trigger_ops = { +static const struct event_trigger_ops hist_disable_trigger_ops = { .trigger = hist_enable_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, }; -static struct event_trigger_ops hist_disable_count_trigger_ops = { +static const struct event_trigger_ops hist_disable_count_trigger_ops = { .trigger = hist_enable_count_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, }; -static struct event_trigger_ops * +static const struct event_trigger_ops * hist_enable_get_trigger_ops(char *cmd, char *param) { - struct event_trigger_ops *ops; + const struct event_trigger_ops *ops; bool enable; enable = (strcmp(cmd, ENABLE_HIST_STR) == 0); diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index e3f7d09e5512..969f48742d72 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -207,7 +207,7 @@ static int synth_field_string_size(char *type) if (len == 0) return 0; /* variable-length string */ - strncpy(buf, start, len); + memcpy(buf, start, len); buf[len] = '\0'; err = kstrtouint(buf, 0, &size); @@ -305,7 +305,7 @@ static const char *synth_field_fmt(char *type) else if (strcmp(type, "gfp_t") == 0) fmt = "%x"; else if (synth_field_is_string(type)) - fmt = "%.*s"; + fmt = "%s"; else if (synth_field_is_stack(type)) fmt = "%s"; @@ -612,7 +612,7 @@ static int __set_synth_event_print_fmt(struct synth_event *event, fmt = synth_field_fmt(event->fields[i]->type); pos += snprintf(buf + pos, LEN_OR_ZERO, "%s=%s%s", event->fields[i]->name, fmt, - i == event->n_fields - 1 ? "" : ", "); + i == event->n_fields - 1 ? "" : " "); } pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); @@ -852,6 +852,38 @@ static struct trace_event_fields synth_event_fields_array[] = { {} }; +static int synth_event_reg(struct trace_event_call *call, + enum trace_reg type, void *data) +{ + struct synth_event *event = container_of(call, struct synth_event, call); + + switch (type) { +#ifdef CONFIG_PERF_EVENTS + case TRACE_REG_PERF_REGISTER: +#endif + case TRACE_REG_REGISTER: + if (!try_module_get(event->mod)) + return -EBUSY; + break; + default: + break; + } + + int ret = trace_event_reg(call, type, data); + + switch (type) { +#ifdef CONFIG_PERF_EVENTS + case TRACE_REG_PERF_UNREGISTER: +#endif + case TRACE_REG_UNREGISTER: + module_put(event->mod); + break; + default: + break; + } + return ret; +} + static int register_synth_event(struct synth_event *event) { struct trace_event_call *call = &event->call; @@ -881,7 +913,7 @@ static int register_synth_event(struct synth_event *event) goto out; } call->flags = TRACE_EVENT_FL_TRACEPOINT; - call->class->reg = trace_event_reg; + call->class->reg = synth_event_reg; call->class->probe = trace_event_raw_event_synth; call->data = event; call->tp = event->tp; diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index d45448947094..b66b6d235d91 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -825,7 +825,7 @@ struct event_trigger_data *event_trigger_alloc(struct event_command *cmd_ops, void *private_data) { struct event_trigger_data *trigger_data; - struct event_trigger_ops *trigger_ops; + const struct event_trigger_ops *trigger_ops; trigger_ops = cmd_ops->get_trigger_ops(cmd, param); @@ -1367,38 +1367,38 @@ traceoff_trigger_print(struct seq_file *m, struct event_trigger_data *data) data->filter_str); } -static struct event_trigger_ops traceon_trigger_ops = { +static const struct event_trigger_ops traceon_trigger_ops = { .trigger = traceon_trigger, .print = traceon_trigger_print, .init = event_trigger_init, .free = event_trigger_free, }; -static struct event_trigger_ops traceon_count_trigger_ops = { +static const struct event_trigger_ops traceon_count_trigger_ops = { .trigger = traceon_count_trigger, .print = traceon_trigger_print, .init = event_trigger_init, .free = event_trigger_free, }; -static struct event_trigger_ops traceoff_trigger_ops = { +static const struct event_trigger_ops traceoff_trigger_ops = { .trigger = traceoff_trigger, .print = traceoff_trigger_print, .init = event_trigger_init, .free = event_trigger_free, }; -static struct event_trigger_ops traceoff_count_trigger_ops = { +static const struct event_trigger_ops traceoff_count_trigger_ops = { .trigger = traceoff_count_trigger, .print = traceoff_trigger_print, .init = event_trigger_init, .free = event_trigger_free, }; -static struct event_trigger_ops * +static const struct event_trigger_ops * onoff_get_trigger_ops(char *cmd, char *param) { - struct event_trigger_ops *ops; + const struct event_trigger_ops *ops; /* we register both traceon and traceoff to this callback */ if (strcmp(cmd, "traceon") == 0) @@ -1491,21 +1491,21 @@ snapshot_trigger_print(struct seq_file *m, struct event_trigger_data *data) data->filter_str); } -static struct event_trigger_ops snapshot_trigger_ops = { +static const struct event_trigger_ops snapshot_trigger_ops = { .trigger = snapshot_trigger, .print = snapshot_trigger_print, .init = event_trigger_init, .free = event_trigger_free, }; -static struct event_trigger_ops snapshot_count_trigger_ops = { +static const struct event_trigger_ops snapshot_count_trigger_ops = { .trigger = snapshot_count_trigger, .print = snapshot_trigger_print, .init = event_trigger_init, .free = event_trigger_free, }; -static struct event_trigger_ops * +static const struct event_trigger_ops * snapshot_get_trigger_ops(char *cmd, char *param) { return param ? &snapshot_count_trigger_ops : &snapshot_trigger_ops; @@ -1586,21 +1586,21 @@ stacktrace_trigger_print(struct seq_file *m, struct event_trigger_data *data) data->filter_str); } -static struct event_trigger_ops stacktrace_trigger_ops = { +static const struct event_trigger_ops stacktrace_trigger_ops = { .trigger = stacktrace_trigger, .print = stacktrace_trigger_print, .init = event_trigger_init, .free = event_trigger_free, }; -static struct event_trigger_ops stacktrace_count_trigger_ops = { +static const struct event_trigger_ops stacktrace_count_trigger_ops = { .trigger = stacktrace_count_trigger, .print = stacktrace_trigger_print, .init = event_trigger_init, .free = event_trigger_free, }; -static struct event_trigger_ops * +static const struct event_trigger_ops * stacktrace_get_trigger_ops(char *cmd, char *param) { return param ? &stacktrace_count_trigger_ops : &stacktrace_trigger_ops; @@ -1711,28 +1711,28 @@ void event_enable_trigger_free(struct event_trigger_data *data) } } -static struct event_trigger_ops event_enable_trigger_ops = { +static const struct event_trigger_ops event_enable_trigger_ops = { .trigger = event_enable_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, }; -static struct event_trigger_ops event_enable_count_trigger_ops = { +static const struct event_trigger_ops event_enable_count_trigger_ops = { .trigger = event_enable_count_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, }; -static struct event_trigger_ops event_disable_trigger_ops = { +static const struct event_trigger_ops event_disable_trigger_ops = { .trigger = event_enable_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, }; -static struct event_trigger_ops event_disable_count_trigger_ops = { +static const struct event_trigger_ops event_disable_count_trigger_ops = { .trigger = event_enable_count_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, @@ -1916,10 +1916,10 @@ void event_enable_unregister_trigger(char *glob, data->ops->free(data); } -static struct event_trigger_ops * +static const struct event_trigger_ops * event_enable_get_trigger_ops(char *cmd, char *param) { - struct event_trigger_ops *ops; + const struct event_trigger_ops *ops; bool enable; #ifdef CONFIG_HIST_TRIGGERS diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index 97325fbd6283..af42aaa3d172 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -455,7 +455,7 @@ static void user_event_enabler_fault_fixup(struct work_struct *work) if (ret && ret != -ENOENT) { struct user_event *user = enabler->event; - pr_warn("user_events: Fault for mm: 0x%pK @ 0x%llx event: %s\n", + pr_warn("user_events: Fault for mm: 0x%p @ 0x%llx event: %s\n", mm->mm, (unsigned long long)uaddr, EVENT_NAME(user)); } @@ -2793,11 +2793,8 @@ static int user_seq_show(struct seq_file *m, void *p) seq_printf(m, "%s", EVENT_TP_NAME(user)); - if (status != 0) - seq_puts(m, " #"); - if (status != 0) { - seq_puts(m, " Used by"); + seq_puts(m, " # Used by"); if (status & EVENT_STATUS_FTRACE) seq_puts(m, " ftrace"); if (status & EVENT_STATUS_PERF) diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c index 985ff98272da..5d7ca80173ea 100644 --- a/kernel/trace/trace_fprobe.c +++ b/kernel/trace/trace_fprobe.c @@ -1199,8 +1199,11 @@ static int trace_fprobe_create_internal(int argc, const char *argv[], argc = new_argc; argv = new_argv; } - if (argc > MAX_TRACE_ARGS) + if (argc > MAX_TRACE_ARGS) { + trace_probe_log_set_index(2); + trace_probe_log_err(0, TOO_MANY_ARGS); return -E2BIG; + } ret = traceprobe_expand_dentry_args(argc, argv, &dbuf); if (ret) diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index df56f9b76010..98ccf3f00c51 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -25,6 +25,9 @@ static void function_trace_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs); static void +function_args_trace_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs); +static void function_stack_trace_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs); static void @@ -42,9 +45,10 @@ enum { TRACE_FUNC_NO_OPTS = 0x0, /* No flags set. */ TRACE_FUNC_OPT_STACK = 0x1, TRACE_FUNC_OPT_NO_REPEATS = 0x2, + TRACE_FUNC_OPT_ARGS = 0x4, /* Update this to next highest bit. */ - TRACE_FUNC_OPT_HIGHEST_BIT = 0x4 + TRACE_FUNC_OPT_HIGHEST_BIT = 0x8 }; #define TRACE_FUNC_OPT_MASK (TRACE_FUNC_OPT_HIGHEST_BIT - 1) @@ -114,6 +118,8 @@ static ftrace_func_t select_trace_function(u32 flags_val) switch (flags_val & TRACE_FUNC_OPT_MASK) { case TRACE_FUNC_NO_OPTS: return function_trace_call; + case TRACE_FUNC_OPT_ARGS: + return function_args_trace_call; case TRACE_FUNC_OPT_STACK: return function_stack_trace_call; case TRACE_FUNC_OPT_NO_REPEATS: @@ -220,7 +226,34 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, data = this_cpu_ptr(tr->array_buffer.data); if (!atomic_read(&data->disabled)) - trace_function(tr, ip, parent_ip, trace_ctx); + trace_function(tr, ip, parent_ip, trace_ctx, NULL); + + ftrace_test_recursion_unlock(bit); +} + +static void +function_args_trace_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs) +{ + struct trace_array *tr = op->private; + struct trace_array_cpu *data; + unsigned int trace_ctx; + int bit; + int cpu; + + if (unlikely(!tr->function_enabled)) + return; + + bit = ftrace_test_recursion_trylock(ip, parent_ip); + if (bit < 0) + return; + + trace_ctx = tracing_gen_ctx(); + + cpu = smp_processor_id(); + data = per_cpu_ptr(tr->array_buffer.data, cpu); + if (!atomic_read(&data->disabled)) + trace_function(tr, ip, parent_ip, trace_ctx, fregs); ftrace_test_recursion_unlock(bit); } @@ -270,7 +303,7 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip, if (likely(disabled == 1)) { trace_ctx = tracing_gen_ctx_flags(flags); - trace_function(tr, ip, parent_ip, trace_ctx); + trace_function(tr, ip, parent_ip, trace_ctx, NULL); #ifdef CONFIG_UNWINDER_FRAME_POINTER if (ftrace_pids_enabled(op)) skip++; @@ -349,7 +382,7 @@ function_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip, trace_ctx = tracing_gen_ctx_dec(); process_repeats(tr, ip, parent_ip, last_info, trace_ctx); - trace_function(tr, ip, parent_ip, trace_ctx); + trace_function(tr, ip, parent_ip, trace_ctx, NULL); out: ftrace_test_recursion_unlock(bit); @@ -389,7 +422,7 @@ function_stack_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip, trace_ctx = tracing_gen_ctx_flags(flags); process_repeats(tr, ip, parent_ip, last_info, trace_ctx); - trace_function(tr, ip, parent_ip, trace_ctx); + trace_function(tr, ip, parent_ip, trace_ctx, NULL); __trace_stack(tr, trace_ctx, STACK_SKIP); } @@ -403,6 +436,9 @@ static struct tracer_opt func_opts[] = { { TRACER_OPT(func_stack_trace, TRACE_FUNC_OPT_STACK) }, #endif { TRACER_OPT(func-no-repeats, TRACE_FUNC_OPT_NO_REPEATS) }, +#ifdef CONFIG_FUNCTION_TRACE_ARGS + { TRACER_OPT(func-args, TRACE_FUNC_OPT_ARGS) }, +#endif { } /* Always set a last empty entry */ }; diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 136c750b0b4d..2f077d4158e5 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -71,6 +71,10 @@ static struct tracer_opt trace_opts[] = { /* Display function return address ? */ { TRACER_OPT(funcgraph-retaddr, TRACE_GRAPH_PRINT_RETADDR) }, #endif +#ifdef CONFIG_FUNCTION_TRACE_ARGS + /* Display function arguments ? */ + { TRACER_OPT(funcgraph-args, TRACE_GRAPH_ARGS) }, +#endif /* Include sleep time (scheduled out) between entry and return */ { TRACER_OPT(sleep-time, TRACE_GRAPH_SLEEP_TIME) }, @@ -110,25 +114,43 @@ static void print_graph_duration(struct trace_array *tr, unsigned long long duration, struct trace_seq *s, u32 flags); -int __trace_graph_entry(struct trace_array *tr, - struct ftrace_graph_ent *trace, - unsigned int trace_ctx) +static int __graph_entry(struct trace_array *tr, struct ftrace_graph_ent *trace, + unsigned int trace_ctx, struct ftrace_regs *fregs) { struct ring_buffer_event *event; struct trace_buffer *buffer = tr->array_buffer.buffer; struct ftrace_graph_ent_entry *entry; + int size; - event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT, - sizeof(*entry), trace_ctx); + /* If fregs is defined, add FTRACE_REGS_MAX_ARGS long size words */ + size = sizeof(*entry) + (FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long)); + + event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT, size, trace_ctx); if (!event) return 0; - entry = ring_buffer_event_data(event); - entry->graph_ent = *trace; + + entry = ring_buffer_event_data(event); + entry->graph_ent = *trace; + +#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API + if (fregs) { + for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++) + entry->args[i] = ftrace_regs_get_argument(fregs, i); + } +#endif + trace_buffer_unlock_commit_nostack(buffer, event); return 1; } +int __trace_graph_entry(struct trace_array *tr, + struct ftrace_graph_ent *trace, + unsigned int trace_ctx) +{ + return __graph_entry(tr, trace, trace_ctx, NULL); +} + #ifdef CONFIG_FUNCTION_GRAPH_RETADDR int __trace_graph_retaddr_entry(struct trace_array *tr, struct ftrace_graph_ent *trace, @@ -174,9 +196,9 @@ struct fgraph_times { unsigned long long sleeptime; /* may be optional! */ }; -int trace_graph_entry(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops, - struct ftrace_regs *fregs) +static int graph_entry(struct ftrace_graph_ent *trace, + struct fgraph_ops *gops, + struct ftrace_regs *fregs) { unsigned long *task_var = fgraph_get_task_var(gops); struct trace_array *tr = gops->private; @@ -246,7 +268,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace, unsigned long retaddr = ftrace_graph_top_ret_addr(current); ret = __trace_graph_retaddr_entry(tr, trace, trace_ctx, retaddr); } else { - ret = __trace_graph_entry(tr, trace, trace_ctx); + ret = __graph_entry(tr, trace, trace_ctx, fregs); } } preempt_enable_notrace(); @@ -254,6 +276,20 @@ int trace_graph_entry(struct ftrace_graph_ent *trace, return ret; } +int trace_graph_entry(struct ftrace_graph_ent *trace, + struct fgraph_ops *gops, + struct ftrace_regs *fregs) +{ + return graph_entry(trace, gops, NULL); +} + +static int trace_graph_entry_args(struct ftrace_graph_ent *trace, + struct fgraph_ops *gops, + struct ftrace_regs *fregs) +{ + return graph_entry(trace, gops, fregs); +} + static void __trace_graph_function(struct trace_array *tr, unsigned long ip, unsigned int trace_ctx) @@ -418,14 +454,17 @@ static int graph_trace_init(struct trace_array *tr) { int ret; - tr->gops->entryfunc = trace_graph_entry; + if (tracer_flags_is_set(TRACE_GRAPH_ARGS)) + tr->gops->entryfunc = trace_graph_entry_args; + else + tr->gops->entryfunc = trace_graph_entry; if (tracing_thresh) tr->gops->retfunc = trace_graph_thresh_return; else tr->gops->retfunc = trace_graph_return; - /* Make gops functions are visible before we start tracing */ + /* Make gops functions visible before we start tracing */ smp_mb(); ret = register_ftrace_graph(tr->gops); @@ -436,6 +475,28 @@ static int graph_trace_init(struct trace_array *tr) return 0; } +static int ftrace_graph_trace_args(struct trace_array *tr, int set) +{ + trace_func_graph_ent_t entry; + + if (set) + entry = trace_graph_entry_args; + else + entry = trace_graph_entry; + + /* See if there's any changes */ + if (tr->gops->entryfunc == entry) + return 0; + + unregister_ftrace_graph(tr->gops); + + tr->gops->entryfunc = entry; + + /* Make gops functions visible before we start tracing */ + smp_mb(); + return register_ftrace_graph(tr->gops); +} + static void graph_trace_reset(struct trace_array *tr) { tracing_stop_cmdline_record(); @@ -775,7 +836,7 @@ static void print_graph_retaddr(struct trace_seq *s, struct fgraph_retaddr_ent_e static void print_graph_retval(struct trace_seq *s, struct ftrace_graph_ent_entry *entry, struct ftrace_graph_ret *graph_ret, void *func, - u32 opt_flags, u32 trace_flags) + u32 opt_flags, u32 trace_flags, int args_size) { unsigned long err_code = 0; unsigned long retval = 0; @@ -809,7 +870,14 @@ static void print_graph_retval(struct trace_seq *s, struct ftrace_graph_ent_entr if (entry->ent.type != TRACE_GRAPH_RETADDR_ENT) print_retaddr = false; - trace_seq_printf(s, "%ps();", func); + trace_seq_printf(s, "%ps", func); + + if (args_size >= FTRACE_REGS_MAX_ARGS * sizeof(long)) { + print_function_args(s, entry->args, (unsigned long)func); + trace_seq_putc(s, ';'); + } else + trace_seq_puts(s, "();"); + if (print_retval || print_retaddr) trace_seq_puts(s, " /*"); else @@ -836,7 +904,8 @@ static void print_graph_retval(struct trace_seq *s, struct ftrace_graph_ent_entr #else -#define print_graph_retval(_seq, _ent, _ret, _func, _opt_flags, _trace_flags) do {} while (0) +#define print_graph_retval(_seq, _ent, _ret, _func, _opt_flags, _trace_flags, args_size) \ + do {} while (0) #endif @@ -852,16 +921,17 @@ print_graph_entry_leaf(struct trace_iterator *iter, struct ftrace_graph_ret *graph_ret; struct ftrace_graph_ent *call; unsigned long long duration; - unsigned long func; + unsigned long ret_func; + int args_size; int cpu = iter->cpu; int i; + args_size = iter->ent_size - offsetof(struct ftrace_graph_ent_entry, args); + graph_ret = &ret_entry->ret; call = &entry->graph_ent; duration = ret_entry->rettime - ret_entry->calltime; - func = call->func + iter->tr->text_delta; - if (data) { struct fgraph_cpu_data *cpu_data; @@ -887,16 +957,25 @@ print_graph_entry_leaf(struct trace_iterator *iter, for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) trace_seq_putc(s, ' '); + ret_func = graph_ret->func + iter->tr->text_delta; + /* * Write out the function return value or return address */ if (flags & (__TRACE_GRAPH_PRINT_RETVAL | __TRACE_GRAPH_PRINT_RETADDR)) { print_graph_retval(s, entry, graph_ret, (void *)graph_ret->func + iter->tr->text_delta, - flags, tr->trace_flags); + flags, tr->trace_flags, args_size); } else { - trace_seq_printf(s, "%ps();\n", (void *)func); + trace_seq_printf(s, "%ps", (void *)ret_func); + + if (args_size >= FTRACE_REGS_MAX_ARGS * sizeof(long)) { + print_function_args(s, entry->args, ret_func); + trace_seq_putc(s, ';'); + } else + trace_seq_puts(s, "();"); } + trace_seq_printf(s, "\n"); print_graph_irq(iter, graph_ret->func, TRACE_GRAPH_RET, cpu, iter->ent->pid, flags); @@ -913,6 +992,7 @@ print_graph_entry_nested(struct trace_iterator *iter, struct fgraph_data *data = iter->private; struct trace_array *tr = iter->tr; unsigned long func; + int args_size; int i; if (data) { @@ -937,7 +1017,17 @@ print_graph_entry_nested(struct trace_iterator *iter, func = call->func + iter->tr->text_delta; - trace_seq_printf(s, "%ps() {", (void *)func); + trace_seq_printf(s, "%ps", (void *)func); + + args_size = iter->ent_size - offsetof(struct ftrace_graph_ent_entry, args); + + if (args_size >= FTRACE_REGS_MAX_ARGS * sizeof(long)) + print_function_args(s, entry->args, func); + else + trace_seq_puts(s, "()"); + + trace_seq_puts(s, " {"); + if (flags & __TRACE_GRAPH_PRINT_RETADDR && entry->ent.type == TRACE_GRAPH_RETADDR_ENT) print_graph_retaddr(s, (struct fgraph_retaddr_ent_entry *)entry, @@ -1107,21 +1197,38 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, struct trace_iterator *iter, u32 flags) { struct fgraph_data *data = iter->private; - struct ftrace_graph_ent *call = &field->graph_ent; + struct ftrace_graph_ent *call; struct ftrace_graph_ret_entry *leaf_ret; static enum print_line_t ret; int cpu = iter->cpu; + /* + * print_graph_entry() may consume the current event, + * thus @field may become invalid, so we need to save it. + * sizeof(struct ftrace_graph_ent_entry) is very small, + * it can be safely saved at the stack. + */ + struct ftrace_graph_ent_entry *entry; + u8 save_buf[sizeof(*entry) + FTRACE_REGS_MAX_ARGS * sizeof(long)]; + + /* The ent_size is expected to be as big as the entry */ + if (iter->ent_size > sizeof(save_buf)) + iter->ent_size = sizeof(save_buf); + + entry = (void *)save_buf; + memcpy(entry, field, iter->ent_size); + + call = &entry->graph_ent; if (check_irq_entry(iter, flags, call->func, call->depth)) return TRACE_TYPE_HANDLED; print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func, flags); - leaf_ret = get_return_for_leaf(iter, field); + leaf_ret = get_return_for_leaf(iter, entry); if (leaf_ret) - ret = print_graph_entry_leaf(iter, field, leaf_ret, s, flags); + ret = print_graph_entry_leaf(iter, entry, leaf_ret, s, flags); else - ret = print_graph_entry_nested(iter, field, s, cpu, flags); + ret = print_graph_entry_nested(iter, entry, s, cpu, flags); if (data) { /* @@ -1195,7 +1302,8 @@ print_graph_return(struct ftrace_graph_ret_entry *retentry, struct trace_seq *s, * funcgraph-retval option is enabled. */ if (flags & __TRACE_GRAPH_PRINT_RETVAL) { - print_graph_retval(s, NULL, trace, (void *)func, flags, tr->trace_flags); + print_graph_retval(s, NULL, trace, (void *)func, flags, + tr->trace_flags, 0); } else { /* * If the return function does not have a matching entry, @@ -1323,16 +1431,8 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags) switch (entry->type) { case TRACE_GRAPH_ENT: { - /* - * print_graph_entry() may consume the current event, - * thus @field may become invalid, so we need to save it. - * sizeof(struct ftrace_graph_ent_entry) is very small, - * it can be safely saved at the stack. - */ - struct ftrace_graph_ent_entry saved; trace_assign_type(field, entry); - saved = *field; - return print_graph_entry(&saved, s, iter, flags); + return print_graph_entry(field, s, iter, flags); } #ifdef CONFIG_FUNCTION_GRAPH_RETADDR case TRACE_GRAPH_RETADDR_ENT: { @@ -1511,6 +1611,7 @@ void graph_trace_close(struct trace_iterator *iter) if (data) { free_percpu(data->cpu_data); kfree(data); + iter->private = NULL; } } @@ -1526,6 +1627,9 @@ func_graph_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) if (bit == TRACE_GRAPH_GRAPH_TIME) ftrace_graph_graph_time_control(set); + if (bit == TRACE_GRAPH_ARGS) + return ftrace_graph_trace_args(tr, set); + return 0; } diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 7294ad676379..40c39e946940 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -150,7 +150,7 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip, trace_ctx = tracing_gen_ctx_flags(flags); - trace_function(tr, ip, parent_ip, trace_ctx); + trace_function(tr, ip, parent_ip, trace_ctx, fregs); atomic_dec(&data->disabled); } @@ -250,8 +250,6 @@ static void irqsoff_trace_open(struct trace_iterator *iter) { if (is_graph(iter->tr)) graph_trace_open(iter); - else - iter->private = NULL; } static void irqsoff_trace_close(struct trace_iterator *iter) @@ -295,11 +293,17 @@ __trace_function(struct trace_array *tr, if (is_graph(tr)) trace_graph_function(tr, ip, parent_ip, trace_ctx); else - trace_function(tr, ip, parent_ip, trace_ctx); + trace_function(tr, ip, parent_ip, trace_ctx, NULL); } #else -#define __trace_function trace_function +static inline void +__trace_function(struct trace_array *tr, + unsigned long ip, unsigned long parent_ip, + unsigned int trace_ctx) +{ + return trace_function(tr, ip, parent_ip, trace_ctx, NULL); +} static enum print_line_t irqsoff_print_line(struct trace_iterator *iter) { diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index d8d5f18a141a..2703b96d8990 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -124,9 +124,8 @@ static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk) if (!p) return true; *p = '\0'; - rcu_read_lock_sched(); - ret = !!find_module(tk->symbol); - rcu_read_unlock_sched(); + scoped_guard(rcu) + ret = !!find_module(tk->symbol); *p = ':'; return ret; @@ -796,12 +795,10 @@ static struct module *try_module_get_by_name(const char *name) { struct module *mod; - rcu_read_lock_sched(); + guard(rcu)(); mod = find_module(name); if (mod && !try_module_get(mod)) mod = NULL; - rcu_read_unlock_sched(); - return mod; } #else @@ -1007,8 +1004,11 @@ static int trace_kprobe_create_internal(int argc, const char *argv[], argc = new_argc; argv = new_argv; } - if (argc > MAX_TRACE_ARGS) + if (argc > MAX_TRACE_ARGS) { + trace_probe_log_set_index(2); + trace_probe_log_err(0, TOO_MANY_ARGS); return -E2BIG; + } ret = traceprobe_expand_dentry_args(argc, argv, &dbuf); if (ret) diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index f3a2722ee4c0..e732c9e37e14 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -316,33 +316,6 @@ static inline void osn_var_reset_all(void) bool trace_osnoise_callback_enabled; /* - * osnoise sample structure definition. Used to store the statistics of a - * sample run. - */ -struct osnoise_sample { - u64 runtime; /* runtime */ - u64 noise; /* noise */ - u64 max_sample; /* max single noise sample */ - int hw_count; /* # HW (incl. hypervisor) interference */ - int nmi_count; /* # NMIs during this sample */ - int irq_count; /* # IRQs during this sample */ - int softirq_count; /* # softirqs during this sample */ - int thread_count; /* # threads during this sample */ -}; - -#ifdef CONFIG_TIMERLAT_TRACER -/* - * timerlat sample structure definition. Used to store the statistics of - * a sample run. - */ -struct timerlat_sample { - u64 timer_latency; /* timer_latency */ - unsigned int seqnum; /* unique sequence */ - int context; /* timer context */ -}; -#endif - -/* * Tracer data. */ static struct osnoise_data { @@ -497,7 +470,7 @@ static void print_osnoise_headers(struct seq_file *s) * Record an osnoise_sample into the tracer buffer. */ static void -__trace_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buffer) +__record_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buffer) { struct ring_buffer_event *event; struct osnoise_entry *entry; @@ -520,17 +493,19 @@ __trace_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buffe } /* - * Record an osnoise_sample on all osnoise instances. + * Record an osnoise_sample on all osnoise instances and fire trace event. */ -static void trace_osnoise_sample(struct osnoise_sample *sample) +static void record_osnoise_sample(struct osnoise_sample *sample) { struct osnoise_instance *inst; struct trace_buffer *buffer; + trace_osnoise_sample(sample); + rcu_read_lock(); list_for_each_entry_rcu(inst, &osnoise_instances, list) { buffer = inst->tr->array_buffer.buffer; - __trace_osnoise_sample(sample, buffer); + __record_osnoise_sample(sample, buffer); } rcu_read_unlock(); } @@ -574,7 +549,7 @@ static void print_timerlat_headers(struct seq_file *s) #endif /* CONFIG_PREEMPT_RT */ static void -__trace_timerlat_sample(struct timerlat_sample *sample, struct trace_buffer *buffer) +__record_timerlat_sample(struct timerlat_sample *sample, struct trace_buffer *buffer) { struct ring_buffer_event *event; struct timerlat_entry *entry; @@ -594,15 +569,17 @@ __trace_timerlat_sample(struct timerlat_sample *sample, struct trace_buffer *buf /* * Record an timerlat_sample into the tracer buffer. */ -static void trace_timerlat_sample(struct timerlat_sample *sample) +static void record_timerlat_sample(struct timerlat_sample *sample) { struct osnoise_instance *inst; struct trace_buffer *buffer; + trace_timerlat_sample(sample); + rcu_read_lock(); list_for_each_entry_rcu(inst, &osnoise_instances, list) { buffer = inst->tr->array_buffer.buffer; - __trace_timerlat_sample(sample, buffer); + __record_timerlat_sample(sample, buffer); } rcu_read_unlock(); } @@ -1542,27 +1519,25 @@ static int run_osnoise(void) /* * In some cases, notably when running on a nohz_full CPU with - * a stopped tick PREEMPT_RCU has no way to account for QSs. - * This will eventually cause unwarranted noise as PREEMPT_RCU - * will force preemption as the means of ending the current - * grace period. We avoid this problem by calling - * rcu_momentary_eqs(), which performs a zero duration - * EQS allowing PREEMPT_RCU to end the current grace period. - * This call shouldn't be wrapped inside an RCU critical - * section. + * a stopped tick PREEMPT_RCU or PREEMPT_LAZY have no way to + * account for QSs. This will eventually cause unwarranted + * noise as RCU forces preemption as the means of ending the + * current grace period. We avoid this by calling + * rcu_momentary_eqs(), which performs a zero duration EQS + * allowing RCU to end the current grace period. This call + * shouldn't be wrapped inside an RCU critical section. * - * Note that in non PREEMPT_RCU kernels QSs are handled through - * cond_resched() + * Normally QSs for other cases are handled through cond_resched(). + * For simplicity, however, we call rcu_momentary_eqs() for all + * configurations here. */ - if (IS_ENABLED(CONFIG_PREEMPT_RCU)) { - if (!disable_irq) - local_irq_disable(); + if (!disable_irq) + local_irq_disable(); - rcu_momentary_eqs(); + rcu_momentary_eqs(); - if (!disable_irq) - local_irq_enable(); - } + if (!disable_irq) + local_irq_enable(); /* * For the non-preemptive kernel config: let threads runs, if @@ -1608,7 +1583,7 @@ static int run_osnoise(void) /* Save interference stats info */ diff_osn_sample_stats(osn_var, &s); - trace_osnoise_sample(&s); + record_osnoise_sample(&s); notify_new_max_latency(max_noise); @@ -1803,7 +1778,7 @@ static enum hrtimer_restart timerlat_irq(struct hrtimer *timer) s.timer_latency = diff; s.context = IRQ_CONTEXT; - trace_timerlat_sample(&s); + record_timerlat_sample(&s); if (osnoise_data.stop_tracing) { if (time_to_us(diff) >= osnoise_data.stop_tracing) { @@ -1901,8 +1876,7 @@ static int timerlat_main(void *data) tlat->count = 0; tlat->tracing_thread = false; - hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); - tlat->timer.function = timerlat_irq; + hrtimer_setup(&tlat->timer, timerlat_irq, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); tlat->kthread = current; osn_var->pid = current->pid; /* @@ -1923,7 +1897,7 @@ static int timerlat_main(void *data) s.timer_latency = diff; s.context = THREAD_CONTEXT; - trace_timerlat_sample(&s); + record_timerlat_sample(&s); notify_new_max_latency(diff); @@ -2032,7 +2006,6 @@ static int start_kthread(unsigned int cpu) if (IS_ERR(kthread)) { pr_err(BANNER "could not start sampling thread\n"); - stop_per_cpu_kthreads(); return -ENOMEM; } @@ -2456,8 +2429,7 @@ static int timerlat_fd_open(struct inode *inode, struct file *file) tlat = this_cpu_tmr_var(); tlat->count = 0; - hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); - tlat->timer.function = timerlat_irq; + hrtimer_setup(&tlat->timer, timerlat_irq, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); migrate_enable(); return 0; @@ -2529,7 +2501,7 @@ timerlat_fd_read(struct file *file, char __user *ubuf, size_t count, s.timer_latency = diff; s.context = THREAD_URET; - trace_timerlat_sample(&s); + record_timerlat_sample(&s); notify_new_max_latency(diff); @@ -2564,7 +2536,7 @@ timerlat_fd_read(struct file *file, char __user *ubuf, size_t count, s.timer_latency = diff; s.context = THREAD_CONTEXT; - trace_timerlat_sample(&s); + record_timerlat_sample(&s); if (osnoise_data.stop_tracing_total) { if (time_to_us(diff) >= osnoise_data.stop_tracing_total) { diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 03d56f711ad1..fee40ffbd490 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -5,6 +5,7 @@ * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> * */ +#include "trace.h" #include <linux/module.h> #include <linux/mutex.h> #include <linux/ftrace.h> @@ -12,15 +13,19 @@ #include <linux/sched/clock.h> #include <linux/sched/mm.h> #include <linux/idr.h> +#include <linux/btf.h> +#include <linux/bpf.h> +#include <linux/hashtable.h> #include "trace_output.h" +#include "trace_btf.h" -/* must be a power of 2 */ -#define EVENT_HASHSIZE 128 +/* 2^7 = 128 */ +#define EVENT_HASH_BITS 7 DECLARE_RWSEM(trace_event_sem); -static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly; +static DEFINE_HASHTABLE(event_hash, EVENT_HASH_BITS); enum print_line_t trace_print_bputs_msg_only(struct trace_iterator *iter) { @@ -684,6 +689,88 @@ int trace_print_lat_context(struct trace_iterator *iter) return !trace_seq_has_overflowed(s); } +#ifdef CONFIG_FUNCTION_TRACE_ARGS +void print_function_args(struct trace_seq *s, unsigned long *args, + unsigned long func) +{ + const struct btf_param *param; + const struct btf_type *t; + const char *param_name; + char name[KSYM_NAME_LEN]; + unsigned long arg; + struct btf *btf; + s32 tid, nr = 0; + int a, p, x; + + trace_seq_printf(s, "("); + + if (!args) + goto out; + if (lookup_symbol_name(func, name)) + goto out; + + /* TODO: Pass module name here too */ + t = btf_find_func_proto(name, &btf); + if (IS_ERR_OR_NULL(t)) + goto out; + + param = btf_get_func_param(t, &nr); + if (!param) + goto out_put; + + for (a = 0, p = 0; p < nr; a++, p++) { + if (p) + trace_seq_puts(s, ", "); + + /* This only prints what the arch allows (6 args by default) */ + if (a == FTRACE_REGS_MAX_ARGS) { + trace_seq_puts(s, "..."); + break; + } + + arg = args[a]; + + param_name = btf_name_by_offset(btf, param[p].name_off); + if (param_name) + trace_seq_printf(s, "%s=", param_name); + t = btf_type_skip_modifiers(btf, param[p].type, &tid); + + switch (t ? BTF_INFO_KIND(t->info) : BTF_KIND_UNKN) { + case BTF_KIND_UNKN: + trace_seq_putc(s, '?'); + /* Still print unknown type values */ + fallthrough; + case BTF_KIND_PTR: + trace_seq_printf(s, "0x%lx", arg); + break; + case BTF_KIND_INT: + trace_seq_printf(s, "%ld", arg); + break; + case BTF_KIND_ENUM: + trace_seq_printf(s, "%ld", arg); + break; + default: + /* This does not handle complex arguments */ + trace_seq_printf(s, "(%s)[0x%lx", btf_type_str(t), arg); + for (x = sizeof(long); x < t->size; x += sizeof(long)) { + trace_seq_putc(s, ':'); + if (++a == FTRACE_REGS_MAX_ARGS) { + trace_seq_puts(s, "...]"); + goto out_put; + } + trace_seq_printf(s, "0x%lx", args[a]); + } + trace_seq_putc(s, ']'); + break; + } + } +out_put: + btf_put(btf); +out: + trace_seq_printf(s, ")"); +} +#endif + /** * ftrace_find_event - find a registered event * @type: the type of event to look for @@ -694,11 +781,8 @@ int trace_print_lat_context(struct trace_iterator *iter) struct trace_event *ftrace_find_event(int type) { struct trace_event *event; - unsigned key; - - key = type & (EVENT_HASHSIZE - 1); - hlist_for_each_entry(event, &event_hash[key], node) { + hash_for_each_possible(event_hash, event, node, type) { if (event->type == type) return event; } @@ -753,7 +837,6 @@ void trace_event_read_unlock(void) */ int register_trace_event(struct trace_event *event) { - unsigned key; int ret = 0; down_write(&trace_event_sem); @@ -786,9 +869,7 @@ int register_trace_event(struct trace_event *event) if (event->funcs->binary == NULL) event->funcs->binary = trace_nop_print; - key = event->type & (EVENT_HASHSIZE - 1); - - hlist_add_head(&event->node, &event_hash[key]); + hash_add(event_hash, &event->node, event->type); ret = event->type; out: @@ -803,7 +884,7 @@ EXPORT_SYMBOL_GPL(register_trace_event); */ int __unregister_trace_event(struct trace_event *event) { - hlist_del(&event->node); + hash_del(&event->node); free_trace_event_type(event->type); return 0; } @@ -1005,12 +1086,15 @@ enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags, } static void print_fn_trace(struct trace_seq *s, unsigned long ip, - unsigned long parent_ip, long delta, int flags) + unsigned long parent_ip, long delta, + unsigned long *args, int flags) { ip += delta; parent_ip += delta; seq_print_ip_sym(s, ip, flags); + if (args) + print_function_args(s, args, ip); if ((flags & TRACE_ITER_PRINT_PARENT) && parent_ip) { trace_seq_puts(s, " <-"); @@ -1024,10 +1108,19 @@ static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags, { struct ftrace_entry *field; struct trace_seq *s = &iter->seq; + unsigned long *args; + int args_size; trace_assign_type(field, iter->ent); - print_fn_trace(s, field->ip, field->parent_ip, iter->tr->text_delta, flags); + args_size = iter->ent_size - offsetof(struct ftrace_entry, args); + if (args_size >= FTRACE_REGS_MAX_ARGS * sizeof(long)) + args = field->args; + else + args = NULL; + + print_fn_trace(s, field->ip, field->parent_ip, iter->tr->text_delta, + args, flags); trace_seq_putc(s, '\n'); return trace_handle_return(s); @@ -1248,7 +1341,6 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter, struct trace_seq *s = &iter->seq; unsigned long *p; unsigned long *end; - long delta = iter->tr->text_delta; trace_assign_type(field, iter->ent); end = (unsigned long *)((long)iter->ent + iter->ent_size); @@ -1265,7 +1357,7 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter, trace_seq_puts(s, "[FTRACE TRAMPOLINE]\n"); continue; } - seq_print_ip_sym(s, (*p) + delta, flags); + seq_print_ip_sym(s, trace_adjust_address(iter->tr, *p), flags); trace_seq_putc(s, '\n'); } @@ -1700,7 +1792,7 @@ trace_func_repeats_print(struct trace_iterator *iter, int flags, trace_assign_type(field, iter->ent); - print_fn_trace(s, field->ip, field->parent_ip, iter->tr->text_delta, flags); + print_fn_trace(s, field->ip, field->parent_ip, iter->tr->text_delta, NULL, flags); trace_seq_printf(s, " (repeats: %u, last_ts:", field->count); trace_print_time(s, iter, iter->ts - FUNC_REPEATS_GET_DELTA_TS(field)); diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h index dca40f1f1da4..2e305364f2a9 100644 --- a/kernel/trace/trace_output.h +++ b/kernel/trace/trace_output.h @@ -41,5 +41,14 @@ extern struct rw_semaphore trace_event_sem; #define SEQ_PUT_HEX_FIELD(s, x) \ trace_seq_putmem_hex(s, &(x), sizeof(x)) +#ifdef CONFIG_FUNCTION_TRACE_ARGS +void print_function_args(struct trace_seq *s, unsigned long *args, + unsigned long func); +#else +static inline void print_function_args(struct trace_seq *s, unsigned long *args, + unsigned long func) { + trace_seq_puts(s, "()"); +} +#endif #endif diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 8f58ee1e8858..2eeecb6c95ee 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -770,6 +770,10 @@ static int check_prepare_btf_string_fetch(char *typename, #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API +/* + * Add the entry code to store the 'argnum'th parameter and return the offset + * in the entry data buffer where the data will be stored. + */ static int __store_entry_arg(struct trace_probe *tp, int argnum) { struct probe_entry_arg *earg = tp->entry_arg; @@ -793,6 +797,20 @@ static int __store_entry_arg(struct trace_probe *tp, int argnum) tp->entry_arg = earg; } + /* + * The entry code array is repeating the pair of + * [FETCH_OP_ARG(argnum)][FETCH_OP_ST_EDATA(offset of entry data buffer)] + * and the rest of entries are filled with [FETCH_OP_END]. + * + * To reduce the redundant function parameter fetching, we scan the entry + * code array to find the FETCH_OP_ARG which already fetches the 'argnum' + * parameter. If it doesn't match, update 'offset' to find the last + * offset. + * If we find the FETCH_OP_END without matching FETCH_OP_ARG entry, we + * will save the entry with FETCH_OP_ARG and FETCH_OP_ST_EDATA, and + * return data offset so that caller can find the data offset in the entry + * data buffer. + */ offset = 0; for (i = 0; i < earg->size - 1; i++) { switch (earg->code[i].op) { @@ -826,6 +844,16 @@ int traceprobe_get_entry_data_size(struct trace_probe *tp) if (!earg) return 0; + /* + * earg->code[] array has an operation sequence which is run in + * the entry handler. + * The sequence stopped by FETCH_OP_END and each data stored in + * the entry data buffer by FETCH_OP_ST_EDATA. The FETCH_OP_ST_EDATA + * stores the data at the data buffer + its offset, and all data are + * "unsigned long" size. The offset must be increased when a data is + * stored. Thus we need to find the last FETCH_OP_ST_EDATA in the + * code array. + */ for (i = 0; i < earg->size; i++) { switch (earg->code[i].op) { case FETCH_OP_END: diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 96792bc4b092..854e5668f5ee 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -545,6 +545,7 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call, C(BAD_BTF_TID, "Failed to get BTF type info."),\ C(BAD_TYPE4STR, "This type does not fit for string."),\ C(NEED_STRING_TYPE, "$comm and immediate-string only accepts string type"),\ + C(TOO_MANY_ARGS, "Too many arguments are specified"), \ C(TOO_MANY_EARGS, "Too many entry arguments specified"), #undef C diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index af30586f1aea..a0db3404f7f7 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -188,8 +188,6 @@ static void wakeup_trace_open(struct trace_iterator *iter) { if (is_graph(iter->tr)) graph_trace_open(iter); - else - iter->private = NULL; } static void wakeup_trace_close(struct trace_iterator *iter) @@ -242,7 +240,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip, return; local_irq_save(flags); - trace_function(tr, ip, parent_ip, trace_ctx); + trace_function(tr, ip, parent_ip, trace_ctx, fregs); local_irq_restore(flags); atomic_dec(&data->disabled); @@ -327,7 +325,7 @@ __trace_function(struct trace_array *tr, if (is_graph(tr)) trace_graph_function(tr, ip, parent_ip, trace_ctx); else - trace_function(tr, ip, parent_ip, trace_ctx); + trace_function(tr, ip, parent_ip, trace_ctx, NULL); } static int wakeup_flag_changed(struct trace_array *tr, u32 mask, int set) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index ccc762fbb69c..3386439ec9f6 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -562,8 +562,14 @@ static int __trace_uprobe_create(int argc, const char **argv) if (argc < 2) return -ECANCELED; - if (argc - 2 > MAX_TRACE_ARGS) + + trace_probe_log_init("trace_uprobe", argc, argv); + + if (argc - 2 > MAX_TRACE_ARGS) { + trace_probe_log_set_index(2); + trace_probe_log_err(0, TOO_MANY_ARGS); return -E2BIG; + } if (argv[0][1] == ':') event = &argv[0][2]; @@ -582,7 +588,6 @@ static int __trace_uprobe_create(int argc, const char **argv) return -ECANCELED; } - trace_probe_log_init("trace_uprobe", argc, argv); trace_probe_log_set_index(1); /* filename is the 2nd argument */ *arg++ = '\0'; |