diff options
Diffstat (limited to 'kernel/trace')
-rw-r--r-- | kernel/trace/Kconfig | 3 | ||||
-rw-r--r-- | kernel/trace/blktrace.c | 75 | ||||
-rw-r--r-- | kernel/trace/bpf_trace.c | 60 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 13 | ||||
-rw-r--r-- | kernel/trace/rethook.c | 9 | ||||
-rw-r--r-- | kernel/trace/trace.c | 13 | ||||
-rw-r--r-- | kernel/trace/trace_events_hist.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace_kprobe.c | 11 | ||||
-rw-r--r-- | kernel/trace/trace_uprobe.c | 1 |
9 files changed, 115 insertions, 72 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index debbbb083286..ccd6a5ade3e9 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -194,7 +194,8 @@ config FUNCTION_TRACER sequence is then dynamically patched into a tracer call when tracing is enabled by the administrator. If it's runtime disabled (the bootup default), then the overhead of the instructions is very - small and not measurable even in micro-benchmarks. + small and not measurable even in micro-benchmarks (at least on + x86, but may have impact on other architectures). config FUNCTION_GRAPH_TRACER bool "Kernel Function Graph Tracer" diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 10a32b0f2deb..7f5eb295fe19 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -205,7 +205,7 @@ static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), #define BLK_TC_PREFLUSH BLK_TC_FLUSH /* The ilog2() calls fall out because they're constant */ -#define MASK_TC_BIT(rw, __name) ((rw & REQ_ ## __name) << \ +#define MASK_TC_BIT(rw, __name) ((__force u32)(rw & REQ_ ## __name) << \ (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - __REQ_ ## __name)) /* @@ -213,8 +213,8 @@ static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), * blk_io_trace structure and places it in a per-cpu subbuffer. */ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, - int op, int op_flags, u32 what, int error, int pdu_len, - void *pdu_data, u64 cgid) + const blk_opf_t opf, u32 what, int error, + int pdu_len, void *pdu_data, u64 cgid) { struct task_struct *tsk = current; struct ring_buffer_event *event = NULL; @@ -227,16 +227,17 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, int cpu; bool blk_tracer = blk_tracer_enabled; ssize_t cgid_len = cgid ? sizeof(cgid) : 0; + const enum req_op op = opf & REQ_OP_MASK; if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer)) return; what |= ddir_act[op_is_write(op) ? WRITE : READ]; - what |= MASK_TC_BIT(op_flags, SYNC); - what |= MASK_TC_BIT(op_flags, RAHEAD); - what |= MASK_TC_BIT(op_flags, META); - what |= MASK_TC_BIT(op_flags, PREFLUSH); - what |= MASK_TC_BIT(op_flags, FUA); + what |= MASK_TC_BIT(opf, SYNC); + what |= MASK_TC_BIT(opf, RAHEAD); + what |= MASK_TC_BIT(opf, META); + what |= MASK_TC_BIT(opf, PREFLUSH); + what |= MASK_TC_BIT(opf, FUA); if (op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE) what |= BLK_TC_ACT(BLK_TC_DISCARD); if (op == REQ_OP_FLUSH) @@ -736,12 +737,12 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) switch (cmd) { case BLKTRACESETUP: - bdevname(bdev, b); + snprintf(b, sizeof(b), "%pg", bdev); ret = __blk_trace_setup(q, b, bdev->bd_dev, bdev, arg); break; #if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64) case BLKTRACESETUP32: - bdevname(bdev, b); + snprintf(b, sizeof(b), "%pg", bdev); ret = compat_blk_trace_setup(q, b, bdev->bd_dev, bdev, arg); break; #endif @@ -770,14 +771,11 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) **/ void blk_trace_shutdown(struct request_queue *q) { - mutex_lock(&q->debugfs_mutex); if (rcu_dereference_protected(q->blk_trace, lockdep_is_held(&q->debugfs_mutex))) { __blk_trace_startstop(q, 0); __blk_trace_remove(q); } - - mutex_unlock(&q->debugfs_mutex); } #ifdef CONFIG_BLK_CGROUP @@ -845,9 +843,8 @@ static void blk_add_trace_rq(struct request *rq, blk_status_t error, else what |= BLK_TC_ACT(BLK_TC_FS); - __blk_add_trace(bt, blk_rq_trace_sector(rq), nr_bytes, req_op(rq), - rq->cmd_flags, what, blk_status_to_errno(error), 0, - NULL, cgid); + __blk_add_trace(bt, blk_rq_trace_sector(rq), nr_bytes, rq->cmd_flags, + what, blk_status_to_errno(error), 0, NULL, cgid); rcu_read_unlock(); } @@ -906,7 +903,7 @@ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, } __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, - bio_op(bio), bio->bi_opf, what, error, 0, NULL, + bio->bi_opf, what, error, 0, NULL, blk_trace_bio_get_cgid(q, bio)); rcu_read_unlock(); } @@ -952,7 +949,7 @@ static void blk_add_trace_plug(void *ignore, struct request_queue *q) rcu_read_lock(); bt = rcu_dereference(q->blk_trace); if (bt) - __blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, 0); + __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, 0); rcu_read_unlock(); } @@ -972,7 +969,7 @@ static void blk_add_trace_unplug(void *ignore, struct request_queue *q, else what = BLK_TA_UNPLUG_TIMER; - __blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, 0); + __blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, 0); } rcu_read_unlock(); } @@ -988,8 +985,7 @@ static void blk_add_trace_split(void *ignore, struct bio *bio, unsigned int pdu) __be64 rpdu = cpu_to_be64(pdu); __blk_add_trace(bt, bio->bi_iter.bi_sector, - bio->bi_iter.bi_size, bio_op(bio), bio->bi_opf, - BLK_TA_SPLIT, + bio->bi_iter.bi_size, bio->bi_opf, BLK_TA_SPLIT, blk_status_to_errno(bio->bi_status), sizeof(rpdu), &rpdu, blk_trace_bio_get_cgid(q, bio)); @@ -1025,7 +1021,7 @@ static void blk_add_trace_bio_remap(void *ignore, struct bio *bio, dev_t dev, r.sector_from = cpu_to_be64(from); __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, - bio_op(bio), bio->bi_opf, BLK_TA_REMAP, + bio->bi_opf, BLK_TA_REMAP, blk_status_to_errno(bio->bi_status), sizeof(r), &r, blk_trace_bio_get_cgid(q, bio)); rcu_read_unlock(); @@ -1061,7 +1057,7 @@ static void blk_add_trace_rq_remap(void *ignore, struct request *rq, dev_t dev, r.sector_from = cpu_to_be64(from); __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), - rq_data_dir(rq), 0, BLK_TA_REMAP, 0, + rq->cmd_flags, BLK_TA_REMAP, 0, sizeof(r), &r, blk_trace_request_get_cgid(rq)); rcu_read_unlock(); } @@ -1087,7 +1083,7 @@ void blk_add_driver_data(struct request *rq, void *data, size_t len) return; } - __blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0, + __blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, BLK_TA_DRV_DATA, 0, len, data, blk_trace_request_get_cgid(rq)); rcu_read_unlock(); @@ -1870,17 +1866,6 @@ out_unlock_bdev: out: return ret ? ret : count; } - -int blk_trace_init_sysfs(struct device *dev) -{ - return sysfs_create_group(&dev->kobj, &blk_trace_attr_group); -} - -void blk_trace_remove_sysfs(struct device *dev) -{ - sysfs_remove_group(&dev->kobj, &blk_trace_attr_group); -} - #endif /* CONFIG_BLK_DEV_IO_TRACE */ #ifdef CONFIG_EVENT_TRACING @@ -1888,21 +1873,21 @@ void blk_trace_remove_sysfs(struct device *dev) /** * blk_fill_rwbs - Fill the buffer rwbs by mapping op to character string. * @rwbs: buffer to be filled - * @op: REQ_OP_XXX for the tracepoint + * @opf: request operation type (REQ_OP_XXX) and flags for the tracepoint * * Description: - * Maps the REQ_OP_XXX to character and fills the buffer provided by the - * caller with resulting string. + * Maps each request operation and flag to a single character and fills the + * buffer provided by the caller with resulting string. * **/ -void blk_fill_rwbs(char *rwbs, unsigned int op) +void blk_fill_rwbs(char *rwbs, blk_opf_t opf) { int i = 0; - if (op & REQ_PREFLUSH) + if (opf & REQ_PREFLUSH) rwbs[i++] = 'F'; - switch (op & REQ_OP_MASK) { + switch (opf & REQ_OP_MASK) { case REQ_OP_WRITE: rwbs[i++] = 'W'; break; @@ -1923,13 +1908,13 @@ void blk_fill_rwbs(char *rwbs, unsigned int op) rwbs[i++] = 'N'; } - if (op & REQ_FUA) + if (opf & REQ_FUA) rwbs[i++] = 'F'; - if (op & REQ_RAHEAD) + if (opf & REQ_RAHEAD) rwbs[i++] = 'A'; - if (op & REQ_SYNC) + if (opf & REQ_SYNC) rwbs[i++] = 'S'; - if (op & REQ_META) + if (opf & REQ_META) rwbs[i++] = 'M'; rwbs[i] = '\0'; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 7a13e6ac6327..88589d74a892 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2423,7 +2423,7 @@ kprobe_multi_link_handler(struct fprobe *fp, unsigned long entry_ip, kprobe_multi_link_prog_run(link, entry_ip, regs); } -static int symbols_cmp(const void *a, const void *b) +static int symbols_cmp_r(const void *a, const void *b, const void *priv) { const char **str_a = (const char **) a; const char **str_b = (const char **) b; @@ -2431,6 +2431,28 @@ static int symbols_cmp(const void *a, const void *b) return strcmp(*str_a, *str_b); } +struct multi_symbols_sort { + const char **funcs; + u64 *cookies; +}; + +static void symbols_swap_r(void *a, void *b, int size, const void *priv) +{ + const struct multi_symbols_sort *data = priv; + const char **name_a = a, **name_b = b; + + swap(*name_a, *name_b); + + /* If defined, swap also related cookies. */ + if (data->cookies) { + u64 *cookie_a, *cookie_b; + + cookie_a = data->cookies + (name_a - data->funcs); + cookie_b = data->cookies + (name_b - data->funcs); + swap(*cookie_a, *cookie_b); + } +} + int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) { struct bpf_kprobe_multi_link *link = NULL; @@ -2468,38 +2490,46 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr if (!addrs) return -ENOMEM; + ucookies = u64_to_user_ptr(attr->link_create.kprobe_multi.cookies); + if (ucookies) { + cookies = kvmalloc_array(cnt, sizeof(*addrs), GFP_KERNEL); + if (!cookies) { + err = -ENOMEM; + goto error; + } + if (copy_from_user(cookies, ucookies, size)) { + err = -EFAULT; + goto error; + } + } + if (uaddrs) { if (copy_from_user(addrs, uaddrs, size)) { err = -EFAULT; goto error; } } else { + struct multi_symbols_sort data = { + .cookies = cookies, + }; struct user_syms us; err = copy_user_syms(&us, usyms, cnt); if (err) goto error; - sort(us.syms, cnt, sizeof(*us.syms), symbols_cmp, NULL); + if (cookies) + data.funcs = us.syms; + + sort_r(us.syms, cnt, sizeof(*us.syms), symbols_cmp_r, + symbols_swap_r, &data); + err = ftrace_lookup_symbols(us.syms, cnt, addrs); free_user_syms(&us); if (err) goto error; } - ucookies = u64_to_user_ptr(attr->link_create.kprobe_multi.cookies); - if (ucookies) { - cookies = kvmalloc_array(cnt, sizeof(*addrs), GFP_KERNEL); - if (!cookies) { - err = -ENOMEM; - goto error; - } - if (copy_from_user(cookies, ucookies, size)) { - err = -EFAULT; - goto error; - } - } - link = kzalloc(sizeof(*link), GFP_KERNEL); if (!link) { err = -ENOMEM; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index e750fe141a60..601ccf1b2f09 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -8029,15 +8029,23 @@ static int kallsyms_callback(void *data, const char *name, struct module *mod, unsigned long addr) { struct kallsyms_data *args = data; + const char **sym; + int idx; - if (!bsearch(&name, args->syms, args->cnt, sizeof(*args->syms), symbols_cmp)) + sym = bsearch(&name, args->syms, args->cnt, sizeof(*args->syms), symbols_cmp); + if (!sym) + return 0; + + idx = sym - args->syms; + if (args->addrs[idx]) return 0; addr = ftrace_location(addr); if (!addr) return 0; - args->addrs[args->found++] = addr; + args->addrs[idx] = addr; + args->found++; return args->found == args->cnt ? 1 : 0; } @@ -8062,6 +8070,7 @@ int ftrace_lookup_symbols(const char **sorted_syms, size_t cnt, unsigned long *a struct kallsyms_data args; int err; + memset(addrs, 0, sizeof(*addrs) * cnt); args.addrs = addrs; args.syms = sorted_syms; args.cnt = cnt; diff --git a/kernel/trace/rethook.c b/kernel/trace/rethook.c index b56833700d23..c69d82273ce7 100644 --- a/kernel/trace/rethook.c +++ b/kernel/trace/rethook.c @@ -154,6 +154,15 @@ struct rethook_node *rethook_try_get(struct rethook *rh) if (unlikely(!handler)) return NULL; + /* + * This expects the caller will set up a rethook on a function entry. + * When the function returns, the rethook will eventually be reclaimed + * or released in the rethook_recycle() with call_rcu(). + * This means the caller must be run in the RCU-availabe context. + */ + if (unlikely(!rcu_is_watching())) + return NULL; + fn = freelist_try_get(&rh->pool); if (!fn) return NULL; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5fc7f17f5ec7..0c517c8c8999 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6424,9 +6424,7 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) synchronize_rcu(); free_snapshot(tr); } -#endif -#ifdef CONFIG_TRACER_MAX_TRACE if (t->use_max_tr && !had_max_tr) { ret = tracing_alloc_snapshot_instance(tr); if (ret < 0) @@ -9866,6 +9864,12 @@ void trace_init_global_iter(struct trace_iterator *iter) /* Output in nanoseconds only if we are using a clock in nanoseconds. */ if (trace_clocks[iter->tr->clock_id].in_ns) iter->iter_flags |= TRACE_FILE_TIME_IN_NS; + + /* Can not use kmalloc for iter.temp and iter.fmt */ + iter->temp = static_temp_buf; + iter->temp_size = STATIC_TEMP_BUF_SIZE; + iter->fmt = static_fmt_buf; + iter->fmt_size = STATIC_FMT_BUF_SIZE; } void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) @@ -9898,11 +9902,6 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) /* Simulate the iterator */ trace_init_global_iter(&iter); - /* Can not use kmalloc for iter.temp and iter.fmt */ - iter.temp = static_temp_buf; - iter.temp_size = STATIC_TEMP_BUF_SIZE; - iter.fmt = static_fmt_buf; - iter.fmt_size = STATIC_FMT_BUF_SIZE; for_each_tracing_cpu(cpu) { atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 48e82e141d54..e87a46794079 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -4430,6 +4430,8 @@ static int parse_var_defs(struct hist_trigger_data *hist_data) s = kstrdup(field_str, GFP_KERNEL); if (!s) { + kfree(hist_data->attrs->var_defs.name[n_vars]); + hist_data->attrs->var_defs.name[n_vars] = NULL; ret = -ENOMEM; goto free; } diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 93507330462c..a245ea673715 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1718,8 +1718,17 @@ static int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) { struct kretprobe *rp = get_kretprobe(ri); - struct trace_kprobe *tk = container_of(rp, struct trace_kprobe, rp); + struct trace_kprobe *tk; + + /* + * There is a small chance that get_kretprobe(ri) returns NULL when + * the kretprobe is unregister on another CPU between kretprobe's + * trampoline_handler and this function. + */ + if (unlikely(!rp)) + return 0; + tk = container_of(rp, struct trace_kprobe, rp); raw_cpu_inc(*tk->nhit); if (trace_probe_test_flag(&tk->tp, TP_FLAG_TRACE)) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 9711589273cd..c3dc4f859a6b 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -546,7 +546,6 @@ static int __trace_uprobe_create(int argc, const char **argv) bool is_return = false; int i, ret; - ret = 0; ref_ctr_offset = 0; switch (argv[0][0]) { |