diff options
Diffstat (limited to 'kernel/trace/bpf_trace.c')
| -rw-r--r-- | kernel/trace/bpf_trace.c | 345 |
1 files changed, 312 insertions, 33 deletions
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index a02bd258677e..75495a5c3507 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -23,6 +23,7 @@ #include <linux/sort.h> #include <linux/key.h> #include <linux/namei.h> +#include <linux/file.h> #include <net/bpf_sk_storage.h> @@ -42,6 +43,7 @@ #define MAX_UPROBE_MULTI_CNT (1U << 20) #define MAX_KPROBE_MULTI_CNT (1U << 20) +#define MAX_TRACING_MULTI_CNT (1U << 20) #ifdef CONFIG_MODULES struct bpf_trace_module { @@ -152,6 +154,34 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) return ret; } +/** + * trace_call_bpf_faultable - invoke BPF program in faultable context + * @call: tracepoint event + * @ctx: opaque context pointer + * + * Variant of trace_call_bpf() for faultable tracepoints (syscall + * tracepoints). Supports sleepable BPF programs by using rcu_tasks_trace + * for lifetime protection and bpf_prog_run_array_sleepable() for per-program + * RCU flavor selection, following the uprobe pattern. + * + * Per-program recursion protection is provided by + * bpf_prog_run_array_sleepable(). Global bpf_prog_active is not + * needed because syscall tracepoints cannot self-recurse. + * + * Must be called from a faultable/preemptible context. + */ +unsigned int trace_call_bpf_faultable(struct trace_event_call *call, void *ctx) +{ + struct bpf_prog_array *prog_array; + + might_fault(); + guard(rcu_tasks_trace)(); + + prog_array = rcu_dereference_check(call->prog_array, + rcu_read_lock_trace_held()); + return bpf_prog_run_array_sleepable(prog_array, ctx, bpf_prog_run); +} + #ifdef CONFIG_BPF_KPROBE_OVERRIDE BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc) { @@ -1305,7 +1335,8 @@ static inline bool is_uprobe_session(const struct bpf_prog *prog) static inline bool is_trace_fsession(const struct bpf_prog *prog) { return prog->type == BPF_PROG_TYPE_TRACING && - prog->expected_attach_type == BPF_TRACE_FSESSION; + (prog->expected_attach_type == BPF_TRACE_FSESSION || + prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI); } static const struct bpf_func_proto * @@ -2072,11 +2103,19 @@ void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp) static __always_inline void __bpf_trace_run(struct bpf_raw_tp_link *link, u64 *args) { + struct srcu_ctr __percpu *scp = NULL; struct bpf_prog *prog = link->link.prog; + bool sleepable = prog->sleepable; struct bpf_run_ctx *old_run_ctx; struct bpf_trace_run_ctx run_ctx; - rcu_read_lock_dont_migrate(); + if (sleepable) { + scp = rcu_read_lock_tasks_trace(); + migrate_disable(); + } else { + rcu_read_lock_dont_migrate(); + } + if (unlikely(!bpf_prog_get_recursion_context(prog))) { bpf_prog_inc_misses_counter(prog); goto out; @@ -2085,12 +2124,18 @@ void __bpf_trace_run(struct bpf_raw_tp_link *link, u64 *args) run_ctx.bpf_cookie = link->cookie; old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); - (void) bpf_prog_run(prog, args); + (void)bpf_prog_run(prog, args); bpf_reset_run_ctx(old_run_ctx); out: bpf_prog_put_recursion_context(prog); - rcu_read_unlock_migrate(); + + if (sleepable) { + migrate_enable(); + rcu_read_unlock_tasks_trace(scp); + } else { + rcu_read_unlock_migrate(); + } } #define UNPACK(...) __VA_ARGS__ @@ -2331,9 +2376,12 @@ static int copy_user_syms(struct user_syms *us, unsigned long __user *usyms, u32 int err = -ENOMEM; unsigned int i; + if (!access_ok(usyms, cnt * sizeof(*usyms))) + return -EFAULT; + syms = kvmalloc_array(cnt, sizeof(*syms), GFP_KERNEL); if (!syms) - goto error; + return -ENOMEM; buf = kvmalloc_array(cnt, KSYM_NAME_LEN, GFP_KERNEL); if (!buf) @@ -2358,10 +2406,8 @@ static int copy_user_syms(struct user_syms *us, unsigned long __user *usyms, u32 return 0; error: - if (err) { - kvfree(syms); - kvfree(buf); - } + kvfree(syms); + kvfree(buf); return err; } @@ -3170,6 +3216,38 @@ static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx) return run_ctx->uprobe->cookie; } +static int bpf_uprobe_multi_get_path(const union bpf_attr *attr, struct path *path) +{ + void __user *upath = u64_to_user_ptr(attr->link_create.uprobe_multi.path); + u32 path_fd = attr->link_create.uprobe_multi.path_fd; + u32 flags = attr->link_create.uprobe_multi.flags; + + if (flags & BPF_F_UPROBE_MULTI_PATH_FD) { + /* + * When BPF_F_UPROBE_MULTI_PATH_FD is set, the executable is + * identified by path_fd, upath must be NULL. + */ + if (upath) + return -EINVAL; + + CLASS(fd, f)(path_fd); + if (fd_empty(f)) + return -EBADF; + *path = fd_file(f)->f_path; + path_get(path); + return 0; + } + + /* + * When BPF_F_UPROBE_MULTI_PATH_FD is not set, the path is resolved + * relative to the cwd (AT_FDCWD) or absolute using the upath string. + */ + if (!upath || path_fd) + return -EINVAL; + + return user_path_at(AT_FDCWD, upath, LOOKUP_FOLLOW, path); +} + int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) { struct bpf_uprobe_multi_link *link = NULL; @@ -3179,10 +3257,9 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr struct task_struct *task = NULL; unsigned long __user *uoffsets; u64 __user *ucookies; - void __user *upath; + unsigned long size; u32 flags, cnt, i; struct path path; - char *name; pid_t pid; int err; @@ -3197,19 +3274,18 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr return -EINVAL; flags = attr->link_create.uprobe_multi.flags; - if (flags & ~BPF_F_UPROBE_MULTI_RETURN) + if (flags & ~(BPF_F_UPROBE_MULTI_RETURN | BPF_F_UPROBE_MULTI_PATH_FD)) return -EINVAL; /* - * path, offsets and cnt are mandatory, + * offsets and cnt are mandatory, * ref_ctr_offsets and cookies are optional */ - upath = u64_to_user_ptr(attr->link_create.uprobe_multi.path); uoffsets = u64_to_user_ptr(attr->link_create.uprobe_multi.offsets); cnt = attr->link_create.uprobe_multi.cnt; pid = attr->link_create.uprobe_multi.pid; - if (!upath || !uoffsets || !cnt || pid < 0) + if (!uoffsets || !cnt || pid < 0) return -EINVAL; if (cnt > MAX_UPROBE_MULTI_CNT) return -E2BIG; @@ -3217,14 +3293,17 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr uref_ctr_offsets = u64_to_user_ptr(attr->link_create.uprobe_multi.ref_ctr_offsets); ucookies = u64_to_user_ptr(attr->link_create.uprobe_multi.cookies); - name = strndup_user(upath, PATH_MAX); - if (IS_ERR(name)) { - err = PTR_ERR(name); - return err; - } + /* + * All uoffsets/uref_ctr_offsets/ucookies arrays have the same value + * size, we need to check their address range is safe for __get_user + * calls. + */ + size = sizeof(*uoffsets) * cnt; + if (!access_ok(uoffsets, size) || !access_ok(uref_ctr_offsets, size) || + !access_ok(ucookies, size)) + return -EFAULT; - err = kern_path(name, LOOKUP_FOLLOW, &path); - kfree(name); + err = bpf_uprobe_multi_get_path(attr, &path); if (err) return err; @@ -3398,12 +3477,12 @@ typedef int (*copy_fn_t)(void *dst, const void *src, u32 size, struct task_struc * direct calls into all the specific callback implementations * (copy_user_data_sleepable, copy_user_data_nofault, and so on) */ -static __always_inline int __bpf_dynptr_copy_str(struct bpf_dynptr *dptr, u64 doff, u64 size, +static __always_inline int __bpf_dynptr_copy_str(const struct bpf_dynptr *dptr, u64 doff, u64 size, const void *unsafe_src, copy_fn_t str_copy_fn, struct task_struct *tsk) { - struct bpf_dynptr_kern *dst; + const struct bpf_dynptr_kern *dst; u64 chunk_sz, off; void *dst_slice; int cnt, err; @@ -3439,7 +3518,7 @@ static __always_inline int __bpf_dynptr_copy(const struct bpf_dynptr *dptr, u64 u64 size, const void *unsafe_src, copy_fn_t copy_fn, struct task_struct *tsk) { - struct bpf_dynptr_kern *dst; + const struct bpf_dynptr_kern *dst; void *dst_slice; char buf[256]; u64 off, chunk_sz; @@ -3540,49 +3619,49 @@ __bpf_kfunc int bpf_send_signal_task(struct task_struct *task, int sig, enum pid return bpf_send_signal_common(sig, type, task, value); } -__bpf_kfunc int bpf_probe_read_user_dynptr(struct bpf_dynptr *dptr, u64 off, +__bpf_kfunc int bpf_probe_read_user_dynptr(const struct bpf_dynptr *dptr, u64 off, u64 size, const void __user *unsafe_ptr__ign) { return __bpf_dynptr_copy(dptr, off, size, (const void __force *)unsafe_ptr__ign, copy_user_data_nofault, NULL); } -__bpf_kfunc int bpf_probe_read_kernel_dynptr(struct bpf_dynptr *dptr, u64 off, +__bpf_kfunc int bpf_probe_read_kernel_dynptr(const struct bpf_dynptr *dptr, u64 off, u64 size, const void *unsafe_ptr__ign) { return __bpf_dynptr_copy(dptr, off, size, unsafe_ptr__ign, copy_kernel_data_nofault, NULL); } -__bpf_kfunc int bpf_probe_read_user_str_dynptr(struct bpf_dynptr *dptr, u64 off, +__bpf_kfunc int bpf_probe_read_user_str_dynptr(const struct bpf_dynptr *dptr, u64 off, u64 size, const void __user *unsafe_ptr__ign) { return __bpf_dynptr_copy_str(dptr, off, size, (const void __force *)unsafe_ptr__ign, copy_user_str_nofault, NULL); } -__bpf_kfunc int bpf_probe_read_kernel_str_dynptr(struct bpf_dynptr *dptr, u64 off, +__bpf_kfunc int bpf_probe_read_kernel_str_dynptr(const struct bpf_dynptr *dptr, u64 off, u64 size, const void *unsafe_ptr__ign) { return __bpf_dynptr_copy_str(dptr, off, size, unsafe_ptr__ign, copy_kernel_str_nofault, NULL); } -__bpf_kfunc int bpf_copy_from_user_dynptr(struct bpf_dynptr *dptr, u64 off, +__bpf_kfunc int bpf_copy_from_user_dynptr(const struct bpf_dynptr *dptr, u64 off, u64 size, const void __user *unsafe_ptr__ign) { return __bpf_dynptr_copy(dptr, off, size, (const void __force *)unsafe_ptr__ign, copy_user_data_sleepable, NULL); } -__bpf_kfunc int bpf_copy_from_user_str_dynptr(struct bpf_dynptr *dptr, u64 off, +__bpf_kfunc int bpf_copy_from_user_str_dynptr(const struct bpf_dynptr *dptr, u64 off, u64 size, const void __user *unsafe_ptr__ign) { return __bpf_dynptr_copy_str(dptr, off, size, (const void __force *)unsafe_ptr__ign, copy_user_str_sleepable, NULL); } -__bpf_kfunc int bpf_copy_from_user_task_dynptr(struct bpf_dynptr *dptr, u64 off, +__bpf_kfunc int bpf_copy_from_user_task_dynptr(const struct bpf_dynptr *dptr, u64 off, u64 size, const void __user *unsafe_ptr__ign, struct task_struct *tsk) { @@ -3590,7 +3669,7 @@ __bpf_kfunc int bpf_copy_from_user_task_dynptr(struct bpf_dynptr *dptr, u64 off, copy_user_data_sleepable, tsk); } -__bpf_kfunc int bpf_copy_from_user_task_str_dynptr(struct bpf_dynptr *dptr, u64 off, +__bpf_kfunc int bpf_copy_from_user_task_str_dynptr(const struct bpf_dynptr *dptr, u64 off, u64 size, const void __user *unsafe_ptr__ign, struct task_struct *tsk) { @@ -3599,3 +3678,203 @@ __bpf_kfunc int bpf_copy_from_user_task_str_dynptr(struct bpf_dynptr *dptr, u64 } __bpf_kfunc_end_defs(); + +#if defined(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS) && \ + defined(CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS) + +static void bpf_tracing_multi_link_release(struct bpf_link *link) +{ + struct bpf_tracing_multi_link *tr_link = + container_of(link, struct bpf_tracing_multi_link, link); + + WARN_ON_ONCE(bpf_trampoline_multi_detach(link->prog, tr_link)); +} + +static void bpf_tracing_multi_link_dealloc(struct bpf_link *link) +{ + struct bpf_tracing_multi_link *tr_link = + container_of(link, struct bpf_tracing_multi_link, link); + + kvfree(tr_link->fexits); + kvfree(tr_link->cookies); + kvfree(tr_link); +} + +#ifdef CONFIG_PROC_FS +static void bpf_tracing_multi_show_fdinfo(const struct bpf_link *link, + struct seq_file *seq) +{ + struct bpf_tracing_multi_link *tr_link = + container_of(link, struct bpf_tracing_multi_link, link); + bool has_cookies = !!tr_link->cookies; + + seq_printf(seq, "attach_type:\t%u\n", tr_link->link.attach_type); + seq_printf(seq, "cnt:\t%u\n", tr_link->nodes_cnt); + + seq_printf(seq, "%s\t %s\t %s\t %s\n", "obj-id", "btf-id", "cookie", "func"); + for (int i = 0; i < tr_link->nodes_cnt; i++) { + struct bpf_tracing_multi_node *mnode = &tr_link->nodes[i]; + u32 btf_id, obj_id; + + bpf_trampoline_unpack_key(mnode->trampoline->key, &obj_id, &btf_id); + seq_printf(seq, "%u\t %u\t %llu\t %pS\n", + obj_id, btf_id, + has_cookies ? tr_link->cookies[i] : 0, + (void *) mnode->trampoline->ip); + + cond_resched(); + } +} +#endif + +static const struct bpf_link_ops bpf_tracing_multi_link_lops = { + .release = bpf_tracing_multi_link_release, + .dealloc_deferred = bpf_tracing_multi_link_dealloc, +#ifdef CONFIG_PROC_FS + .show_fdinfo = bpf_tracing_multi_show_fdinfo, +#endif +}; + +static int ids_cmp_r(const void *pa, const void *pb, const void *priv __maybe_unused) +{ + u32 a = *(u32 *) pa; + u32 b = *(u32 *) pb; + + return (a > b) - (a < b); +} + +static void ids_swap_r(void *a, void *b, int size __maybe_unused, + const void *priv __maybe_unused) +{ + u64 *cookie_a, *cookie_b, *cookies; + u32 *id_a = a, *id_b = b, *ids; + void **data = (void **) priv; + + ids = data[0]; + cookies = data[1]; + + if (cookies) { + cookie_a = cookies + (id_a - ids); + cookie_b = cookies + (id_b - ids); + swap(*cookie_a, *cookie_b); + } + swap(*id_a, *id_b); +} + +static int check_dup_ids(u32 *ids, u64 *cookies, u32 cnt) +{ + void *data[2] = { ids, cookies }; + int err = 0; + + /* + * Sort ids array (together with cookies array if defined) + * and check it for duplicates. The ids and cookies arrays + * are left sorted. + */ + sort_r_nonatomic(ids, cnt, sizeof(ids[0]), ids_cmp_r, ids_swap_r, data); + + for (int i = 1; i < cnt; i++) { + if (ids[i] == ids[i - 1]) { + err = -EINVAL; + break; + } + } + return err; +} + +int bpf_tracing_multi_attach(struct bpf_prog *prog, const union bpf_attr *attr) +{ + struct bpf_tracing_multi_link *link = NULL; + struct bpf_tramp_node *fexits = NULL; + struct bpf_link_primer link_primer; + u32 cnt, *ids = NULL; + u64 __user *ucookies; + u64 *cookies = NULL; + u32 __user *uids; + int err; + + uids = u64_to_user_ptr(attr->link_create.tracing_multi.ids); + cnt = attr->link_create.tracing_multi.cnt; + + if (!cnt || !uids) + return -EINVAL; + if (cnt > MAX_TRACING_MULTI_CNT) + return -E2BIG; + if (attr->link_create.flags || attr->link_create.target_fd) + return -EINVAL; + + ids = kvmalloc_objs(*ids, cnt); + if (!ids) + return -ENOMEM; + + if (copy_from_user(ids, uids, cnt * sizeof(*ids))) { + err = -EFAULT; + goto error; + } + + ucookies = u64_to_user_ptr(attr->link_create.tracing_multi.cookies); + if (ucookies) { + cookies = kvmalloc_objs(*cookies, cnt); + if (!cookies) { + err = -ENOMEM; + goto error; + } + if (copy_from_user(cookies, ucookies, cnt * sizeof(*cookies))) { + err = -EFAULT; + goto error; + } + } + + err = check_dup_ids(ids, cookies, cnt); + if (err) + goto error; + + if (prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI) { + fexits = kvmalloc_objs(*fexits, cnt); + if (!fexits) { + err = -ENOMEM; + goto error; + } + } + + link = kvzalloc_flex(*link, nodes, cnt); + if (!link) { + err = -ENOMEM; + goto error; + } + + bpf_link_init(&link->link, BPF_LINK_TYPE_TRACING_MULTI, + &bpf_tracing_multi_link_lops, prog, prog->expected_attach_type); + + err = bpf_link_prime(&link->link, &link_primer); + if (err) + goto error; + + link->nodes_cnt = cnt; + link->cookies = cookies; + link->fexits = fexits; + + err = bpf_trampoline_multi_attach(prog, ids, link); + kvfree(ids); + if (err) { + bpf_link_cleanup(&link_primer); + return err; + } + return bpf_link_settle(&link_primer); + +error: + kvfree(fexits); + kvfree(cookies); + kvfree(ids); + kvfree(link); + return err; +} + +#else + +int bpf_tracing_multi_attach(struct bpf_prog *prog, const union bpf_attr *attr) +{ + return -EOPNOTSUPP; +} + +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS && CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS */ |
