diff options
Diffstat (limited to 'kernel/trace/bpf_trace.c')
| -rw-r--r-- | kernel/trace/bpf_trace.c | 769 |
1 files changed, 393 insertions, 376 deletions
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index adc947587eb8..a02bd258677e 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -22,7 +22,6 @@ #include <linux/bsearch.h> #include <linux/sort.h> #include <linux/key.h> -#include <linux/verification.h> #include <linux/namei.h> #include <net/bpf_sk_storage.h> @@ -392,7 +391,7 @@ static const struct bpf_func_proto bpf_trace_printk_proto = { .arg2_type = ARG_CONST_SIZE, }; -static void __set_printk_clr_event(void) +static void __set_printk_clr_event(struct work_struct *work) { /* * This program might be calling bpf_trace_printk, @@ -405,10 +404,11 @@ static void __set_printk_clr_event(void) if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1)) pr_warn_ratelimited("could not enable bpf_trace_printk events"); } +static DECLARE_WORK(set_printk_work, __set_printk_clr_event); const struct bpf_func_proto *bpf_get_trace_printk_proto(void) { - __set_printk_clr_event(); + schedule_work(&set_printk_work); return &bpf_trace_printk_proto; } @@ -451,7 +451,7 @@ static const struct bpf_func_proto bpf_trace_vprintk_proto = { const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void) { - __set_printk_clr_event(); + schedule_work(&set_printk_work); return &bpf_trace_vprintk_proto; } @@ -571,7 +571,7 @@ BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags) return value; } -static const struct bpf_func_proto bpf_perf_event_read_proto = { +const struct bpf_func_proto bpf_perf_event_read_proto = { .func = bpf_perf_event_read, .gpl_only = true, .ret_type = RET_INTEGER, @@ -606,6 +606,11 @@ static const struct bpf_func_proto bpf_perf_event_read_value_proto = { .arg4_type = ARG_CONST_SIZE, }; +const struct bpf_func_proto *bpf_get_perf_event_read_value_proto(void) +{ + return &bpf_perf_event_read_value_proto; +} + static __always_inline u64 __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, u64 flags, struct perf_raw_record *raw, @@ -775,8 +780,7 @@ BPF_CALL_1(bpf_task_pt_regs, struct task_struct *, task) return (unsigned long) task_pt_regs(task); } -BTF_ID_LIST(bpf_task_pt_regs_ids) -BTF_ID(struct, pt_regs) +BTF_ID_LIST_SINGLE(bpf_task_pt_regs_ids, struct, pt_regs) const struct bpf_func_proto bpf_task_pt_regs_proto = { .func = bpf_task_pt_regs, @@ -826,7 +830,7 @@ static int bpf_send_signal_common(u32 sig, enum pid_type type, struct task_struc info.si_code = SI_KERNEL; info.si_pid = 0; info.si_uid = 0; - info.si_value.sival_ptr = (void *)(unsigned long)value; + info.si_value.sival_ptr = (void __user __force *)(unsigned long)value; siginfo = &info; } @@ -843,7 +847,7 @@ static int bpf_send_signal_common(u32 sig, enum pid_type type, struct task_struc if (unlikely(is_global_init(task))) return -EPERM; - if (!preemptible()) { + if (preempt_count() != 0 || irqs_disabled()) { /* Do an early check on signal validity. Otherwise, * the error is lost in deferred irq_work. */ @@ -876,7 +880,7 @@ BPF_CALL_1(bpf_send_signal, u32, sig) return bpf_send_signal_common(sig, PIDTYPE_TGID, NULL, 0); } -static const struct bpf_func_proto bpf_send_signal_proto = { +const struct bpf_func_proto bpf_send_signal_proto = { .func = bpf_send_signal, .gpl_only = false, .ret_type = RET_INTEGER, @@ -888,14 +892,14 @@ BPF_CALL_1(bpf_send_signal_thread, u32, sig) return bpf_send_signal_common(sig, PIDTYPE_PID, NULL, 0); } -static const struct bpf_func_proto bpf_send_signal_thread_proto = { +const struct bpf_func_proto bpf_send_signal_thread_proto = { .func = bpf_send_signal_thread, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_ANYTHING, }; -BPF_CALL_3(bpf_d_path, struct path *, path, char *, buf, u32, sz) +BPF_CALL_3(bpf_d_path, const struct path *, path, char *, buf, u32, sz) { struct path copy; long len; @@ -961,7 +965,7 @@ static const struct bpf_func_proto bpf_d_path_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_BTF_ID, .arg1_btf_id = &bpf_d_path_btf_ids[0], - .arg2_type = ARG_PTR_TO_MEM, + .arg2_type = ARG_PTR_TO_MEM | MEM_WRITE, .arg3_type = ARG_CONST_SIZE_OR_ZERO, .allowed = bpf_d_path_allowed, }; @@ -1018,7 +1022,7 @@ const struct bpf_func_proto bpf_snprintf_btf_proto = { .func = bpf_snprintf_btf, .gpl_only = false, .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_MEM, + .arg1_type = ARG_PTR_TO_MEM | MEM_WRITE, .arg2_type = ARG_CONST_SIZE, .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg4_type = ARG_CONST_SIZE, @@ -1038,27 +1042,14 @@ static const struct bpf_func_proto bpf_get_func_ip_proto_tracing = { .arg1_type = ARG_PTR_TO_CTX, }; -#ifdef CONFIG_X86_KERNEL_IBT -static unsigned long get_entry_ip(unsigned long fentry_ip) +static inline unsigned long get_entry_ip(unsigned long fentry_ip) { - u32 instr; - - /* We want to be extra safe in case entry ip is on the page edge, - * but otherwise we need to avoid get_kernel_nofault()'s overhead. - */ - if ((fentry_ip & ~PAGE_MASK) < ENDBR_INSN_SIZE) { - if (get_kernel_nofault(instr, (u32 *)(fentry_ip - ENDBR_INSN_SIZE))) - return fentry_ip; - } else { - instr = *(u32 *)(fentry_ip - ENDBR_INSN_SIZE); - } - if (is_endbr(instr)) +#ifdef CONFIG_X86_KERNEL_IBT + if (is_endbr((void *)(fentry_ip - ENDBR_INSN_SIZE))) fentry_ip -= ENDBR_INSN_SIZE; +#endif return fentry_ip; } -#else -#define get_entry_ip(fentry_ip) fentry_ip -#endif BPF_CALL_1(bpf_get_func_ip_kprobe, struct pt_regs *, regs) { @@ -1192,7 +1183,7 @@ BPF_CALL_3(bpf_get_branch_snapshot, void *, buf, u32, size, u64, flags) return entry_cnt * br_entry_size; } -static const struct bpf_func_proto bpf_get_branch_snapshot_proto = { +const struct bpf_func_proto bpf_get_branch_snapshot_proto = { .func = bpf_get_branch_snapshot, .gpl_only = true, .ret_type = RET_INTEGER, @@ -1203,7 +1194,7 @@ static const struct bpf_func_proto bpf_get_branch_snapshot_proto = { BPF_CALL_3(get_func_arg, void *, ctx, u32, n, u64 *, value) { /* This helper call is inlined by verifier. */ - u64 nr_args = ((u64 *)ctx)[-1]; + u64 nr_args = ((u64 *)ctx)[-1] & 0xFF; if ((u64) n >= nr_args) return -EINVAL; @@ -1223,7 +1214,7 @@ static const struct bpf_func_proto bpf_get_func_arg_proto = { BPF_CALL_2(get_func_ret, void *, ctx, u64 *, value) { /* This helper call is inlined by verifier. */ - u64 nr_args = ((u64 *)ctx)[-1]; + u64 nr_args = ((u64 *)ctx)[-1] & 0xFF; *value = ((u64 *)ctx)[nr_args]; return 0; @@ -1240,7 +1231,7 @@ static const struct bpf_func_proto bpf_get_func_ret_proto = { BPF_CALL_1(get_func_arg_cnt, void *, ctx) { /* This helper call is inlined by verifier. */ - return ((u64 *)ctx)[-1]; + return ((u64 *)ctx)[-1] & 0xFF; } static const struct bpf_func_proto bpf_get_func_arg_cnt_proto = { @@ -1249,244 +1240,14 @@ static const struct bpf_func_proto bpf_get_func_arg_cnt_proto = { .arg1_type = ARG_PTR_TO_CTX, }; -#ifdef CONFIG_KEYS -__bpf_kfunc_start_defs(); - -/** - * bpf_lookup_user_key - lookup a key by its serial - * @serial: key handle serial number - * @flags: lookup-specific flags - * - * Search a key with a given *serial* and the provided *flags*. - * If found, increment the reference count of the key by one, and - * return it in the bpf_key structure. - * - * The bpf_key structure must be passed to bpf_key_put() when done - * with it, so that the key reference count is decremented and the - * bpf_key structure is freed. - * - * Permission checks are deferred to the time the key is used by - * one of the available key-specific kfuncs. - * - * Set *flags* with KEY_LOOKUP_CREATE, to attempt creating a requested - * special keyring (e.g. session keyring), if it doesn't yet exist. - * Set *flags* with KEY_LOOKUP_PARTIAL, to lookup a key without waiting - * for the key construction, and to retrieve uninstantiated keys (keys - * without data attached to them). - * - * Return: a bpf_key pointer with a valid key pointer if the key is found, a - * NULL pointer otherwise. - */ -__bpf_kfunc struct bpf_key *bpf_lookup_user_key(u32 serial, u64 flags) -{ - key_ref_t key_ref; - struct bpf_key *bkey; - - if (flags & ~KEY_LOOKUP_ALL) - return NULL; - - /* - * Permission check is deferred until the key is used, as the - * intent of the caller is unknown here. - */ - key_ref = lookup_user_key(serial, flags, KEY_DEFER_PERM_CHECK); - if (IS_ERR(key_ref)) - return NULL; - - bkey = kmalloc(sizeof(*bkey), GFP_KERNEL); - if (!bkey) { - key_put(key_ref_to_ptr(key_ref)); - return NULL; - } - - bkey->key = key_ref_to_ptr(key_ref); - bkey->has_ref = true; - - return bkey; -} - -/** - * bpf_lookup_system_key - lookup a key by a system-defined ID - * @id: key ID - * - * Obtain a bpf_key structure with a key pointer set to the passed key ID. - * The key pointer is marked as invalid, to prevent bpf_key_put() from - * attempting to decrement the key reference count on that pointer. The key - * pointer set in such way is currently understood only by - * verify_pkcs7_signature(). - * - * Set *id* to one of the values defined in include/linux/verification.h: - * 0 for the primary keyring (immutable keyring of system keys); - * VERIFY_USE_SECONDARY_KEYRING for both the primary and secondary keyring - * (where keys can be added only if they are vouched for by existing keys - * in those keyrings); VERIFY_USE_PLATFORM_KEYRING for the platform - * keyring (primarily used by the integrity subsystem to verify a kexec'ed - * kerned image and, possibly, the initramfs signature). - * - * Return: a bpf_key pointer with an invalid key pointer set from the - * pre-determined ID on success, a NULL pointer otherwise - */ -__bpf_kfunc struct bpf_key *bpf_lookup_system_key(u64 id) -{ - struct bpf_key *bkey; - - if (system_keyring_id_check(id) < 0) - return NULL; - - bkey = kmalloc(sizeof(*bkey), GFP_ATOMIC); - if (!bkey) - return NULL; - - bkey->key = (struct key *)(unsigned long)id; - bkey->has_ref = false; - - return bkey; -} - -/** - * bpf_key_put - decrement key reference count if key is valid and free bpf_key - * @bkey: bpf_key structure - * - * Decrement the reference count of the key inside *bkey*, if the pointer - * is valid, and free *bkey*. - */ -__bpf_kfunc void bpf_key_put(struct bpf_key *bkey) -{ - if (bkey->has_ref) - key_put(bkey->key); - - kfree(bkey); -} - -#ifdef CONFIG_SYSTEM_DATA_VERIFICATION -/** - * bpf_verify_pkcs7_signature - verify a PKCS#7 signature - * @data_p: data to verify - * @sig_p: signature of the data - * @trusted_keyring: keyring with keys trusted for signature verification - * - * Verify the PKCS#7 signature *sig_ptr* against the supplied *data_ptr* - * with keys in a keyring referenced by *trusted_keyring*. - * - * Return: 0 on success, a negative value on error. - */ -__bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p, - struct bpf_dynptr *sig_p, - struct bpf_key *trusted_keyring) -{ - struct bpf_dynptr_kern *data_ptr = (struct bpf_dynptr_kern *)data_p; - struct bpf_dynptr_kern *sig_ptr = (struct bpf_dynptr_kern *)sig_p; - const void *data, *sig; - u32 data_len, sig_len; - int ret; - - if (trusted_keyring->has_ref) { - /* - * Do the permission check deferred in bpf_lookup_user_key(). - * See bpf_lookup_user_key() for more details. - * - * A call to key_task_permission() here would be redundant, as - * it is already done by keyring_search() called by - * find_asymmetric_key(). - */ - ret = key_validate(trusted_keyring->key); - if (ret < 0) - return ret; - } - - data_len = __bpf_dynptr_size(data_ptr); - data = __bpf_dynptr_data(data_ptr, data_len); - sig_len = __bpf_dynptr_size(sig_ptr); - sig = __bpf_dynptr_data(sig_ptr, sig_len); - - return verify_pkcs7_signature(data, data_len, sig, sig_len, - trusted_keyring->key, - VERIFYING_UNSPECIFIED_SIGNATURE, NULL, - NULL); -} -#endif /* CONFIG_SYSTEM_DATA_VERIFICATION */ - -__bpf_kfunc_end_defs(); - -BTF_KFUNCS_START(key_sig_kfunc_set) -BTF_ID_FLAGS(func, bpf_lookup_user_key, KF_ACQUIRE | KF_RET_NULL | KF_SLEEPABLE) -BTF_ID_FLAGS(func, bpf_lookup_system_key, KF_ACQUIRE | KF_RET_NULL) -BTF_ID_FLAGS(func, bpf_key_put, KF_RELEASE) -#ifdef CONFIG_SYSTEM_DATA_VERIFICATION -BTF_ID_FLAGS(func, bpf_verify_pkcs7_signature, KF_SLEEPABLE) -#endif -BTF_KFUNCS_END(key_sig_kfunc_set) - -static const struct btf_kfunc_id_set bpf_key_sig_kfunc_set = { - .owner = THIS_MODULE, - .set = &key_sig_kfunc_set, -}; - -static int __init bpf_key_sig_kfuncs_init(void) -{ - return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, - &bpf_key_sig_kfunc_set); -} - -late_initcall(bpf_key_sig_kfuncs_init); -#endif /* CONFIG_KEYS */ - static const struct bpf_func_proto * bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { const struct bpf_func_proto *func_proto; switch (func_id) { - case BPF_FUNC_map_lookup_elem: - return &bpf_map_lookup_elem_proto; - case BPF_FUNC_map_update_elem: - return &bpf_map_update_elem_proto; - case BPF_FUNC_map_delete_elem: - return &bpf_map_delete_elem_proto; - case BPF_FUNC_map_push_elem: - return &bpf_map_push_elem_proto; - case BPF_FUNC_map_pop_elem: - return &bpf_map_pop_elem_proto; - case BPF_FUNC_map_peek_elem: - return &bpf_map_peek_elem_proto; - case BPF_FUNC_map_lookup_percpu_elem: - return &bpf_map_lookup_percpu_elem_proto; - case BPF_FUNC_ktime_get_ns: - return &bpf_ktime_get_ns_proto; - case BPF_FUNC_ktime_get_boot_ns: - return &bpf_ktime_get_boot_ns_proto; - case BPF_FUNC_tail_call: - return &bpf_tail_call_proto; - case BPF_FUNC_get_current_task: - return &bpf_get_current_task_proto; - case BPF_FUNC_get_current_task_btf: - return &bpf_get_current_task_btf_proto; - case BPF_FUNC_task_pt_regs: - return &bpf_task_pt_regs_proto; - case BPF_FUNC_get_current_uid_gid: - return &bpf_get_current_uid_gid_proto; - case BPF_FUNC_get_current_comm: - return &bpf_get_current_comm_proto; - case BPF_FUNC_trace_printk: - return bpf_get_trace_printk_proto(); case BPF_FUNC_get_smp_processor_id: return &bpf_get_smp_processor_id_proto; - case BPF_FUNC_get_numa_node_id: - return &bpf_get_numa_node_id_proto; - case BPF_FUNC_perf_event_read: - return &bpf_perf_event_read_proto; - case BPF_FUNC_get_prandom_u32: - return &bpf_get_prandom_u32_proto; - case BPF_FUNC_probe_read_user: - return &bpf_probe_read_user_proto; - case BPF_FUNC_probe_read_kernel: - return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? - NULL : &bpf_probe_read_kernel_proto; - case BPF_FUNC_probe_read_user_str: - return &bpf_probe_read_user_str_proto; - case BPF_FUNC_probe_read_kernel_str: - return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? - NULL : &bpf_probe_read_kernel_str_proto; #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE case BPF_FUNC_probe_read: return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? @@ -1495,65 +1256,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? NULL : &bpf_probe_read_compat_str_proto; #endif -#ifdef CONFIG_CGROUPS - case BPF_FUNC_cgrp_storage_get: - return &bpf_cgrp_storage_get_proto; - case BPF_FUNC_cgrp_storage_delete: - return &bpf_cgrp_storage_delete_proto; - case BPF_FUNC_current_task_under_cgroup: - return &bpf_current_task_under_cgroup_proto; -#endif - case BPF_FUNC_send_signal: - return &bpf_send_signal_proto; - case BPF_FUNC_send_signal_thread: - return &bpf_send_signal_thread_proto; - case BPF_FUNC_perf_event_read_value: - return &bpf_perf_event_read_value_proto; - case BPF_FUNC_ringbuf_output: - return &bpf_ringbuf_output_proto; - case BPF_FUNC_ringbuf_reserve: - return &bpf_ringbuf_reserve_proto; - case BPF_FUNC_ringbuf_submit: - return &bpf_ringbuf_submit_proto; - case BPF_FUNC_ringbuf_discard: - return &bpf_ringbuf_discard_proto; - case BPF_FUNC_ringbuf_query: - return &bpf_ringbuf_query_proto; - case BPF_FUNC_jiffies64: - return &bpf_jiffies64_proto; - case BPF_FUNC_get_task_stack: - return prog->sleepable ? &bpf_get_task_stack_sleepable_proto - : &bpf_get_task_stack_proto; - case BPF_FUNC_copy_from_user: - return &bpf_copy_from_user_proto; - case BPF_FUNC_copy_from_user_task: - return &bpf_copy_from_user_task_proto; - case BPF_FUNC_snprintf_btf: - return &bpf_snprintf_btf_proto; - case BPF_FUNC_per_cpu_ptr: - return &bpf_per_cpu_ptr_proto; - case BPF_FUNC_this_cpu_ptr: - return &bpf_this_cpu_ptr_proto; - case BPF_FUNC_task_storage_get: - if (bpf_prog_check_recur(prog)) - return &bpf_task_storage_get_recur_proto; - return &bpf_task_storage_get_proto; - case BPF_FUNC_task_storage_delete: - if (bpf_prog_check_recur(prog)) - return &bpf_task_storage_delete_recur_proto; - return &bpf_task_storage_delete_proto; - case BPF_FUNC_for_each_map_elem: - return &bpf_for_each_map_elem_proto; - case BPF_FUNC_snprintf: - return &bpf_snprintf_proto; case BPF_FUNC_get_func_ip: return &bpf_get_func_ip_proto_tracing; - case BPF_FUNC_get_branch_snapshot: - return &bpf_get_branch_snapshot_proto; - case BPF_FUNC_find_vma: - return &bpf_find_vma_proto; - case BPF_FUNC_trace_vprintk: - return bpf_get_trace_vprintk_proto(); default: break; } @@ -1582,7 +1286,8 @@ static bool is_kprobe_multi(const struct bpf_prog *prog) static inline bool is_kprobe_session(const struct bpf_prog *prog) { - return prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION; + return prog->type == BPF_PROG_TYPE_KPROBE && + prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION; } static inline bool is_uprobe_multi(const struct bpf_prog *prog) @@ -1593,7 +1298,14 @@ static inline bool is_uprobe_multi(const struct bpf_prog *prog) static inline bool is_uprobe_session(const struct bpf_prog *prog) { - return prog->expected_attach_type == BPF_TRACE_UPROBE_SESSION; + return prog->type == BPF_PROG_TYPE_KPROBE && + prog->expected_attach_type == BPF_TRACE_UPROBE_SESSION; +} + +static inline bool is_trace_fsession(const struct bpf_prog *prog) +{ + return prog->type == BPF_PROG_TYPE_TRACING && + prog->expected_attach_type == BPF_TRACE_FSESSION; } static const struct bpf_func_proto * @@ -1634,8 +1346,6 @@ static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type { if (off < 0 || off >= sizeof(struct pt_regs)) return false; - if (type != BPF_READ) - return false; if (off % size != 0) return false; /* @@ -1645,6 +1355,9 @@ static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type if (off + size > sizeof(struct pt_regs)) return false; + if (type == BPF_WRITE) + prog->aux->kprobe_write_ctx = true; + return true; } @@ -1821,7 +1534,7 @@ static const struct bpf_func_proto bpf_read_branch_records_proto = { .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_MEM_OR_NULL, + .arg2_type = ARG_PTR_TO_MEM_OR_NULL | MEM_WRITE, .arg3_type = ARG_CONST_SIZE_OR_ZERO, .arg4_type = ARG_ANYTHING, }; @@ -1865,7 +1578,7 @@ static struct pt_regs *get_bpf_raw_tp_regs(void) struct bpf_raw_tp_regs *tp_regs = this_cpu_ptr(&bpf_raw_tp_regs); int nest_level = this_cpu_inc_return(bpf_raw_tp_nest_level); - if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(tp_regs->regs))) { + if (nest_level > ARRAY_SIZE(tp_regs->regs)) { this_cpu_dec(bpf_raw_tp_nest_level); return ERR_PTR(-EBUSY); } @@ -1956,7 +1669,7 @@ static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = { .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg2_type = ARG_PTR_TO_UNINIT_MEM, .arg3_type = ARG_CONST_SIZE_OR_ZERO, .arg4_type = ARG_ANYTHING, }; @@ -2029,11 +1742,17 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_d_path: return &bpf_d_path_proto; case BPF_FUNC_get_func_arg: - return bpf_prog_has_trampoline(prog) ? &bpf_get_func_arg_proto : NULL; + if (bpf_prog_has_trampoline(prog) || + prog->expected_attach_type == BPF_TRACE_RAW_TP) + return &bpf_get_func_arg_proto; + return NULL; case BPF_FUNC_get_func_ret: return bpf_prog_has_trampoline(prog) ? &bpf_get_func_ret_proto : NULL; case BPF_FUNC_get_func_arg_cnt: - return bpf_prog_has_trampoline(prog) ? &bpf_get_func_arg_cnt_proto : NULL; + if (bpf_prog_has_trampoline(prog) || + prog->expected_attach_type == BPF_TRACE_RAW_TP) + return &bpf_get_func_arg_cnt_proto; + return NULL; case BPF_FUNC_get_attach_cookie: if (prog->type == BPF_PROG_TYPE_TRACING && prog->expected_attach_type == BPF_TRACE_RAW_TP) @@ -2345,10 +2064,9 @@ void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp) { struct module *mod; - preempt_disable(); + guard(rcu)(); mod = __module_address((unsigned long)btp); module_put(mod); - preempt_enable(); } static __always_inline @@ -2358,8 +2076,8 @@ void __bpf_trace_run(struct bpf_raw_tp_link *link, u64 *args) struct bpf_run_ctx *old_run_ctx; struct bpf_trace_run_ctx run_ctx; - cant_sleep(); - if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) { + rcu_read_lock_dont_migrate(); + if (unlikely(!bpf_prog_get_recursion_context(prog))) { bpf_prog_inc_misses_counter(prog); goto out; } @@ -2367,13 +2085,12 @@ void __bpf_trace_run(struct bpf_raw_tp_link *link, u64 *args) run_ctx.bpf_cookie = link->cookie; old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); - rcu_read_lock(); (void) bpf_prog_run(prog, args); - rcu_read_unlock(); bpf_reset_run_ctx(old_run_ctx); out: - this_cpu_dec(*(prog->active)); + bpf_prog_put_recursion_context(prog); + rcu_read_unlock_migrate(); } #define UNPACK(...) __VA_ARGS__ @@ -2526,7 +2243,7 @@ static int bpf_event_notify(struct notifier_block *nb, unsigned long op, switch (op) { case MODULE_STATE_COMING: - btm = kzalloc(sizeof(*btm), GFP_KERNEL); + btm = kzalloc_obj(*btm); if (btm) { btm->module = module; list_add(&btm->list, &bpf_trace_modules); @@ -2579,7 +2296,6 @@ struct bpf_kprobe_multi_link { u32 cnt; u32 mods_cnt; struct module **mods; - u32 flags; }; struct bpf_kprobe_multi_run_ctx { @@ -2668,7 +2384,8 @@ static void bpf_kprobe_multi_link_release(struct bpf_link *link) struct bpf_kprobe_multi_link *kmulti_link; kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link); - unregister_fprobe(&kmulti_link->fp); + /* Don't wait for RCU GP here. */ + unregister_fprobe_async(&kmulti_link->fp); kprobe_multi_put_modules(kmulti_link->mods, kmulti_link->mods_cnt); } @@ -2699,7 +2416,7 @@ static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link, kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link); info->kprobe_multi.count = kmulti_link->cnt; - info->kprobe_multi.flags = kmulti_link->flags; + info->kprobe_multi.flags = kmulti_link->link.flags; info->kprobe_multi.missed = kmulti_link->fp.nmissed; if (!uaddrs) @@ -2733,10 +2450,39 @@ static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link, return err; } +#ifdef CONFIG_PROC_FS +static void bpf_kprobe_multi_show_fdinfo(const struct bpf_link *link, + struct seq_file *seq) +{ + struct bpf_kprobe_multi_link *kmulti_link; + bool has_cookies; + + kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link); + has_cookies = !!kmulti_link->cookies; + + seq_printf(seq, + "kprobe_cnt:\t%u\n" + "missed:\t%lu\n", + kmulti_link->cnt, + kmulti_link->fp.nmissed); + + seq_printf(seq, "%s\t %s\n", "cookie", "func"); + for (int i = 0; i < kmulti_link->cnt; i++) { + seq_printf(seq, + "%llu\t %pS\n", + has_cookies ? kmulti_link->cookies[i] : 0, + (void *)kmulti_link->addrs[i]); + } +} +#endif + static const struct bpf_link_ops bpf_kprobe_multi_link_lops = { .release = bpf_kprobe_multi_link_release, .dealloc_deferred = bpf_kprobe_multi_link_dealloc, .fill_link_info = bpf_kprobe_multi_link_fill_link_info, +#ifdef CONFIG_PROC_FS + .show_fdinfo = bpf_kprobe_multi_show_fdinfo, +#endif }; static void bpf_kprobe_multi_cookie_swap(void *a, void *b, int size, const void *priv) @@ -2799,7 +2545,7 @@ static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx) return run_ctx->entry_ip; } -static int +static __always_inline int kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link, unsigned long entry_ip, struct ftrace_regs *fregs, bool is_return, void *data) @@ -2816,20 +2562,26 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link, struct pt_regs *regs; int err; + /* + * graph tracer framework ensures we won't migrate, so there is no need + * to use migrate_disable for bpf_prog_run again. The check here just for + * __this_cpu_inc_return. + */ + cant_sleep(); + if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) { bpf_prog_inc_misses_counter(link->link.prog); err = 1; goto out; } - migrate_disable(); rcu_read_lock(); regs = ftrace_partial_regs(fregs, bpf_kprobe_multi_pt_regs_ptr()); old_run_ctx = bpf_set_run_ctx(&run_ctx.session_ctx.run_ctx); err = bpf_prog_run(link->link.prog, regs); bpf_reset_run_ctx(old_run_ctx); + ftrace_partial_regs_update(fregs, bpf_kprobe_multi_pt_regs_ptr()); rcu_read_unlock(); - migrate_enable(); out: __this_cpu_dec(bpf_prog_active); @@ -2932,18 +2684,21 @@ static int get_modules_for_addrs(struct module ***mods, unsigned long *addrs, u3 u32 i, err = 0; for (i = 0; i < addrs_cnt; i++) { + bool skip_add = false; struct module *mod; - preempt_disable(); - mod = __module_address(addrs[i]); - /* Either no module or we it's already stored */ - if (!mod || has_module(&arr, mod)) { - preempt_enable(); - continue; + scoped_guard(rcu) { + mod = __module_address(addrs[i]); + /* Either no module or it's already stored */ + if (!mod || has_module(&arr, mod)) { + skip_add = true; + break; /* scoped_guard */ + } + if (!try_module_get(mod)) + err = -EINVAL; } - if (!try_module_get(mod)) - err = -EINVAL; - preempt_enable(); + if (skip_add) + continue; if (err) break; err = add_module(&arr, mod); @@ -2992,9 +2747,20 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr if (sizeof(u64) != sizeof(void *)) return -EOPNOTSUPP; + if (attr->link_create.flags) + return -EINVAL; + if (!is_kprobe_multi(prog)) return -EINVAL; + /* kprobe_multi is not allowed to be sleepable. */ + if (prog->sleepable) + return -EINVAL; + + /* Writing to context is not allowed for kprobes. */ + if (prog->aux->kprobe_write_ctx) + return -EINVAL; + flags = attr->link_create.kprobe_multi.flags; if (flags & ~BPF_F_KPROBE_MULTI_RETURN) return -EINVAL; @@ -3060,14 +2826,14 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr goto error; } - link = kzalloc(sizeof(*link), GFP_KERNEL); + link = kzalloc_obj(*link); if (!link) { err = -ENOMEM; goto error; } bpf_link_init(&link->link, BPF_LINK_TYPE_KPROBE_MULTI, - &bpf_kprobe_multi_link_lops, prog); + &bpf_kprobe_multi_link_lops, prog, attr->link_create.attach_type); err = bpf_link_prime(&link->link, &link_primer); if (err) @@ -3083,7 +2849,7 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr link->addrs = addrs; link->cookies = cookies; link->cnt = cnt; - link->flags = flags; + link->link.flags = flags; if (cookies) { /* @@ -3152,7 +2918,6 @@ struct bpf_uprobe_multi_link { struct path path; struct bpf_link link; u32 cnt; - u32 flags; struct bpf_uprobe *uprobes; struct task_struct *task; }; @@ -3216,7 +2981,7 @@ static int bpf_uprobe_multi_link_fill_link_info(const struct bpf_link *link, umulti_link = container_of(link, struct bpf_uprobe_multi_link, link); info->uprobe_multi.count = umulti_link->cnt; - info->uprobe_multi.flags = umulti_link->flags; + info->uprobe_multi.flags = umulti_link->link.flags; info->uprobe_multi.pid = umulti_link->task ? task_pid_nr_ns(umulti_link->task, task_active_pid_ns(current)) : 0; @@ -3261,10 +3026,54 @@ static int bpf_uprobe_multi_link_fill_link_info(const struct bpf_link *link, return err; } +#ifdef CONFIG_PROC_FS +static void bpf_uprobe_multi_show_fdinfo(const struct bpf_link *link, + struct seq_file *seq) +{ + struct bpf_uprobe_multi_link *umulti_link; + char *p, *buf; + pid_t pid; + + umulti_link = container_of(link, struct bpf_uprobe_multi_link, link); + + buf = kmalloc(PATH_MAX, GFP_KERNEL); + if (!buf) + return; + + p = d_path(&umulti_link->path, buf, PATH_MAX); + if (IS_ERR(p)) { + kfree(buf); + return; + } + + pid = umulti_link->task ? + task_pid_nr_ns(umulti_link->task, task_active_pid_ns(current)) : 0; + seq_printf(seq, + "uprobe_cnt:\t%u\n" + "pid:\t%u\n" + "path:\t%s\n", + umulti_link->cnt, pid, p); + + seq_printf(seq, "%s\t %s\t %s\n", "cookie", "offset", "ref_ctr_offset"); + for (int i = 0; i < umulti_link->cnt; i++) { + seq_printf(seq, + "%llu\t %#llx\t %#lx\n", + umulti_link->uprobes[i].cookie, + umulti_link->uprobes[i].offset, + umulti_link->uprobes[i].ref_ctr_offset); + } + + kfree(buf); +} +#endif + static const struct bpf_link_ops bpf_uprobe_multi_link_lops = { .release = bpf_uprobe_multi_link_release, .dealloc_deferred = bpf_uprobe_multi_link_dealloc, .fill_link_info = bpf_uprobe_multi_link_fill_link_info, +#ifdef CONFIG_PROC_FS + .show_fdinfo = bpf_uprobe_multi_show_fdinfo, +#endif }; static int uprobe_prog_run(struct bpf_uprobe *uprobe, @@ -3381,6 +3190,9 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr if (sizeof(u64) != sizeof(void *)) return -EOPNOTSUPP; + if (attr->link_create.flags) + return -EINVAL; + if (!is_uprobe_multi(prog)) return -EINVAL; @@ -3422,7 +3234,9 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr } if (pid) { + rcu_read_lock(); task = get_pid_task(find_vpid(pid), PIDTYPE_TGID); + rcu_read_unlock(); if (!task) { err = -ESRCH; goto error_path_put; @@ -3431,8 +3245,8 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr err = -ENOMEM; - link = kzalloc(sizeof(*link), GFP_KERNEL); - uprobes = kvcalloc(cnt, sizeof(*uprobes), GFP_KERNEL); + link = kzalloc_obj(*link); + uprobes = kvzalloc_objs(*uprobes, cnt); if (!uprobes || !link) goto error_free; @@ -3471,10 +3285,10 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr link->uprobes = uprobes; link->path = path; link->task = task; - link->flags = flags; + link->link.flags = flags; bpf_link_init(&link->link, BPF_LINK_TYPE_UPROBE_MULTI, - &bpf_uprobe_multi_link_lops, prog); + &bpf_uprobe_multi_link_lops, prog, attr->link_create.attach_type); for (i = 0; i < cnt; i++) { uprobes[i].uprobe = uprobe_register(d_real_inode(link->path.dentry), @@ -3523,7 +3337,7 @@ static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx) __bpf_kfunc_start_defs(); -__bpf_kfunc bool bpf_session_is_return(void) +__bpf_kfunc bool bpf_session_is_return(void *ctx) { struct bpf_session_run_ctx *session_ctx; @@ -3531,7 +3345,7 @@ __bpf_kfunc bool bpf_session_is_return(void) return session_ctx->is_return; } -__bpf_kfunc __u64 *bpf_session_cookie(void) +__bpf_kfunc __u64 *bpf_session_cookie(void *ctx) { struct bpf_session_run_ctx *session_ctx; @@ -3541,34 +3355,179 @@ __bpf_kfunc __u64 *bpf_session_cookie(void) __bpf_kfunc_end_defs(); -BTF_KFUNCS_START(kprobe_multi_kfunc_set_ids) +BTF_KFUNCS_START(session_kfunc_set_ids) BTF_ID_FLAGS(func, bpf_session_is_return) BTF_ID_FLAGS(func, bpf_session_cookie) -BTF_KFUNCS_END(kprobe_multi_kfunc_set_ids) +BTF_KFUNCS_END(session_kfunc_set_ids) -static int bpf_kprobe_multi_filter(const struct bpf_prog *prog, u32 kfunc_id) +static int bpf_session_filter(const struct bpf_prog *prog, u32 kfunc_id) { - if (!btf_id_set8_contains(&kprobe_multi_kfunc_set_ids, kfunc_id)) + if (!btf_id_set8_contains(&session_kfunc_set_ids, kfunc_id)) return 0; - if (!is_kprobe_session(prog) && !is_uprobe_session(prog)) + if (!is_kprobe_session(prog) && !is_uprobe_session(prog) && !is_trace_fsession(prog)) return -EACCES; return 0; } -static const struct btf_kfunc_id_set bpf_kprobe_multi_kfunc_set = { +static const struct btf_kfunc_id_set bpf_session_kfunc_set = { .owner = THIS_MODULE, - .set = &kprobe_multi_kfunc_set_ids, - .filter = bpf_kprobe_multi_filter, + .set = &session_kfunc_set_ids, + .filter = bpf_session_filter, }; -static int __init bpf_kprobe_multi_kfuncs_init(void) +static int __init bpf_trace_kfuncs_init(void) +{ + int err = 0; + + err = err ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_KPROBE, &bpf_session_kfunc_set); + err = err ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_session_kfunc_set); + + return err; +} + +late_initcall(bpf_trace_kfuncs_init); + +typedef int (*copy_fn_t)(void *dst, const void *src, u32 size, struct task_struct *tsk); + +/* + * The __always_inline is to make sure the compiler doesn't + * generate indirect calls into callbacks, which is expensive, + * on some kernel configurations. This allows compiler to put + * direct calls into all the specific callback implementations + * (copy_user_data_sleepable, copy_user_data_nofault, and so on) + */ +static __always_inline int __bpf_dynptr_copy_str(struct bpf_dynptr *dptr, u64 doff, u64 size, + const void *unsafe_src, + copy_fn_t str_copy_fn, + struct task_struct *tsk) +{ + struct bpf_dynptr_kern *dst; + u64 chunk_sz, off; + void *dst_slice; + int cnt, err; + char buf[256]; + + dst_slice = bpf_dynptr_slice_rdwr(dptr, doff, NULL, size); + if (likely(dst_slice)) + return str_copy_fn(dst_slice, unsafe_src, size, tsk); + + dst = (struct bpf_dynptr_kern *)dptr; + if (bpf_dynptr_check_off_len(dst, doff, size)) + return -E2BIG; + + for (off = 0; off < size; off += chunk_sz - 1) { + chunk_sz = min_t(u64, sizeof(buf), size - off); + /* Expect str_copy_fn to return count of copied bytes, including + * zero terminator. Next iteration increment off by chunk_sz - 1 to + * overwrite NUL. + */ + cnt = str_copy_fn(buf, unsafe_src + off, chunk_sz, tsk); + if (cnt < 0) + return cnt; + err = __bpf_dynptr_write(dst, doff + off, buf, cnt, 0); + if (err) + return err; + if (cnt < chunk_sz || chunk_sz == 1) /* we are done */ + return off + cnt; + } + return off; +} + +static __always_inline int __bpf_dynptr_copy(const struct bpf_dynptr *dptr, u64 doff, + u64 size, const void *unsafe_src, + copy_fn_t copy_fn, struct task_struct *tsk) +{ + struct bpf_dynptr_kern *dst; + void *dst_slice; + char buf[256]; + u64 off, chunk_sz; + int err; + + dst_slice = bpf_dynptr_slice_rdwr(dptr, doff, NULL, size); + if (likely(dst_slice)) + return copy_fn(dst_slice, unsafe_src, size, tsk); + + dst = (struct bpf_dynptr_kern *)dptr; + if (bpf_dynptr_check_off_len(dst, doff, size)) + return -E2BIG; + + for (off = 0; off < size; off += chunk_sz) { + chunk_sz = min_t(u64, sizeof(buf), size - off); + err = copy_fn(buf, unsafe_src + off, chunk_sz, tsk); + if (err) + return err; + err = __bpf_dynptr_write(dst, doff + off, buf, chunk_sz, 0); + if (err) + return err; + } + return 0; +} + +static __always_inline int copy_user_data_nofault(void *dst, const void *unsafe_src, + u32 size, struct task_struct *tsk) +{ + return copy_from_user_nofault(dst, (const void __user *)unsafe_src, size); +} + +static __always_inline int copy_user_data_sleepable(void *dst, const void *unsafe_src, + u32 size, struct task_struct *tsk) +{ + int ret; + + if (!tsk) { /* Read from the current task */ + ret = copy_from_user(dst, (const void __user *)unsafe_src, size); + if (ret) + return -EFAULT; + return 0; + } + + ret = access_process_vm(tsk, (unsigned long)unsafe_src, dst, size, 0); + if (ret != size) + return -EFAULT; + return 0; +} + +static __always_inline int copy_kernel_data_nofault(void *dst, const void *unsafe_src, + u32 size, struct task_struct *tsk) +{ + return copy_from_kernel_nofault(dst, unsafe_src, size); +} + +static __always_inline int copy_user_str_nofault(void *dst, const void *unsafe_src, + u32 size, struct task_struct *tsk) { - return register_btf_kfunc_id_set(BPF_PROG_TYPE_KPROBE, &bpf_kprobe_multi_kfunc_set); + return strncpy_from_user_nofault(dst, (const void __user *)unsafe_src, size); } -late_initcall(bpf_kprobe_multi_kfuncs_init); +static __always_inline int copy_user_str_sleepable(void *dst, const void *unsafe_src, + u32 size, struct task_struct *tsk) +{ + int ret; + + if (unlikely(size == 0)) + return 0; + + if (tsk) { + ret = copy_remote_vm_str(tsk, (unsigned long)unsafe_src, dst, size, 0); + } else { + ret = strncpy_from_user(dst, (const void __user *)unsafe_src, size - 1); + /* strncpy_from_user does not guarantee NUL termination */ + if (ret >= 0) + ((char *)dst)[ret] = '\0'; + } + + if (ret < 0) + return ret; + return ret + 1; +} + +static __always_inline int copy_kernel_str_nofault(void *dst, const void *unsafe_src, + u32 size, struct task_struct *tsk) +{ + return strncpy_from_kernel_nofault(dst, unsafe_src, size); +} __bpf_kfunc_start_defs(); @@ -3581,4 +3540,62 @@ __bpf_kfunc int bpf_send_signal_task(struct task_struct *task, int sig, enum pid return bpf_send_signal_common(sig, type, task, value); } +__bpf_kfunc int bpf_probe_read_user_dynptr(struct bpf_dynptr *dptr, u64 off, + u64 size, const void __user *unsafe_ptr__ign) +{ + return __bpf_dynptr_copy(dptr, off, size, (const void __force *)unsafe_ptr__ign, + copy_user_data_nofault, NULL); +} + +__bpf_kfunc int bpf_probe_read_kernel_dynptr(struct bpf_dynptr *dptr, u64 off, + u64 size, const void *unsafe_ptr__ign) +{ + return __bpf_dynptr_copy(dptr, off, size, unsafe_ptr__ign, + copy_kernel_data_nofault, NULL); +} + +__bpf_kfunc int bpf_probe_read_user_str_dynptr(struct bpf_dynptr *dptr, u64 off, + u64 size, const void __user *unsafe_ptr__ign) +{ + return __bpf_dynptr_copy_str(dptr, off, size, (const void __force *)unsafe_ptr__ign, + copy_user_str_nofault, NULL); +} + +__bpf_kfunc int bpf_probe_read_kernel_str_dynptr(struct bpf_dynptr *dptr, u64 off, + u64 size, const void *unsafe_ptr__ign) +{ + return __bpf_dynptr_copy_str(dptr, off, size, unsafe_ptr__ign, + copy_kernel_str_nofault, NULL); +} + +__bpf_kfunc int bpf_copy_from_user_dynptr(struct bpf_dynptr *dptr, u64 off, + u64 size, const void __user *unsafe_ptr__ign) +{ + return __bpf_dynptr_copy(dptr, off, size, (const void __force *)unsafe_ptr__ign, + copy_user_data_sleepable, NULL); +} + +__bpf_kfunc int bpf_copy_from_user_str_dynptr(struct bpf_dynptr *dptr, u64 off, + u64 size, const void __user *unsafe_ptr__ign) +{ + return __bpf_dynptr_copy_str(dptr, off, size, (const void __force *)unsafe_ptr__ign, + copy_user_str_sleepable, NULL); +} + +__bpf_kfunc int bpf_copy_from_user_task_dynptr(struct bpf_dynptr *dptr, u64 off, + u64 size, const void __user *unsafe_ptr__ign, + struct task_struct *tsk) +{ + return __bpf_dynptr_copy(dptr, off, size, (const void __force *)unsafe_ptr__ign, + copy_user_data_sleepable, tsk); +} + +__bpf_kfunc int bpf_copy_from_user_task_str_dynptr(struct bpf_dynptr *dptr, u64 off, + u64 size, const void __user *unsafe_ptr__ign, + struct task_struct *tsk) +{ + return __bpf_dynptr_copy_str(dptr, off, size, (const void __force *)unsafe_ptr__ign, + copy_user_str_sleepable, tsk); +} + __bpf_kfunc_end_defs(); |
