From ad35d8018669fd2eea76e3f74eb050fd3d2fb690 Mon Sep 17 00:00:00 2001 From: Aaron Tomlin Date: Sat, 18 Apr 2026 23:09:44 -0400 Subject: libbpf: Report error when a negative kprobe offset is specified In attach_kprobe(), the parsing logic uses sscanf() to extract the target function name and offset from the section definition. Currently, if a user specifies a negative offset (e.g., SEC("kprobe/func+-100")), the input is not explicitly caught and reported as an error. This commit updates the logic to explicitly notify the user when a negative integer is provided. To facilitate this check, the offset variable is changed from unsigned long to long so that sscanf() can accurately capture a negative input for evaluation. If a negative offset is detected, the loader will now print an informative warning stating that the offset must be non-negative, and return -EINVAL. Additionally, free(func) is called in this new error path to prevent a memory leak, as the function name string is dynamically allocated by sscanf(). Fixes: e3f9bc35ea7e9 ("libbpf: Allow decimal offset for kprobes") Signed-off-by: Aaron Tomlin Acked-by: Mykyta Yatsenko Link: https://lore.kernel.org/bpf/20260419030944.1423642-1-atomlin@atomlin.com Signed-off-by: Kumar Kartikeya Dwivedi --- tools/lib/bpf/libbpf.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 3a80a018fc7d..83aae7a39d36 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -12280,7 +12280,7 @@ error: static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link) { DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts); - unsigned long offset = 0; + long offset = 0; const char *func_name; char *func; int n; @@ -12302,6 +12302,13 @@ static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf pr_warn("kprobe name is invalid: %s\n", func_name); return -EINVAL; } + + if (offset < 0) { + free(func); + pr_warn("kprobe offset must be a non-negative integer: %li\n", offset); + return -EINVAL; + } + if (opts.retprobe && offset != 0) { free(func); pr_warn("kretprobes do not support offset specification\n"); -- cgit v1.2.3 From 7c528b364bd8b2e5629aab1d84898c52c2085187 Mon Sep 17 00:00:00 2001 From: Amery Hung Date: Fri, 17 Apr 2026 16:36:31 -0700 Subject: selftests/bpf: Trace bpf_local_storage_update to debug flaky local storage tests task_local_storage/sys_enter_exit and cgrp_local_storage/ cgroup_iter_sleepable occasionally fail in CI possibly because bpf_{task,cgrp}_storage_get() returns NULL. Add a fexit probe on bpf_local_storage_update() to capture the actual error code when this happens. It will allow us to tell if it is trylock failure in kmalloc_nolock(), timeout/deadlock in rqspinlock or something else. Signed-off-by: Amery Hung Acked-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/bpf/20260417233631.1443199-1-ameryhung@gmail.com Signed-off-by: Kumar Kartikeya Dwivedi --- .../selftests/bpf/prog_tests/cgrp_local_storage.c | 15 +++++++++++++-- .../selftests/bpf/prog_tests/task_local_storage.c | 1 + tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c | 18 ++++++++++++++++++ .../testing/selftests/bpf/progs/task_local_storage.c | 19 +++++++++++++++++++ 4 files changed, 51 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c index 478a77cb67e6..c4398ccf3493 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c @@ -176,7 +176,7 @@ static void test_cgroup_iter_sleepable(int cgroup_fd, __u64 cgroup_id) DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); union bpf_iter_link_info linfo; struct cgrp_ls_sleepable *skel; - struct bpf_link *link; + struct bpf_link *link, *fexit_link; int err, iter_fd; char buf[16]; @@ -200,16 +200,27 @@ static void test_cgroup_iter_sleepable(int cgroup_fd, __u64 cgroup_id) if (!ASSERT_OK_PTR(link, "attach_iter")) goto out; + fexit_link = bpf_program__attach(skel->progs.fexit_update); + if (!ASSERT_OK_PTR(fexit_link, "attach_fexit")) + goto out_link; + iter_fd = bpf_iter_create(bpf_link__fd(link)); if (!ASSERT_GE(iter_fd, 0, "iter_create")) - goto out_link; + goto out_fexit_link; + + skel->bss->target_pid = sys_gettid(); /* trigger the program run */ (void)read(iter_fd, buf, sizeof(buf)); + skel->bss->target_pid = 0; + + ASSERT_EQ(skel->bss->update_err, 0, "update_err"); ASSERT_EQ(skel->bss->cgroup_id, cgroup_id, "cgroup_id"); close(iter_fd); +out_fexit_link: + bpf_link__destroy(fexit_link); out_link: bpf_link__destroy(link); out: diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c index 1b26c12f255a..5b2b56cc3a4f 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c @@ -47,6 +47,7 @@ static void test_sys_enter_exit(void) skel->bss->target_pid = 0; /* 2x gettid syscalls */ + ASSERT_EQ(skel->bss->update_err, 0, "update_err"); ASSERT_EQ(skel->bss->enter_cnt, 2, "enter_cnt"); ASSERT_EQ(skel->bss->exit_cnt, 2, "exit_cnt"); ASSERT_EQ(skel->bss->mismatch_cnt, 0, "mismatch_cnt"); diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c index a2de95f85648..37bd6b03ba01 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c +++ b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c @@ -4,6 +4,7 @@ #include #include #include "bpf_misc.h" +#include "err.h" char _license[] SEC("license") = "GPL"; @@ -16,6 +17,7 @@ struct { __s32 target_pid; __u64 cgroup_id; +long update_err; int target_hid; bool is_cgroup1; @@ -123,3 +125,19 @@ int yes_rcu_lock(void *ctx) bpf_rcu_read_unlock(); return 0; } + +SEC("fexit/bpf_local_storage_update") +int BPF_PROG(fexit_update, void *owner, struct bpf_local_storage_map *smap, + void *value, u64 map_flags, bool swap_uptrs, + struct bpf_local_storage_data *ret) +{ + struct task_struct *task = bpf_get_current_task_btf(); + + if (task->pid != target_pid) + return 0; + + if (IS_ERR_VALUE(ret)) + update_err = PTR_ERR(ret); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/task_local_storage.c b/tools/testing/selftests/bpf/progs/task_local_storage.c index 80a0a20db88d..34fa3d6451d2 100644 --- a/tools/testing/selftests/bpf/progs/task_local_storage.c +++ b/tools/testing/selftests/bpf/progs/task_local_storage.c @@ -14,12 +14,15 @@ struct { __type(value, long); } enter_id SEC(".maps"); +#include "err.h" + #define MAGIC_VALUE 0xabcd1234 pid_t target_pid = 0; int mismatch_cnt = 0; int enter_cnt = 0; int exit_cnt = 0; +long update_err = 0; SEC("tp_btf/sys_enter") int BPF_PROG(on_enter, struct pt_regs *regs, long id) @@ -62,3 +65,19 @@ int BPF_PROG(on_exit, struct pt_regs *regs, long id) __sync_fetch_and_add(&mismatch_cnt, 1); return 0; } + +SEC("fexit/bpf_local_storage_update") +int BPF_PROG(fexit_update, void *owner, struct bpf_local_storage_map *smap, + void *value, u64 map_flags, bool swap_uptrs, + struct bpf_local_storage_data *ret) +{ + struct task_struct *task = bpf_get_current_task_btf(); + + if (task->pid != target_pid) + return 0; + + if (IS_ERR_VALUE(ret)) + update_err = PTR_ERR(ret); + + return 0; +} -- cgit v1.2.3 From 31f61ac33032ee87ea404d6d996ba2c386502a36 Mon Sep 17 00:00:00 2001 From: Amery Hung Date: Tue, 14 Apr 2026 12:10:14 -0700 Subject: bpf: Refactor dynptr mutability tracking Redefine dynptr mutability and fix inconsistency in the verifier and kfunc signatures. Dynptr mutability is at two levels. The first is the bpf_dynptr structure and the second is the memory the dynptr points to. The verifer currently tracks the mutability of the bpf_dynptr struct through helper and kfunc prototypes, where "const struct bpf_dynptr *" means the structure itself is immutable. The second level is tracked in upper bit of bpf_dynptr->size in runtime and is not changed in this patch. There are two type of inconsistency in the verfier regarding the mutability of the bpf_dynptr struct. First, there are many existing kfuncs whose prototypes are wrong. For example, bpf_dynptr_adjust() mutates a dynptr's start and offset but marks the argument as a const pointer. At the same time many other kfuncs that does not mutate the dynptr but mark themselves as mutable. Second, the verifier currently does not honor the const qualifier in kfunc prototypes as it determines whether tagging the arg_type with MEM_RDONLY or not based on the register state. Since all the verifier care is to prevent CONST_PTR_TO_DYNPTR from being destroyed in callback and global subprogram, redefine the mutability at the bpf_dynptr level to just bpf_dynptr_kern->data. Then, explicitly prohibit passing CONST_PTR_TO_DYNPTR to an argument tagged with MEM_UNINIT or OBJ_RELEASE. The mutability of a dynptr's view is not really interesting so drop MEM_RDONLY annotation for dynptr from the helpers and kfuncs. Plus, if the mutability of the entire bpf_dynptr were to be done correctly, it would kill the bpf_dynptr_adjust() usage in callback and global subporgram. Implementation wise - First, make sure all kfunc arg are correctly tagged: Tag the dynptr argument of bpf_dynptr_file_discard() with OBJ_RELEASE. - Then, in process_dynptr_func(), make sure CONST_PTR_TO_DYNPTR cannot be passed to argument tagged with MEM_UNINIT or OBJ_RELEASE. For MEM_UNINIT, it is already checked by is_dynptr_reg_valid_uninit(). For OBJ_RELEASE, check against OBJ_RELEASE instead of MEM_RDONLY and drop a now identical check in unmark_stack_slots_dynptr(). - Remove the mutual exclusive check between MEM_UNINIT and MEM_RDONLY, but don't add a MEM_UNINIT and OBJ_RELEASE version as it is obviously wrong. Note that while this patch stops following the C semantic for the mutability of bpf_dynptr, the prototype of kfuncs are still fixed to maintain the correct C semantics in the implementation. Adding or removing the const qualifier does not break backward compatibility. In addition, fix kfuncs dropping the const qualifier when casting the opaque bpf_dynptr to bpf_dynptr_kern. In test_kfunc_dynptr_param.c, initialize dynptr to 0 to avoid -Wuninitialized-const-pointer warning. Signed-off-by: Amery Hung Acked-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/bpf/20260414191014.1218567-1-ameryhung@gmail.com Signed-off-by: Kumar Kartikeya Dwivedi --- fs/bpf_fs_kfuncs.c | 2 +- fs/verity/measure.c | 4 +- include/linux/bpf.h | 8 +-- kernel/bpf/btf.c | 2 +- kernel/bpf/helpers.c | 36 +++++------ kernel/bpf/verifier.c | 70 ++++++---------------- kernel/trace/bpf_trace.c | 22 +++---- tools/testing/selftests/bpf/bpf_kfuncs.h | 8 +-- tools/testing/selftests/bpf/progs/dynptr_success.c | 6 +- .../selftests/bpf/progs/test_kfunc_dynptr_param.c | 9 +-- 10 files changed, 65 insertions(+), 102 deletions(-) (limited to 'tools') diff --git a/fs/bpf_fs_kfuncs.c b/fs/bpf_fs_kfuncs.c index e4e51a1d0de2..9d27be058494 100644 --- a/fs/bpf_fs_kfuncs.c +++ b/fs/bpf_fs_kfuncs.c @@ -200,7 +200,7 @@ int bpf_set_dentry_xattr_locked(struct dentry *dentry, const char *name__str, const struct bpf_dynptr *value_p, int flags) { - struct bpf_dynptr_kern *value_ptr = (struct bpf_dynptr_kern *)value_p; + const struct bpf_dynptr_kern *value_ptr = (struct bpf_dynptr_kern *)value_p; struct inode *inode = d_inode(dentry); const void *value; u32 value_len; diff --git a/fs/verity/measure.c b/fs/verity/measure.c index 6a35623ebdf0..265fa0253e3d 100644 --- a/fs/verity/measure.c +++ b/fs/verity/measure.c @@ -118,9 +118,9 @@ __bpf_kfunc_start_defs(); * * Return: 0 on success, a negative value on error. */ -__bpf_kfunc int bpf_get_fsverity_digest(struct file *file, struct bpf_dynptr *digest_p) +__bpf_kfunc int bpf_get_fsverity_digest(struct file *file, const struct bpf_dynptr *digest_p) { - struct bpf_dynptr_kern *digest_ptr = (struct bpf_dynptr_kern *)digest_p; + const struct bpf_dynptr_kern *digest_ptr = (struct bpf_dynptr_kern *)digest_p; const struct inode *inode = file_inode(file); u32 dynptr_sz = __bpf_dynptr_size(digest_ptr); struct fsverity_digest *arg; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b4b703c90ca9..3cb6b9e70080 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -3622,8 +3622,8 @@ static inline int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, struct bpf_key *bpf_lookup_user_key(s32 serial, u64 flags); struct bpf_key *bpf_lookup_system_key(u64 id); void bpf_key_put(struct bpf_key *bkey); -int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p, - struct bpf_dynptr *sig_p, +int bpf_verify_pkcs7_signature(const struct bpf_dynptr *data_p, + const struct bpf_dynptr *sig_p, struct bpf_key *trusted_keyring); #else @@ -3641,8 +3641,8 @@ static inline void bpf_key_put(struct bpf_key *bkey) { } -static inline int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p, - struct bpf_dynptr *sig_p, +static inline int bpf_verify_pkcs7_signature(const struct bpf_dynptr *data_p, + const struct bpf_dynptr *sig_p, struct bpf_key *trusted_keyring) { return -EOPNOTSUPP; diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index a62d78581207..3c2aaa3c5004 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -7973,7 +7973,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog) bpf_log(log, "arg#%d has invalid combination of tags\n", i); return -EINVAL; } - sub->args[i].arg_type = ARG_PTR_TO_DYNPTR | MEM_RDONLY; + sub->args[i].arg_type = ARG_PTR_TO_DYNPTR; continue; } if (tags & ARG_TAG_TRUSTED) { diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 2bb60200c266..baa12b24bb64 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1944,7 +1944,7 @@ static const struct bpf_func_proto bpf_dynptr_read_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_UNINIT_MEM, .arg2_type = ARG_CONST_SIZE_OR_ZERO, - .arg3_type = ARG_PTR_TO_DYNPTR | MEM_RDONLY, + .arg3_type = ARG_PTR_TO_DYNPTR, .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, }; @@ -2001,7 +2001,7 @@ static const struct bpf_func_proto bpf_dynptr_write_proto = { .func = bpf_dynptr_write, .gpl_only = false, .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_DYNPTR | MEM_RDONLY, + .arg1_type = ARG_PTR_TO_DYNPTR, .arg2_type = ARG_ANYTHING, .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg4_type = ARG_CONST_SIZE_OR_ZERO, @@ -2044,7 +2044,7 @@ static const struct bpf_func_proto bpf_dynptr_data_proto = { .func = bpf_dynptr_data, .gpl_only = false, .ret_type = RET_PTR_TO_DYNPTR_MEM_OR_NULL, - .arg1_type = ARG_PTR_TO_DYNPTR | MEM_RDONLY, + .arg1_type = ARG_PTR_TO_DYNPTR, .arg2_type = ARG_ANYTHING, .arg3_type = ARG_CONST_ALLOC_SIZE_OR_ZERO, }; @@ -3072,7 +3072,7 @@ __bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u64 offset, return bpf_dynptr_slice(p, offset, buffer__nullable, buffer__szk); } -__bpf_kfunc int bpf_dynptr_adjust(const struct bpf_dynptr *p, u64 start, u64 end) +__bpf_kfunc int bpf_dynptr_adjust(struct bpf_dynptr *p, u64 start, u64 end) { struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; u64 size; @@ -3093,14 +3093,14 @@ __bpf_kfunc int bpf_dynptr_adjust(const struct bpf_dynptr *p, u64 start, u64 end __bpf_kfunc bool bpf_dynptr_is_null(const struct bpf_dynptr *p) { - struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; + const struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; return !ptr->data; } __bpf_kfunc bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *p) { - struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; + const struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; if (!ptr->data) return false; @@ -3110,7 +3110,7 @@ __bpf_kfunc bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *p) __bpf_kfunc u64 bpf_dynptr_size(const struct bpf_dynptr *p) { - struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; + const struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; if (!ptr->data) return -EINVAL; @@ -3122,7 +3122,7 @@ __bpf_kfunc int bpf_dynptr_clone(const struct bpf_dynptr *p, struct bpf_dynptr *clone__uninit) { struct bpf_dynptr_kern *clone = (struct bpf_dynptr_kern *)clone__uninit; - struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; + const struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; if (!ptr->data) { bpf_dynptr_set_null(clone); @@ -3145,11 +3145,11 @@ __bpf_kfunc int bpf_dynptr_clone(const struct bpf_dynptr *p, * Copies data from source dynptr to destination dynptr. * Returns 0 on success; negative error, otherwise. */ -__bpf_kfunc int bpf_dynptr_copy(struct bpf_dynptr *dst_ptr, u64 dst_off, - struct bpf_dynptr *src_ptr, u64 src_off, u64 size) +__bpf_kfunc int bpf_dynptr_copy(const struct bpf_dynptr *dst_ptr, u64 dst_off, + const struct bpf_dynptr *src_ptr, u64 src_off, u64 size) { - struct bpf_dynptr_kern *dst = (struct bpf_dynptr_kern *)dst_ptr; - struct bpf_dynptr_kern *src = (struct bpf_dynptr_kern *)src_ptr; + const struct bpf_dynptr_kern *dst = (struct bpf_dynptr_kern *)dst_ptr; + const struct bpf_dynptr_kern *src = (struct bpf_dynptr_kern *)src_ptr; void *src_slice, *dst_slice; char buf[256]; u64 off; @@ -3200,9 +3200,9 @@ __bpf_kfunc int bpf_dynptr_copy(struct bpf_dynptr *dst_ptr, u64 dst_off, * at @offset with the constant byte @val. * Returns 0 on success; negative error, otherwise. */ -__bpf_kfunc int bpf_dynptr_memset(struct bpf_dynptr *p, u64 offset, u64 size, u8 val) +__bpf_kfunc int bpf_dynptr_memset(const struct bpf_dynptr *p, u64 offset, u64 size, u8 val) { - struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; + const struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; u64 chunk_sz, write_off; char buf[256]; void* slice; @@ -4214,13 +4214,13 @@ __bpf_kfunc void bpf_key_put(struct bpf_key *bkey) * * Return: 0 on success, a negative value on error. */ -__bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p, - struct bpf_dynptr *sig_p, +__bpf_kfunc int bpf_verify_pkcs7_signature(const struct bpf_dynptr *data_p, + const struct bpf_dynptr *sig_p, struct bpf_key *trusted_keyring) { #ifdef CONFIG_SYSTEM_DATA_VERIFICATION - struct bpf_dynptr_kern *data_ptr = (struct bpf_dynptr_kern *)data_p; - struct bpf_dynptr_kern *sig_ptr = (struct bpf_dynptr_kern *)sig_p; + const struct bpf_dynptr_kern *data_ptr = (struct bpf_dynptr_kern *)data_p; + const struct bpf_dynptr_kern *sig_ptr = (struct bpf_dynptr_kern *)sig_p; const void *data, *sig; u32 data_len, sig_len; int ret; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 69d75515ed3f..185210b73385 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -717,15 +717,6 @@ static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_re struct bpf_func_state *state = bpf_func(env, reg); int spi, ref_obj_id, i; - /* - * This can only be set for PTR_TO_STACK, as CONST_PTR_TO_DYNPTR cannot - * be released by any dynptr helper. Hence, unmark_stack_slots_dynptr - * is safe to do directly. - */ - if (reg->type == CONST_PTR_TO_DYNPTR) { - verifier_bug(env, "CONST_PTR_TO_DYNPTR cannot be released"); - return -EFAULT; - } spi = dynptr_get_spi(env, reg); if (spi < 0) return spi; @@ -7434,23 +7425,12 @@ static int process_kptr_func(struct bpf_verifier_env *env, int regno, * bytes as STACK_DYNPTR in case of PTR_TO_STACK. In case of * CONST_PTR_TO_DYNPTR, we are guaranteed to get the beginning of the object. * - * Mutability of bpf_dynptr is at two levels, one is at the level of struct - * bpf_dynptr itself, i.e. whether the helper is receiving a pointer to struct - * bpf_dynptr or pointer to const struct bpf_dynptr. In the former case, it can - * mutate the view of the dynptr and also possibly destroy it. In the latter - * case, it cannot mutate the bpf_dynptr itself but it can still mutate the - * memory that dynptr points to. - * - * The verifier will keep track both levels of mutation (bpf_dynptr's in - * reg->type and the memory's in reg->dynptr.type), but there is no support for - * readonly dynptr view yet, hence only the first case is tracked and checked. - * - * This is consistent with how C applies the const modifier to a struct object, - * where the pointer itself inside bpf_dynptr becomes const but not what it - * points to. - * - * Helpers which do not mutate the bpf_dynptr set MEM_RDONLY in their argument - * type, and declare it as 'const struct bpf_dynptr *' in their prototype. + * Mutability of bpf_dynptr is at two levels: the dynptr and the memory the + * dynptr points to. At the first level, the verifier will make sure a + * CONST_PTR_TO_DYNPTR cannot be reinitialized or destroyed. The mutability of + * a dynptr's view (i.e., start and offset) is not tracked as there is not such + * use case. The second level is tracked using the upper bit of bpf_dynptr->size + * and checked dynamically during runtime. */ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn_idx, enum bpf_arg_type arg_type, int clone_ref_obj_id) @@ -7465,14 +7445,6 @@ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn return -EINVAL; } - /* MEM_UNINIT and MEM_RDONLY are exclusive, when applied to an - * ARG_PTR_TO_DYNPTR (or ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_*): - */ - if ((arg_type & (MEM_UNINIT | MEM_RDONLY)) == (MEM_UNINIT | MEM_RDONLY)) { - verifier_bug(env, "misconfigured dynptr helper type flags"); - return -EFAULT; - } - /* MEM_UNINIT - Points to memory that is an appropriate candidate for * constructing a mutable bpf_dynptr object. * @@ -7480,13 +7452,12 @@ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn * pointing to a region of at least 16 bytes which doesn't * contain an existing bpf_dynptr. * - * MEM_RDONLY - Points to a initialized bpf_dynptr that will not be - * mutated or destroyed. However, the memory it points to - * may be mutated. + * OBJ_RELEASE - Points to a initialized bpf_dynptr that will be + * destroyed. * - * None - Points to a initialized dynptr that can be mutated and - * destroyed, including mutation of the memory it points - * to. + * None - Points to a initialized dynptr that cannot be + * reinitialized or destroyed. However, the view of the + * dynptr and the memory it points to may be mutated. */ if (arg_type & MEM_UNINIT) { int i; @@ -7505,10 +7476,10 @@ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn } err = mark_stack_slots_dynptr(env, reg, arg_type, insn_idx, clone_ref_obj_id); - } else /* MEM_RDONLY and None case from above */ { + } else /* OBJ_RELEASE and None case from above */ { /* For the reg->type == PTR_TO_STACK case, bpf_dynptr is never const */ - if (reg->type == CONST_PTR_TO_DYNPTR && !(arg_type & MEM_RDONLY)) { - verbose(env, "cannot pass pointer to const bpf_dynptr, the helper mutates it\n"); + if (reg->type == CONST_PTR_TO_DYNPTR && (arg_type & OBJ_RELEASE)) { + verbose(env, "CONST_PTR_TO_DYNPTR cannot be released\n"); return -EINVAL; } @@ -7519,8 +7490,8 @@ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn return -EINVAL; } - /* Fold modifiers (in this case, MEM_RDONLY) when checking expected type */ - if (!is_dynptr_type_expected(env, reg, arg_type & ~MEM_RDONLY)) { + /* Fold modifiers (in this case, OBJ_RELEASE) when checking expected type */ + if (!is_dynptr_type_expected(env, reg, arg_type & ~OBJ_RELEASE)) { verbose(env, "Expected a dynptr of type %s as arg #%d\n", dynptr_type_str(arg_to_dynptr_type(arg_type)), regno - 1); @@ -9366,7 +9337,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog, bpf_log(log, "R%d is not a pointer to arena or scalar.\n", regno); return -EINVAL; } - } else if (arg->arg_type == (ARG_PTR_TO_DYNPTR | MEM_RDONLY)) { + } else if (arg->arg_type == ARG_PTR_TO_DYNPTR) { ret = check_func_arg_reg_off(env, reg, regno, ARG_PTR_TO_DYNPTR); if (ret) return ret; @@ -12273,9 +12244,6 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ enum bpf_arg_type dynptr_arg_type = ARG_PTR_TO_DYNPTR; int clone_ref_obj_id = 0; - if (reg->type == CONST_PTR_TO_DYNPTR) - dynptr_arg_type |= MEM_RDONLY; - if (is_kfunc_arg_uninit(btf, &args[i])) dynptr_arg_type |= MEM_UNINIT; @@ -12288,7 +12256,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_file]) { dynptr_arg_type |= DYNPTR_TYPE_FILE; } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_file_discard]) { - dynptr_arg_type |= DYNPTR_TYPE_FILE; + dynptr_arg_type |= DYNPTR_TYPE_FILE | OBJ_RELEASE; meta->release_regno = regno; } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_clone] && (dynptr_arg_type & MEM_UNINIT)) { @@ -18745,7 +18713,7 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog) } else if (arg->arg_type == ARG_ANYTHING) { reg->type = SCALAR_VALUE; mark_reg_unknown(env, regs, i); - } else if (arg->arg_type == (ARG_PTR_TO_DYNPTR | MEM_RDONLY)) { + } else if (arg->arg_type == ARG_PTR_TO_DYNPTR) { /* assume unspecial LOCAL dynptr type */ __mark_dynptr_reg(reg, BPF_DYNPTR_TYPE_LOCAL, true, ++env->id_gen); } else if (base_type(arg->arg_type) == ARG_PTR_TO_MEM) { diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index af7079aa0f36..e916f0ccbed9 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -3397,12 +3397,12 @@ typedef int (*copy_fn_t)(void *dst, const void *src, u32 size, struct task_struc * direct calls into all the specific callback implementations * (copy_user_data_sleepable, copy_user_data_nofault, and so on) */ -static __always_inline int __bpf_dynptr_copy_str(struct bpf_dynptr *dptr, u64 doff, u64 size, +static __always_inline int __bpf_dynptr_copy_str(const struct bpf_dynptr *dptr, u64 doff, u64 size, const void *unsafe_src, copy_fn_t str_copy_fn, struct task_struct *tsk) { - struct bpf_dynptr_kern *dst; + const struct bpf_dynptr_kern *dst; u64 chunk_sz, off; void *dst_slice; int cnt, err; @@ -3438,7 +3438,7 @@ static __always_inline int __bpf_dynptr_copy(const struct bpf_dynptr *dptr, u64 u64 size, const void *unsafe_src, copy_fn_t copy_fn, struct task_struct *tsk) { - struct bpf_dynptr_kern *dst; + const struct bpf_dynptr_kern *dst; void *dst_slice; char buf[256]; u64 off, chunk_sz; @@ -3539,49 +3539,49 @@ __bpf_kfunc int bpf_send_signal_task(struct task_struct *task, int sig, enum pid return bpf_send_signal_common(sig, type, task, value); } -__bpf_kfunc int bpf_probe_read_user_dynptr(struct bpf_dynptr *dptr, u64 off, +__bpf_kfunc int bpf_probe_read_user_dynptr(const struct bpf_dynptr *dptr, u64 off, u64 size, const void __user *unsafe_ptr__ign) { return __bpf_dynptr_copy(dptr, off, size, (const void __force *)unsafe_ptr__ign, copy_user_data_nofault, NULL); } -__bpf_kfunc int bpf_probe_read_kernel_dynptr(struct bpf_dynptr *dptr, u64 off, +__bpf_kfunc int bpf_probe_read_kernel_dynptr(const struct bpf_dynptr *dptr, u64 off, u64 size, const void *unsafe_ptr__ign) { return __bpf_dynptr_copy(dptr, off, size, unsafe_ptr__ign, copy_kernel_data_nofault, NULL); } -__bpf_kfunc int bpf_probe_read_user_str_dynptr(struct bpf_dynptr *dptr, u64 off, +__bpf_kfunc int bpf_probe_read_user_str_dynptr(const struct bpf_dynptr *dptr, u64 off, u64 size, const void __user *unsafe_ptr__ign) { return __bpf_dynptr_copy_str(dptr, off, size, (const void __force *)unsafe_ptr__ign, copy_user_str_nofault, NULL); } -__bpf_kfunc int bpf_probe_read_kernel_str_dynptr(struct bpf_dynptr *dptr, u64 off, +__bpf_kfunc int bpf_probe_read_kernel_str_dynptr(const struct bpf_dynptr *dptr, u64 off, u64 size, const void *unsafe_ptr__ign) { return __bpf_dynptr_copy_str(dptr, off, size, unsafe_ptr__ign, copy_kernel_str_nofault, NULL); } -__bpf_kfunc int bpf_copy_from_user_dynptr(struct bpf_dynptr *dptr, u64 off, +__bpf_kfunc int bpf_copy_from_user_dynptr(const struct bpf_dynptr *dptr, u64 off, u64 size, const void __user *unsafe_ptr__ign) { return __bpf_dynptr_copy(dptr, off, size, (const void __force *)unsafe_ptr__ign, copy_user_data_sleepable, NULL); } -__bpf_kfunc int bpf_copy_from_user_str_dynptr(struct bpf_dynptr *dptr, u64 off, +__bpf_kfunc int bpf_copy_from_user_str_dynptr(const struct bpf_dynptr *dptr, u64 off, u64 size, const void __user *unsafe_ptr__ign) { return __bpf_dynptr_copy_str(dptr, off, size, (const void __force *)unsafe_ptr__ign, copy_user_str_sleepable, NULL); } -__bpf_kfunc int bpf_copy_from_user_task_dynptr(struct bpf_dynptr *dptr, u64 off, +__bpf_kfunc int bpf_copy_from_user_task_dynptr(const struct bpf_dynptr *dptr, u64 off, u64 size, const void __user *unsafe_ptr__ign, struct task_struct *tsk) { @@ -3589,7 +3589,7 @@ __bpf_kfunc int bpf_copy_from_user_task_dynptr(struct bpf_dynptr *dptr, u64 off, copy_user_data_sleepable, tsk); } -__bpf_kfunc int bpf_copy_from_user_task_str_dynptr(struct bpf_dynptr *dptr, u64 off, +__bpf_kfunc int bpf_copy_from_user_task_str_dynptr(const struct bpf_dynptr *dptr, u64 off, u64 size, const void __user *unsafe_ptr__ign, struct task_struct *tsk) { diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h index 7dad01439391..ae71e9b69051 100644 --- a/tools/testing/selftests/bpf/bpf_kfuncs.h +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -40,7 +40,7 @@ extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u64 offset, extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u64 offset, void *buffer, __u64 buffer__szk) __ksym __weak; -extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u64 start, __u64 end) __ksym __weak; +extern int bpf_dynptr_adjust(struct bpf_dynptr *ptr, __u64 start, __u64 end) __ksym __weak; extern bool bpf_dynptr_is_null(const struct bpf_dynptr *ptr) __ksym __weak; extern bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym __weak; extern __u64 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym __weak; @@ -70,13 +70,13 @@ extern void *bpf_rdonly_cast(const void *obj, __u32 btf_id) __ksym __weak; extern int bpf_get_file_xattr(struct file *file, const char *name, struct bpf_dynptr *value_ptr) __ksym; -extern int bpf_get_fsverity_digest(struct file *file, struct bpf_dynptr *digest_ptr) __ksym; +extern int bpf_get_fsverity_digest(struct file *file, const struct bpf_dynptr *digest_ptr) __ksym; extern struct bpf_key *bpf_lookup_user_key(__s32 serial, __u64 flags) __ksym; extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym; extern void bpf_key_put(struct bpf_key *key) __ksym; -extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr, - struct bpf_dynptr *sig_ptr, +extern int bpf_verify_pkcs7_signature(const struct bpf_dynptr *data_ptr, + const struct bpf_dynptr *sig_ptr, struct bpf_key *trusted_keyring) __ksym; struct dentry; diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c index e0d672d93adf..e0745b6e467e 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_success.c +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -914,7 +914,7 @@ void *user_ptr; char expected_str[384]; __u32 test_len[7] = {0/* placeholder */, 0, 1, 2, 255, 256, 257}; -typedef int (*bpf_read_dynptr_fn_t)(struct bpf_dynptr *dptr, u64 off, +typedef int (*bpf_read_dynptr_fn_t)(const struct bpf_dynptr *dptr, u64 off, u64 size, const void *unsafe_ptr); /* Returns the offset just before the end of the maximum sized xdp fragment. @@ -1106,7 +1106,7 @@ int test_copy_from_user_str_dynptr(void *ctx) return 0; } -static int bpf_copy_data_from_user_task(struct bpf_dynptr *dptr, u64 off, +static int bpf_copy_data_from_user_task(const struct bpf_dynptr *dptr, u64 off, u64 size, const void *unsafe_ptr) { struct task_struct *task = bpf_get_current_task_btf(); @@ -1114,7 +1114,7 @@ static int bpf_copy_data_from_user_task(struct bpf_dynptr *dptr, u64 off, return bpf_copy_from_user_task_dynptr(dptr, off, size, unsafe_ptr, task); } -static int bpf_copy_data_from_user_task_str(struct bpf_dynptr *dptr, u64 off, +static int bpf_copy_data_from_user_task_str(const struct bpf_dynptr *dptr, u64 off, u64 size, const void *unsafe_ptr) { struct task_struct *task = bpf_get_current_task_btf(); diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c index d249113ed657..1c6cfd0888ba 100644 --- a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c +++ b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c @@ -11,12 +11,7 @@ #include #include #include "bpf_misc.h" - -extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym; -extern void bpf_key_put(struct bpf_key *key) __ksym; -extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr, - struct bpf_dynptr *sig_ptr, - struct bpf_key *trusted_keyring) __ksym; +#include "bpf_kfuncs.h" struct { __uint(type, BPF_MAP_TYPE_RINGBUF); @@ -38,7 +33,7 @@ SEC("?lsm.s/bpf") __failure __msg("cannot pass in dynptr at an offset=-8") int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size, bool kernel) { - unsigned long val; + unsigned long val = 0; return bpf_verify_pkcs7_signature((struct bpf_dynptr *)&val, (struct bpf_dynptr *)&val, NULL); -- cgit v1.2.3 From 0aa6378695b8c67146130812f635f07c4898f171 Mon Sep 17 00:00:00 2001 From: Matt Bobrowski Date: Mon, 20 Apr 2026 09:37:34 +0000 Subject: selftests/bpf: Fix off-by-one in bpf_cpumask_populate related selftest The test_populate test uses >= instead of > when checking if the runtime nr_cpus exceeds the bit capacity of a cpumask_t. On a system where the physical CPU core count perfectly matches the CONFIG_NR_CPUS upper bound (e.g. nr_cpus = 512 and CONFIG_NR_CPUS = 512), the condition nr_cpus >= CPUMASK_TEST_MASKLEN * 8 evaluates to true (512 >= 512). This incorrectly causes the test to fail with an error value of 3. A 512-bit cpumask_t provides enough bits (indices 0 through 511) to represent 512 CPUs. The subsequent bpf_for(i, 0, nr_cpus) loop iterates up to nr_cpus - 1 (511), which perfectly aligns with the maximum valid index of the bitmask. Change the condition to nr_cpus > CPUMASK_TEST_MASKLEN * 8 to fix the false positive failure on these systems. Fixes: 918ba2636d4e ("selftests: bpf: add bpf_cpumask_populate selftests") Signed-off-by: Matt Bobrowski Acked-by: Paul Chaignon Link: https://lore.kernel.org/bpf/20260420093734.2400330-1-mattbobrowski@google.com Signed-off-by: Kumar Kartikeya Dwivedi --- tools/testing/selftests/bpf/progs/cpumask_success.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/cpumask_success.c b/tools/testing/selftests/bpf/progs/cpumask_success.c index 0e04c31b91c0..774706e7b058 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_success.c +++ b/tools/testing/selftests/bpf/progs/cpumask_success.c @@ -866,7 +866,7 @@ int BPF_PROG(test_populate, struct task_struct *task, u64 clone_flags) * access NR_CPUS, the upper bound for nr_cpus, so we infer * it from the size of cpumask_t. */ - if (nr_cpus < 0 || nr_cpus >= CPUMASK_TEST_MASKLEN * 8) { + if (nr_cpus < 0 || nr_cpus > CPUMASK_TEST_MASKLEN * 8) { err = 3; goto out; } -- cgit v1.2.3 From f7a6b9eaff3e6693ba3b19c5812e28538049bbf2 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Fri, 17 Apr 2026 15:30:18 +0100 Subject: bpf: Extend BTF UAPI vlen, kinds to use unused bits BTF maximum vlen is encoded using 16 bits with a maximum vlen of 65535. This has sufficed for structs, function parameters and enumerated type values. However, with upcoming BTF location information - in particular information about inline sites - this limit is surpassed. Use bits 16-23 - currently unused in BTF info - to extend to 24 bits, giving a max vlen of (2^24 - 1), or 16 million. Also extend BTF kind encoding from 5 to 7 bits, giving a maximum available number of kinds of 128. Since with the BTF location work we use another 3 kinds, we are fast approaching the current limit of 32. Convert BTF_MAX_* values to enums to allow them to be encoded in kernel BTF; this will allow us to detect if the running kernel supports a 24-bit vlen or not. Add one for max _possible_ (not used) kind. Fix up a few places in the kernel where a 16-bit vlen is assumed; remove BTF_INFO_MASK as now all bits are used. The vlen expansion was suggested by Andrii in [1]; the kind expansion is tackled here too as it may be needed also to support new kinds in BTF. [1] https://lore.kernel.org/bpf/CAEf4BzZx=X6vGqcA8SPU6D+v6k+TR=ZewebXMuXtpmML058piw@mail.gmail.com/ Suggested-by: Andrii Nakryiko Signed-off-by: Alan Maguire Acked-by: Mykyta Yatsenko Link: https://lore.kernel.org/r/20260417143023.1551481-2-alan.maguire@oracle.com Signed-off-by: Alexei Starovoitov --- include/linux/btf.h | 4 ++-- include/uapi/linux/btf.h | 26 ++++++++++++++------------ kernel/bpf/btf.c | 27 ++++++++++----------------- tools/include/uapi/linux/btf.h | 26 ++++++++++++++------------ 4 files changed, 40 insertions(+), 43 deletions(-) (limited to 'tools') diff --git a/include/linux/btf.h b/include/linux/btf.h index 48108471c5b1..c82d0d689059 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -415,12 +415,12 @@ static inline bool btf_type_is_array(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY; } -static inline u16 btf_type_vlen(const struct btf_type *t) +static inline u32 btf_type_vlen(const struct btf_type *t) { return BTF_INFO_VLEN(t->info); } -static inline u16 btf_vlen(const struct btf_type *t) +static inline u32 btf_vlen(const struct btf_type *t) { return btf_type_vlen(t); } diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h index 638615ebddc2..618167cab4e6 100644 --- a/include/uapi/linux/btf.h +++ b/include/uapi/linux/btf.h @@ -33,20 +33,22 @@ struct btf_header { __u32 layout_len; /* length of layout section */ }; -/* Max # of type identifier */ -#define BTF_MAX_TYPE 0x000fffff -/* Max offset into the string section */ -#define BTF_MAX_NAME_OFFSET 0x00ffffff -/* Max # of struct/union/enum members or func args */ -#define BTF_MAX_VLEN 0xffff +enum btf_max { + /* Max possible kind */ + BTF_MAX_KIND = 0x0000007f, + /* Max # of type identifier */ + BTF_MAX_TYPE = 0x000fffff, + /* Max offset into the string section */ + BTF_MAX_NAME_OFFSET = 0x00ffffff, + /* Max # of struct/union/enum members or func args */ + BTF_MAX_VLEN = 0x00ffffff, +}; struct btf_type { __u32 name_off; /* "info" bits arrangement - * bits 0-15: vlen (e.g. # of struct's members) - * bits 16-23: unused - * bits 24-28: kind (e.g. int, ptr, array...etc) - * bits 29-30: unused + * bits 0-23: vlen (e.g. # of struct's members) + * bits 24-30: kind (e.g. int, ptr, array...etc) * bit 31: kind_flag, currently used by * struct, union, enum, fwd, enum64, * decl_tag and type_tag @@ -65,8 +67,8 @@ struct btf_type { }; }; -#define BTF_INFO_KIND(info) (((info) >> 24) & 0x1f) -#define BTF_INFO_VLEN(info) ((info) & 0xffff) +#define BTF_INFO_KIND(info) (((info) >> 24) & 0x7f) +#define BTF_INFO_VLEN(info) ((info) & 0xffffff) #define BTF_INFO_KFLAG(info) ((info) >> 31) enum { diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 3c2aaa3c5004..77af44d8a3ad 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -182,7 +182,6 @@ #define BITS_ROUNDUP_BYTES(bits) \ (BITS_ROUNDDOWN_BYTES(bits) + !!BITS_PER_BYTE_MASKED(bits)) -#define BTF_INFO_MASK 0x9f00ffff #define BTF_INT_MASK 0x0fffffff #define BTF_TYPE_ID_VALID(type_id) ((type_id) <= BTF_MAX_TYPE) #define BTF_STR_OFFSET_VALID(name_off) ((name_off) <= BTF_MAX_NAME_OFFSET) @@ -289,7 +288,7 @@ enum verifier_phase { struct resolve_vertex { const struct btf_type *t; u32 type_id; - u16 next_member; + u32 next_member; }; enum visit_state { @@ -2031,7 +2030,7 @@ static int env_stack_push(struct btf_verifier_env *env, } static void env_stack_set_next_member(struct btf_verifier_env *env, - u16 next_member) + u32 next_member) { env->stack[env->top_stack - 1].next_member = next_member; } @@ -3293,7 +3292,7 @@ static s32 btf_struct_check_meta(struct btf_verifier_env *env, struct btf *btf = env->btf; u32 struct_size = t->size; u32 offset; - u16 i; + u32 i; meta_needed = btf_type_vlen(t) * sizeof(*member); if (meta_left < meta_needed) { @@ -3369,7 +3368,7 @@ static int btf_struct_resolve(struct btf_verifier_env *env, { const struct btf_member *member; int err; - u16 i; + u32 i; /* Before continue resolving the next_member, * ensure the last member is indeed resolved to a @@ -4447,7 +4446,7 @@ static s32 btf_enum_check_meta(struct btf_verifier_env *env, const struct btf_enum *enums = btf_type_enum(t); struct btf *btf = env->btf; const char *fmt_str; - u16 i, nr_enums; + u32 i, nr_enums; u32 meta_needed; nr_enums = btf_type_vlen(t); @@ -4555,7 +4554,7 @@ static s32 btf_enum64_check_meta(struct btf_verifier_env *env, const struct btf_enum64 *enums = btf_type_enum64(t); struct btf *btf = env->btf; const char *fmt_str; - u16 i, nr_enums; + u32 i, nr_enums; u32 meta_needed; nr_enums = btf_type_vlen(t); @@ -4683,7 +4682,7 @@ static void btf_func_proto_log(struct btf_verifier_env *env, const struct btf_type *t) { const struct btf_param *args = (const struct btf_param *)(t + 1); - u16 nr_args = btf_type_vlen(t), i; + u32 nr_args = btf_type_vlen(t), i; btf_verifier_log(env, "return=%u args=(", t->type); if (!nr_args) { @@ -4929,7 +4928,7 @@ static int btf_datasec_resolve(struct btf_verifier_env *env, { const struct btf_var_secinfo *vsi; struct btf *btf = env->btf; - u16 i; + u32 i; env->resolve_mode = RESOLVE_TBD; for_each_vsi_from(i, v->next_member, v->t, vsi) { @@ -5183,7 +5182,7 @@ static int btf_func_proto_check(struct btf_verifier_env *env, const struct btf_type *ret_type; const struct btf_param *args; const struct btf *btf; - u16 nr_args, i; + u32 nr_args, i; int err; btf = env->btf; @@ -5278,7 +5277,7 @@ static int btf_func_check(struct btf_verifier_env *env, const struct btf_type *proto_type; const struct btf_param *args; const struct btf *btf; - u16 nr_args, i; + u32 nr_args, i; btf = env->btf; proto_type = btf_type_by_id(btf, t->type); @@ -5336,12 +5335,6 @@ static s32 btf_check_meta(struct btf_verifier_env *env, } meta_left -= sizeof(*t); - if (t->info & ~BTF_INFO_MASK) { - btf_verifier_log(env, "[%u] Invalid btf_info:%x", - env->log_type_id, t->info); - return -EINVAL; - } - if (BTF_INFO_KIND(t->info) > BTF_KIND_MAX || BTF_INFO_KIND(t->info) == BTF_KIND_UNKN) { btf_verifier_log(env, "[%u] Invalid kind:%u", diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h index 638615ebddc2..618167cab4e6 100644 --- a/tools/include/uapi/linux/btf.h +++ b/tools/include/uapi/linux/btf.h @@ -33,20 +33,22 @@ struct btf_header { __u32 layout_len; /* length of layout section */ }; -/* Max # of type identifier */ -#define BTF_MAX_TYPE 0x000fffff -/* Max offset into the string section */ -#define BTF_MAX_NAME_OFFSET 0x00ffffff -/* Max # of struct/union/enum members or func args */ -#define BTF_MAX_VLEN 0xffff +enum btf_max { + /* Max possible kind */ + BTF_MAX_KIND = 0x0000007f, + /* Max # of type identifier */ + BTF_MAX_TYPE = 0x000fffff, + /* Max offset into the string section */ + BTF_MAX_NAME_OFFSET = 0x00ffffff, + /* Max # of struct/union/enum members or func args */ + BTF_MAX_VLEN = 0x00ffffff, +}; struct btf_type { __u32 name_off; /* "info" bits arrangement - * bits 0-15: vlen (e.g. # of struct's members) - * bits 16-23: unused - * bits 24-28: kind (e.g. int, ptr, array...etc) - * bits 29-30: unused + * bits 0-23: vlen (e.g. # of struct's members) + * bits 24-30: kind (e.g. int, ptr, array...etc) * bit 31: kind_flag, currently used by * struct, union, enum, fwd, enum64, * decl_tag and type_tag @@ -65,8 +67,8 @@ struct btf_type { }; }; -#define BTF_INFO_KIND(info) (((info) >> 24) & 0x1f) -#define BTF_INFO_VLEN(info) ((info) & 0xffff) +#define BTF_INFO_KIND(info) (((info) >> 24) & 0x7f) +#define BTF_INFO_VLEN(info) ((info) & 0xffffff) #define BTF_INFO_KFLAG(info) ((info) >> 31) enum { -- cgit v1.2.3 From cacd6729c09236245d921464eb28e69a6d573412 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Fri, 17 Apr 2026 15:30:19 +0100 Subject: libbpf: Adjust btf_vlen() to return a __u32 Now that vlen is 24 bits, btf_vlen() must return a __u32. Adjust use cases in libbpf accordingly. Also add error handling to avoid vlen overflow in btf_type_inc_vlen(). Signed-off-by: Alan Maguire Acked-by: Mykyta Yatsenko Link: https://lore.kernel.org/r/20260417143023.1551481-3-alan.maguire@oracle.com Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/btf.c | 50 ++++++++++++++++++++++++++++++----------------- tools/lib/bpf/btf.h | 2 +- tools/lib/bpf/btf_dump.c | 24 +++++++++++------------ tools/lib/bpf/relo_core.c | 16 +++++++-------- 4 files changed, 53 insertions(+), 39 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index ceb57b46a878..267904939098 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -421,7 +421,7 @@ static int btf_type_size_unknown(const struct btf *btf, const struct btf_type *t { __u32 l_cnt = btf->hdr.layout_len / sizeof(struct btf_layout); struct btf_layout *l = btf->layout; - __u16 vlen = btf_vlen(t); + __u32 vlen = btf_vlen(t); __u32 kind = btf_kind(t); /* Fall back to base BTF if needed as they share layout information */ @@ -454,7 +454,7 @@ static int btf_type_size_unknown(const struct btf *btf, const struct btf_type *t static int btf_type_size(const struct btf *btf, const struct btf_type *t) { const int base_size = sizeof(struct btf_type); - __u16 vlen = btf_vlen(t); + __u32 vlen = btf_vlen(t); switch (btf_kind(t)) { case BTF_KIND_FWD: @@ -506,7 +506,7 @@ static int btf_bswap_type_rest(struct btf_type *t) struct btf_array *a; struct btf_param *p; struct btf_enum *e; - __u16 vlen = btf_vlen(t); + __u32 vlen = btf_vlen(t); int i; switch (btf_kind(t)) { @@ -1007,7 +1007,7 @@ int btf__align_of(const struct btf *btf, __u32 id) case BTF_KIND_STRUCT: case BTF_KIND_UNION: { const struct btf_member *m = btf_members(t); - __u16 vlen = btf_vlen(t); + __u32 vlen = btf_vlen(t); int i, max_align = 1, align; for (i = 0; i < vlen; i++, m++) { @@ -2121,9 +2121,12 @@ static void *btf_add_type_mem(struct btf *btf, size_t add_sz) btf->hdr.type_len, UINT_MAX, add_sz); } -static void btf_type_inc_vlen(struct btf_type *t) +static int btf_type_inc_vlen(struct btf_type *t) { + if (btf_vlen(t) == BTF_MAX_VLEN) + return -ENOSPC; t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, btf_kflag(t)); + return 0; } static void btf_hdr_update_type_len(struct btf *btf, int new_len) @@ -2652,6 +2655,8 @@ int btf__add_field(struct btf *btf, const char *name, int type_id, t = btf_last_type(btf); if (!btf_is_composite(t)) return libbpf_err(-EINVAL); + if (btf_vlen(t) == BTF_MAX_VLEN) + return libbpf_err(-ENOSPC); if (validate_type_id(type_id)) return libbpf_err(-EINVAL); @@ -2686,6 +2691,7 @@ int btf__add_field(struct btf *btf, const char *name, int type_id, /* btf_add_type_mem can invalidate t pointer */ t = btf_last_type(btf); + /* update parent type's vlen and kflag */ t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, is_bitfield || btf_kflag(t)); @@ -2796,7 +2802,9 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value) /* update parent type's vlen */ t = btf_last_type(btf); - btf_type_inc_vlen(t); + err = btf_type_inc_vlen(t); + if (err) + return libbpf_err(err); /* if negative value, set signedness to signed */ if (value < 0) @@ -2873,7 +2881,9 @@ int btf__add_enum64_value(struct btf *btf, const char *name, __u64 value) /* update parent type's vlen */ t = btf_last_type(btf); - btf_type_inc_vlen(t); + err = btf_type_inc_vlen(t); + if (err) + return libbpf_err(err); btf_hdr_update_type_len(btf, btf->hdr.type_len + sz); return 0; @@ -3115,7 +3125,9 @@ int btf__add_func_param(struct btf *btf, const char *name, int type_id) /* update parent type's vlen */ t = btf_last_type(btf); - btf_type_inc_vlen(t); + err = btf_type_inc_vlen(t); + if (err) + return libbpf_err(err); btf_hdr_update_type_len(btf, btf->hdr.type_len + sz); return 0; @@ -3257,7 +3269,9 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __ /* update parent type's vlen */ t = btf_last_type(btf); - btf_type_inc_vlen(t); + err = btf_type_inc_vlen(t); + if (err) + return libbpf_err(err); btf_hdr_update_type_len(btf, btf->hdr.type_len + sz); return 0; @@ -4311,7 +4325,7 @@ static long btf_hash_enum(struct btf_type *t) static bool btf_equal_enum_members(struct btf_type *t1, struct btf_type *t2) { const struct btf_enum *m1, *m2; - __u16 vlen; + __u32 vlen; int i; vlen = btf_vlen(t1); @@ -4329,7 +4343,7 @@ static bool btf_equal_enum_members(struct btf_type *t1, struct btf_type *t2) static bool btf_equal_enum64_members(struct btf_type *t1, struct btf_type *t2) { const struct btf_enum64 *m1, *m2; - __u16 vlen; + __u32 vlen; int i; vlen = btf_vlen(t1); @@ -4406,7 +4420,7 @@ static long btf_hash_struct(struct btf_type *t) static bool btf_shallow_equal_struct(struct btf_type *t1, struct btf_type *t2) { const struct btf_member *m1, *m2; - __u16 vlen; + __u32 vlen; int i; if (!btf_equal_common(t1, t2)) @@ -4482,7 +4496,7 @@ static bool btf_compat_array(struct btf_type *t1, struct btf_type *t2) static long btf_hash_fnproto(struct btf_type *t) { const struct btf_param *member = btf_params(t); - __u16 vlen = btf_vlen(t); + __u32 vlen = btf_vlen(t); long h = btf_hash_common(t); int i; @@ -4504,7 +4518,7 @@ static long btf_hash_fnproto(struct btf_type *t) static bool btf_equal_fnproto(struct btf_type *t1, struct btf_type *t2) { const struct btf_param *m1, *m2; - __u16 vlen; + __u32 vlen; int i; if (!btf_equal_common(t1, t2)) @@ -4530,7 +4544,7 @@ static bool btf_equal_fnproto(struct btf_type *t1, struct btf_type *t2) static bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2) { const struct btf_param *m1, *m2; - __u16 vlen; + __u32 vlen; int i; /* skip return type ID */ @@ -5077,7 +5091,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, case BTF_KIND_STRUCT: case BTF_KIND_UNION: { const struct btf_member *cand_m, *canon_m; - __u16 vlen; + __u32 vlen; if (!btf_shallow_equal_struct(cand_type, canon_type)) return 0; @@ -5105,7 +5119,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, case BTF_KIND_FUNC_PROTO: { const struct btf_param *cand_p, *canon_p; - __u16 vlen; + __u32 vlen; if (!btf_compat_fnproto(cand_type, canon_type)) return 0; @@ -5439,7 +5453,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) case BTF_KIND_FUNC_PROTO: { struct btf_param *param; - __u16 vlen; + __u32 vlen; int i; ref_type_id = btf_dedup_ref_type(d, t->type); diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index a1f8deca2603..1a31f2da947f 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -435,7 +435,7 @@ static inline __u16 btf_kind(const struct btf_type *t) return BTF_INFO_KIND(t->info); } -static inline __u16 btf_vlen(const struct btf_type *t) +static inline __u32 btf_vlen(const struct btf_type *t) { return BTF_INFO_VLEN(t->info); } diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 53c6624161d7..cc1ba65bb6c5 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -316,7 +316,7 @@ static int btf_dump_mark_referenced(struct btf_dump *d) { int i, j, n = btf__type_cnt(d->btf); const struct btf_type *t; - __u16 vlen; + __u32 vlen; for (i = d->last_id + 1; i < n; i++) { t = btf__type_by_id(d->btf, i); @@ -485,7 +485,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr) */ struct btf_dump_type_aux_state *tstate = &d->type_states[id]; const struct btf_type *t; - __u16 vlen; + __u32 vlen; int err, i; /* return true, letting typedefs know that it's ok to be emitted */ @@ -798,7 +798,7 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id) */ if (top_level_def || t->name_off == 0) { const struct btf_member *m = btf_members(t); - __u16 vlen = btf_vlen(t); + __u32 vlen = btf_vlen(t); int i, new_cont_id; new_cont_id = t->name_off == 0 ? cont_id : id; @@ -820,7 +820,7 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id) break; case BTF_KIND_FUNC_PROTO: { const struct btf_param *p = btf_params(t); - __u16 n = btf_vlen(t); + __u32 n = btf_vlen(t); int i; btf_dump_emit_type(d, t->type, cont_id); @@ -839,7 +839,7 @@ static bool btf_is_struct_packed(const struct btf *btf, __u32 id, { const struct btf_member *m; int max_align = 1, align, i, bit_sz; - __u16 vlen; + __u32 vlen; m = btf_members(t); vlen = btf_vlen(t); @@ -973,7 +973,7 @@ static void btf_dump_emit_struct_def(struct btf_dump *d, bool is_struct = btf_is_struct(t); bool packed, prev_bitfield = false; int align, i, off = 0; - __u16 vlen = btf_vlen(t); + __u32 vlen = btf_vlen(t); align = btf__align_of(d->btf, id); packed = is_struct ? btf_is_struct_packed(d->btf, id, t) : 0; @@ -1064,7 +1064,7 @@ static void btf_dump_emit_enum_fwd(struct btf_dump *d, __u32 id, static void btf_dump_emit_enum32_val(struct btf_dump *d, const struct btf_type *t, - int lvl, __u16 vlen) + int lvl, __u32 vlen) { const struct btf_enum *v = btf_enum(t); bool is_signed = btf_kflag(t); @@ -1089,7 +1089,7 @@ static void btf_dump_emit_enum32_val(struct btf_dump *d, static void btf_dump_emit_enum64_val(struct btf_dump *d, const struct btf_type *t, - int lvl, __u16 vlen) + int lvl, __u32 vlen) { const struct btf_enum64 *v = btf_enum64(t); bool is_signed = btf_kflag(t); @@ -1122,7 +1122,7 @@ static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id, const struct btf_type *t, int lvl) { - __u16 vlen = btf_vlen(t); + __u32 vlen = btf_vlen(t); btf_dump_printf(d, "enum%s%s", t->name_off ? " " : "", @@ -1542,7 +1542,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d, } case BTF_KIND_FUNC_PROTO: { const struct btf_param *p = btf_params(t); - __u16 vlen = btf_vlen(t); + __u32 vlen = btf_vlen(t); int i; /* @@ -2159,7 +2159,7 @@ static int btf_dump_struct_data(struct btf_dump *d, const void *data) { const struct btf_member *m = btf_members(t); - __u16 n = btf_vlen(t); + __u32 n = btf_vlen(t); int i, err = 0; /* note that we increment depth before calling btf_dump_print() below; @@ -2449,7 +2449,7 @@ static int btf_dump_type_data_check_zero(struct btf_dump *d, case BTF_KIND_STRUCT: case BTF_KIND_UNION: { const struct btf_member *m = btf_members(t); - __u16 n = btf_vlen(t); + __u32 n = btf_vlen(t); /* if any struct/union member is non-zero, the struct/union * is considered non-zero and dumped. diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c index 0ccc8f548cba..6ae3f2a15ad0 100644 --- a/tools/lib/bpf/relo_core.c +++ b/tools/lib/bpf/relo_core.c @@ -191,8 +191,8 @@ recur: case BTF_KIND_FUNC_PROTO: { struct btf_param *local_p = btf_params(local_type); struct btf_param *targ_p = btf_params(targ_type); - __u16 local_vlen = btf_vlen(local_type); - __u16 targ_vlen = btf_vlen(targ_type); + __u32 local_vlen = btf_vlen(local_type); + __u32 targ_vlen = btf_vlen(targ_type); int i, err; if (local_vlen != targ_vlen) @@ -1457,8 +1457,8 @@ static bool bpf_core_names_match(const struct btf *local_btf, size_t local_name_ static int bpf_core_enums_match(const struct btf *local_btf, const struct btf_type *local_t, const struct btf *targ_btf, const struct btf_type *targ_t) { - __u16 local_vlen = btf_vlen(local_t); - __u16 targ_vlen = btf_vlen(targ_t); + __u32 local_vlen = btf_vlen(local_t); + __u32 targ_vlen = btf_vlen(targ_t); int i, j; if (local_t->size != targ_t->size) @@ -1498,8 +1498,8 @@ static int bpf_core_composites_match(const struct btf *local_btf, const struct b bool behind_ptr, int level) { const struct btf_member *local_m = btf_members(local_t); - __u16 local_vlen = btf_vlen(local_t); - __u16 targ_vlen = btf_vlen(targ_t); + __u32 local_vlen = btf_vlen(local_t); + __u32 targ_vlen = btf_vlen(targ_t); int i, j, err; if (local_vlen > targ_vlen) @@ -1674,8 +1674,8 @@ recur: case BTF_KIND_FUNC_PROTO: { struct btf_param *local_p = btf_params(local_t); struct btf_param *targ_p = btf_params(targ_t); - __u16 local_vlen = btf_vlen(local_t); - __u16 targ_vlen = btf_vlen(targ_t); + __u32 local_vlen = btf_vlen(local_t); + __u32 targ_vlen = btf_vlen(targ_t); int i, err; if (local_k != targ_k) -- cgit v1.2.3 From 22b402457ee40f64ea220f4b60776a612f084636 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Fri, 17 Apr 2026 15:30:20 +0100 Subject: bpftool: Support 24-bit vlen Adjust btf_vlen() usage to handle 24-bit vlen. Signed-off-by: Alan Maguire Link: https://lore.kernel.org/r/20260417143023.1551481-4-alan.maguire@oracle.com Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/btf.c | 17 ++++++----------- tools/bpf/bpftool/btf_dumper.c | 4 ++-- tools/bpf/bpftool/gen.c | 16 +++++++++------- 3 files changed, 17 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 2e899e940034..6ef908adf3a4 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -179,8 +179,7 @@ static int dump_btf_type(const struct btf *btf, __u32 id, case BTF_KIND_STRUCT: case BTF_KIND_UNION: { const struct btf_member *m = (const void *)(t + 1); - __u16 vlen = BTF_INFO_VLEN(t->info); - int i; + __u32 i, vlen = BTF_INFO_VLEN(t->info); if (json_output) { jsonw_uint_field(w, "size", t->size); @@ -225,9 +224,8 @@ static int dump_btf_type(const struct btf *btf, __u32 id, } case BTF_KIND_ENUM: { const struct btf_enum *v = (const void *)(t + 1); - __u16 vlen = BTF_INFO_VLEN(t->info); + __u32 i, vlen = BTF_INFO_VLEN(t->info); const char *encoding; - int i; encoding = btf_kflag(t) ? "SIGNED" : "UNSIGNED"; if (json_output) { @@ -263,9 +261,8 @@ static int dump_btf_type(const struct btf *btf, __u32 id, } case BTF_KIND_ENUM64: { const struct btf_enum64 *v = btf_enum64(t); - __u16 vlen = btf_vlen(t); + __u32 i, vlen = btf_vlen(t); const char *encoding; - int i; encoding = btf_kflag(t) ? "SIGNED" : "UNSIGNED"; if (json_output) { @@ -325,8 +322,7 @@ static int dump_btf_type(const struct btf *btf, __u32 id, } case BTF_KIND_FUNC_PROTO: { const struct btf_param *p = (const void *)(t + 1); - __u16 vlen = BTF_INFO_VLEN(t->info); - int i; + __u32 i, vlen = BTF_INFO_VLEN(t->info); if (json_output) { jsonw_uint_field(w, "ret_type_id", t->type); @@ -369,8 +365,7 @@ static int dump_btf_type(const struct btf *btf, __u32 id, case BTF_KIND_DATASEC: { const struct btf_var_secinfo *v = (const void *)(t + 1); const struct btf_type *vt; - __u16 vlen = BTF_INFO_VLEN(t->info); - int i; + __u32 i, vlen = BTF_INFO_VLEN(t->info); if (json_output) { jsonw_uint_field(w, "size", t->size); @@ -675,7 +670,7 @@ static __u64 btf_name_hasher(__u64 hash, const struct btf *btf, __u32 name_off) static __u64 btf_type_disambig_hash(const struct btf *btf, __u32 id, bool include_members) { const struct btf_type *t = btf__type_by_id(btf, id); - int i; + __u32 i; size_t hash = 0; hash = btf_name_hasher(hash, btf, t->name_off); diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c index def297e879f4..9dc8425b1789 100644 --- a/tools/bpf/bpftool/btf_dumper.c +++ b/tools/bpf/bpftool/btf_dumper.c @@ -150,7 +150,7 @@ static int btf_dumper_enum(const struct btf_dumper *d, { const struct btf_enum *enums = btf_enum(t); __s64 value; - __u16 i; + __u32 i; switch (t->size) { case 8: @@ -189,7 +189,7 @@ static int btf_dumper_enum64(const struct btf_dumper *d, const struct btf_enum64 *enums = btf_enum64(t); __u32 val_lo32, val_hi32; __u64 value; - __u16 i; + __u32 i; value = *(__u64 *)data; val_lo32 = (__u32)value; diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index 2f9e10752e28..37159e02f418 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -2094,7 +2094,8 @@ btfgen_mark_type(struct btfgen_info *info, unsigned int type_id, bool follow_poi struct btf_type *cloned_type; struct btf_param *param; struct btf_array *array; - int err, i; + __u32 i; + int err; if (type_id == 0) return 0; @@ -2229,7 +2230,8 @@ static int btfgen_mark_type_match(struct btfgen_info *info, __u32 type_id, bool const struct btf_type *btf_type; struct btf *btf = info->src_btf; struct btf_type *cloned_type; - int i, err; + int err; + __u32 i; if (type_id == 0) return 0; @@ -2249,7 +2251,7 @@ static int btfgen_mark_type_match(struct btfgen_info *info, __u32 type_id, bool case BTF_KIND_STRUCT: case BTF_KIND_UNION: { struct btf_member *m = btf_members(btf_type); - __u16 vlen = btf_vlen(btf_type); + __u32 vlen = btf_vlen(btf_type); if (behind_ptr) break; @@ -2286,7 +2288,7 @@ static int btfgen_mark_type_match(struct btfgen_info *info, __u32 type_id, bool break; } case BTF_KIND_FUNC_PROTO: { - __u16 vlen = btf_vlen(btf_type); + __u32 vlen = btf_vlen(btf_type); struct btf_param *param; /* mark ret type */ @@ -2492,8 +2494,9 @@ static struct btf *btfgen_get_btf(struct btfgen_info *info) { struct btf *btf_new = NULL; unsigned int *ids = NULL; - unsigned int i, n = btf__type_cnt(info->marked_btf); + unsigned int n = btf__type_cnt(info->marked_btf); int err = 0; + __u32 i; btf_new = btf__new_empty(); if (!btf_new) { @@ -2523,8 +2526,7 @@ static struct btf *btfgen_get_btf(struct btfgen_info *info) /* add members for struct and union */ if (btf_is_composite(type)) { struct btf_member *cloned_m, *m; - unsigned short vlen; - int idx_src; + __u32 vlen, idx_src; name = btf__str_by_offset(info->src_btf, type->name_off); -- cgit v1.2.3 From 855af3e775670fa0a2493f3e61f4da38f956ef47 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Fri, 17 Apr 2026 15:30:21 +0100 Subject: selftests/bpf: Fix up btf/invalid test for extended kind With extended kinds, 32 becomes a valid (but not used) BTF info kind value; fix up the test to check for the "Invalid kind" rather than "Invalid btf_info" message. Since all bits are used in BTF info, it is no longer possible to craft an invalid BTF info value. Use 127 (new maximum possible kind value). Signed-off-by: Alan Maguire Acked-by: Mykyta Yatsenko Link: https://lore.kernel.org/r/20260417143023.1551481-5-alan.maguire@oracle.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/btf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 054ecb6b1e9f..0cc347e32db3 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -1924,11 +1924,11 @@ static struct btf_raw_test raw_tests[] = { }, { - .descr = "invalid BTF_INFO", + .descr = "invalid BTF kind", .raw_types = { /* int */ /* [1] */ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), - BTF_TYPE_ENC(0, 0x20000000, 4), + BTF_TYPE_ENC(0, 0x7f000000, 4), BTF_END_RAW, }, .str_sec = "", @@ -1941,7 +1941,7 @@ static struct btf_raw_test raw_tests[] = { .value_type_id = 1, .max_entries = 4, .btf_load_err = true, - .err_str = "Invalid btf_info", + .err_str = "Invalid kind", }, { -- cgit v1.2.3 From ad256554f1065feb17c094f7aab16d75ad41f60c Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Fri, 17 Apr 2026 15:30:22 +0100 Subject: selftests/bpf: Fix up __u16 vlen assumptions Fix up a few cases where we assume vlen is 16 bits. Signed-off-by: Alan Maguire Acked-by: Mykyta Yatsenko Link: https://lore.kernel.org/r/20260417143023.1551481-6-alan.maguire@oracle.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/btf.c | 2 +- tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c | 3 +-- tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c | 3 +-- tools/testing/selftests/bpf/test_progs.c | 2 +- 4 files changed, 4 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 0cc347e32db3..a9de328a8697 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -8092,7 +8092,7 @@ static struct btf_dedup_test dedup_tests[] = { static int btf_type_size(const struct btf_type *t) { int base_size = sizeof(struct btf_type); - __u16 vlen = BTF_INFO_VLEN(t->info); + __u32 vlen = BTF_INFO_VLEN(t->info); __u16 kind = BTF_INFO_KIND(t->info); switch (kind) { diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c index 5bc15bb6b7ce..6bc31236805c 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c @@ -487,9 +487,8 @@ static void test_split_module(void) for (i = 0; i < ARRAY_SIZE(mod_funcs); i++) { const struct btf_param *p; const struct btf_type *t; - __u16 vlen; + __u32 vlen, j; __u32 id; - int j; id = btf__find_by_name_kind(btf1, mod_funcs[i], BTF_KIND_FUNC); if (!ASSERT_GE(id, nr_base_types, "func_id")) diff --git a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c index 469e92869523..5064aeb8fe67 100644 --- a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c +++ b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c @@ -253,8 +253,7 @@ static int find_field_offset_aux(struct btf *btf, int btf_id, char *field_name, { const struct btf_type *type = btf__type_by_id(btf, btf_id); const struct btf_member *m; - __u16 mnum; - int i; + __u32 mnum, i; if (!type) { PRINT_FAIL("Can't find btf_type for id %d\n", btf_id); diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 7fe16b5131b1..cc14b13e23fe 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -1257,7 +1257,7 @@ int get_bpf_max_tramp_links_from(struct btf *btf) const struct btf_type *t; __u32 i, type_cnt; const char *name; - __u16 j, vlen; + __u32 j, vlen; for (i = 1, type_cnt = btf__type_cnt(btf); i < type_cnt; i++) { t = btf__type_by_id(btf, i); -- cgit v1.2.3 From 0cd420a6f40c7ee4e58c5277df6bf66efcfcdf1a Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Wed, 22 Apr 2026 12:41:10 -0700 Subject: libbpf: Add section handlers for sleepable tracepoints Add SEC_DEF entries for sleepable tracepoint variants: - "tp_btf.s+" for sleepable BTF-based raw tracepoints - "raw_tp.s+" for sleepable raw tracepoints - "raw_tracepoint.s+" (alias) - "tp.s+" for sleepable classic tracepoints - "tracepoint.s+" (alias) Extract sec_name_match_prefix() to share the prefix matching logic between attach_tp() and attach_raw_tp(), eliminating duplicated loops and hardcoded strcmp() checks for bare section names. Signed-off-by: Mykyta Yatsenko Acked-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/bpf/20260422-sleepable_tracepoints-v13-5-99005dff21ef@meta.com Signed-off-by: Kumar Kartikeya Dwivedi --- tools/lib/bpf/libbpf.c | 88 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 57 insertions(+), 31 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 83aae7a39d36..ab2071fdd3e8 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -10018,11 +10018,16 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("netkit/peer", SCHED_CLS, BPF_NETKIT_PEER, SEC_NONE), SEC_DEF("tracepoint+", TRACEPOINT, 0, SEC_NONE, attach_tp), SEC_DEF("tp+", TRACEPOINT, 0, SEC_NONE, attach_tp), + SEC_DEF("tracepoint.s+", TRACEPOINT, 0, SEC_SLEEPABLE, attach_tp), + SEC_DEF("tp.s+", TRACEPOINT, 0, SEC_SLEEPABLE, attach_tp), SEC_DEF("raw_tracepoint+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), SEC_DEF("raw_tp+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("raw_tracepoint.s+", RAW_TRACEPOINT, 0, SEC_SLEEPABLE, attach_raw_tp), + SEC_DEF("raw_tp.s+", RAW_TRACEPOINT, 0, SEC_SLEEPABLE, attach_raw_tp), SEC_DEF("raw_tracepoint.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), SEC_DEF("raw_tp.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), SEC_DEF("tp_btf+", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("tp_btf.s+", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), SEC_DEF("fentry+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace), SEC_DEF("fmod_ret+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace), SEC_DEF("fexit+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace), @@ -13152,25 +13157,61 @@ struct bpf_link *bpf_program__attach_tracepoint(const struct bpf_program *prog, return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL); } +/* + * Match section name against a prefix array. Returns pointer past + * "prefix/" on match, empty string for bare sections (exact prefix + * match), or NULL if no prefix matches. + */ +static const char *sec_name_match_prefix(const char *sec_name, + const char *const *prefixes, + size_t n) +{ + size_t i; + + for (i = 0; i < n; i++) { + size_t pfx_len; + + if (!str_has_pfx(sec_name, prefixes[i])) + continue; + + pfx_len = strlen(prefixes[i]); + if (sec_name[pfx_len] == '\0') + return sec_name + pfx_len; + + if (sec_name[pfx_len] != '/' || sec_name[pfx_len + 1] == '\0') + continue; + + return sec_name + pfx_len + 1; + } + return NULL; +} + static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link) { + static const char *const prefixes[] = { + "tp.s", + "tp", + "tracepoint.s", + "tracepoint", + }; char *sec_name, *tp_cat, *tp_name; + const char *match; *link = NULL; - /* no auto-attach for SEC("tp") or SEC("tracepoint") */ - if (strcmp(prog->sec_name, "tp") == 0 || strcmp(prog->sec_name, "tracepoint") == 0) + match = sec_name_match_prefix(prog->sec_name, prefixes, ARRAY_SIZE(prefixes)); + if (!match) { + pr_warn("prog '%s': invalid section name '%s'\n", prog->name, prog->sec_name); + return -EINVAL; + } + if (!match[0]) /* bare section name no autoattach */ return 0; sec_name = strdup(prog->sec_name); if (!sec_name) return -ENOMEM; - /* extract "tp//" or "tracepoint//" */ - if (str_has_pfx(prog->sec_name, "tp/")) - tp_cat = sec_name + sizeof("tp/") - 1; - else - tp_cat = sec_name + sizeof("tracepoint/") - 1; + tp_cat = sec_name + (match - prog->sec_name); tp_name = strchr(tp_cat, '/'); if (!tp_name) { free(sec_name); @@ -13234,37 +13275,22 @@ static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf "raw_tracepoint", "raw_tp.w", "raw_tracepoint.w", + "raw_tp.s", + "raw_tracepoint.s", }; - size_t i; - const char *tp_name = NULL; + const char *match; *link = NULL; - for (i = 0; i < ARRAY_SIZE(prefixes); i++) { - size_t pfx_len; - - if (!str_has_pfx(prog->sec_name, prefixes[i])) - continue; - - pfx_len = strlen(prefixes[i]); - /* no auto-attach case of, e.g., SEC("raw_tp") */ - if (prog->sec_name[pfx_len] == '\0') - return 0; - - if (prog->sec_name[pfx_len] != '/') - continue; - - tp_name = prog->sec_name + pfx_len + 1; - break; - } - - if (!tp_name) { - pr_warn("prog '%s': invalid section name '%s'\n", - prog->name, prog->sec_name); + match = sec_name_match_prefix(prog->sec_name, prefixes, ARRAY_SIZE(prefixes)); + if (!match) { + pr_warn("prog '%s': invalid section name '%s'\n", prog->name, prog->sec_name); return -EINVAL; } + if (!match[0]) + return 0; - *link = bpf_program__attach_raw_tracepoint(prog, tp_name); + *link = bpf_program__attach_raw_tracepoint(prog, match); return libbpf_get_error(*link); } -- cgit v1.2.3 From 8a20655749c625dcc4debdfdeeaa0cf8bb85c203 Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Wed, 22 Apr 2026 12:41:11 -0700 Subject: selftests/bpf: Add tests for sleepable tracepoint programs Cover all three sleepable tracepoint types (tp_btf.s, raw_tp.s, tp.s) and sys_exit (via bpf_task_pt_regs) with functional tests using bpf_copy_from_user() on getcwd. Verify alias and bare SEC variants, bpf_prog_test_run_raw_tp() with BPF_F_TEST_RUN_ON_CPU rejection, attach-time rejection on non-faultable tracepoints, and load-time rejection for sleepable tp_btf on non-faultable tracepoints. Signed-off-by: Mykyta Yatsenko Acked-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/bpf/20260422-sleepable_tracepoints-v13-6-99005dff21ef@meta.com Signed-off-by: Kumar Kartikeya Dwivedi --- .../bpf/prog_tests/sleepable_tracepoints.c | 142 +++++++++++++++++++++ .../bpf/progs/test_sleepable_tracepoints.c | 112 ++++++++++++++++ .../bpf/progs/test_sleepable_tracepoints_fail.c | 18 +++ tools/testing/selftests/bpf/verifier/sleepable.c | 17 ++- 4 files changed, 287 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/sleepable_tracepoints.c create mode 100644 tools/testing/selftests/bpf/progs/test_sleepable_tracepoints.c create mode 100644 tools/testing/selftests/bpf/progs/test_sleepable_tracepoints_fail.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/sleepable_tracepoints.c b/tools/testing/selftests/bpf/prog_tests/sleepable_tracepoints.c new file mode 100644 index 000000000000..19500b785ee3 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sleepable_tracepoints.c @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include "test_sleepable_tracepoints.skel.h" +#include "test_sleepable_tracepoints_fail.skel.h" + +static void run_test(struct test_sleepable_tracepoints *skel) +{ + char buf[PATH_MAX] = "/"; + + skel->bss->target_pid = getpid(); + skel->bss->prog_triggered = 0; + skel->bss->err = 0; + skel->bss->copied_byte = 0; + + syscall(__NR_getcwd, buf, sizeof(buf)); + + ASSERT_EQ(skel->bss->prog_triggered, 1, "prog_triggered"); + ASSERT_EQ(skel->bss->err, 0, "err"); + ASSERT_EQ(skel->bss->copied_byte, '/', "copied_byte"); +} + +static void run_auto_attach_test(struct bpf_program *prog, + struct test_sleepable_tracepoints *skel) +{ + struct bpf_link *link; + + link = bpf_program__attach(prog); + if (!ASSERT_OK_PTR(link, "prog_attach")) + return; + + run_test(skel); + bpf_link__destroy(link); +} + +static void test_attach_only(struct bpf_program *prog) +{ + struct bpf_link *link; + + link = bpf_program__attach(prog); + if (ASSERT_OK_PTR(link, "attach")) + bpf_link__destroy(link); +} + +static void test_attach_reject(struct bpf_program *prog) +{ + struct bpf_link *link; + + link = bpf_program__attach(prog); + if (!ASSERT_ERR_PTR(link, "attach_should_fail")) + bpf_link__destroy(link); +} + +static void test_raw_tp_bare(struct test_sleepable_tracepoints *skel) +{ + struct bpf_link *link; + + link = bpf_program__attach_raw_tracepoint(skel->progs.handle_raw_tp_bare, + "sys_enter"); + if (ASSERT_OK_PTR(link, "attach")) + bpf_link__destroy(link); +} + +static void test_tp_bare(struct test_sleepable_tracepoints *skel) +{ + struct bpf_link *link; + + link = bpf_program__attach_tracepoint(skel->progs.handle_tp_bare, + "syscalls", "sys_enter_getcwd"); + if (ASSERT_OK_PTR(link, "attach")) + bpf_link__destroy(link); +} + +static void test_test_run(struct test_sleepable_tracepoints *skel) +{ + __u64 args[2] = {0x1234ULL, 0x5678ULL}; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .ctx_in = args, + .ctx_size_in = sizeof(args), + ); + int fd, err; + + fd = bpf_program__fd(skel->progs.handle_test_run); + err = bpf_prog_test_run_opts(fd, &topts); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(topts.retval, args[0] + args[1], "test_run_retval"); +} + +static void test_test_run_on_cpu_reject(struct test_sleepable_tracepoints *skel) +{ + __u64 args[2] = {}; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .ctx_in = args, + .ctx_size_in = sizeof(args), + .flags = BPF_F_TEST_RUN_ON_CPU, + ); + int fd, err; + + fd = bpf_program__fd(skel->progs.handle_test_run); + err = bpf_prog_test_run_opts(fd, &topts); + ASSERT_ERR(err, "test_run_on_cpu_reject"); +} + +void test_sleepable_tracepoints(void) +{ + struct test_sleepable_tracepoints *skel; + + skel = test_sleepable_tracepoints__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + if (test__start_subtest("tp_btf")) + run_auto_attach_test(skel->progs.handle_sys_enter_tp_btf, skel); + if (test__start_subtest("raw_tp")) + run_auto_attach_test(skel->progs.handle_sys_enter_raw_tp, skel); + if (test__start_subtest("tracepoint")) + run_auto_attach_test(skel->progs.handle_sys_enter_tp, skel); + if (test__start_subtest("sys_exit")) + run_auto_attach_test(skel->progs.handle_sys_exit_tp, skel); + if (test__start_subtest("tracepoint_alias")) + test_attach_only(skel->progs.handle_sys_enter_tp_alias); + if (test__start_subtest("raw_tracepoint_alias")) + test_attach_only(skel->progs.handle_sys_enter_raw_tp_alias); + if (test__start_subtest("raw_tp_bare")) + test_raw_tp_bare(skel); + if (test__start_subtest("tp_bare")) + test_tp_bare(skel); + if (test__start_subtest("test_run")) + test_test_run(skel); + if (test__start_subtest("test_run_on_cpu_reject")) + test_test_run_on_cpu_reject(skel); + if (test__start_subtest("raw_tp_non_faultable")) + test_attach_reject(skel->progs.handle_raw_tp_non_faultable); + if (test__start_subtest("tp_non_syscall")) + test_attach_reject(skel->progs.handle_tp_non_syscall); + if (test__start_subtest("tp_btf_non_faultable_reject")) + RUN_TESTS(test_sleepable_tracepoints_fail); + + test_sleepable_tracepoints__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_sleepable_tracepoints.c b/tools/testing/selftests/bpf/progs/test_sleepable_tracepoints.c new file mode 100644 index 000000000000..254f7fd895d9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sleepable_tracepoints.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include +#include +#include + +char _license[] SEC("license") = "GPL"; + +int target_pid; +int prog_triggered; +long err; +char copied_byte; + +static int copy_getcwd_arg(char *ubuf) +{ + err = bpf_copy_from_user(&copied_byte, sizeof(copied_byte), ubuf); + if (err) + return err; + + prog_triggered = 1; + return 0; +} + +SEC("tp_btf.s/sys_enter") +int BPF_PROG(handle_sys_enter_tp_btf, struct pt_regs *regs, long id) +{ + if ((bpf_get_current_pid_tgid() >> 32) != target_pid || + id != __NR_getcwd) + return 0; + + return copy_getcwd_arg((void *)PT_REGS_PARM1_SYSCALL(regs)); +} + +SEC("raw_tp.s/sys_enter") +int BPF_PROG(handle_sys_enter_raw_tp, struct pt_regs *regs, long id) +{ + if ((bpf_get_current_pid_tgid() >> 32) != target_pid || + id != __NR_getcwd) + return 0; + + return copy_getcwd_arg((void *)PT_REGS_PARM1_CORE_SYSCALL(regs)); +} + +SEC("tp.s/syscalls/sys_enter_getcwd") +int handle_sys_enter_tp(struct syscall_trace_enter *args) +{ + if ((bpf_get_current_pid_tgid() >> 32) != target_pid) + return 0; + + return copy_getcwd_arg((void *)args->args[0]); +} + +SEC("tp.s/syscalls/sys_exit_getcwd") +int handle_sys_exit_tp(struct syscall_trace_exit *args) +{ + struct pt_regs *regs; + + if ((bpf_get_current_pid_tgid() >> 32) != target_pid) + return 0; + + regs = (struct pt_regs *)bpf_task_pt_regs(bpf_get_current_task_btf()); + return copy_getcwd_arg((void *)PT_REGS_PARM1_CORE_SYSCALL(regs)); +} + +SEC("raw_tp.s") +int BPF_PROG(handle_raw_tp_bare, struct pt_regs *regs, long id) +{ + return 0; +} + +SEC("tp.s") +int handle_tp_bare(void *ctx) +{ + return 0; +} + +SEC("tracepoint.s/syscalls/sys_enter_getcwd") +int handle_sys_enter_tp_alias(struct syscall_trace_enter *args) +{ + return 0; +} + +SEC("raw_tracepoint.s/sys_enter") +int BPF_PROG(handle_sys_enter_raw_tp_alias, struct pt_regs *regs, long id) +{ + return 0; +} + +SEC("raw_tp.s/sys_enter") +int BPF_PROG(handle_test_run, struct pt_regs *regs, long id) +{ + if ((__u64)regs == 0x1234ULL && (__u64)id == 0x5678ULL) + return (__u64)regs + (__u64)id; + + return 0; +} + +SEC("raw_tp.s/sched_switch") +int BPF_PROG(handle_raw_tp_non_faultable, bool preempt, + struct task_struct *prev, struct task_struct *next) +{ + return 0; +} + +SEC("tp.s/sched/sched_switch") +int handle_tp_non_syscall(void *ctx) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_sleepable_tracepoints_fail.c b/tools/testing/selftests/bpf/progs/test_sleepable_tracepoints_fail.c new file mode 100644 index 000000000000..1a0748a9520b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sleepable_tracepoints_fail.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +/* Sleepable program on a non-faultable tracepoint should fail to load */ +SEC("tp_btf.s/sched_switch") +__failure __msg("Sleepable program cannot attach to non-faultable tracepoint") +int BPF_PROG(handle_sched_switch, bool preempt, + struct task_struct *prev, struct task_struct *next) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/verifier/sleepable.c b/tools/testing/selftests/bpf/verifier/sleepable.c index c2b7f5ebf168..6dabc5522945 100644 --- a/tools/testing/selftests/bpf/verifier/sleepable.c +++ b/tools/testing/selftests/bpf/verifier/sleepable.c @@ -76,7 +76,20 @@ .runs = -1, }, { - "sleepable raw tracepoint reject", + "sleepable raw tracepoint accept", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACING, + .expected_attach_type = BPF_TRACE_RAW_TP, + .kfunc = "sys_enter", + .result = ACCEPT, + .flags = BPF_F_SLEEPABLE, + .runs = -1, +}, +{ + "sleepable raw tracepoint reject non-faultable", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), @@ -85,7 +98,7 @@ .expected_attach_type = BPF_TRACE_RAW_TP, .kfunc = "sched_switch", .result = REJECT, - .errstr = "Only fentry/fexit/fsession/fmod_ret, lsm, iter, uprobe, and struct_ops programs can be sleepable", + .errstr = "Sleepable program cannot attach to non-faultable tracepoint", .flags = BPF_F_SLEEPABLE, .runs = -1, }, -- cgit v1.2.3 From a20f97791a786203821570e84941ee7a67fd53e9 Mon Sep 17 00:00:00 2001 From: Jerome Marchand Date: Mon, 20 Apr 2026 15:46:37 +0200 Subject: selftests/bpf: Page out as late as possible in file_reader The file_reader/on_open_expect_fault fails consistently on my system. It expects a page fault on first dynptr read of some range the exe file of the current process because it has paged out that page range earlier. However a lot can happen to that range (which depending on the actual memory layout could contain text section, data section, sections )related to dynamic linking...) between the moment it was paged out and the moment the bpf program expected to hit a pagefault actually run. A bit of instrumentation with mincore() shows that pages from that range were accessed several times before the program is run. In particular the call of file_reader__load() seems to fault all the range in. Move the call to madvise(MADV_PAGEOUT) to just before attaching the program to minimize the risk of having those page pulled back in from under our feet. Signed-off-by: Jerome Marchand Acked-by: Mykyta Yatsenko Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20260420134637.2513867-1-jmarchan@redhat.com Signed-off-by: Kumar Kartikeya Dwivedi --- .../testing/selftests/bpf/prog_tests/file_reader.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/file_reader.c b/tools/testing/selftests/bpf/prog_tests/file_reader.c index 5cde32b35da4..48aae7ea0e4b 100644 --- a/tools/testing/selftests/bpf/prog_tests/file_reader.c +++ b/tools/testing/selftests/bpf/prog_tests/file_reader.c @@ -10,6 +10,7 @@ const char *user_ptr = "hello world"; char file_contents[256000]; +void *addr; void *get_executable_base_addr(void) { @@ -26,8 +27,7 @@ void *get_executable_base_addr(void) static int initialize_file_contents(void) { int fd, page_sz = sysconf(_SC_PAGESIZE); - ssize_t n = 0, cur, off; - void *addr; + ssize_t n = 0, cur; fd = open("/proc/self/exe", O_RDONLY); if (!ASSERT_OK_FD(fd, "Open /proc/self/exe\n")) @@ -52,16 +52,6 @@ static int initialize_file_contents(void) /* page-align base file address */ addr = (void *)((unsigned long)addr & ~(page_sz - 1)); - /* - * Page out range 0..512K, use 0..256K for positive tests and - * 256K..512K for negative tests expecting page faults - */ - for (off = 0; off < sizeof(file_contents) * 2; off += page_sz) { - if (!ASSERT_OK(madvise(addr + off, page_sz, MADV_PAGEOUT), - "madvise pageout")) - return errno; - } - return 0; } @@ -90,6 +80,14 @@ static void run_test(const char *prog_name) if (!ASSERT_OK(err, "file_reader__load")) goto cleanup; + /* + * Page out range 0..512K, use 0..256K for positive tests and + * 256K..512K for negative tests expecting page faults + */ + if (!ASSERT_OK(madvise(addr, sizeof(file_contents) * 2, MADV_PAGEOUT), + "madvise pageout")) + goto cleanup; + err = file_reader__attach(skel); if (!ASSERT_OK(err, "file_reader__attach")) goto cleanup; -- cgit v1.2.3 From 0831b110eb4591e4ad8c5fd0d8f0f3f9979a5ff5 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Fri, 17 Apr 2026 10:33:17 +0200 Subject: libbpf: Fix deduplication of typedef with base definitions When deduplicating definitions for a module, typedef defined in the base are not removed. This is because the hash used for base types differs from the one used in the deduplication logic in btf_dedup_struct_type. This was introduced by the referenced commit when moving the typedef deduplication logic handling from btf_dedup_ref_type to btf_dedup_struct_type, as this also changed the hash logic (btf_hash_common to btf_hash_typedef). This also impacts other types referencing those typedef (e.g. const). In my test, the BTF section size of the openvswitch module went from 31KB to 45KB. Fixes: 3781413465df ("libbpf: Fix BTF dedup to support recursive typedef definitions"). Signed-off-by: Antoine Tenart Signed-off-by: Andrii Nakryiko Tested-by: Alan Maguire Reviewed-by: Alan Maguire Link: https://lore.kernel.org/bpf/20260417083319.32716-1-atenart@kernel.org --- tools/lib/bpf/btf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 267904939098..823bce895178 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -4592,12 +4592,14 @@ static int btf_dedup_prep(struct btf_dedup *d) case BTF_KIND_RESTRICT: case BTF_KIND_PTR: case BTF_KIND_FWD: - case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: case BTF_KIND_FLOAT: case BTF_KIND_TYPE_TAG: h = btf_hash_common(t); break; + case BTF_KIND_TYPEDEF: + h = btf_hash_typedef(t); + break; case BTF_KIND_INT: case BTF_KIND_DECL_TAG: h = btf_hash_int_decl_tag(t); -- cgit v1.2.3 From 1980023d759decc4b5647718d72c94385925fe9c Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Fri, 17 Apr 2026 10:33:18 +0200 Subject: selftests/bpf: Ensure typedef are deduplicated in split BTF If a typedef is defined both in a base and in a split BTF, after deduplication a single instance should be found in the base BTF. Suggested-by: Alan Maguire Signed-off-by: Antoine Tenart Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20260417083319.32716-2-atenart@kernel.org --- .../selftests/bpf/prog_tests/btf_dedup_split.c | 48 ++++++++++++++-------- 1 file changed, 32 insertions(+), 16 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c index 6bc31236805c..9d6161151593 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c @@ -20,18 +20,22 @@ static void test_split_simple() { btf__add_struct(btf1, "s1", 4); /* [3] struct s1 { */ btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */ /* } */ + btf__add_typedef(btf1, "t1", 1); /* [4] typedef int */ VALIDATE_RAW_BTF( btf1, "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", "[2] PTR '(anon)' type_id=1", "[3] STRUCT 's1' size=4 vlen=1\n" - "\t'f1' type_id=1 bits_offset=0"); + "\t'f1' type_id=1 bits_offset=0", + "[4] TYPEDEF 't1' type_id=1"); ASSERT_STREQ(btf_type_c_dump(btf1), "\ struct s1 {\n\ int f1;\n\ -};\n\n", "c_dump"); +};\n\ +\n\ +typedef int t1;\n\n", "c_dump"); btf2 = btf__new_empty_split(btf1); if (!ASSERT_OK_PTR(btf2, "empty_split_btf")) @@ -49,39 +53,46 @@ struct s1 {\n\ ASSERT_EQ(btf_is_int(t), true, "int_kind"); ASSERT_STREQ(btf__str_by_offset(btf2, t->name_off), "int", "int_name"); - btf__add_struct(btf2, "s2", 16); /* [4] struct s2 { */ - btf__add_field(btf2, "f1", 6, 0, 0); /* struct s1 f1; */ - btf__add_field(btf2, "f2", 5, 32, 0); /* int f2; */ + btf__add_struct(btf2, "s2", 16); /* [5] struct s2 { */ + btf__add_field(btf2, "f1", 7, 0, 0); /* struct s1 f1; */ + btf__add_field(btf2, "f2", 6, 32, 0); /* int f2; */ btf__add_field(btf2, "f3", 2, 64, 0); /* int *f3; */ /* } */ /* duplicated int */ - btf__add_int(btf2, "int", 4, BTF_INT_SIGNED); /* [5] int */ + btf__add_int(btf2, "int", 4, BTF_INT_SIGNED); /* [6] int */ /* duplicated struct s1 */ - btf__add_struct(btf2, "s1", 4); /* [6] struct s1 { */ - btf__add_field(btf2, "f1", 5, 0, 0); /* int f1; */ + btf__add_struct(btf2, "s1", 4); /* [7] struct s1 { */ + btf__add_field(btf2, "f1", 6, 0, 0); /* int f1; */ /* } */ + /* duplicated typedef t1 */ + btf__add_typedef(btf2, "t1", 6); /* [8] typedef int */ + VALIDATE_RAW_BTF( btf2, "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", "[2] PTR '(anon)' type_id=1", "[3] STRUCT 's1' size=4 vlen=1\n" "\t'f1' type_id=1 bits_offset=0", - "[4] STRUCT 's2' size=16 vlen=3\n" - "\t'f1' type_id=6 bits_offset=0\n" - "\t'f2' type_id=5 bits_offset=32\n" + "[4] TYPEDEF 't1' type_id=1", + "[5] STRUCT 's2' size=16 vlen=3\n" + "\t'f1' type_id=7 bits_offset=0\n" + "\t'f2' type_id=6 bits_offset=32\n" "\t'f3' type_id=2 bits_offset=64", - "[5] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", - "[6] STRUCT 's1' size=4 vlen=1\n" - "\t'f1' type_id=5 bits_offset=0"); + "[6] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[7] STRUCT 's1' size=4 vlen=1\n" + "\t'f1' type_id=6 bits_offset=0", + "[8] TYPEDEF 't1' type_id=6"); ASSERT_STREQ(btf_type_c_dump(btf2), "\ struct s1 {\n\ int f1;\n\ };\n\ \n\ +typedef int t1;\n\ +\n\ struct s1___2 {\n\ int f1;\n\ };\n\ @@ -90,7 +101,9 @@ struct s2 {\n\ struct s1___2 f1;\n\ int f2;\n\ int *f3;\n\ -};\n\n", "c_dump"); +};\n\ +\n\ +typedef int t1___2;\n\n", "c_dump"); err = btf__dedup(btf2, NULL); if (!ASSERT_OK(err, "btf_dedup")) @@ -102,7 +115,8 @@ struct s2 {\n\ "[2] PTR '(anon)' type_id=1", "[3] STRUCT 's1' size=4 vlen=1\n" "\t'f1' type_id=1 bits_offset=0", - "[4] STRUCT 's2' size=16 vlen=3\n" + "[4] TYPEDEF 't1' type_id=1", + "[5] STRUCT 's2' size=16 vlen=3\n" "\t'f1' type_id=3 bits_offset=0\n" "\t'f2' type_id=1 bits_offset=32\n" "\t'f3' type_id=2 bits_offset=64"); @@ -112,6 +126,8 @@ struct s1 {\n\ int f1;\n\ };\n\ \n\ +typedef int t1;\n\ +\n\ struct s2 {\n\ struct s1 f1;\n\ int f2;\n\ -- cgit v1.2.3 From 7f843c0584f438c1cc8cbe798ca8ab4207e67509 Mon Sep 17 00:00:00 2001 From: "Alexis Lothoré (eBPF Foundation)" Date: Tue, 21 Apr 2026 16:33:29 +0200 Subject: selftests/bpf: Fix uprobe_multi usage message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit uprobe_multi usage message not in sync with the list of subtests it actually supports. Add the missing subtests in the help message. Signed-off-by: Alexis Lothoré (eBPF Foundation) Link: https://lore.kernel.org/bpf/20260421-uprobe_multi_usage-v1-1-4c51675955e6@bootlin.com Signed-off-by: Kumar Kartikeya Dwivedi --- tools/testing/selftests/bpf/uprobe_multi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/uprobe_multi.c b/tools/testing/selftests/bpf/uprobe_multi.c index 3e58a86b8e25..0af330b6c364 100644 --- a/tools/testing/selftests/bpf/uprobe_multi.c +++ b/tools/testing/selftests/bpf/uprobe_multi.c @@ -144,6 +144,8 @@ int main(int argc, char **argv) return trigger_uprobe(true /* page-in build ID */); error: - fprintf(stderr, "usage: %s \n", argv[0]); + fprintf(stderr, + "usage: %s \n", + argv[0]); return -1; } -- cgit v1.2.3 From 9b9f0b42703ceb88332bcb19453c4288c2683e34 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 22 Apr 2026 20:35:01 -0700 Subject: bpf: Prepare verifier logs for upcoming kfunc stack arguments This change prepares verifier log reporting for upcoming kfunc stack argument support. Currently verifier log code mostly assumes that an argument can be described directly by a register number. That works for arguments passed in `R1` to `R5`, but it does not work once kfunc arguments can also be passed on the stack. Introduce an opaque `argno_t` type that encodes both register-based and arg-based references. Four helpers form the interface: - argno_from_reg(regno): create from a register number - argno_from_arg(arg): create from a 1-based arg number - reg_from_argno(a): extract register number, or -1 - arg_from_argno(a): extract arg number, or -1 reg_arg_name() converts an argno_t to a human-readable string for verifier logs: "R%d" for register arguments, or "*(R11-off)" for stack arguments beyond R5. Update selftests accordingly. Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20260423033501.2539667-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 1 + kernel/bpf/verifier.c | 694 ++++++++++++--------- tools/testing/selftests/bpf/prog_tests/bpf_nf.c | 22 +- tools/testing/selftests/bpf/prog_tests/cb_refs.c | 2 +- .../testing/selftests/bpf/prog_tests/kfunc_call.c | 2 +- .../testing/selftests/bpf/prog_tests/linked_list.c | 4 +- .../selftests/bpf/progs/cgrp_kfunc_failure.c | 14 +- .../testing/selftests/bpf/progs/cpumask_failure.c | 10 +- tools/testing/selftests/bpf/progs/dynptr_fail.c | 22 +- .../testing/selftests/bpf/progs/file_reader_fail.c | 4 +- tools/testing/selftests/bpf/progs/irq.c | 4 +- tools/testing/selftests/bpf/progs/iters.c | 6 +- .../selftests/bpf/progs/iters_state_safety.c | 14 +- tools/testing/selftests/bpf/progs/iters_testmod.c | 4 +- .../selftests/bpf/progs/iters_testmod_seq.c | 4 +- tools/testing/selftests/bpf/progs/map_kptr_fail.c | 2 +- .../selftests/bpf/progs/percpu_alloc_fail.c | 4 +- tools/testing/selftests/bpf/progs/rbtree_fail.c | 6 +- .../selftests/bpf/progs/refcounted_kptr_fail.c | 2 +- tools/testing/selftests/bpf/progs/stream_fail.c | 2 +- .../selftests/bpf/progs/task_kfunc_failure.c | 18 +- tools/testing/selftests/bpf/progs/task_work_fail.c | 6 +- .../testing/selftests/bpf/progs/test_bpf_nf_fail.c | 8 +- .../selftests/bpf/progs/test_kfunc_dynptr_param.c | 2 +- .../bpf/progs/test_kfunc_param_nullable.c | 2 +- .../selftests/bpf/progs/verifier_bits_iter.c | 4 +- .../selftests/bpf/progs/verifier_ref_tracking.c | 6 +- .../selftests/bpf/progs/verifier_vfs_reject.c | 8 +- tools/testing/selftests/bpf/progs/wq_failures.c | 2 +- tools/testing/selftests/bpf/verifier/calls.c | 14 +- 30 files changed, 497 insertions(+), 396 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index b148f816f25b..d5b4303315dd 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -913,6 +913,7 @@ struct bpf_verifier_env { * e.g., in reg_type_str() to generate reg_type string */ char tmp_str_buf[TMP_STR_BUF_LEN]; + char tmp_arg_name[32]; struct bpf_insn insn_buf[INSN_BUF_SIZE]; struct bpf_insn epilogue_buf[INSN_BUF_SIZE]; struct bpf_scc_callchain callchain_buf; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 81d77dfaaaf6..ff6ff1c27517 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -261,6 +261,36 @@ struct bpf_kfunc_meta { struct btf *btf_vmlinux; +typedef struct argno { + int argno; +} argno_t; + +static argno_t argno_from_reg(u32 regno) +{ + return (argno_t){ .argno = regno }; +} + +static argno_t argno_from_arg(u32 arg) +{ + return (argno_t){ .argno = -arg }; +} + +static int reg_from_argno(argno_t a) +{ + if (a.argno >= 0) + return a.argno; + if (a.argno >= -MAX_BPF_FUNC_REG_ARGS) + return -a.argno; + return -1; +} + +static int arg_from_argno(argno_t a) +{ + if (a.argno < 0) + return -a.argno; + return -1; +} + static const char *btf_type_name(const struct btf *btf, u32 id) { return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off); @@ -1742,6 +1772,22 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, return &elem->st; } +static const char *reg_arg_name(struct bpf_verifier_env *env, argno_t argno) +{ + char *buf = env->tmp_arg_name; + int len = sizeof(env->tmp_arg_name); + int arg, regno = reg_from_argno(argno); + + if (regno >= 0) { + snprintf(buf, len, "R%d", regno); + } else { + arg = arg_from_argno(argno); + snprintf(buf, len, "*(R11-%u)", (arg - MAX_BPF_FUNC_REG_ARGS) * BPF_REG_SIZE); + } + + return buf; +} + static const int caller_saved[CALLER_SAVED_REGS] = { BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5 }; @@ -4241,7 +4287,7 @@ enum bpf_access_src { }; static int check_stack_range_initialized(struct bpf_verifier_env *env, struct bpf_reg_state *reg, - int regno, int off, int access_size, + argno_t argno, int off, int access_size, bool zero_size_allowed, enum bpf_access_type type, struct bpf_call_arg_meta *meta); @@ -4265,7 +4311,7 @@ static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno) * instead. */ static int check_stack_read_var_off(struct bpf_verifier_env *env, struct bpf_reg_state *reg, - int ptr_regno, int off, int size, int dst_regno) + argno_t ptr_argno, int off, int size, int dst_regno) { struct bpf_func_state *ptr_state = bpf_func(env, reg); int err; @@ -4273,7 +4319,7 @@ static int check_stack_read_var_off(struct bpf_verifier_env *env, struct bpf_reg /* Note that we pass a NULL meta, so raw access will not be permitted. */ - err = check_stack_range_initialized(env, reg, ptr_regno, off, size, + err = check_stack_range_initialized(env, reg, ptr_argno, off, size, false, BPF_READ, NULL); if (err) return err; @@ -4295,7 +4341,7 @@ static int check_stack_read_var_off(struct bpf_verifier_env *env, struct bpf_reg * can be -1, meaning that the read value is not going to a register. */ static int check_stack_read(struct bpf_verifier_env *env, - struct bpf_reg_state *reg, int ptr_regno, int off, int size, + struct bpf_reg_state *reg, argno_t ptr_argno, int off, int size, int dst_regno) { struct bpf_func_state *state = bpf_func(env, reg); @@ -4333,7 +4379,7 @@ static int check_stack_read(struct bpf_verifier_env *env, * than fixed offset ones. Note that dst_regno >= 0 on this * branch. */ - err = check_stack_read_var_off(env, reg, ptr_regno, off, size, + err = check_stack_read_var_off(env, reg, ptr_argno, off, size, dst_regno); } return err; @@ -4393,7 +4439,7 @@ static int check_map_access_type(struct bpf_verifier_env *env, struct bpf_reg_st } /* check read/write into memory region (e.g., map value, ringbuf sample, etc) */ -static int __check_mem_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int regno, +static int __check_mem_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, int off, int size, u32 mem_size, bool zero_size_allowed) { @@ -4414,8 +4460,8 @@ static int __check_mem_access(struct bpf_verifier_env *env, struct bpf_reg_state case PTR_TO_PACKET: case PTR_TO_PACKET_META: case PTR_TO_PACKET_END: - verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n", - off, size, regno, reg->id, off, mem_size); + verbose(env, "invalid access to packet, off=%d size=%d, %s(id=%d,off=%d,r=%d)\n", + off, size, reg_arg_name(env, argno), reg->id, off, mem_size); break; case PTR_TO_CTX: verbose(env, "invalid access to context, ctx_size=%d off=%d size=%d\n", @@ -4431,7 +4477,7 @@ static int __check_mem_access(struct bpf_verifier_env *env, struct bpf_reg_state } /* check read/write into a memory region with possible variable offset */ -static int check_mem_region_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg, u32 regno, +static int check_mem_region_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, int off, int size, u32 mem_size, bool zero_size_allowed) { @@ -4451,15 +4497,15 @@ static int check_mem_region_access(struct bpf_verifier_env *env, struct bpf_reg_ (reg->smin_value == S64_MIN || (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) || reg->smin_value + off < 0)) { - verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", - regno); + verbose(env, "%s min value is negative, either use unsigned index or do a if (index >=0) check.\n", + reg_arg_name(env, argno)); return -EACCES; } - err = __check_mem_access(env, reg, regno, reg->smin_value + off, size, + err = __check_mem_access(env, reg, argno, reg->smin_value + off, size, mem_size, zero_size_allowed); if (err) { - verbose(env, "R%d min value is outside of the allowed memory range\n", - regno); + verbose(env, "%s min value is outside of the allowed memory range\n", + reg_arg_name(env, argno)); return err; } @@ -4468,15 +4514,15 @@ static int check_mem_region_access(struct bpf_verifier_env *env, struct bpf_reg_ * If reg->umax_value + off could overflow, treat that as unbounded too. */ if (reg->umax_value >= BPF_MAX_VAR_OFF) { - verbose(env, "R%d unbounded memory access, make sure to bounds check any such access\n", - regno); + verbose(env, "%s unbounded memory access, make sure to bounds check any such access\n", + reg_arg_name(env, argno)); return -EACCES; } - err = __check_mem_access(env, reg, regno, reg->umax_value + off, size, + err = __check_mem_access(env, reg, argno, reg->umax_value + off, size, mem_size, zero_size_allowed); if (err) { - verbose(env, "R%d max value is outside of the allowed memory range\n", - regno); + verbose(env, "%s max value is outside of the allowed memory range\n", + reg_arg_name(env, argno)); return err; } @@ -4484,7 +4530,7 @@ static int check_mem_region_access(struct bpf_verifier_env *env, struct bpf_reg_ } static int __check_ptr_off_reg(struct bpf_verifier_env *env, - const struct bpf_reg_state *reg, int regno, + const struct bpf_reg_state *reg, argno_t argno, bool fixed_off_ok) { /* Access to this pointer-typed register or passing it to a helper @@ -4501,14 +4547,14 @@ static int __check_ptr_off_reg(struct bpf_verifier_env *env, } if (reg->smin_value < 0) { - verbose(env, "negative offset %s ptr R%d off=%lld disallowed\n", - reg_type_str(env, reg->type), regno, reg->var_off.value); + verbose(env, "negative offset %s ptr %s off=%lld disallowed\n", + reg_type_str(env, reg->type), reg_arg_name(env, argno), reg->var_off.value); return -EACCES; } if (!fixed_off_ok && reg->var_off.value != 0) { - verbose(env, "dereference of modified %s ptr R%d off=%lld disallowed\n", - reg_type_str(env, reg->type), regno, reg->var_off.value); + verbose(env, "dereference of modified %s ptr %s off=%lld disallowed\n", + reg_type_str(env, reg->type), reg_arg_name(env, argno), reg->var_off.value); return -EACCES; } @@ -4518,7 +4564,7 @@ static int __check_ptr_off_reg(struct bpf_verifier_env *env, static int check_ptr_off_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno) { - return __check_ptr_off_reg(env, reg, regno, false); + return __check_ptr_off_reg(env, reg, argno_from_reg(regno), false); } static int map_kptr_match_type(struct bpf_verifier_env *env, @@ -4556,7 +4602,7 @@ static int map_kptr_match_type(struct bpf_verifier_env *env, * Since ref_ptr cannot be accessed directly by BPF insns, check for * reg->ref_obj_id is not needed here. */ - if (__check_ptr_off_reg(env, reg, regno, true)) + if (__check_ptr_off_reg(env, reg, argno_from_reg(regno), true)) return -EACCES; /* A full type match is needed, as BTF can be vmlinux, module or prog BTF, and @@ -4776,7 +4822,7 @@ static u32 map_mem_size(const struct bpf_map *map) } /* check read/write into a map element with possible variable offset */ -static int check_map_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg, u32 regno, +static int check_map_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, int off, int size, bool zero_size_allowed, enum bpf_access_src src) { @@ -4785,7 +4831,7 @@ static int check_map_access(struct bpf_verifier_env *env, struct bpf_reg_state * struct btf_record *rec; int err, i; - err = check_mem_region_access(env, reg, regno, off, size, mem_size, zero_size_allowed); + err = check_mem_region_access(env, reg, argno, off, size, mem_size, zero_size_allowed); if (err) return err; @@ -4881,17 +4927,17 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, } } -static int check_packet_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg, u32 regno, int off, +static int check_packet_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, int off, int size, bool zero_size_allowed) { int err; if (reg->range < 0) { - verbose(env, "R%d offset is outside of the packet\n", regno); + verbose(env, "%s offset is outside of the packet\n", reg_arg_name(env, argno)); return -EINVAL; } - err = check_mem_region_access(env, reg, regno, off, size, reg->range, zero_size_allowed); + err = check_mem_region_access(env, reg, argno, off, size, reg->range, zero_size_allowed); if (err) return err; @@ -4946,7 +4992,7 @@ static int __check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int of return -EACCES; } -static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, struct bpf_reg_state *reg, u32 regno, +static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, struct bpf_reg_state *reg, argno_t argno, int off, int access_size, enum bpf_access_type t, struct bpf_insn_access_aux *info) { @@ -4959,9 +5005,9 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, struct b int err; if (var_off_ok) - err = check_mem_region_access(env, reg, regno, off, access_size, U16_MAX, false); + err = check_mem_region_access(env, reg, argno, off, access_size, U16_MAX, false); else - err = __check_ptr_off_reg(env, reg, regno, fixed_off_ok); + err = __check_ptr_off_reg(env, reg, argno, fixed_off_ok); if (err) return err; off += reg->umax_value; @@ -4985,15 +5031,15 @@ static int check_flow_keys_access(struct bpf_verifier_env *env, int off, } static int check_sock_access(struct bpf_verifier_env *env, int insn_idx, - struct bpf_reg_state *reg, u32 regno, int off, int size, + struct bpf_reg_state *reg, argno_t argno, int off, int size, enum bpf_access_type t) { struct bpf_insn_access_aux info = {}; bool valid; if (reg->smin_value < 0) { - verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", - regno); + verbose(env, "%s min value is negative, either use unsigned index or do a if (index >=0) check.\n", + reg_arg_name(env, argno)); return -EACCES; } @@ -5021,8 +5067,8 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx, return 0; } - verbose(env, "R%d invalid %s access off=%d size=%d\n", - regno, reg_type_str(env, reg->type), off, size); + verbose(env, "%s invalid %s access off=%d size=%d\n", + reg_arg_name(env, argno), reg_type_str(env, reg->type), off, size); return -EACCES; } @@ -5535,12 +5581,12 @@ static int check_max_stack_depth(struct bpf_verifier_env *env) static int __check_buffer_access(struct bpf_verifier_env *env, const char *buf_info, const struct bpf_reg_state *reg, - int regno, int off, int size) + argno_t argno, int off, int size) { if (off < 0) { verbose(env, - "R%d invalid %s buffer access: off=%d, size=%d\n", - regno, buf_info, off, size); + "%s invalid %s buffer access: off=%d, size=%d\n", + reg_arg_name(env, argno), buf_info, off, size); return -EACCES; } if (!tnum_is_const(reg->var_off)) { @@ -5548,8 +5594,8 @@ static int __check_buffer_access(struct bpf_verifier_env *env, tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); verbose(env, - "R%d invalid variable buffer offset: off=%d, var_off=%s\n", - regno, off, tn_buf); + "%s invalid variable buffer offset: off=%d, var_off=%s\n", + reg_arg_name(env, argno), off, tn_buf); return -EACCES; } @@ -5558,11 +5604,11 @@ static int __check_buffer_access(struct bpf_verifier_env *env, static int check_tp_buffer_access(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, - int regno, int off, int size) + argno_t argno, int off, int size) { int err; - err = __check_buffer_access(env, "tracepoint", reg, regno, off, size); + err = __check_buffer_access(env, "tracepoint", reg, argno, off, size); if (err) return err; @@ -5574,14 +5620,14 @@ static int check_tp_buffer_access(struct bpf_verifier_env *env, static int check_buffer_access(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, - int regno, int off, int size, + argno_t argno, int off, int size, bool zero_size_allowed, u32 *max_access) { const char *buf_info = type_is_rdonly_mem(reg->type) ? "rdonly" : "rdwr"; int err; - err = __check_buffer_access(env, buf_info, reg, regno, off, size); + err = __check_buffer_access(env, buf_info, reg, argno, off, size); if (err) return err; @@ -5954,7 +6000,7 @@ static bool type_is_trusted_or_null(struct bpf_verifier_env *env, static int check_ptr_to_btf_access(struct bpf_verifier_env *env, struct bpf_reg_state *regs, struct bpf_reg_state *reg, - int regno, int off, int size, + argno_t argno, int off, int size, enum bpf_access_type atype, int value_regno) { @@ -5983,8 +6029,8 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); verbose(env, - "R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n", - regno, tname, off, tn_buf); + "%s is ptr_%s invalid variable offset: off=%d, var_off=%s\n", + reg_arg_name(env, argno), tname, off, tn_buf); return -EACCES; } @@ -5992,22 +6038,22 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, if (off < 0) { verbose(env, - "R%d is ptr_%s invalid negative access: off=%d\n", - regno, tname, off); + "%s is ptr_%s invalid negative access: off=%d\n", + reg_arg_name(env, argno), tname, off); return -EACCES; } if (reg->type & MEM_USER) { verbose(env, - "R%d is ptr_%s access user memory: off=%d\n", - regno, tname, off); + "%s is ptr_%s access user memory: off=%d\n", + reg_arg_name(env, argno), tname, off); return -EACCES; } if (reg->type & MEM_PERCPU) { verbose(env, - "R%d is ptr_%s access percpu memory: off=%d\n", - regno, tname, off); + "%s is ptr_%s access percpu memory: off=%d\n", + reg_arg_name(env, argno), tname, off); return -EACCES; } @@ -6110,7 +6156,7 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, static int check_ptr_to_map_access(struct bpf_verifier_env *env, struct bpf_reg_state *regs, struct bpf_reg_state *reg, - int regno, int off, int size, + argno_t argno, int off, int size, enum bpf_access_type atype, int value_regno) { @@ -6144,8 +6190,8 @@ static int check_ptr_to_map_access(struct bpf_verifier_env *env, } if (off < 0) { - verbose(env, "R%d is %s invalid negative access: off=%d\n", - regno, tname, off); + verbose(env, "%s is %s invalid negative access: off=%d\n", + reg_arg_name(env, argno), tname, off); return -EACCES; } @@ -6203,7 +6249,7 @@ static int check_stack_slot_within_bounds(struct bpf_verifier_env *env, */ static int check_stack_access_within_bounds( struct bpf_verifier_env *env, struct bpf_reg_state *reg, - int regno, int off, int access_size, + argno_t argno, int off, int access_size, enum bpf_access_type type) { struct bpf_func_state *state = bpf_func(env, reg); @@ -6222,8 +6268,8 @@ static int check_stack_access_within_bounds( } else { if (reg->smax_value >= BPF_MAX_VAR_OFF || reg->smin_value <= -BPF_MAX_VAR_OFF) { - verbose(env, "invalid unbounded variable-offset%s stack R%d\n", - err_extra, regno); + verbose(env, "invalid unbounded variable-offset%s stack %s\n", + err_extra, reg_arg_name(env, argno)); return -EACCES; } min_off = reg->smin_value + off; @@ -6241,14 +6287,14 @@ static int check_stack_access_within_bounds( if (err) { if (tnum_is_const(reg->var_off)) { - verbose(env, "invalid%s stack R%d off=%lld size=%d\n", - err_extra, regno, min_off, access_size); + verbose(env, "invalid%s stack %s off=%lld size=%d\n", + err_extra, reg_arg_name(env, argno), min_off, access_size); } else { char tn_buf[48]; tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose(env, "invalid variable-offset%s stack R%d var_off=%s off=%d size=%d\n", - err_extra, regno, tn_buf, off, access_size); + verbose(env, "invalid variable-offset%s stack %s var_off=%s off=%d size=%d\n", + err_extra, reg_arg_name(env, argno), tn_buf, off, access_size); } return err; } @@ -6293,7 +6339,7 @@ static void add_scalar_to_reg(struct bpf_reg_state *dst_reg, s64 val) * if t==write && value_regno==-1, some unknown value is stored into memory * if t==read && value_regno==-1, don't care what we read from memory */ -static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, struct bpf_reg_state *reg, u32 regno, +static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, struct bpf_reg_state *reg, argno_t argno, int off, int bpf_size, enum bpf_access_type t, int value_regno, bool strict_alignment_once, bool is_ldsx) { @@ -6310,11 +6356,12 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, struct b if (reg->type == PTR_TO_MAP_KEY) { if (t == BPF_WRITE) { - verbose(env, "write to change key R%d not allowed\n", regno); + verbose(env, "write to change key %s not allowed\n", + reg_arg_name(env, argno)); return -EACCES; } - err = check_mem_region_access(env, reg, regno, off, size, + err = check_mem_region_access(env, reg, argno, off, size, reg->map_ptr->key_size, false); if (err) return err; @@ -6331,7 +6378,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, struct b err = check_map_access_type(env, reg, off, size, t); if (err) return err; - err = check_map_access(env, reg, regno, off, size, false, ACCESS_DIRECT); + err = check_map_access(env, reg, argno, off, size, false, ACCESS_DIRECT); if (err) return err; if (tnum_is_const(reg->var_off)) @@ -6378,14 +6425,14 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, struct b bool rdonly_untrusted = rdonly_mem && (reg->type & PTR_UNTRUSTED); if (type_may_be_null(reg->type)) { - verbose(env, "R%d invalid mem access '%s'\n", regno, + verbose(env, "%s invalid mem access '%s'\n", reg_arg_name(env, argno), reg_type_str(env, reg->type)); return -EACCES; } if (t == BPF_WRITE && rdonly_mem) { - verbose(env, "R%d cannot write into %s\n", - regno, reg_type_str(env, reg->type)); + verbose(env, "%s cannot write into %s\n", + reg_arg_name(env, argno), reg_type_str(env, reg->type)); return -EACCES; } @@ -6400,7 +6447,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, struct b * instructions, hence no need to check bounds in that case. */ if (!rdonly_untrusted) - err = check_mem_region_access(env, reg, regno, off, size, + err = check_mem_region_access(env, reg, argno, off, size, reg->mem_size, false); if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem)) mark_reg_unknown(env, regs, value_regno); @@ -6418,7 +6465,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, struct b return -EACCES; } - err = check_ctx_access(env, insn_idx, reg, regno, off, size, t, &info); + err = check_ctx_access(env, insn_idx, reg, argno, off, size, t, &info); if (!err && t == BPF_READ && value_regno >= 0) { /* ctx access returns either a scalar, or a * PTR_TO_PACKET[_META,_END]. In the latter @@ -6455,12 +6502,12 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, struct b } else if (reg->type == PTR_TO_STACK) { /* Basic bounds checks. */ - err = check_stack_access_within_bounds(env, reg, regno, off, size, t); + err = check_stack_access_within_bounds(env, reg, argno, off, size, t); if (err) return err; if (t == BPF_READ) - err = check_stack_read(env, reg, regno, off, size, + err = check_stack_read(env, reg, argno, off, size, value_regno); else err = check_stack_write(env, reg, off, size, @@ -6476,7 +6523,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, struct b value_regno); return -EACCES; } - err = check_packet_access(env, reg, regno, off, size, false); + err = check_packet_access(env, reg, argno, off, size, false); if (!err && t == BPF_READ && value_regno >= 0) mark_reg_unknown(env, regs, value_regno); } else if (reg->type == PTR_TO_FLOW_KEYS) { @@ -6492,23 +6539,23 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, struct b mark_reg_unknown(env, regs, value_regno); } else if (type_is_sk_pointer(reg->type)) { if (t == BPF_WRITE) { - verbose(env, "R%d cannot write into %s\n", - regno, reg_type_str(env, reg->type)); + verbose(env, "%s cannot write into %s\n", + reg_arg_name(env, argno), reg_type_str(env, reg->type)); return -EACCES; } - err = check_sock_access(env, insn_idx, reg, regno, off, size, t); + err = check_sock_access(env, insn_idx, reg, argno, off, size, t); if (!err && value_regno >= 0) mark_reg_unknown(env, regs, value_regno); } else if (reg->type == PTR_TO_TP_BUFFER) { - err = check_tp_buffer_access(env, reg, regno, off, size); + err = check_tp_buffer_access(env, reg, argno, off, size); if (!err && t == BPF_READ && value_regno >= 0) mark_reg_unknown(env, regs, value_regno); } else if (base_type(reg->type) == PTR_TO_BTF_ID && !type_may_be_null(reg->type)) { - err = check_ptr_to_btf_access(env, regs, reg, regno, off, size, t, + err = check_ptr_to_btf_access(env, regs, reg, argno, off, size, t, value_regno); } else if (reg->type == CONST_PTR_TO_MAP) { - err = check_ptr_to_map_access(env, regs, reg, regno, off, size, t, + err = check_ptr_to_map_access(env, regs, reg, argno, off, size, t, value_regno); } else if (base_type(reg->type) == PTR_TO_BUF && !type_may_be_null(reg->type)) { @@ -6517,8 +6564,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, struct b if (rdonly_mem) { if (t == BPF_WRITE) { - verbose(env, "R%d cannot write into %s\n", - regno, reg_type_str(env, reg->type)); + verbose(env, "%s cannot write into %s\n", + reg_arg_name(env, argno), reg_type_str(env, reg->type)); return -EACCES; } max_access = &env->prog->aux->max_rdonly_access; @@ -6526,7 +6573,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, struct b max_access = &env->prog->aux->max_rdwr_access; } - err = check_buffer_access(env, reg, regno, off, size, false, + err = check_buffer_access(env, reg, argno, off, size, false, max_access); if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ)) @@ -6535,7 +6582,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, struct b if (t == BPF_READ && value_regno >= 0) mark_reg_unknown(env, regs, value_regno); } else { - verbose(env, "R%d invalid mem access '%s'\n", regno, + verbose(env, "%s invalid mem access '%s'\n", reg_arg_name(env, argno), reg_type_str(env, reg->type)); return -EACCES; } @@ -6577,7 +6624,7 @@ static int check_load_mem(struct bpf_verifier_env *env, struct bpf_insn *insn, /* Check if (src_reg + off) is readable. The state of dst_reg will be * updated by this call. */ - err = check_mem_access(env, env->insn_idx, regs + insn->src_reg, insn->src_reg, insn->off, + err = check_mem_access(env, env->insn_idx, regs + insn->src_reg, argno_from_reg(insn->src_reg), insn->off, BPF_SIZE(insn->code), BPF_READ, insn->dst_reg, strict_alignment_once, is_ldsx); err = err ?: save_aux_ptr_type(env, src_reg_type, @@ -6607,7 +6654,7 @@ static int check_store_reg(struct bpf_verifier_env *env, struct bpf_insn *insn, dst_reg_type = regs[insn->dst_reg].type; /* Check if (dst_reg + off) is writeable. */ - err = check_mem_access(env, env->insn_idx, regs + insn->dst_reg, insn->dst_reg, insn->off, + err = check_mem_access(env, env->insn_idx, regs + insn->dst_reg, argno_from_reg(insn->dst_reg), insn->off, BPF_SIZE(insn->code), BPF_WRITE, insn->src_reg, strict_alignment_once, false); err = err ?: save_aux_ptr_type(env, dst_reg_type, false); @@ -6685,10 +6732,10 @@ static int check_atomic_rmw(struct bpf_verifier_env *env, /* Check whether we can read the memory, with second call for fetch * case to simulate the register fill. */ - err = check_mem_access(env, env->insn_idx, dst_reg, insn->dst_reg, insn->off, + err = check_mem_access(env, env->insn_idx, dst_reg, argno_from_reg(insn->dst_reg), insn->off, BPF_SIZE(insn->code), BPF_READ, -1, true, false); if (!err && load_reg >= 0) - err = check_mem_access(env, env->insn_idx, dst_reg, insn->dst_reg, + err = check_mem_access(env, env->insn_idx, dst_reg, argno_from_reg(insn->dst_reg), insn->off, BPF_SIZE(insn->code), BPF_READ, load_reg, true, false); if (err) @@ -6700,7 +6747,7 @@ static int check_atomic_rmw(struct bpf_verifier_env *env, return err; } /* Check whether we can write into the same memory. */ - err = check_mem_access(env, env->insn_idx, dst_reg, insn->dst_reg, insn->off, + err = check_mem_access(env, env->insn_idx, dst_reg, argno_from_reg(insn->dst_reg), insn->off, BPF_SIZE(insn->code), BPF_WRITE, -1, true, false); if (err) return err; @@ -6789,7 +6836,7 @@ static int check_atomic(struct bpf_verifier_env *env, struct bpf_insn *insn) * read offsets are marked as read. */ static int check_stack_range_initialized( - struct bpf_verifier_env *env, struct bpf_reg_state *reg, int regno, int off, + struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, int off, int access_size, bool zero_size_allowed, enum bpf_access_type type, struct bpf_call_arg_meta *meta) { @@ -6814,7 +6861,7 @@ static int check_stack_range_initialized( return -EACCES; } - err = check_stack_access_within_bounds(env, reg, regno, off, access_size, type); + err = check_stack_access_within_bounds(env, reg, argno, off, access_size, type); if (err) return err; @@ -6831,8 +6878,8 @@ static int check_stack_range_initialized( char tn_buf[48]; tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose(env, "R%d variable offset stack access prohibited for !root, var_off=%s\n", - regno, tn_buf); + verbose(env, "%s variable offset stack access prohibited for !root, var_off=%s\n", + reg_arg_name(env, argno), tn_buf); return -EACCES; } /* Only initialized buffer on stack is allowed to be accessed @@ -6875,7 +6922,7 @@ static int check_stack_range_initialized( } } meta->access_size = access_size; - meta->regno = regno; + meta->regno = reg_from_argno(argno); return 0; } @@ -6915,17 +6962,17 @@ static int check_stack_range_initialized( if (*stype == STACK_POISON) { if (allow_poison) goto mark; - verbose(env, "reading from stack R%d off %d+%d size %d, slot poisoned by dead code elimination\n", - regno, min_off, i - min_off, access_size); + verbose(env, "reading from stack %s off %d+%d size %d, slot poisoned by dead code elimination\n", + reg_arg_name(env, argno), min_off, i - min_off, access_size); } else if (tnum_is_const(reg->var_off)) { - verbose(env, "invalid read from stack R%d off %d+%d size %d\n", - regno, min_off, i - min_off, access_size); + verbose(env, "invalid read from stack %s off %d+%d size %d\n", + reg_arg_name(env, argno), min_off, i - min_off, access_size); } else { char tn_buf[48]; tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose(env, "invalid read from stack R%d var_off %s+%d size %d\n", - regno, tn_buf, i - min_off, access_size); + verbose(env, "invalid read from stack %s var_off %s+%d size %d\n", + reg_arg_name(env, argno), tn_buf, i - min_off, access_size); } return -EACCES; mark: @@ -6934,7 +6981,7 @@ mark: return 0; } -static int check_helper_mem_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int regno, +static int check_helper_mem_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, int access_size, enum bpf_access_type access_type, bool zero_size_allowed, struct bpf_call_arg_meta *meta) @@ -6945,37 +6992,37 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, struct bpf_reg_ switch (base_type(reg->type)) { case PTR_TO_PACKET: case PTR_TO_PACKET_META: - return check_packet_access(env, reg, regno, 0, access_size, + return check_packet_access(env, reg, argno, 0, access_size, zero_size_allowed); case PTR_TO_MAP_KEY: if (access_type == BPF_WRITE) { - verbose(env, "R%d cannot write into %s\n", regno, - reg_type_str(env, reg->type)); + verbose(env, "%s cannot write into %s\n", + reg_arg_name(env, argno), reg_type_str(env, reg->type)); return -EACCES; } - return check_mem_region_access(env, reg, regno, 0, access_size, + return check_mem_region_access(env, reg, argno, 0, access_size, reg->map_ptr->key_size, false); case PTR_TO_MAP_VALUE: if (check_map_access_type(env, reg, 0, access_size, access_type)) return -EACCES; - return check_map_access(env, reg, regno, 0, access_size, + return check_map_access(env, reg, argno, 0, access_size, zero_size_allowed, ACCESS_HELPER); case PTR_TO_MEM: if (type_is_rdonly_mem(reg->type)) { if (access_type == BPF_WRITE) { - verbose(env, "R%d cannot write into %s\n", regno, - reg_type_str(env, reg->type)); + verbose(env, "%s cannot write into %s\n", + reg_arg_name(env, argno), reg_type_str(env, reg->type)); return -EACCES; } } - return check_mem_region_access(env, reg, regno, 0, + return check_mem_region_access(env, reg, argno, 0, access_size, reg->mem_size, zero_size_allowed); case PTR_TO_BUF: if (type_is_rdonly_mem(reg->type)) { if (access_type == BPF_WRITE) { - verbose(env, "R%d cannot write into %s\n", regno, - reg_type_str(env, reg->type)); + verbose(env, "%s cannot write into %s\n", + reg_arg_name(env, argno), reg_type_str(env, reg->type)); return -EACCES; } @@ -6983,21 +7030,21 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, struct bpf_reg_ } else { max_access = &env->prog->aux->max_rdwr_access; } - return check_buffer_access(env, reg, regno, 0, + return check_buffer_access(env, reg, argno, 0, access_size, zero_size_allowed, max_access); case PTR_TO_STACK: return check_stack_range_initialized( env, reg, - regno, 0, access_size, + argno, 0, access_size, zero_size_allowed, access_type, meta); case PTR_TO_BTF_ID: - return check_ptr_to_btf_access(env, regs, reg, regno, 0, + return check_ptr_to_btf_access(env, regs, reg, argno, 0, access_size, BPF_READ, -1); case PTR_TO_CTX: /* Only permit reading or writing syscall context using helper calls. */ if (is_var_ctx_off_allowed(env->prog)) { - int err = check_mem_region_access(env, reg, regno, 0, access_size, U16_MAX, + int err = check_mem_region_access(env, reg, argno, 0, access_size, U16_MAX, zero_size_allowed); if (err) return err; @@ -7012,7 +7059,7 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, struct bpf_reg_ bpf_register_is_null(reg)) return 0; - verbose(env, "R%d type=%s ", regno, + verbose(env, "%s type=%s ", reg_arg_name(env, argno), reg_type_str(env, reg->type)); verbose(env, "expected=%s\n", reg_type_str(env, PTR_TO_STACK)); return -EACCES; @@ -7026,8 +7073,8 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, struct bpf_reg_ */ static int check_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *mem_reg, - struct bpf_reg_state *size_reg, u32 mem_regno, - u32 size_regno, enum bpf_access_type access_type, + struct bpf_reg_state *size_reg, argno_t mem_argno, + argno_t size_argno, enum bpf_access_type access_type, bool zero_size_allowed, struct bpf_call_arg_meta *meta) { @@ -7052,31 +7099,31 @@ static int check_mem_size_reg(struct bpf_verifier_env *env, meta = NULL; if (size_reg->smin_value < 0) { - verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n", - size_regno); + verbose(env, "%s min value is negative, either use unsigned or 'var &= const'\n", + reg_arg_name(env, size_argno)); return -EACCES; } if (size_reg->umin_value == 0 && !zero_size_allowed) { - verbose(env, "R%d invalid zero-sized read: u64=[%lld,%lld]\n", - size_regno, size_reg->umin_value, size_reg->umax_value); + verbose(env, "%s invalid zero-sized read: u64=[%lld,%lld]\n", + reg_arg_name(env, size_argno), size_reg->umin_value, size_reg->umax_value); return -EACCES; } if (size_reg->umax_value >= BPF_MAX_VAR_SIZ) { - verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n", - size_regno); + verbose(env, "%s unbounded memory access, use 'var &= const' or 'if (var < const)'\n", + reg_arg_name(env, size_argno)); return -EACCES; } - err = check_helper_mem_access(env, mem_reg, mem_regno, size_reg->umax_value, + err = check_helper_mem_access(env, mem_reg, mem_argno, size_reg->umax_value, access_type, zero_size_allowed, meta); if (!err) - err = mark_chain_precision(env, size_regno); + err = mark_chain_precision(env, reg_from_argno(size_argno)); return err; } static int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, - u32 regno, u32 mem_size) + argno_t argno, u32 mem_size) { bool may_be_null = type_may_be_null(reg->type); struct bpf_reg_state saved_reg; @@ -7096,8 +7143,8 @@ static int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg int size = base_type(reg->type) == PTR_TO_STACK ? -(int)mem_size : mem_size; - err = check_helper_mem_access(env, reg, regno, size, BPF_READ, true, NULL); - err = err ?: check_helper_mem_access(env, reg, regno, size, BPF_WRITE, true, NULL); + err = check_helper_mem_access(env, reg, argno, size, BPF_READ, true, NULL); + err = err ?: check_helper_mem_access(env, reg, argno, size, BPF_WRITE, true, NULL); if (may_be_null) *reg = saved_reg; @@ -7106,7 +7153,7 @@ static int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg } static int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *mem_reg, - struct bpf_reg_state *size_reg, u32 mem_regno, u32 size_regno) + struct bpf_reg_state *size_reg, argno_t mem_argno, argno_t size_argno) { bool may_be_null = type_may_be_null(mem_reg->type); struct bpf_reg_state saved_reg; @@ -7120,8 +7167,8 @@ static int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg mark_ptr_not_null_reg(mem_reg); } - err = check_mem_size_reg(env, mem_reg, size_reg, mem_regno, size_regno, BPF_READ, true, &meta); - err = err ?: check_mem_size_reg(env, mem_reg, size_reg, mem_regno, size_regno, BPF_WRITE, true, &meta); + err = check_mem_size_reg(env, mem_reg, size_reg, mem_argno, size_argno, BPF_READ, true, &meta); + err = err ?: check_mem_size_reg(env, mem_reg, size_reg, mem_argno, size_argno, BPF_WRITE, true, &meta); if (may_be_null) *mem_reg = saved_reg; @@ -7157,7 +7204,7 @@ enum { * env->cur_state->active_locks remembers which map value element or allocated * object got locked and clears it after bpf_spin_unlock. */ -static int process_spin_lock(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int regno, int flags) +static int process_spin_lock(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, int flags) { bool is_lock = flags & PROCESS_SPIN_LOCK, is_res_lock = flags & PROCESS_RES_LOCK; const char *lock_str = is_res_lock ? "bpf_res_spin" : "bpf_spin"; @@ -7173,8 +7220,8 @@ static int process_spin_lock(struct bpf_verifier_env *env, struct bpf_reg_state if (!is_const) { verbose(env, - "R%d doesn't have constant offset. %s_lock has to be at the constant offset\n", - regno, lock_str); + "%s doesn't have constant offset. %s_lock has to be at the constant offset\n", + reg_arg_name(env, argno), lock_str); return -EINVAL; } if (reg->type == PTR_TO_MAP_VALUE) { @@ -7273,7 +7320,7 @@ static int process_spin_lock(struct bpf_verifier_env *env, struct bpf_reg_state } /* Check if @regno is a pointer to a specific field in a map value */ -static int check_map_field_pointer(struct bpf_verifier_env *env, struct bpf_reg_state *reg, u32 regno, +static int check_map_field_pointer(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, enum btf_field_type field_type, struct bpf_map_desc *map_desc) { @@ -7285,8 +7332,8 @@ static int check_map_field_pointer(struct bpf_verifier_env *env, struct bpf_reg_ if (!is_const) { verbose(env, - "R%d doesn't have constant offset. %s has to be at the constant offset\n", - regno, struct_name); + "%s doesn't have constant offset. %s has to be at the constant offset\n", + reg_arg_name(env, argno), struct_name); return -EINVAL; } if (!map->btf) { @@ -7326,26 +7373,26 @@ static int check_map_field_pointer(struct bpf_verifier_env *env, struct bpf_reg_ return 0; } -static int process_timer_func(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int regno, +static int process_timer_func(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, struct bpf_map_desc *map) { if (IS_ENABLED(CONFIG_PREEMPT_RT)) { verbose(env, "bpf_timer cannot be used for PREEMPT_RT.\n"); return -EOPNOTSUPP; } - return check_map_field_pointer(env, reg, regno, BPF_TIMER, map); + return check_map_field_pointer(env, reg, argno, BPF_TIMER, map); } -static int process_timer_helper(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int regno, +static int process_timer_helper(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, struct bpf_call_arg_meta *meta) { - return process_timer_func(env, reg, regno, &meta->map); + return process_timer_func(env, reg, argno, &meta->map); } -static int process_timer_kfunc(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int regno, +static int process_timer_kfunc(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, struct bpf_kfunc_call_arg_meta *meta) { - return process_timer_func(env, reg, regno, &meta->map); + return process_timer_func(env, reg, argno, &meta->map); } static int process_kptr_func(struct bpf_verifier_env *env, int regno, @@ -7410,15 +7457,15 @@ static int process_kptr_func(struct bpf_verifier_env *env, int regno, * use case. The second level is tracked using the upper bit of bpf_dynptr->size * and checked dynamically during runtime. */ -static int process_dynptr_func(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int regno, int insn_idx, +static int process_dynptr_func(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, int insn_idx, enum bpf_arg_type arg_type, int clone_ref_obj_id) { int err; if (reg->type != PTR_TO_STACK && reg->type != CONST_PTR_TO_DYNPTR) { verbose(env, - "arg#%d expected pointer to stack or const struct bpf_dynptr\n", - regno - 1); + "%s expected pointer to stack or const struct bpf_dynptr\n", + reg_arg_name(env, argno)); return -EINVAL; } @@ -7446,7 +7493,7 @@ static int process_dynptr_func(struct bpf_verifier_env *env, struct bpf_reg_stat /* we write BPF_DW bits (8 bytes) at a time */ for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) { - err = check_mem_access(env, insn_idx, reg, regno, + err = check_mem_access(env, insn_idx, reg, argno, i, BPF_DW, BPF_WRITE, -1, false, false); if (err) return err; @@ -7461,17 +7508,17 @@ static int process_dynptr_func(struct bpf_verifier_env *env, struct bpf_reg_stat } if (!is_dynptr_reg_valid_init(env, reg)) { - verbose(env, - "Expected an initialized dynptr as arg #%d\n", - regno - 1); + verbose(env, "Expected an initialized dynptr as %s\n", + reg_arg_name(env, argno)); return -EINVAL; } /* Fold modifiers (in this case, OBJ_RELEASE) when checking expected type */ if (!is_dynptr_type_expected(env, reg, arg_type & ~OBJ_RELEASE)) { verbose(env, - "Expected a dynptr of type %s as arg #%d\n", - dynptr_type_str(arg_to_dynptr_type(arg_type)), regno - 1); + "Expected a dynptr of type %s as %s\n", + dynptr_type_str(arg_to_dynptr_type(arg_type)), + reg_arg_name(env, argno)); return -EINVAL; } @@ -7516,14 +7563,16 @@ static bool is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta *meta, int arg_idx, return btf_param_match_suffix(meta->btf, arg, "__iter"); } -static int process_iter_arg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int regno, int insn_idx, +static int process_iter_arg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, int insn_idx, struct bpf_kfunc_call_arg_meta *meta) { const struct btf_type *t; + u32 arg_idx = arg_from_argno(argno) - 1; int spi, err, i, nr_slots, btf_id; if (reg->type != PTR_TO_STACK) { - verbose(env, "arg#%d expected pointer to an iterator on stack\n", regno - 1); + verbose(env, "%s expected pointer to an iterator on stack\n", + reg_arg_name(env, argno)); return -EINVAL; } @@ -7533,9 +7582,10 @@ static int process_iter_arg(struct bpf_verifier_env *env, struct bpf_reg_state * * to any kfunc, if arg has "__iter" suffix, we need to be a bit more * conservative here. */ - btf_id = btf_check_iter_arg(meta->btf, meta->func_proto, regno - 1); + btf_id = btf_check_iter_arg(meta->btf, meta->func_proto, arg_idx); if (btf_id < 0) { - verbose(env, "expected valid iter pointer as arg #%d\n", regno - 1); + verbose(env, "expected valid iter pointer as %s\n", + reg_arg_name(env, argno)); return -EINVAL; } t = btf_type_by_id(meta->btf, btf_id); @@ -7544,13 +7594,13 @@ static int process_iter_arg(struct bpf_verifier_env *env, struct bpf_reg_state * if (is_iter_new_kfunc(meta)) { /* bpf_iter__new() expects pointer to uninit iter state */ if (!is_iter_reg_valid_uninit(env, reg, nr_slots)) { - verbose(env, "expected uninitialized iter_%s as arg #%d\n", - iter_type_str(meta->btf, btf_id), regno - 1); + verbose(env, "expected uninitialized iter_%s as %s\n", + iter_type_str(meta->btf, btf_id), reg_arg_name(env, argno)); return -EINVAL; } for (i = 0; i < nr_slots * 8; i += BPF_REG_SIZE) { - err = check_mem_access(env, insn_idx, reg, regno, + err = check_mem_access(env, insn_idx, reg, argno, i, BPF_DW, BPF_WRITE, -1, false, false); if (err) return err; @@ -7568,8 +7618,8 @@ static int process_iter_arg(struct bpf_verifier_env *env, struct bpf_reg_state * case 0: break; case -EINVAL: - verbose(env, "expected an initialized iter_%s as arg #%d\n", - iter_type_str(meta->btf, btf_id), regno - 1); + verbose(env, "expected an initialized iter_%s as %s\n", + iter_type_str(meta->btf, btf_id), reg_arg_name(env, argno)); return err; case -EPROTO: verbose(env, "expected an RCU CS when using %s\n", meta->func_name); @@ -7989,7 +8039,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_DYNPTR] = &dynptr_types, }; -static int check_reg_type(struct bpf_verifier_env *env, struct bpf_reg_state *reg, u32 regno, +static int check_reg_type(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, enum bpf_arg_type arg_type, const u32 *arg_btf_id, struct bpf_call_arg_meta *meta) @@ -8024,7 +8074,7 @@ static int check_reg_type(struct bpf_verifier_env *env, struct bpf_reg_state *re type &= ~DYNPTR_TYPE_FLAG_MASK; /* Local kptr types are allowed as the source argument of bpf_kptr_xchg */ - if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type) && regno == BPF_REG_2) { + if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type) && reg_from_argno(argno) == BPF_REG_2) { type &= ~MEM_ALLOC; type &= ~MEM_PERCPU; } @@ -8038,7 +8088,7 @@ static int check_reg_type(struct bpf_verifier_env *env, struct bpf_reg_state *re goto found; } - verbose(env, "R%d type=%s expected=", regno, reg_type_str(env, reg->type)); + verbose(env, "%s type=%s expected=", reg_arg_name(env, argno), reg_type_str(env, reg->type)); for (j = 0; j + 1 < i; j++) verbose(env, "%s, ", reg_type_str(env, compatible->types[j])); verbose(env, "%s\n", reg_type_str(env, compatible->types[j])); @@ -8051,9 +8101,9 @@ found: if (compatible == &mem_types) { if (!(arg_type & MEM_RDONLY)) { verbose(env, - "%s() may write into memory pointed by R%d type=%s\n", + "%s() may write into memory pointed by %s type=%s\n", func_id_name(meta->func_id), - regno, reg_type_str(env, reg->type)); + reg_arg_name(env, argno), reg_type_str(env, reg->type)); return -EACCES; } return 0; @@ -8076,7 +8126,8 @@ found: if (type_may_be_null(reg->type) && (!type_may_be_null(arg_type) || arg_type_is_release(arg_type))) { - verbose(env, "Possibly NULL pointer passed to helper arg%d\n", regno); + verbose(env, "Possibly NULL pointer passed to helper %s\n", + reg_arg_name(env, argno)); return -EACCES; } @@ -8089,25 +8140,26 @@ found: } if (meta->func_id == BPF_FUNC_kptr_xchg) { - if (map_kptr_match_type(env, meta->kptr_field, reg, regno)) + if (map_kptr_match_type(env, meta->kptr_field, reg, reg_from_argno(argno))) return -EACCES; } else { if (arg_btf_id == BPF_PTR_POISON) { verbose(env, "verifier internal error:"); - verbose(env, "R%d has non-overwritten BPF_PTR_POISON type\n", - regno); + verbose(env, "%s has non-overwritten BPF_PTR_POISON type\n", + reg_arg_name(env, argno)); return -EACCES; } - err = __check_ptr_off_reg(env, reg, regno, true); + err = __check_ptr_off_reg(env, reg, argno, true); if (err) return err; if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->var_off.value, btf_vmlinux, *arg_btf_id, strict_type_match)) { - verbose(env, "R%d is of type %s but %s is expected\n", - regno, btf_type_name(reg->btf, reg->btf_id), + verbose(env, "%s is of type %s but %s is expected\n", + reg_arg_name(env, argno), + btf_type_name(reg->btf, reg->btf_id), btf_type_name(btf_vmlinux, *arg_btf_id)); return -EACCES; } @@ -8124,8 +8176,11 @@ found: return -EFAULT; } /* Check if local kptr in src arg matches kptr in dst arg */ - if (meta->func_id == BPF_FUNC_kptr_xchg && regno == BPF_REG_2) { - if (map_kptr_match_type(env, meta->kptr_field, reg, regno)) + if (meta->func_id == BPF_FUNC_kptr_xchg) { + int regno = reg_from_argno(argno); + + if (regno == BPF_REG_2 && + map_kptr_match_type(env, meta->kptr_field, reg, regno)) return -EACCES; } break; @@ -8159,7 +8214,7 @@ reg_find_field_offset(const struct bpf_reg_state *reg, s32 off, u32 fields) } static int check_func_arg_reg_off(struct bpf_verifier_env *env, - const struct bpf_reg_state *reg, int regno, + const struct bpf_reg_state *reg, argno_t argno, enum bpf_arg_type arg_type) { u32 type = reg->type; @@ -8185,8 +8240,8 @@ static int check_func_arg_reg_off(struct bpf_verifier_env *env, * to give the user a better error message. */ if (!tnum_is_const(reg->var_off) || reg->var_off.value != 0) { - verbose(env, "R%d must have zero offset when passed to release func or trusted arg to kfunc\n", - regno); + verbose(env, "%s must have zero offset when passed to release func or trusted arg to kfunc\n", + reg_arg_name(env, argno)); return -EINVAL; } } @@ -8222,7 +8277,7 @@ static int check_func_arg_reg_off(struct bpf_verifier_env *env, * cases. var_off always must be 0 for PTR_TO_BTF_ID, hence we * still need to do checks instead of returning. */ - return __check_ptr_off_reg(env, reg, regno, true); + return __check_ptr_off_reg(env, reg, argno, true); case PTR_TO_CTX: /* * Allow fixed and variable offsets for syscall context, but @@ -8234,7 +8289,7 @@ static int check_func_arg_reg_off(struct bpf_verifier_env *env, return 0; fallthrough; default: - return __check_ptr_off_reg(env, reg, regno, false); + return __check_ptr_off_reg(env, reg, argno, false); } } @@ -8304,8 +8359,8 @@ static enum bpf_dynptr_type dynptr_get_type(struct bpf_verifier_env *env, return state->stack[spi].spilled_ptr.dynptr.type; } -static int check_reg_const_str(struct bpf_verifier_env *env, - struct bpf_reg_state *reg, u32 regno) +static int check_arg_const_str(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, argno_t argno) { struct bpf_map *map = reg->map_ptr; int err; @@ -8317,17 +8372,18 @@ static int check_reg_const_str(struct bpf_verifier_env *env, return -EINVAL; if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY) { - verbose(env, "R%d points to insn_array map which cannot be used as const string\n", regno); + verbose(env, "%s points to insn_array map which cannot be used as const string\n", + reg_arg_name(env, argno)); return -EACCES; } if (!bpf_map_is_rdonly(map)) { - verbose(env, "R%d does not point to a readonly map'\n", regno); + verbose(env, "%s does not point to a readonly map'\n", reg_arg_name(env, argno)); return -EACCES; } if (!tnum_is_const(reg->var_off)) { - verbose(env, "R%d is not a constant address'\n", regno); + verbose(env, "%s is not a constant address'\n", reg_arg_name(env, argno)); return -EACCES; } @@ -8336,7 +8392,7 @@ static int check_reg_const_str(struct bpf_verifier_env *env, return -EACCES; } - err = check_map_access(env, reg, regno, 0, + err = check_map_access(env, reg, argno, 0, map->value_size - reg->var_off.value, false, ACCESS_HELPER); if (err) @@ -8472,11 +8528,11 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, base_type(arg_type) == ARG_PTR_TO_SPIN_LOCK) arg_btf_id = fn->arg_btf_id[arg]; - err = check_reg_type(env, reg, regno, arg_type, arg_btf_id, meta); + err = check_reg_type(env, reg, argno_from_reg(regno), arg_type, arg_btf_id, meta); if (err) return err; - err = check_func_arg_reg_off(env, reg, regno, arg_type); + err = check_func_arg_reg_off(env, reg, argno_from_reg(regno), arg_type); if (err) return err; @@ -8565,7 +8621,7 @@ skip_type_check: return -EFAULT; } key_size = meta->map.ptr->key_size; - err = check_helper_mem_access(env, reg, regno, key_size, BPF_READ, false, NULL); + err = check_helper_mem_access(env, reg, argno_from_reg(regno), key_size, BPF_READ, false, NULL); if (err) return err; if (can_elide_value_nullness(meta->map.ptr->map_type)) { @@ -8592,7 +8648,7 @@ skip_type_check: return -EFAULT; } meta->raw_mode = arg_type & MEM_UNINIT; - err = check_helper_mem_access(env, reg, regno, meta->map.ptr->value_size, + err = check_helper_mem_access(env, reg, argno_from_reg(regno), meta->map.ptr->value_size, arg_type & MEM_WRITE ? BPF_WRITE : BPF_READ, false, meta); break; @@ -8610,11 +8666,11 @@ skip_type_check: return -EACCES; } if (meta->func_id == BPF_FUNC_spin_lock) { - err = process_spin_lock(env, reg, regno, PROCESS_SPIN_LOCK); + err = process_spin_lock(env, reg, argno_from_reg(regno), PROCESS_SPIN_LOCK); if (err) return err; } else if (meta->func_id == BPF_FUNC_spin_unlock) { - err = process_spin_lock(env, reg, regno, 0); + err = process_spin_lock(env, reg, argno_from_reg(regno), 0); if (err) return err; } else { @@ -8623,7 +8679,7 @@ skip_type_check: } break; case ARG_PTR_TO_TIMER: - err = process_timer_helper(env, reg, regno, meta); + err = process_timer_helper(env, reg, argno_from_reg(regno), meta); if (err) return err; break; @@ -8636,7 +8692,7 @@ skip_type_check: */ meta->raw_mode = arg_type & MEM_UNINIT; if (arg_type & MEM_FIXED_SIZE) { - err = check_helper_mem_access(env, reg, regno, fn->arg_size[arg], + err = check_helper_mem_access(env, reg, argno_from_reg(regno), fn->arg_size[arg], arg_type & MEM_WRITE ? BPF_WRITE : BPF_READ, false, meta); if (err) @@ -8646,19 +8702,21 @@ skip_type_check: } break; case ARG_CONST_SIZE: - err = check_mem_size_reg(env, reg_state(env, regno - 1), reg, regno - 1, regno, + err = check_mem_size_reg(env, reg_state(env, regno - 1), reg, argno_from_reg(regno - 1), + argno_from_reg(regno), fn->arg_type[arg - 1] & MEM_WRITE ? BPF_WRITE : BPF_READ, false, meta); break; case ARG_CONST_SIZE_OR_ZERO: - err = check_mem_size_reg(env, reg_state(env, regno - 1), reg, regno - 1, regno, + err = check_mem_size_reg(env, reg_state(env, regno - 1), reg, argno_from_reg(regno - 1), + argno_from_reg(regno), fn->arg_type[arg - 1] & MEM_WRITE ? BPF_WRITE : BPF_READ, true, meta); break; case ARG_PTR_TO_DYNPTR: - err = process_dynptr_func(env, reg, regno, insn_idx, arg_type, 0); + err = process_dynptr_func(env, reg, argno_from_reg(regno), insn_idx, arg_type, 0); if (err) return err; break; @@ -8675,7 +8733,7 @@ skip_type_check: break; case ARG_PTR_TO_CONST_STR: { - err = check_reg_const_str(env, reg, regno); + err = check_arg_const_str(env, reg, argno_from_reg(regno)); if (err) return err; break; @@ -9264,13 +9322,14 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog, * verifier sees. */ for (i = 0; i < sub->arg_cnt; i++) { + argno_t argno = argno_from_arg(i + 1); u32 regno = i + 1; struct bpf_reg_state *reg = ®s[regno]; struct bpf_subprog_arg_info *arg = &sub->args[i]; if (arg->arg_type == ARG_ANYTHING) { if (reg->type != SCALAR_VALUE) { - bpf_log(log, "R%d is not a scalar\n", regno); + bpf_log(log, "%s is not a scalar\n", reg_arg_name(env, argno)); return -EINVAL; } } else if (arg->arg_type & PTR_UNTRUSTED) { @@ -9280,24 +9339,26 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog, * invalid memory access. */ } else if (arg->arg_type == ARG_PTR_TO_CTX) { - ret = check_func_arg_reg_off(env, reg, regno, ARG_PTR_TO_CTX); + ret = check_func_arg_reg_off(env, reg, argno, ARG_PTR_TO_CTX); if (ret < 0) return ret; /* If function expects ctx type in BTF check that caller * is passing PTR_TO_CTX. */ if (reg->type != PTR_TO_CTX) { - bpf_log(log, "arg#%d expects pointer to ctx\n", i); + bpf_log(log, "%s expects pointer to ctx\n", + reg_arg_name(env, argno)); return -EINVAL; } } else if (base_type(arg->arg_type) == ARG_PTR_TO_MEM) { - ret = check_func_arg_reg_off(env, reg, regno, ARG_DONTCARE); + ret = check_func_arg_reg_off(env, reg, argno, ARG_DONTCARE); if (ret < 0) return ret; - if (check_mem_reg(env, reg, regno, arg->mem_size)) + if (check_mem_reg(env, reg, argno, arg->mem_size)) return -EINVAL; if (!(arg->arg_type & PTR_MAYBE_NULL) && (reg->type & PTR_MAYBE_NULL)) { - bpf_log(log, "arg#%d is expected to be non-NULL\n", i); + bpf_log(log, "%s is expected to be non-NULL\n", + reg_arg_name(env, argno)); return -EINVAL; } } else if (base_type(arg->arg_type) == ARG_PTR_TO_ARENA) { @@ -9309,15 +9370,16 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog, * run-time debug nightmare. */ if (reg->type != PTR_TO_ARENA && reg->type != SCALAR_VALUE) { - bpf_log(log, "R%d is not a pointer to arena or scalar.\n", regno); + bpf_log(log, "%s is not a pointer to arena or scalar.\n", + reg_arg_name(env, argno)); return -EINVAL; } } else if (arg->arg_type == ARG_PTR_TO_DYNPTR) { - ret = check_func_arg_reg_off(env, reg, regno, ARG_PTR_TO_DYNPTR); + ret = check_func_arg_reg_off(env, reg, argno, ARG_PTR_TO_DYNPTR); if (ret) return ret; - ret = process_dynptr_func(env, reg, regno, -1, arg->arg_type, 0); + ret = process_dynptr_func(env, reg, argno, -1, arg->arg_type, 0); if (ret) return ret; } else if (base_type(arg->arg_type) == ARG_PTR_TO_BTF_ID) { @@ -9328,12 +9390,13 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog, continue; memset(&meta, 0, sizeof(meta)); /* leave func_id as zero */ - err = check_reg_type(env, reg, regno, arg->arg_type, &arg->btf_id, &meta); - err = err ?: check_func_arg_reg_off(env, reg, regno, arg->arg_type); + err = check_reg_type(env, reg, argno, arg->arg_type, &arg->btf_id, &meta); + err = err ?: check_func_arg_reg_off(env, reg, argno, arg->arg_type); if (err) return err; } else { - verifier_bug(env, "unrecognized arg#%d type %d", i, arg->arg_type); + verifier_bug(env, "unrecognized %s type %d", + reg_arg_name(env, argno), arg->arg_type); return -EFAULT; } } @@ -10292,7 +10355,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn * is inferred from register state. */ for (i = 0; i < meta.access_size; i++) { - err = check_mem_access(env, insn_idx, regs + meta.regno, meta.regno, i, BPF_B, + err = check_mem_access(env, insn_idx, regs + meta.regno, argno_from_reg(meta.regno), i, BPF_B, BPF_WRITE, -1, false, false); if (err) return err; @@ -11301,7 +11364,7 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta, const struct btf_type *t, const struct btf_type *ref_t, const char *ref_tname, const struct btf_param *args, - int arg, int nargs, struct bpf_reg_state *reg) + int arg, int nargs, argno_t argno, struct bpf_reg_state *reg) { u32 regno = arg + 1; struct bpf_reg_state *regs = cur_regs(env); @@ -11376,8 +11439,9 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) { if (!btf_type_is_struct(ref_t)) { - verbose(env, "kernel function %s args#%d pointer type %s %s is not supported\n", - meta->func_name, arg, btf_type_str(ref_t), ref_tname); + verbose(env, "kernel function %s %s pointer type %s %s is not supported\n", + meta->func_name, reg_arg_name(env, argno), + btf_type_str(ref_t), ref_tname); return -EINVAL; } return KF_ARG_PTR_TO_BTF_ID; @@ -11393,8 +11457,9 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, */ if (!btf_type_is_scalar(ref_t) && !__btf_type_is_scalar_struct(env, meta->btf, ref_t, 0) && (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) { - verbose(env, "arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n", - arg, btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : ""); + verbose(env, "%s pointer type %s %s must point to %sscalar, or struct with scalar\n", + reg_arg_name(env, argno), + btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : ""); return -EINVAL; } return arg_mem_size ? KF_ARG_PTR_TO_MEM_SIZE : KF_ARG_PTR_TO_MEM; @@ -11405,7 +11470,7 @@ static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env, const struct btf_type *ref_t, const char *ref_tname, u32 ref_id, struct bpf_kfunc_call_arg_meta *meta, - int arg) + int arg, argno_t argno) { const struct btf_type *reg_ref_t; bool strict_type_match = false; @@ -11463,15 +11528,16 @@ static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env, */ taking_projection = btf_is_projection_of(ref_tname, reg_ref_tname); if (!taking_projection && !struct_same) { - verbose(env, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n", - meta->func_name, arg, btf_type_str(ref_t), ref_tname, arg + 1, + verbose(env, "kernel function %s %s expected pointer to %s %s but %s has a pointer to %s %s\n", + meta->func_name, reg_arg_name(env, argno), + btf_type_str(ref_t), ref_tname, reg_arg_name(env, argno), btf_type_str(reg_ref_t), reg_ref_tname); return -EINVAL; } return 0; } -static int process_irq_flag(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int regno, +static int process_irq_flag(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, struct bpf_kfunc_call_arg_meta *meta) { int err, kfunc_class = IRQ_NATIVE_KFUNC; @@ -11494,11 +11560,13 @@ static int process_irq_flag(struct bpf_verifier_env *env, struct bpf_reg_state * if (irq_save) { if (!is_irq_flag_reg_valid_uninit(env, reg)) { - verbose(env, "expected uninitialized irq flag as arg#%d\n", regno - 1); + verbose(env, "expected uninitialized irq flag as %s\n", + reg_arg_name(env, argno)); return -EINVAL; } - err = check_mem_access(env, env->insn_idx, reg, regno, 0, BPF_DW, BPF_WRITE, -1, false, false); + err = check_mem_access(env, env->insn_idx, reg, argno, 0, BPF_DW, + BPF_WRITE, -1, false, false); if (err) return err; @@ -11508,7 +11576,8 @@ static int process_irq_flag(struct bpf_verifier_env *env, struct bpf_reg_state * } else { err = is_irq_flag_reg_valid_init(env, reg); if (err) { - verbose(env, "expected an initialized irq flag as arg#%d\n", regno - 1); + verbose(env, "expected an initialized irq flag as %s\n", + reg_arg_name(env, argno)); return err; } @@ -11799,7 +11868,7 @@ static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env, static int __process_kf_arg_ptr_to_graph_root(struct bpf_verifier_env *env, - struct bpf_reg_state *reg, u32 regno, + struct bpf_reg_state *reg, argno_t argno, struct bpf_kfunc_call_arg_meta *meta, enum btf_field_type head_field_type, struct btf_field **head_field) @@ -11820,8 +11889,8 @@ __process_kf_arg_ptr_to_graph_root(struct bpf_verifier_env *env, head_type_name = btf_field_type_name(head_field_type); if (!tnum_is_const(reg->var_off)) { verbose(env, - "R%d doesn't have constant offset. %s has to be at the constant offset\n", - regno, head_type_name); + "%s doesn't have constant offset. %s has to be at the constant offset\n", + reg_arg_name(env, argno), head_type_name); return -EINVAL; } @@ -11849,24 +11918,24 @@ __process_kf_arg_ptr_to_graph_root(struct bpf_verifier_env *env, } static int process_kf_arg_ptr_to_list_head(struct bpf_verifier_env *env, - struct bpf_reg_state *reg, u32 regno, + struct bpf_reg_state *reg, argno_t argno, struct bpf_kfunc_call_arg_meta *meta) { - return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, BPF_LIST_HEAD, + return __process_kf_arg_ptr_to_graph_root(env, reg, argno, meta, BPF_LIST_HEAD, &meta->arg_list_head.field); } static int process_kf_arg_ptr_to_rbtree_root(struct bpf_verifier_env *env, - struct bpf_reg_state *reg, u32 regno, + struct bpf_reg_state *reg, argno_t argno, struct bpf_kfunc_call_arg_meta *meta) { - return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, BPF_RB_ROOT, + return __process_kf_arg_ptr_to_graph_root(env, reg, argno, meta, BPF_RB_ROOT, &meta->arg_rbtree_root.field); } static int __process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env *env, - struct bpf_reg_state *reg, u32 regno, + struct bpf_reg_state *reg, argno_t argno, struct bpf_kfunc_call_arg_meta *meta, enum btf_field_type head_field_type, enum btf_field_type node_field_type, @@ -11888,8 +11957,8 @@ __process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env *env, node_type_name = btf_field_type_name(node_field_type); if (!tnum_is_const(reg->var_off)) { verbose(env, - "R%d doesn't have constant offset. %s has to be at the constant offset\n", - regno, node_type_name); + "%s doesn't have constant offset. %s has to be at the constant offset\n", + reg_arg_name(env, argno), node_type_name); return -EINVAL; } @@ -11930,19 +11999,19 @@ __process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env *env, } static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env, - struct bpf_reg_state *reg, u32 regno, + struct bpf_reg_state *reg, argno_t argno, struct bpf_kfunc_call_arg_meta *meta) { - return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta, + return __process_kf_arg_ptr_to_graph_node(env, reg, argno, meta, BPF_LIST_HEAD, BPF_LIST_NODE, &meta->arg_list_head.field); } static int process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env *env, - struct bpf_reg_state *reg, u32 regno, + struct bpf_reg_state *reg, argno_t argno, struct bpf_kfunc_call_arg_meta *meta) { - return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta, + return __process_kf_arg_ptr_to_graph_node(env, reg, argno, meta, BPF_RB_ROOT, BPF_RB_NODE, &meta->arg_rbtree_root.field); } @@ -11994,6 +12063,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[i + 1]; const struct btf_type *t, *ref_t, *resolve_ret; enum bpf_arg_type arg_type = ARG_DONTCARE; + argno_t argno = argno_from_arg(i + 1); u32 regno = i + 1, ref_id, type_size; bool is_ret_buf_sz = false; int kf_arg_type; @@ -12016,7 +12086,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ if (btf_type_is_scalar(t)) { if (reg->type != SCALAR_VALUE) { - verbose(env, "R%d is not a scalar\n", regno); + verbose(env, "%s is not a scalar\n", reg_arg_name(env, argno)); return -EINVAL; } @@ -12026,7 +12096,8 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ return -EFAULT; } if (!tnum_is_const(reg->var_off)) { - verbose(env, "R%d must be a known constant\n", regno); + verbose(env, "%s must be a known constant\n", + reg_arg_name(env, argno)); return -EINVAL; } ret = mark_chain_precision(env, regno); @@ -12048,7 +12119,8 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ } if (!tnum_is_const(reg->var_off)) { - verbose(env, "R%d is not a const\n", regno); + verbose(env, "%s is not a const\n", + reg_arg_name(env, argno)); return -EINVAL; } @@ -12061,20 +12133,22 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ } if (!btf_type_is_ptr(t)) { - verbose(env, "Unrecognized arg#%d type %s\n", i, btf_type_str(t)); + verbose(env, "Unrecognized %s type %s\n", + reg_arg_name(env, argno), btf_type_str(t)); return -EINVAL; } if ((bpf_register_is_null(reg) || type_may_be_null(reg->type)) && !is_kfunc_arg_nullable(meta->btf, &args[i])) { - verbose(env, "Possibly NULL pointer passed to trusted arg%d\n", i); + verbose(env, "Possibly NULL pointer passed to trusted %s\n", + reg_arg_name(env, argno)); return -EACCES; } if (reg->ref_obj_id) { if (is_kfunc_release(meta) && meta->ref_obj_id) { - verifier_bug(env, "more than one arg with ref_obj_id R%d %u %u", - regno, reg->ref_obj_id, + verifier_bug(env, "more than one arg with ref_obj_id %s %u %u", + reg_arg_name(env, argno), reg->ref_obj_id, meta->ref_obj_id); return -EFAULT; } @@ -12086,7 +12160,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id); ref_tname = btf_name_by_offset(btf, ref_t->name_off); - kf_arg_type = get_kfunc_ptr_arg_type(env, meta, t, ref_t, ref_tname, args, i, nargs, reg); + kf_arg_type = get_kfunc_ptr_arg_type(env, meta, t, ref_t, ref_tname, args, i, nargs, argno, reg); if (kf_arg_type < 0) return kf_arg_type; @@ -12095,7 +12169,8 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ continue; case KF_ARG_PTR_TO_MAP: if (!reg->map_ptr) { - verbose(env, "pointer in R%d isn't map pointer\n", regno); + verbose(env, "pointer in %s isn't map pointer\n", + reg_arg_name(env, argno)); return -EINVAL; } if (meta->map.ptr && (reg->map_ptr->record->wq_off >= 0 || @@ -12133,11 +12208,13 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ case KF_ARG_PTR_TO_BTF_ID: if (!is_trusted_reg(reg)) { if (!is_kfunc_rcu(meta)) { - verbose(env, "R%d must be referenced or trusted\n", regno); + verbose(env, "%s must be referenced or trusted\n", + reg_arg_name(env, argno)); return -EINVAL; } if (!is_rcu_reg(reg)) { - verbose(env, "R%d must be a rcu pointer\n", regno); + verbose(env, "%s must be a rcu pointer\n", + reg_arg_name(env, argno)); return -EINVAL; } } @@ -12169,15 +12246,15 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ if (is_kfunc_release(meta) && reg->ref_obj_id) arg_type |= OBJ_RELEASE; - ret = check_func_arg_reg_off(env, reg, regno, arg_type); + ret = check_func_arg_reg_off(env, reg, argno, arg_type); if (ret < 0) return ret; switch (kf_arg_type) { case KF_ARG_PTR_TO_CTX: if (reg->type != PTR_TO_CTX) { - verbose(env, "arg#%d expected pointer to ctx, but got %s\n", - i, reg_type_str(env, reg->type)); + verbose(env, "%s expected pointer to ctx, but got %s\n", + reg_arg_name(env, argno), reg_type_str(env, reg->type)); return -EINVAL; } @@ -12191,16 +12268,19 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ case KF_ARG_PTR_TO_ALLOC_BTF_ID: if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC)) { if (!is_bpf_obj_drop_kfunc(meta->func_id)) { - verbose(env, "arg#%d expected for bpf_obj_drop()\n", i); + verbose(env, "%s expected for bpf_obj_drop()\n", + reg_arg_name(env, argno)); return -EINVAL; } } else if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC | MEM_PERCPU)) { if (!is_bpf_percpu_obj_drop_kfunc(meta->func_id)) { - verbose(env, "arg#%d expected for bpf_percpu_obj_drop()\n", i); + verbose(env, "%s expected for bpf_percpu_obj_drop()\n", + reg_arg_name(env, argno)); return -EINVAL; } } else { - verbose(env, "arg#%d expected pointer to allocated object\n", i); + verbose(env, "%s expected pointer to allocated object\n", + reg_arg_name(env, argno)); return -EINVAL; } if (!reg->ref_obj_id) { @@ -12248,7 +12328,8 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ } } - ret = process_dynptr_func(env, reg, regno, insn_idx, dynptr_arg_type, clone_ref_obj_id); + ret = process_dynptr_func(env, reg, argno, insn_idx, + dynptr_arg_type, clone_ref_obj_id); if (ret < 0) return ret; @@ -12273,55 +12354,59 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ return -EINVAL; } } - ret = process_iter_arg(env, reg, regno, insn_idx, meta); + ret = process_iter_arg(env, reg, argno, insn_idx, meta); if (ret < 0) return ret; break; case KF_ARG_PTR_TO_LIST_HEAD: if (reg->type != PTR_TO_MAP_VALUE && reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) { - verbose(env, "arg#%d expected pointer to map value or allocated object\n", i); + verbose(env, "%s expected pointer to map value or allocated object\n", + reg_arg_name(env, argno)); return -EINVAL; } if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) { verbose(env, "allocated object must be referenced\n"); return -EINVAL; } - ret = process_kf_arg_ptr_to_list_head(env, reg, regno, meta); + ret = process_kf_arg_ptr_to_list_head(env, reg, argno, meta); if (ret < 0) return ret; break; case KF_ARG_PTR_TO_RB_ROOT: if (reg->type != PTR_TO_MAP_VALUE && reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) { - verbose(env, "arg#%d expected pointer to map value or allocated object\n", i); + verbose(env, "%s expected pointer to map value or allocated object\n", + reg_arg_name(env, argno)); return -EINVAL; } if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) { verbose(env, "allocated object must be referenced\n"); return -EINVAL; } - ret = process_kf_arg_ptr_to_rbtree_root(env, reg, regno, meta); + ret = process_kf_arg_ptr_to_rbtree_root(env, reg, argno, meta); if (ret < 0) return ret; break; case KF_ARG_PTR_TO_LIST_NODE: if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) { - verbose(env, "arg#%d expected pointer to allocated object\n", i); + verbose(env, "%s expected pointer to allocated object\n", + reg_arg_name(env, argno)); return -EINVAL; } if (!reg->ref_obj_id) { verbose(env, "allocated object must be referenced\n"); return -EINVAL; } - ret = process_kf_arg_ptr_to_list_node(env, reg, regno, meta); + ret = process_kf_arg_ptr_to_list_node(env, reg, argno, meta); if (ret < 0) return ret; break; case KF_ARG_PTR_TO_RB_NODE: if (is_bpf_rbtree_add_kfunc(meta->func_id)) { if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) { - verbose(env, "arg#%d expected pointer to allocated object\n", i); + verbose(env, "%s expected pointer to allocated object\n", + reg_arg_name(env, argno)); return -EINVAL; } if (!reg->ref_obj_id) { @@ -12339,7 +12424,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ } } - ret = process_kf_arg_ptr_to_rbtree_node(env, reg, regno, meta); + ret = process_kf_arg_ptr_to_rbtree_node(env, reg, argno, meta); if (ret < 0) return ret; break; @@ -12354,24 +12439,26 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ if ((base_type(reg->type) != PTR_TO_BTF_ID || (bpf_type_has_unsafe_modifiers(reg->type) && !is_rcu_reg(reg))) && !reg2btf_ids[base_type(reg->type)]) { - verbose(env, "arg#%d is %s ", i, reg_type_str(env, reg->type)); + verbose(env, "%s is %s ", reg_arg_name(env, argno), + reg_type_str(env, reg->type)); verbose(env, "expected %s or socket\n", reg_type_str(env, base_type(reg->type) | (type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS))); return -EINVAL; } - ret = process_kf_arg_ptr_to_btf_id(env, reg, ref_t, ref_tname, ref_id, meta, i); + ret = process_kf_arg_ptr_to_btf_id(env, reg, ref_t, ref_tname, ref_id, meta, i, argno); if (ret < 0) return ret; break; case KF_ARG_PTR_TO_MEM: resolve_ret = btf_resolve_size(btf, ref_t, &type_size); if (IS_ERR(resolve_ret)) { - verbose(env, "arg#%d reference type('%s %s') size cannot be determined: %ld\n", - i, btf_type_str(ref_t), ref_tname, PTR_ERR(resolve_ret)); + verbose(env, "%s reference type('%s %s') size cannot be determined: %ld\n", + reg_arg_name(env, argno), btf_type_str(ref_t), + ref_tname, PTR_ERR(resolve_ret)); return -EINVAL; } - ret = check_mem_reg(env, reg, regno, type_size); + ret = check_mem_reg(env, reg, argno, type_size); if (ret < 0) return ret; break; @@ -12381,11 +12468,15 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ const struct btf_param *buff_arg = &args[i]; struct bpf_reg_state *size_reg = ®s[regno + 1]; const struct btf_param *size_arg = &args[i + 1]; + argno_t next_argno = argno_from_arg(i + 2); if (!bpf_register_is_null(buff_reg) || !is_kfunc_arg_nullable(meta->btf, buff_arg)) { - ret = check_kfunc_mem_size_reg(env, buff_reg, size_reg, regno, regno + 1); + ret = check_kfunc_mem_size_reg(env, buff_reg, size_reg, + argno, next_argno); if (ret < 0) { - verbose(env, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", i, i + 1); + verbose(env, "%s and ", reg_arg_name(env, argno)); + verbose(env, "%s memory, len pair leads to invalid memory access\n", + reg_arg_name(env, next_argno)); return ret; } } @@ -12396,7 +12487,8 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ return -EFAULT; } if (!tnum_is_const(size_reg->var_off)) { - verbose(env, "R%d must be a known constant\n", regno + 1); + verbose(env, "%s must be a known constant\n", + reg_arg_name(env, next_argno)); return -EINVAL; } meta->arg_constant.found = true; @@ -12409,14 +12501,15 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ } case KF_ARG_PTR_TO_CALLBACK: if (reg->type != PTR_TO_FUNC) { - verbose(env, "arg%d expected pointer to func\n", i); + verbose(env, "%s expected pointer to func\n", reg_arg_name(env, argno)); return -EINVAL; } meta->subprogno = reg->subprogno; break; case KF_ARG_PTR_TO_REFCOUNTED_KPTR: if (!type_is_ptr_alloc_obj(reg->type)) { - verbose(env, "arg#%d is neither owning or non-owning ref\n", i); + verbose(env, "%s is neither owning or non-owning ref\n", + reg_arg_name(env, argno)); return -EINVAL; } if (!type_is_non_owning_ref(reg->type)) @@ -12429,7 +12522,8 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ } if (rec->refcount_off < 0) { - verbose(env, "arg#%d doesn't point to a type with bpf_refcount field\n", i); + verbose(env, "%s doesn't point to a type with bpf_refcount field\n", + reg_arg_name(env, argno)); return -EINVAL; } @@ -12438,46 +12532,51 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ break; case KF_ARG_PTR_TO_CONST_STR: if (reg->type != PTR_TO_MAP_VALUE) { - verbose(env, "arg#%d doesn't point to a const string\n", i); + verbose(env, "%s doesn't point to a const string\n", + reg_arg_name(env, argno)); return -EINVAL; } - ret = check_reg_const_str(env, reg, regno); + ret = check_arg_const_str(env, reg, argno); if (ret) return ret; break; case KF_ARG_PTR_TO_WORKQUEUE: if (reg->type != PTR_TO_MAP_VALUE) { - verbose(env, "arg#%d doesn't point to a map value\n", i); + verbose(env, "%s doesn't point to a map value\n", + reg_arg_name(env, argno)); return -EINVAL; } - ret = check_map_field_pointer(env, reg, regno, BPF_WORKQUEUE, &meta->map); + ret = check_map_field_pointer(env, reg, argno, BPF_WORKQUEUE, &meta->map); if (ret < 0) return ret; break; case KF_ARG_PTR_TO_TIMER: if (reg->type != PTR_TO_MAP_VALUE) { - verbose(env, "arg#%d doesn't point to a map value\n", i); + verbose(env, "%s doesn't point to a map value\n", + reg_arg_name(env, argno)); return -EINVAL; } - ret = process_timer_kfunc(env, reg, regno, meta); + ret = process_timer_kfunc(env, reg, argno, meta); if (ret < 0) return ret; break; case KF_ARG_PTR_TO_TASK_WORK: if (reg->type != PTR_TO_MAP_VALUE) { - verbose(env, "arg#%d doesn't point to a map value\n", i); + verbose(env, "%s doesn't point to a map value\n", + reg_arg_name(env, argno)); return -EINVAL; } - ret = check_map_field_pointer(env, reg, regno, BPF_TASK_WORK, &meta->map); + ret = check_map_field_pointer(env, reg, argno, BPF_TASK_WORK, &meta->map); if (ret < 0) return ret; break; case KF_ARG_PTR_TO_IRQ_FLAG: if (reg->type != PTR_TO_STACK) { - verbose(env, "arg#%d doesn't point to an irq flag on stack\n", i); + verbose(env, "%s doesn't point to an irq flag on stack\n", + reg_arg_name(env, argno)); return -EINVAL; } - ret = process_irq_flag(env, reg, regno, meta); + ret = process_irq_flag(env, reg, argno, meta); if (ret < 0) return ret; break; @@ -12486,7 +12585,8 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ int flags = PROCESS_RES_LOCK; if (reg->type != PTR_TO_MAP_VALUE && reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) { - verbose(env, "arg#%d doesn't point to map value or allocated object\n", i); + verbose(env, "%s doesn't point to map value or allocated object\n", + reg_arg_name(env, argno)); return -EINVAL; } @@ -12498,7 +12598,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave] || meta->func_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore]) flags |= PROCESS_LOCK_IRQ; - ret = process_spin_lock(env, reg, regno, flags); + ret = process_spin_lock(env, reg, argno, flags); if (ret < 0) return ret; break; @@ -13649,7 +13749,7 @@ static int sanitize_check_bounds(struct bpf_verifier_env *env, return -EACCES; break; case PTR_TO_MAP_VALUE: - if (check_map_access(env, dst_reg, dst, 0, 1, false, ACCESS_HELPER)) { + if (check_map_access(env, dst_reg, argno_from_reg(dst), 0, 1, false, ACCESS_HELPER)) { verbose(env, "R%d pointer arithmetic of map value goes out of range, " "prohibited for !root\n", dst); return -EACCES; @@ -16831,7 +16931,7 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char prog->aux->attach_func_proto->type, NULL); if (ret_type && ret_type == reg_type && reg->ref_obj_id) - return __check_ptr_off_reg(env, reg, regno, false); + return __check_ptr_off_reg(env, reg, argno_from_reg(regno), false); } /* eBPF calling convention is such that R0 is used @@ -17535,7 +17635,7 @@ static int do_check_insn(struct bpf_verifier_env *env, bool *do_print_state) dst_reg_type = cur_regs(env)[insn->dst_reg].type; - err = check_mem_access(env, env->insn_idx, cur_regs(env) + insn->dst_reg, insn->dst_reg, + err = check_mem_access(env, env->insn_idx, cur_regs(env) + insn->dst_reg, argno_from_reg(insn->dst_reg), insn->off, BPF_SIZE(insn->code), BPF_WRITE, -1, false, false); if (err) @@ -18714,7 +18814,7 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog) mark_reg_unknown(env, regs, i); } else { verifier_bug(env, "unhandled arg#%d type %d", - i - BPF_REG_1, arg->arg_type); + i - BPF_REG_1 + 1, arg->arg_type); ret = -EFAULT; goto out; } diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c index 215878ea04de..b33dba4b126e 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c @@ -11,18 +11,18 @@ struct { const char *prog_name; const char *err_msg; } test_bpf_nf_fail_tests[] = { - { "alloc_release", "kernel function bpf_ct_release args#0 expected pointer to STRUCT nf_conn but" }, - { "insert_insert", "kernel function bpf_ct_insert_entry args#0 expected pointer to STRUCT nf_conn___init but" }, - { "lookup_insert", "kernel function bpf_ct_insert_entry args#0 expected pointer to STRUCT nf_conn___init but" }, - { "set_timeout_after_insert", "kernel function bpf_ct_set_timeout args#0 expected pointer to STRUCT nf_conn___init but" }, - { "set_status_after_insert", "kernel function bpf_ct_set_status args#0 expected pointer to STRUCT nf_conn___init but" }, - { "change_timeout_after_alloc", "kernel function bpf_ct_change_timeout args#0 expected pointer to STRUCT nf_conn but" }, - { "change_status_after_alloc", "kernel function bpf_ct_change_status args#0 expected pointer to STRUCT nf_conn but" }, + { "alloc_release", "kernel function bpf_ct_release R1 expected pointer to STRUCT nf_conn but" }, + { "insert_insert", "kernel function bpf_ct_insert_entry R1 expected pointer to STRUCT nf_conn___init but" }, + { "lookup_insert", "kernel function bpf_ct_insert_entry R1 expected pointer to STRUCT nf_conn___init but" }, + { "set_timeout_after_insert", "kernel function bpf_ct_set_timeout R1 expected pointer to STRUCT nf_conn___init but" }, + { "set_status_after_insert", "kernel function bpf_ct_set_status R1 expected pointer to STRUCT nf_conn___init but" }, + { "change_timeout_after_alloc", "kernel function bpf_ct_change_timeout R1 expected pointer to STRUCT nf_conn but" }, + { "change_status_after_alloc", "kernel function bpf_ct_change_status R1 expected pointer to STRUCT nf_conn but" }, { "write_not_allowlisted_field", "no write support to nf_conn at off" }, - { "lookup_null_bpf_tuple", "Possibly NULL pointer passed to trusted arg1" }, - { "lookup_null_bpf_opts", "Possibly NULL pointer passed to trusted arg3" }, - { "xdp_lookup_null_bpf_tuple", "Possibly NULL pointer passed to trusted arg1" }, - { "xdp_lookup_null_bpf_opts", "Possibly NULL pointer passed to trusted arg3" }, + { "lookup_null_bpf_tuple", "Possibly NULL pointer passed to trusted R2" }, + { "lookup_null_bpf_opts", "Possibly NULL pointer passed to trusted R4" }, + { "xdp_lookup_null_bpf_tuple", "Possibly NULL pointer passed to trusted R2" }, + { "xdp_lookup_null_bpf_opts", "Possibly NULL pointer passed to trusted R4" }, }; enum { diff --git a/tools/testing/selftests/bpf/prog_tests/cb_refs.c b/tools/testing/selftests/bpf/prog_tests/cb_refs.c index c40df623a8f7..6300b67a3a84 100644 --- a/tools/testing/selftests/bpf/prog_tests/cb_refs.c +++ b/tools/testing/selftests/bpf/prog_tests/cb_refs.c @@ -12,7 +12,7 @@ struct { const char *err_msg; } cb_refs_tests[] = { { "underflow_prog", "must point to scalar, or struct with scalar" }, - { "leak_prog", "Possibly NULL pointer passed to helper arg2" }, + { "leak_prog", "Possibly NULL pointer passed to helper R2" }, { "nested_cb", "Unreleased reference id=4 alloc_insn=2" }, /* alloc_insn=2{4,5} */ { "non_cb_transfer_ref", "Unreleased reference id=4 alloc_insn=1" }, /* alloc_insn=1{1,2} */ }; diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c index 62f3fb79f5d1..3df07680f9e0 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c +++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c @@ -68,7 +68,7 @@ static struct kfunc_test_params kfunc_tests[] = { TC_FAIL(kfunc_call_test_get_mem_fail_oob, 0, "min value is outside of the allowed memory range"), TC_FAIL(kfunc_call_test_get_mem_fail_not_const, 0, "is not a const"), TC_FAIL(kfunc_call_test_mem_acquire_fail, 0, "acquire kernel function does not return PTR_TO_BTF_ID"), - TC_FAIL(kfunc_call_test_pointer_arg_type_mismatch, 0, "arg#0 expected pointer to ctx, but got scalar"), + TC_FAIL(kfunc_call_test_pointer_arg_type_mismatch, 0, "R1 expected pointer to ctx, but got scalar"), /* success cases */ TC_TEST(kfunc_call_test1, 12), diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c index 6f25b5f39a79..dbff099860ba 100644 --- a/tools/testing/selftests/bpf/prog_tests/linked_list.c +++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c @@ -81,8 +81,8 @@ static struct { { "direct_write_node", "direct access to bpf_list_node is disallowed" }, { "use_after_unlock_push_front", "invalid mem access 'scalar'" }, { "use_after_unlock_push_back", "invalid mem access 'scalar'" }, - { "double_push_front", "arg#1 expected pointer to allocated object" }, - { "double_push_back", "arg#1 expected pointer to allocated object" }, + { "double_push_front", "R2 expected pointer to allocated object" }, + { "double_push_back", "R2 expected pointer to allocated object" }, { "no_node_value_type", "bpf_list_node not found at offset=0" }, { "incorrect_value_type", "operation on bpf_list_head expects arg#1 bpf_list_node at offset=48 in struct foo, " diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c index 9fe9c4a4e8f6..a875ba8e5007 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c @@ -29,7 +29,7 @@ static struct __cgrps_kfunc_map_value *insert_lookup_cgrp(struct cgroup *cgrp) } SEC("tp_btf/cgroup_mkdir") -__failure __msg("Possibly NULL pointer passed to trusted arg0") +__failure __msg("Possibly NULL pointer passed to trusted R1") int BPF_PROG(cgrp_kfunc_acquire_untrusted, struct cgroup *cgrp, const char *path) { struct cgroup *acquired; @@ -48,7 +48,7 @@ int BPF_PROG(cgrp_kfunc_acquire_untrusted, struct cgroup *cgrp, const char *path } SEC("tp_btf/cgroup_mkdir") -__failure __msg("Possibly NULL pointer passed to trusted arg0") +__failure __msg("Possibly NULL pointer passed to trusted R1") int BPF_PROG(cgrp_kfunc_acquire_no_null_check, struct cgroup *cgrp, const char *path) { struct cgroup *acquired; @@ -64,7 +64,7 @@ int BPF_PROG(cgrp_kfunc_acquire_no_null_check, struct cgroup *cgrp, const char * } SEC("tp_btf/cgroup_mkdir") -__failure __msg("arg#0 pointer type STRUCT cgroup must point") +__failure __msg("R1 pointer type STRUCT cgroup must point") int BPF_PROG(cgrp_kfunc_acquire_fp, struct cgroup *cgrp, const char *path) { struct cgroup *acquired, *stack_cgrp = (struct cgroup *)&path; @@ -106,7 +106,7 @@ int BPF_PROG(cgrp_kfunc_acquire_trusted_walked, struct cgroup *cgrp, const char } SEC("tp_btf/cgroup_mkdir") -__failure __msg("Possibly NULL pointer passed to trusted arg0") +__failure __msg("Possibly NULL pointer passed to trusted R1") int BPF_PROG(cgrp_kfunc_acquire_null, struct cgroup *cgrp, const char *path) { struct cgroup *acquired; @@ -175,7 +175,7 @@ int BPF_PROG(cgrp_kfunc_rcu_get_release, struct cgroup *cgrp, const char *path) } SEC("tp_btf/cgroup_mkdir") -__failure __msg("Possibly NULL pointer passed to trusted arg0") +__failure __msg("Possibly NULL pointer passed to trusted R1") int BPF_PROG(cgrp_kfunc_release_untrusted, struct cgroup *cgrp, const char *path) { struct __cgrps_kfunc_map_value *v; @@ -191,7 +191,7 @@ int BPF_PROG(cgrp_kfunc_release_untrusted, struct cgroup *cgrp, const char *path } SEC("tp_btf/cgroup_mkdir") -__failure __msg("arg#0 pointer type STRUCT cgroup must point") +__failure __msg("R1 pointer type STRUCT cgroup must point") int BPF_PROG(cgrp_kfunc_release_fp, struct cgroup *cgrp, const char *path) { struct cgroup *acquired = (struct cgroup *)&path; @@ -203,7 +203,7 @@ int BPF_PROG(cgrp_kfunc_release_fp, struct cgroup *cgrp, const char *path) } SEC("tp_btf/cgroup_mkdir") -__failure __msg("Possibly NULL pointer passed to trusted arg0") +__failure __msg("Possibly NULL pointer passed to trusted R1") int BPF_PROG(cgrp_kfunc_release_null, struct cgroup *cgrp, const char *path) { struct __cgrps_kfunc_map_value local, *v; diff --git a/tools/testing/selftests/bpf/progs/cpumask_failure.c b/tools/testing/selftests/bpf/progs/cpumask_failure.c index 61c32e91e8c3..4c45346fe6f7 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_failure.c +++ b/tools/testing/selftests/bpf/progs/cpumask_failure.c @@ -45,7 +45,7 @@ int BPF_PROG(test_alloc_no_release, struct task_struct *task, u64 clone_flags) } SEC("tp_btf/task_newtask") -__failure __msg("NULL pointer passed to trusted arg0") +__failure __msg("NULL pointer passed to trusted R1") int BPF_PROG(test_alloc_double_release, struct task_struct *task, u64 clone_flags) { struct bpf_cpumask *cpumask; @@ -73,7 +73,7 @@ int BPF_PROG(test_acquire_wrong_cpumask, struct task_struct *task, u64 clone_fla } SEC("tp_btf/task_newtask") -__failure __msg("bpf_cpumask_set_cpu args#1 expected pointer to STRUCT bpf_cpumask") +__failure __msg("bpf_cpumask_set_cpu R2 expected pointer to STRUCT bpf_cpumask") int BPF_PROG(test_mutate_cpumask, struct task_struct *task, u64 clone_flags) { /* Can't set the CPU of a non-struct bpf_cpumask. */ @@ -107,7 +107,7 @@ int BPF_PROG(test_insert_remove_no_release, struct task_struct *task, u64 clone_ } SEC("tp_btf/task_newtask") -__failure __msg("NULL pointer passed to trusted arg0") +__failure __msg("NULL pointer passed to trusted R1") int BPF_PROG(test_cpumask_null, struct task_struct *task, u64 clone_flags) { /* NULL passed to kfunc. */ @@ -151,7 +151,7 @@ int BPF_PROG(test_global_mask_out_of_rcu, struct task_struct *task, u64 clone_fl } SEC("tp_btf/task_newtask") -__failure __msg("NULL pointer passed to trusted arg1") +__failure __msg("NULL pointer passed to trusted R2") int BPF_PROG(test_global_mask_no_null_check, struct task_struct *task, u64 clone_flags) { struct bpf_cpumask *local, *prev; @@ -179,7 +179,7 @@ int BPF_PROG(test_global_mask_no_null_check, struct task_struct *task, u64 clone } SEC("tp_btf/task_newtask") -__failure __msg("Possibly NULL pointer passed to helper arg2") +__failure __msg("Possibly NULL pointer passed to helper R2") int BPF_PROG(test_global_mask_rcu_no_null_check, struct task_struct *task, u64 clone_flags) { struct bpf_cpumask *prev, *curr; diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c index b62773ce5219..dbd97add5a5a 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_fail.c +++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c @@ -149,7 +149,7 @@ int ringbuf_release_uninit_dynptr(void *ctx) /* A dynptr can't be used after it has been invalidated */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #2") +__failure __msg("Expected an initialized dynptr as R3") int use_after_invalid(void *ctx) { struct bpf_dynptr ptr; @@ -448,7 +448,7 @@ int invalid_helper2(void *ctx) /* A bpf_dynptr is invalidated if it's been written into */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #0") +__failure __msg("Expected an initialized dynptr as R1") int invalid_write1(void *ctx) { struct bpf_dynptr ptr; @@ -1642,7 +1642,7 @@ int invalid_slice_rdwr_rdonly(struct __sk_buff *skb) /* bpf_dynptr_adjust can only be called on initialized dynptrs */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #0") +__failure __msg("Expected an initialized dynptr as R1") int dynptr_adjust_invalid(void *ctx) { struct bpf_dynptr ptr = {}; @@ -1655,7 +1655,7 @@ int dynptr_adjust_invalid(void *ctx) /* bpf_dynptr_is_null can only be called on initialized dynptrs */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #0") +__failure __msg("Expected an initialized dynptr as R1") int dynptr_is_null_invalid(void *ctx) { struct bpf_dynptr ptr = {}; @@ -1668,7 +1668,7 @@ int dynptr_is_null_invalid(void *ctx) /* bpf_dynptr_is_rdonly can only be called on initialized dynptrs */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #0") +__failure __msg("Expected an initialized dynptr as R1") int dynptr_is_rdonly_invalid(void *ctx) { struct bpf_dynptr ptr = {}; @@ -1681,7 +1681,7 @@ int dynptr_is_rdonly_invalid(void *ctx) /* bpf_dynptr_size can only be called on initialized dynptrs */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #0") +__failure __msg("Expected an initialized dynptr as R1") int dynptr_size_invalid(void *ctx) { struct bpf_dynptr ptr = {}; @@ -1694,7 +1694,7 @@ int dynptr_size_invalid(void *ctx) /* Only initialized dynptrs can be cloned */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #0") +__failure __msg("Expected an initialized dynptr as R1") int clone_invalid1(void *ctx) { struct bpf_dynptr ptr1 = {}; @@ -1728,7 +1728,7 @@ int clone_invalid2(struct xdp_md *xdp) /* Invalidating a dynptr should invalidate its clones */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #2") +__failure __msg("Expected an initialized dynptr as R3") int clone_invalidate1(void *ctx) { struct bpf_dynptr clone; @@ -1749,7 +1749,7 @@ int clone_invalidate1(void *ctx) /* Invalidating a dynptr should invalidate its parent */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #2") +__failure __msg("Expected an initialized dynptr as R3") int clone_invalidate2(void *ctx) { struct bpf_dynptr ptr; @@ -1770,7 +1770,7 @@ int clone_invalidate2(void *ctx) /* Invalidating a dynptr should invalidate its siblings */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #2") +__failure __msg("Expected an initialized dynptr as R3") int clone_invalidate3(void *ctx) { struct bpf_dynptr ptr; @@ -1981,7 +1981,7 @@ __noinline long global_call_bpf_dynptr(const struct bpf_dynptr *dynptr) } SEC("?raw_tp") -__failure __msg("arg#0 expected pointer to stack or const struct bpf_dynptr") +__failure __msg("R1 expected pointer to stack or const struct bpf_dynptr") int test_dynptr_reg_type(void *ctx) { struct task_struct *current = NULL; diff --git a/tools/testing/selftests/bpf/progs/file_reader_fail.c b/tools/testing/selftests/bpf/progs/file_reader_fail.c index 32fe28ed2439..0739620dea8a 100644 --- a/tools/testing/selftests/bpf/progs/file_reader_fail.c +++ b/tools/testing/selftests/bpf/progs/file_reader_fail.c @@ -30,7 +30,7 @@ int on_nanosleep_unreleased_ref(void *ctx) SEC("xdp") __failure -__msg("Expected a dynptr of type file as arg #0") +__msg("Expected a dynptr of type file as R1") int xdp_wrong_dynptr_type(struct xdp_md *xdp) { struct bpf_dynptr dynptr; @@ -42,7 +42,7 @@ int xdp_wrong_dynptr_type(struct xdp_md *xdp) SEC("xdp") __failure -__msg("Expected an initialized dynptr as arg #0") +__msg("Expected an initialized dynptr as R1") int xdp_no_dynptr_type(struct xdp_md *xdp) { struct bpf_dynptr dynptr; diff --git a/tools/testing/selftests/bpf/progs/irq.c b/tools/testing/selftests/bpf/progs/irq.c index e11e82d98904..a4a007866a33 100644 --- a/tools/testing/selftests/bpf/progs/irq.c +++ b/tools/testing/selftests/bpf/progs/irq.c @@ -15,7 +15,7 @@ struct bpf_res_spin_lock lockA __hidden SEC(".data.A"); struct bpf_res_spin_lock lockB __hidden SEC(".data.B"); SEC("?tc") -__failure __msg("arg#0 doesn't point to an irq flag on stack") +__failure __msg("R1 doesn't point to an irq flag on stack") int irq_save_bad_arg(struct __sk_buff *ctx) { bpf_local_irq_save(&global_flags); @@ -23,7 +23,7 @@ int irq_save_bad_arg(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("arg#0 doesn't point to an irq flag on stack") +__failure __msg("R1 doesn't point to an irq flag on stack") int irq_restore_bad_arg(struct __sk_buff *ctx) { bpf_local_irq_restore(&global_flags); diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index 86b74e3579d9..0fa70b133d93 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -1605,7 +1605,7 @@ int iter_subprog_check_stacksafe(const void *ctx) struct bpf_iter_num global_it; SEC("raw_tp") -__failure __msg("arg#0 expected pointer to an iterator on stack") +__failure __msg("R1 expected pointer to an iterator on stack") int iter_new_bad_arg(const void *ctx) { bpf_iter_num_new(&global_it, 0, 1); @@ -1613,7 +1613,7 @@ int iter_new_bad_arg(const void *ctx) } SEC("raw_tp") -__failure __msg("arg#0 expected pointer to an iterator on stack") +__failure __msg("R1 expected pointer to an iterator on stack") int iter_next_bad_arg(const void *ctx) { bpf_iter_num_next(&global_it); @@ -1621,7 +1621,7 @@ int iter_next_bad_arg(const void *ctx) } SEC("raw_tp") -__failure __msg("arg#0 expected pointer to an iterator on stack") +__failure __msg("R1 expected pointer to an iterator on stack") int iter_destroy_bad_arg(const void *ctx) { bpf_iter_num_destroy(&global_it); diff --git a/tools/testing/selftests/bpf/progs/iters_state_safety.c b/tools/testing/selftests/bpf/progs/iters_state_safety.c index d273b46dfc7c..af8f9ec1ea98 100644 --- a/tools/testing/selftests/bpf/progs/iters_state_safety.c +++ b/tools/testing/selftests/bpf/progs/iters_state_safety.c @@ -73,7 +73,7 @@ int create_and_forget_to_destroy_fail(void *ctx) } SEC("?raw_tp") -__failure __msg("expected an initialized iter_num as arg #0") +__failure __msg("expected an initialized iter_num as R1") int destroy_without_creating_fail(void *ctx) { /* init with zeros to stop verifier complaining about uninit stack */ @@ -91,7 +91,7 @@ int destroy_without_creating_fail(void *ctx) } SEC("?raw_tp") -__failure __msg("expected an initialized iter_num as arg #0") +__failure __msg("expected an initialized iter_num as R1") int compromise_iter_w_direct_write_fail(void *ctx) { struct bpf_iter_num iter; @@ -143,7 +143,7 @@ int compromise_iter_w_direct_write_and_skip_destroy_fail(void *ctx) } SEC("?raw_tp") -__failure __msg("expected an initialized iter_num as arg #0") +__failure __msg("expected an initialized iter_num as R1") int compromise_iter_w_helper_write_fail(void *ctx) { struct bpf_iter_num iter; @@ -230,7 +230,7 @@ int valid_stack_reuse(void *ctx) } SEC("?raw_tp") -__failure __msg("expected uninitialized iter_num as arg #0") +__failure __msg("expected uninitialized iter_num as R1") int double_create_fail(void *ctx) { struct bpf_iter_num iter; @@ -258,7 +258,7 @@ int double_create_fail(void *ctx) } SEC("?raw_tp") -__failure __msg("expected an initialized iter_num as arg #0") +__failure __msg("expected an initialized iter_num as R1") int double_destroy_fail(void *ctx) { struct bpf_iter_num iter; @@ -284,7 +284,7 @@ int double_destroy_fail(void *ctx) } SEC("?raw_tp") -__failure __msg("expected an initialized iter_num as arg #0") +__failure __msg("expected an initialized iter_num as R1") int next_without_new_fail(void *ctx) { struct bpf_iter_num iter; @@ -305,7 +305,7 @@ int next_without_new_fail(void *ctx) } SEC("?raw_tp") -__failure __msg("expected an initialized iter_num as arg #0") +__failure __msg("expected an initialized iter_num as R1") int next_after_destroy_fail(void *ctx) { struct bpf_iter_num iter; diff --git a/tools/testing/selftests/bpf/progs/iters_testmod.c b/tools/testing/selftests/bpf/progs/iters_testmod.c index 5379e9960ffd..76012dbbdb41 100644 --- a/tools/testing/selftests/bpf/progs/iters_testmod.c +++ b/tools/testing/selftests/bpf/progs/iters_testmod.c @@ -29,7 +29,7 @@ out: } SEC("raw_tp/sys_enter") -__failure __msg("Possibly NULL pointer passed to trusted arg0") +__failure __msg("Possibly NULL pointer passed to trusted R1") int iter_next_trusted_or_null(const void *ctx) { struct task_struct *cur_task = bpf_get_current_task_btf(); @@ -67,7 +67,7 @@ out: } SEC("raw_tp/sys_enter") -__failure __msg("Possibly NULL pointer passed to trusted arg0") +__failure __msg("Possibly NULL pointer passed to trusted R1") int iter_next_rcu_or_null(const void *ctx) { struct task_struct *cur_task = bpf_get_current_task_btf(); diff --git a/tools/testing/selftests/bpf/progs/iters_testmod_seq.c b/tools/testing/selftests/bpf/progs/iters_testmod_seq.c index 83791348bed5..9b760dac333e 100644 --- a/tools/testing/selftests/bpf/progs/iters_testmod_seq.c +++ b/tools/testing/selftests/bpf/progs/iters_testmod_seq.c @@ -79,7 +79,7 @@ int testmod_seq_truncated(const void *ctx) SEC("?raw_tp") __failure -__msg("expected an initialized iter_testmod_seq as arg #1") +__msg("expected an initialized iter_testmod_seq as R2") int testmod_seq_getter_before_bad(const void *ctx) { struct bpf_iter_testmod_seq it; @@ -89,7 +89,7 @@ int testmod_seq_getter_before_bad(const void *ctx) SEC("?raw_tp") __failure -__msg("expected an initialized iter_testmod_seq as arg #1") +__msg("expected an initialized iter_testmod_seq as R2") int testmod_seq_getter_after_bad(const void *ctx) { struct bpf_iter_testmod_seq it; diff --git a/tools/testing/selftests/bpf/progs/map_kptr_fail.c b/tools/testing/selftests/bpf/progs/map_kptr_fail.c index ee053b24e6ca..8f36e74fd8f9 100644 --- a/tools/testing/selftests/bpf/progs/map_kptr_fail.c +++ b/tools/testing/selftests/bpf/progs/map_kptr_fail.c @@ -364,7 +364,7 @@ int kptr_xchg_ref_state(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("Possibly NULL pointer passed to helper arg2") +__failure __msg("Possibly NULL pointer passed to helper R2") int kptr_xchg_possibly_null(struct __sk_buff *ctx) { struct prog_test_ref_kfunc *p; diff --git a/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c b/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c index 81813c724fa9..08379c3b6a03 100644 --- a/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c +++ b/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c @@ -110,7 +110,7 @@ int BPF_PROG(test_array_map_3) } SEC("?fentry.s/bpf_fentry_test1") -__failure __msg("arg#0 expected for bpf_percpu_obj_drop()") +__failure __msg("R1 expected for bpf_percpu_obj_drop()") int BPF_PROG(test_array_map_4) { struct val_t __percpu_kptr *p; @@ -124,7 +124,7 @@ int BPF_PROG(test_array_map_4) } SEC("?fentry.s/bpf_fentry_test1") -__failure __msg("arg#0 expected for bpf_obj_drop()") +__failure __msg("R1 expected for bpf_obj_drop()") int BPF_PROG(test_array_map_5) { struct val_t *p; diff --git a/tools/testing/selftests/bpf/progs/rbtree_fail.c b/tools/testing/selftests/bpf/progs/rbtree_fail.c index 70b7baf9304b..555379952dcc 100644 --- a/tools/testing/selftests/bpf/progs/rbtree_fail.c +++ b/tools/testing/selftests/bpf/progs/rbtree_fail.c @@ -134,7 +134,7 @@ unlock_err: } SEC("?tc") -__failure __msg("arg#1 expected pointer to allocated object") +__failure __msg("R2 expected pointer to allocated object") long rbtree_api_add_to_multiple_trees(void *ctx) { struct node_data *n; @@ -153,7 +153,7 @@ long rbtree_api_add_to_multiple_trees(void *ctx) } SEC("?tc") -__failure __msg("Possibly NULL pointer passed to trusted arg1") +__failure __msg("Possibly NULL pointer passed to trusted R2") long rbtree_api_use_unchecked_remove_retval(void *ctx) { struct bpf_rb_node *res; @@ -281,7 +281,7 @@ long add_with_cb(bool (cb)(struct bpf_rb_node *a, const struct bpf_rb_node *b)) } SEC("?tc") -__failure __msg("arg#1 expected pointer to allocated object") +__failure __msg("R2 expected pointer to allocated object") long rbtree_api_add_bad_cb_bad_fn_call_add(void *ctx) { return add_with_cb(less__bad_fn_call_add); diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c index b2808bfcec29..7247a20c0a3b 100644 --- a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c +++ b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c @@ -54,7 +54,7 @@ long rbtree_refcounted_node_ref_escapes(void *ctx) } SEC("?tc") -__failure __msg("Possibly NULL pointer passed to trusted arg0") +__failure __msg("Possibly NULL pointer passed to trusted R1") long refcount_acquire_maybe_null(void *ctx) { struct node_acquire *n, *m; diff --git a/tools/testing/selftests/bpf/progs/stream_fail.c b/tools/testing/selftests/bpf/progs/stream_fail.c index 8e8249f3521c..21428bb1ee59 100644 --- a/tools/testing/selftests/bpf/progs/stream_fail.c +++ b/tools/testing/selftests/bpf/progs/stream_fail.c @@ -23,7 +23,7 @@ int stream_vprintk_scalar_arg(void *ctx) } SEC("syscall") -__failure __msg("arg#1 doesn't point to a const string") +__failure __msg("R2 doesn't point to a const string") int stream_vprintk_string_arg(void *ctx) { bpf_stream_vprintk(BPF_STDOUT, ctx, NULL, 0); diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c index 4c07ea193f72..41047d81ec42 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c +++ b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c @@ -28,7 +28,7 @@ static struct __tasks_kfunc_map_value *insert_lookup_task(struct task_struct *ta } SEC("tp_btf/task_newtask") -__failure __msg("Possibly NULL pointer passed to trusted arg0") +__failure __msg("Possibly NULL pointer passed to trusted R1") int BPF_PROG(task_kfunc_acquire_untrusted, struct task_struct *task, u64 clone_flags) { struct task_struct *acquired; @@ -49,7 +49,7 @@ int BPF_PROG(task_kfunc_acquire_untrusted, struct task_struct *task, u64 clone_f } SEC("tp_btf/task_newtask") -__failure __msg("arg#0 pointer type STRUCT task_struct must point") +__failure __msg("R1 pointer type STRUCT task_struct must point") int BPF_PROG(task_kfunc_acquire_fp, struct task_struct *task, u64 clone_flags) { struct task_struct *acquired, *stack_task = (struct task_struct *)&clone_flags; @@ -100,7 +100,7 @@ int BPF_PROG(task_kfunc_acquire_unsafe_kretprobe_rcu, struct task_struct *task, } SEC("tp_btf/task_newtask") -__failure __msg("Possibly NULL pointer passed to trusted arg0") +__failure __msg("Possibly NULL pointer passed to trusted R1") int BPF_PROG(task_kfunc_acquire_null, struct task_struct *task, u64 clone_flags) { struct task_struct *acquired; @@ -149,7 +149,7 @@ int BPF_PROG(task_kfunc_xchg_unreleased, struct task_struct *task, u64 clone_fla } SEC("tp_btf/task_newtask") -__failure __msg("Possibly NULL pointer passed to trusted arg0") +__failure __msg("Possibly NULL pointer passed to trusted R1") int BPF_PROG(task_kfunc_acquire_release_no_null_check, struct task_struct *task, u64 clone_flags) { struct task_struct *acquired; @@ -162,7 +162,7 @@ int BPF_PROG(task_kfunc_acquire_release_no_null_check, struct task_struct *task, } SEC("tp_btf/task_newtask") -__failure __msg("Possibly NULL pointer passed to trusted arg0") +__failure __msg("Possibly NULL pointer passed to trusted R1") int BPF_PROG(task_kfunc_release_untrusted, struct task_struct *task, u64 clone_flags) { struct __tasks_kfunc_map_value *v; @@ -178,7 +178,7 @@ int BPF_PROG(task_kfunc_release_untrusted, struct task_struct *task, u64 clone_f } SEC("tp_btf/task_newtask") -__failure __msg("arg#0 pointer type STRUCT task_struct must point") +__failure __msg("R1 pointer type STRUCT task_struct must point") int BPF_PROG(task_kfunc_release_fp, struct task_struct *task, u64 clone_flags) { struct task_struct *acquired = (struct task_struct *)&clone_flags; @@ -190,7 +190,7 @@ int BPF_PROG(task_kfunc_release_fp, struct task_struct *task, u64 clone_flags) } SEC("tp_btf/task_newtask") -__failure __msg("Possibly NULL pointer passed to trusted arg0") +__failure __msg("Possibly NULL pointer passed to trusted R1") int BPF_PROG(task_kfunc_release_null, struct task_struct *task, u64 clone_flags) { struct __tasks_kfunc_map_value local, *v; @@ -234,7 +234,7 @@ int BPF_PROG(task_kfunc_release_unacquired, struct task_struct *task, u64 clone_ } SEC("tp_btf/task_newtask") -__failure __msg("Possibly NULL pointer passed to trusted arg0") +__failure __msg("Possibly NULL pointer passed to trusted R1") int BPF_PROG(task_kfunc_from_pid_no_null_check, struct task_struct *task, u64 clone_flags) { struct task_struct *acquired; @@ -248,7 +248,7 @@ int BPF_PROG(task_kfunc_from_pid_no_null_check, struct task_struct *task, u64 cl } SEC("tp_btf/task_newtask") -__failure __msg("Possibly NULL pointer passed to trusted arg0") +__failure __msg("Possibly NULL pointer passed to trusted R1") int BPF_PROG(task_kfunc_from_vpid_no_null_check, struct task_struct *task, u64 clone_flags) { struct task_struct *acquired; diff --git a/tools/testing/selftests/bpf/progs/task_work_fail.c b/tools/testing/selftests/bpf/progs/task_work_fail.c index 82e4b8913333..3186e7b4b24e 100644 --- a/tools/testing/selftests/bpf/progs/task_work_fail.c +++ b/tools/testing/selftests/bpf/progs/task_work_fail.c @@ -58,7 +58,7 @@ int mismatch_map(struct pt_regs *args) } SEC("perf_event") -__failure __msg("arg#1 doesn't point to a map value") +__failure __msg("R2 doesn't point to a map value") int no_map_task_work(struct pt_regs *args) { struct task_struct *task; @@ -70,7 +70,7 @@ int no_map_task_work(struct pt_regs *args) } SEC("perf_event") -__failure __msg("Possibly NULL pointer passed to trusted arg1") +__failure __msg("Possibly NULL pointer passed to trusted R2") int task_work_null(struct pt_regs *args) { struct task_struct *task; @@ -81,7 +81,7 @@ int task_work_null(struct pt_regs *args) } SEC("perf_event") -__failure __msg("Possibly NULL pointer passed to trusted arg2") +__failure __msg("Possibly NULL pointer passed to trusted R3") int map_null(struct pt_regs *args) { struct elem *work; diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c b/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c index 2c156cd166af..332cda89caba 100644 --- a/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c +++ b/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c @@ -152,7 +152,7 @@ int change_status_after_alloc(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("Possibly NULL pointer passed to trusted arg1") +__failure __msg("Possibly NULL pointer passed to trusted R2") int lookup_null_bpf_tuple(struct __sk_buff *ctx) { struct bpf_ct_opts___local opts = {}; @@ -165,7 +165,7 @@ int lookup_null_bpf_tuple(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("Possibly NULL pointer passed to trusted arg3") +__failure __msg("Possibly NULL pointer passed to trusted R4") int lookup_null_bpf_opts(struct __sk_buff *ctx) { struct bpf_sock_tuple tup = {}; @@ -178,7 +178,7 @@ int lookup_null_bpf_opts(struct __sk_buff *ctx) } SEC("?xdp") -__failure __msg("Possibly NULL pointer passed to trusted arg1") +__failure __msg("Possibly NULL pointer passed to trusted R2") int xdp_lookup_null_bpf_tuple(struct xdp_md *ctx) { struct bpf_ct_opts___local opts = {}; @@ -191,7 +191,7 @@ int xdp_lookup_null_bpf_tuple(struct xdp_md *ctx) } SEC("?xdp") -__failure __msg("Possibly NULL pointer passed to trusted arg3") +__failure __msg("Possibly NULL pointer passed to trusted R4") int xdp_lookup_null_bpf_opts(struct xdp_md *ctx) { struct bpf_sock_tuple tup = {}; diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c index 1c6cfd0888ba..bf48fc43c7ab 100644 --- a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c +++ b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c @@ -40,7 +40,7 @@ int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size, } SEC("?lsm.s/bpf") -__failure __msg("arg#0 expected pointer to stack or const struct bpf_dynptr") +__failure __msg("R1 expected pointer to stack or const struct bpf_dynptr") int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size, bool kernel) { static struct bpf_dynptr val; diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c b/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c index 967081bbcfe1..ca35b92ea095 100644 --- a/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c +++ b/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c @@ -29,7 +29,7 @@ int kfunc_dynptr_nullable_test2(struct __sk_buff *skb) } SEC("tc") -__failure __msg("Possibly NULL pointer passed to trusted arg0") +__failure __msg("Possibly NULL pointer passed to trusted R1") int kfunc_dynptr_nullable_test3(struct __sk_buff *skb) { struct bpf_dynptr data; diff --git a/tools/testing/selftests/bpf/progs/verifier_bits_iter.c b/tools/testing/selftests/bpf/progs/verifier_bits_iter.c index 8bcddadfc4da..dd97f2027505 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bits_iter.c +++ b/tools/testing/selftests/bpf/progs/verifier_bits_iter.c @@ -32,7 +32,7 @@ int BPF_PROG(no_destroy, struct bpf_iter_meta *meta, struct cgroup *cgrp) SEC("iter/cgroup") __description("uninitialized iter in ->next()") -__failure __msg("expected an initialized iter_bits as arg #0") +__failure __msg("expected an initialized iter_bits as R1") int BPF_PROG(next_uninit, struct bpf_iter_meta *meta, struct cgroup *cgrp) { struct bpf_iter_bits it = {}; @@ -43,7 +43,7 @@ int BPF_PROG(next_uninit, struct bpf_iter_meta *meta, struct cgroup *cgrp) SEC("iter/cgroup") __description("uninitialized iter in ->destroy()") -__failure __msg("expected an initialized iter_bits as arg #0") +__failure __msg("expected an initialized iter_bits as R1") int BPF_PROG(destroy_uninit, struct bpf_iter_meta *meta, struct cgroup *cgrp) { struct bpf_iter_bits it = {}; diff --git a/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c b/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c index 910365201f68..139f70bb3595 100644 --- a/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c +++ b/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c @@ -263,7 +263,7 @@ l0_%=: r0 = 0; \ SEC("lsm.s/bpf") __description("reference tracking: release user key reference without check") -__failure __msg("Possibly NULL pointer passed to trusted arg0") +__failure __msg("Possibly NULL pointer passed to trusted R1") __naked void user_key_reference_without_check(void) { asm volatile (" \ @@ -282,7 +282,7 @@ __naked void user_key_reference_without_check(void) SEC("lsm.s/bpf") __description("reference tracking: release system key reference without check") -__failure __msg("Possibly NULL pointer passed to trusted arg0") +__failure __msg("Possibly NULL pointer passed to trusted R1") __naked void system_key_reference_without_check(void) { asm volatile (" \ @@ -300,7 +300,7 @@ __naked void system_key_reference_without_check(void) SEC("lsm.s/bpf") __description("reference tracking: release with NULL key pointer") -__failure __msg("Possibly NULL pointer passed to trusted arg0") +__failure __msg("Possibly NULL pointer passed to trusted R1") __naked void release_with_null_key_pointer(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c b/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c index 4b392c6c8fc4..0990de076844 100644 --- a/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c +++ b/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c @@ -13,7 +13,7 @@ static char buf[PATH_MAX]; SEC("lsm.s/file_open") -__failure __msg("Possibly NULL pointer passed to trusted arg0") +__failure __msg("Possibly NULL pointer passed to trusted R1") int BPF_PROG(get_task_exe_file_kfunc_null) { struct file *acquired; @@ -28,7 +28,7 @@ int BPF_PROG(get_task_exe_file_kfunc_null) } SEC("lsm.s/inode_getxattr") -__failure __msg("arg#0 pointer type STRUCT task_struct must point to scalar, or struct with scalar") +__failure __msg("R1 pointer type STRUCT task_struct must point to scalar, or struct with scalar") int BPF_PROG(get_task_exe_file_kfunc_fp) { u64 x; @@ -89,7 +89,7 @@ int BPF_PROG(put_file_kfunc_unacquired, struct file *file) } SEC("lsm.s/file_open") -__failure __msg("Possibly NULL pointer passed to trusted arg0") +__failure __msg("Possibly NULL pointer passed to trusted R1") int BPF_PROG(path_d_path_kfunc_null) { /* Can't pass NULL value to bpf_path_d_path() kfunc. */ @@ -128,7 +128,7 @@ int BPF_PROG(path_d_path_kfunc_untrusted_from_current) } SEC("lsm.s/file_open") -__failure __msg("kernel function bpf_path_d_path args#0 expected pointer to STRUCT path but R1 has a pointer to STRUCT file") +__failure __msg("kernel function bpf_path_d_path R1 expected pointer to STRUCT path but R1 has a pointer to STRUCT file") int BPF_PROG(path_d_path_kfunc_type_mismatch, struct file *file) { bpf_path_d_path((struct path *)&file->f_task_work, buf, sizeof(buf)); diff --git a/tools/testing/selftests/bpf/progs/wq_failures.c b/tools/testing/selftests/bpf/progs/wq_failures.c index 3767f5595bbc..32dc8827e128 100644 --- a/tools/testing/selftests/bpf/progs/wq_failures.c +++ b/tools/testing/selftests/bpf/progs/wq_failures.c @@ -98,7 +98,7 @@ __failure * is a correct bpf_wq pointer. */ __msg(": (85) call bpf_wq_set_callback#") /* anchor message */ -__msg("arg#0 doesn't point to a map value") +__msg("R1 doesn't point to a map value") long test_wrong_wq_pointer(void *ctx) { int key = 0; diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index c3164b9b2be5..0bb4337552c8 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -31,7 +31,7 @@ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, - .errstr = "arg#0 pointer type STRUCT prog_test_fail1 must point to scalar", + .errstr = "R1 pointer type STRUCT prog_test_fail1 must point to scalar", .fixup_kfunc_btf_id = { { "bpf_kfunc_call_test_fail1", 2 }, }, @@ -46,7 +46,7 @@ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, - .errstr = "max struct nesting depth exceeded\narg#0 pointer type STRUCT prog_test_fail2", + .errstr = "max struct nesting depth exceeded\nR1 pointer type STRUCT prog_test_fail2", .fixup_kfunc_btf_id = { { "bpf_kfunc_call_test_fail2", 2 }, }, @@ -61,7 +61,7 @@ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, - .errstr = "arg#0 pointer type STRUCT prog_test_fail3 must point to scalar", + .errstr = "R1 pointer type STRUCT prog_test_fail3 must point to scalar", .fixup_kfunc_btf_id = { { "bpf_kfunc_call_test_fail3", 2 }, }, @@ -76,7 +76,7 @@ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, - .errstr = "arg#0 expected pointer to ctx, but got fp", + .errstr = "R1 expected pointer to ctx, but got fp", .fixup_kfunc_btf_id = { { "bpf_kfunc_call_test_pass_ctx", 2 }, }, @@ -91,7 +91,7 @@ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, - .errstr = "arg#0 pointer type UNKNOWN must point to scalar", + .errstr = "R1 pointer type UNKNOWN must point to scalar", .fixup_kfunc_btf_id = { { "bpf_kfunc_call_test_mem_len_fail1", 2 }, }, @@ -109,7 +109,7 @@ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, - .errstr = "Possibly NULL pointer passed to trusted arg0", + .errstr = "Possibly NULL pointer passed to trusted R1", .fixup_kfunc_btf_id = { { "bpf_kfunc_call_test_acquire", 3 }, { "bpf_kfunc_call_test_release", 5 }, @@ -152,7 +152,7 @@ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, - .errstr = "kernel function bpf_kfunc_call_memb1_release args#0 expected pointer", + .errstr = "kernel function bpf_kfunc_call_memb1_release R1 expected pointer", .fixup_kfunc_btf_id = { { "bpf_kfunc_call_memb_acquire", 1 }, { "bpf_kfunc_call_memb1_release", 5 }, -- cgit v1.2.3 From 246ad6e5ee259669692bdb7fb353e8c5d5bba628 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 22 Apr 2026 20:35:06 -0700 Subject: bpf: Introduce bpf register BPF_REG_PARAMS Introduce BPF_REG_PARAMS as a dedicated BPF register for stack argument accesses. It occupies the BPF register number 11 (R11), which is used as the base pointer for the stack argument area, keeping it separate from the R10-based (BPF_REG_FP) program stack. The kernel-internal hidden register BPF_REG_AX previously occupied slot 11 (MAX_BPF_REG). With BPF_REG_PARAMS taking that slot, BPF_REG_AX moves to slot 12 and MAX_BPF_EXT_REG increases accordingly. Acked-by: Puranjay Mohan Acked-by: Kumar Kartikeya Dwivedi Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20260423033506.2542005-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 5 +- kernel/bpf/core.c | 4 +- .../testing/selftests/bpf/prog_tests/ctx_rewrite.c | 14 ++--- .../selftests/bpf/progs/verifier_bpf_fastcall.c | 24 ++++---- .../selftests/bpf/progs/verifier_may_goto_1.c | 12 ++-- tools/testing/selftests/bpf/progs/verifier_sdiv.c | 64 +++++++++++----------- 6 files changed, 62 insertions(+), 61 deletions(-) (limited to 'tools') diff --git a/include/linux/filter.h b/include/linux/filter.h index 1ec6d5ba64cc..b77d0b06db6e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -58,8 +58,9 @@ struct ctl_table_header; #define BPF_REG_H BPF_REG_9 /* hlen, callee-saved */ /* Kernel hidden auxiliary/helper register. */ -#define BPF_REG_AX MAX_BPF_REG -#define MAX_BPF_EXT_REG (MAX_BPF_REG + 1) +#define BPF_REG_PARAMS MAX_BPF_REG +#define BPF_REG_AX (MAX_BPF_REG + 1) +#define MAX_BPF_EXT_REG (MAX_BPF_REG + 2) #define MAX_BPF_JIT_REG MAX_BPF_EXT_REG /* unused opcode to mark special call to bpf_tail_call() helper */ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 8b018ff48875..ae10b9ca018d 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1299,8 +1299,8 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from, u32 imm_rnd = get_random_u32(); s16 off; - BUILD_BUG_ON(BPF_REG_AX + 1 != MAX_BPF_JIT_REG); - BUILD_BUG_ON(MAX_BPF_REG + 1 != MAX_BPF_JIT_REG); + BUILD_BUG_ON(BPF_REG_PARAMS + 2 != MAX_BPF_JIT_REG); + BUILD_BUG_ON(BPF_REG_AX + 1 != MAX_BPF_JIT_REG); /* Constraints on AX register: * diff --git a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c index 5064aeb8fe67..2c3124092b73 100644 --- a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c +++ b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c @@ -69,19 +69,19 @@ static struct test_case test_cases[] = { #if defined(__x86_64__) || defined(__aarch64__) { N(SCHED_CLS, struct __sk_buff, tstamp), - .read = "r11 = *(u8 *)($ctx + sk_buff::__mono_tc_offset);" - "if w11 & 0x4 goto pc+1;" + .read = "r12 = *(u8 *)($ctx + sk_buff::__mono_tc_offset);" + "if w12 & 0x4 goto pc+1;" "goto pc+4;" - "if w11 & 0x3 goto pc+1;" + "if w12 & 0x3 goto pc+1;" "goto pc+2;" "$dst = 0;" "goto pc+1;" "$dst = *(u64 *)($ctx + sk_buff::tstamp);", - .write = "r11 = *(u8 *)($ctx + sk_buff::__mono_tc_offset);" - "if w11 & 0x4 goto pc+1;" + .write = "r12 = *(u8 *)($ctx + sk_buff::__mono_tc_offset);" + "if w12 & 0x4 goto pc+1;" "goto pc+2;" - "w11 &= -4;" - "*(u8 *)($ctx + sk_buff::__mono_tc_offset) = r11;" + "w12 &= -4;" + "*(u8 *)($ctx + sk_buff::__mono_tc_offset) = r12;" "*(u64 *)($ctx + sk_buff::tstamp) = $src;", }, #endif diff --git a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c index fb4fa465d67c..0d9e167555b5 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c +++ b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c @@ -630,13 +630,13 @@ __xlated("...") __xlated("4: r0 = &(void __percpu *)(r0)") __xlated("...") /* may_goto expansion starts */ -__xlated("6: r11 = *(u64 *)(r10 -24)") -__xlated("7: if r11 == 0x0 goto pc+6") -__xlated("8: r11 -= 1") -__xlated("9: if r11 != 0x0 goto pc+2") -__xlated("10: r11 = -24") +__xlated("6: r12 = *(u64 *)(r10 -24)") +__xlated("7: if r12 == 0x0 goto pc+6") +__xlated("8: r12 -= 1") +__xlated("9: if r12 != 0x0 goto pc+2") +__xlated("10: r12 = -24") __xlated("11: call unknown") -__xlated("12: *(u64 *)(r10 -24) = r11") +__xlated("12: *(u64 *)(r10 -24) = r12") /* may_goto expansion ends */ __xlated("13: *(u64 *)(r10 -8) = r1") __xlated("14: exit") @@ -668,13 +668,13 @@ __xlated("1: *(u64 *)(r10 -16) =") __xlated("2: r1 = 1") __xlated("3: call bpf_get_smp_processor_id") /* may_goto expansion starts */ -__xlated("4: r11 = *(u64 *)(r10 -24)") -__xlated("5: if r11 == 0x0 goto pc+6") -__xlated("6: r11 -= 1") -__xlated("7: if r11 != 0x0 goto pc+2") -__xlated("8: r11 = -24") +__xlated("4: r12 = *(u64 *)(r10 -24)") +__xlated("5: if r12 == 0x0 goto pc+6") +__xlated("6: r12 -= 1") +__xlated("7: if r12 != 0x0 goto pc+2") +__xlated("8: r12 = -24") __xlated("9: call unknown") -__xlated("10: *(u64 *)(r10 -24) = r11") +__xlated("10: *(u64 *)(r10 -24) = r12") /* may_goto expansion ends */ __xlated("11: *(u64 *)(r10 -8) = r1") __xlated("12: exit") diff --git a/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c b/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c index 6d1edaef9213..4bdf4256a41e 100644 --- a/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c +++ b/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c @@ -81,13 +81,13 @@ __arch_s390x __arch_arm64 __xlated("0: *(u64 *)(r10 -16) = 65535") __xlated("1: *(u64 *)(r10 -8) = 0") -__xlated("2: r11 = *(u64 *)(r10 -16)") -__xlated("3: if r11 == 0x0 goto pc+6") -__xlated("4: r11 -= 1") -__xlated("5: if r11 != 0x0 goto pc+2") -__xlated("6: r11 = -16") +__xlated("2: r12 = *(u64 *)(r10 -16)") +__xlated("3: if r12 == 0x0 goto pc+6") +__xlated("4: r12 -= 1") +__xlated("5: if r12 != 0x0 goto pc+2") +__xlated("6: r12 = -16") __xlated("7: call unknown") -__xlated("8: *(u64 *)(r10 -16) = r11") +__xlated("8: *(u64 *)(r10 -16) = r12") __xlated("9: r0 = 1") __xlated("10: r0 = 2") __xlated("11: exit") diff --git a/tools/testing/selftests/bpf/progs/verifier_sdiv.c b/tools/testing/selftests/bpf/progs/verifier_sdiv.c index fd59d57e8e37..95f3239ce228 100644 --- a/tools/testing/selftests/bpf/progs/verifier_sdiv.c +++ b/tools/testing/selftests/bpf/progs/verifier_sdiv.c @@ -778,10 +778,10 @@ __arch_x86_64 __xlated("0: r2 = 0x8000000000000000") __xlated("2: r3 = -1") __xlated("3: r4 = r2") -__xlated("4: r11 = r3") -__xlated("5: r11 += 1") -__xlated("6: if r11 > 0x1 goto pc+4") -__xlated("7: if r11 == 0x0 goto pc+1") +__xlated("4: r12 = r3") +__xlated("5: r12 += 1") +__xlated("6: if r12 > 0x1 goto pc+4") +__xlated("7: if r12 == 0x0 goto pc+1") __xlated("8: r2 = 0") __xlated("9: r2 = -r2") __xlated("10: goto pc+1") @@ -812,10 +812,10 @@ __success __retval(-5) __arch_x86_64 __xlated("0: r2 = 5") __xlated("1: r3 = -1") -__xlated("2: r11 = r3") -__xlated("3: r11 += 1") -__xlated("4: if r11 > 0x1 goto pc+4") -__xlated("5: if r11 == 0x0 goto pc+1") +__xlated("2: r12 = r3") +__xlated("3: r12 += 1") +__xlated("4: if r12 > 0x1 goto pc+4") +__xlated("5: if r12 == 0x0 goto pc+1") __xlated("6: r2 = 0") __xlated("7: r2 = -r2") __xlated("8: goto pc+1") @@ -890,10 +890,10 @@ __arch_x86_64 __xlated("0: w2 = -2147483648") __xlated("1: w3 = -1") __xlated("2: w4 = w2") -__xlated("3: r11 = r3") -__xlated("4: w11 += 1") -__xlated("5: if w11 > 0x1 goto pc+4") -__xlated("6: if w11 == 0x0 goto pc+1") +__xlated("3: r12 = r3") +__xlated("4: w12 += 1") +__xlated("5: if w12 > 0x1 goto pc+4") +__xlated("6: if w12 == 0x0 goto pc+1") __xlated("7: w2 = 0") __xlated("8: w2 = -w2") __xlated("9: goto pc+1") @@ -925,10 +925,10 @@ __arch_x86_64 __xlated("0: w2 = -5") __xlated("1: w3 = -1") __xlated("2: w4 = w2") -__xlated("3: r11 = r3") -__xlated("4: w11 += 1") -__xlated("5: if w11 > 0x1 goto pc+4") -__xlated("6: if w11 == 0x0 goto pc+1") +__xlated("3: r12 = r3") +__xlated("4: w12 += 1") +__xlated("5: if w12 > 0x1 goto pc+4") +__xlated("6: if w12 == 0x0 goto pc+1") __xlated("7: w2 = 0") __xlated("8: w2 = -w2") __xlated("9: goto pc+1") @@ -1004,10 +1004,10 @@ __arch_x86_64 __xlated("0: r2 = 0x8000000000000000") __xlated("2: r3 = -1") __xlated("3: r4 = r2") -__xlated("4: r11 = r3") -__xlated("5: r11 += 1") -__xlated("6: if r11 > 0x1 goto pc+3") -__xlated("7: if r11 == 0x1 goto pc+3") +__xlated("4: r12 = r3") +__xlated("5: r12 += 1") +__xlated("6: if r12 > 0x1 goto pc+3") +__xlated("7: if r12 == 0x1 goto pc+3") __xlated("8: w2 = 0") __xlated("9: goto pc+1") __xlated("10: r2 s%= r3") @@ -1034,10 +1034,10 @@ __arch_x86_64 __xlated("0: r2 = 5") __xlated("1: r3 = -1") __xlated("2: r4 = r2") -__xlated("3: r11 = r3") -__xlated("4: r11 += 1") -__xlated("5: if r11 > 0x1 goto pc+3") -__xlated("6: if r11 == 0x1 goto pc+3") +__xlated("3: r12 = r3") +__xlated("4: r12 += 1") +__xlated("5: if r12 > 0x1 goto pc+3") +__xlated("6: if r12 == 0x1 goto pc+3") __xlated("7: w2 = 0") __xlated("8: goto pc+1") __xlated("9: r2 s%= r3") @@ -1108,10 +1108,10 @@ __arch_x86_64 __xlated("0: w2 = -2147483648") __xlated("1: w3 = -1") __xlated("2: w4 = w2") -__xlated("3: r11 = r3") -__xlated("4: w11 += 1") -__xlated("5: if w11 > 0x1 goto pc+3") -__xlated("6: if w11 == 0x1 goto pc+4") +__xlated("3: r12 = r3") +__xlated("4: w12 += 1") +__xlated("5: if w12 > 0x1 goto pc+3") +__xlated("6: if w12 == 0x1 goto pc+4") __xlated("7: w2 = 0") __xlated("8: goto pc+1") __xlated("9: w2 s%= w3") @@ -1140,10 +1140,10 @@ __arch_x86_64 __xlated("0: w2 = -5") __xlated("1: w3 = -1") __xlated("2: w4 = w2") -__xlated("3: r11 = r3") -__xlated("4: w11 += 1") -__xlated("5: if w11 > 0x1 goto pc+3") -__xlated("6: if w11 == 0x1 goto pc+4") +__xlated("3: r12 = r3") +__xlated("4: w12 += 1") +__xlated("5: if w12 > 0x1 goto pc+3") +__xlated("6: if w12 == 0x1 goto pc+4") __xlated("7: w2 = 0") __xlated("8: goto pc+1") __xlated("9: w2 s%= w3") -- cgit v1.2.3 From d4a2eeb2ac7813ac9374568c71662c630689cc54 Mon Sep 17 00:00:00 2001 From: "Alexis Lothoré (eBPF Foundation)" Date: Wed, 22 Apr 2026 18:20:24 +0200 Subject: selftests/bpf: Make btf_dump use xdp_dummy rather than xdping_kern MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to prepare xdping tool removal from the BPF selftests directory, make the btf_dump test use another BPF program for the btf datasec dump test. Use xdp_dummy.bpf.o, as it is already used by various other tests. Signed-off-by: Alexis Lothoré (eBPF Foundation) Signed-off-by: Martin KaFai Lau Acked-by: Paul Chaignon Link: https://patch.msgid.link/20260422-xdping-v2-1-c0f8ccedcf91@bootlin.com --- tools/testing/selftests/bpf/prog_tests/btf_dump.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index f1642794f70e..9f1b50e07a29 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -1027,8 +1027,8 @@ static void test_btf_dump_datasec_data(char *str) char license[4] = "GPL"; struct btf_dump *d; - btf = btf__parse("xdping_kern.bpf.o", NULL); - if (!ASSERT_OK_PTR(btf, "xdping_kern.bpf.o BTF not found")) + btf = btf__parse("xdp_dummy.bpf.o", NULL); + if (!ASSERT_OK_PTR(btf, "xdp_dummy.bpf.o BTF not found")) return; d = btf_dump__new(btf, btf_dump_snprintf, str, NULL); -- cgit v1.2.3 From feb13b19f3fa7202eba1ab9cc47535e092ef7968 Mon Sep 17 00:00:00 2001 From: "Alexis Lothoré (eBPF Foundation)" Date: Wed, 22 Apr 2026 18:20:25 +0200 Subject: selftests/bpf: Drop xdping tool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As part of a larger cleanup effort in the bpf selftests directory, tests and scripts are either being converted to the test_progs framework (so they are executed automatically in bpf CI), or removed if not relevant for such integration. The test_xdping.sh script (with the associated xdping.c) acts as a RTT measurement tool, by attaching two small xdp programs to two interfaces. Converting this test to test_progs may not make much sense: - RTT measurement does not really fit in the scope of a functional test, this is rather about measuring some performance level. - there are other existing tests in test_progs that actively validate XDP features like program attachment, return value processing, packet modification, etc Drop test_xdping.sh, the corresponding xdping.c userspace part, the xdping_kern.c program, and the shared header, xdping.h Signed-off-by: Alexis Lothoré (eBPF Foundation) Signed-off-by: Martin KaFai Lau Reviewed-by: Alan Maguire Acked-by: Paul Chaignon Link: https://patch.msgid.link/20260422-xdping-v2-2-c0f8ccedcf91@bootlin.com --- tools/testing/selftests/bpf/.gitignore | 1 - tools/testing/selftests/bpf/Makefile | 3 - tools/testing/selftests/bpf/progs/xdping_kern.c | 183 ----------------- tools/testing/selftests/bpf/test_xdping.sh | 103 ---------- tools/testing/selftests/bpf/xdping.c | 254 ------------------------ tools/testing/selftests/bpf/xdping.h | 13 -- 6 files changed, 557 deletions(-) delete mode 100644 tools/testing/selftests/bpf/progs/xdping_kern.c delete mode 100755 tools/testing/selftests/bpf/test_xdping.sh delete mode 100644 tools/testing/selftests/bpf/xdping.c delete mode 100644 tools/testing/selftests/bpf/xdping.h (limited to 'tools') diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index bfdc5518ecc8..986a6389186b 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -21,7 +21,6 @@ test_lirc_mode2_user flow_dissector_load test_tcpnotify_user test_libbpf -xdping test_cpp *.d *.subskel.h diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 6ef6872adbc3..ac676d2a4a29 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -111,7 +111,6 @@ TEST_FILES = xsk_prereqs.sh $(wildcard progs/btf_dump_test_case_*.c) # Order correspond to 'make run_tests' order TEST_PROGS := test_kmod.sh \ test_lirc_mode2.sh \ - test_xdping.sh \ test_bpftool_build.sh \ test_doc_build.sh \ test_xsk.sh \ @@ -134,7 +133,6 @@ TEST_GEN_PROGS_EXTENDED = \ xdp_features \ xdp_hw_metadata \ xdp_synproxy \ - xdping \ xskxceiver TEST_GEN_FILES += $(TEST_KMODS) liburandom_read.so urandom_read sign-file uprobe_multi @@ -320,7 +318,6 @@ $(OUTPUT)/test_tcpnotify_user: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(TRACE_HELP $(OUTPUT)/test_sock_fields: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(OUTPUT)/test_tag: $(TESTING_HELPERS) $(OUTPUT)/test_lirc_mode2_user: $(TESTING_HELPERS) -$(OUTPUT)/xdping: $(TESTING_HELPERS) $(OUTPUT)/flow_dissector_load: $(TESTING_HELPERS) $(OUTPUT)/test_maps: $(TESTING_HELPERS) $(OUTPUT)/test_verifier: $(TESTING_HELPERS) $(CAP_HELPERS) $(UNPRIV_HELPERS) diff --git a/tools/testing/selftests/bpf/progs/xdping_kern.c b/tools/testing/selftests/bpf/progs/xdping_kern.c deleted file mode 100644 index 44e2b0ef23ae..000000000000 --- a/tools/testing/selftests/bpf/progs/xdping_kern.c +++ /dev/null @@ -1,183 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. */ - -#define KBUILD_MODNAME "foo" -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "bpf_compiler.h" -#include "xdping.h" - -struct { - __uint(type, BPF_MAP_TYPE_HASH); - __uint(max_entries, 256); - __type(key, __u32); - __type(value, struct pinginfo); -} ping_map SEC(".maps"); - -static __always_inline void swap_src_dst_mac(void *data) -{ - unsigned short *p = data; - unsigned short dst[3]; - - dst[0] = p[0]; - dst[1] = p[1]; - dst[2] = p[2]; - p[0] = p[3]; - p[1] = p[4]; - p[2] = p[5]; - p[3] = dst[0]; - p[4] = dst[1]; - p[5] = dst[2]; -} - -static __always_inline __u16 csum_fold_helper(__wsum sum) -{ - sum = (sum & 0xffff) + (sum >> 16); - return ~((sum & 0xffff) + (sum >> 16)); -} - -static __always_inline __u16 ipv4_csum(void *data_start, int data_size) -{ - __wsum sum; - - sum = bpf_csum_diff(0, 0, data_start, data_size, 0); - return csum_fold_helper(sum); -} - -#define ICMP_ECHO_LEN 64 - -static __always_inline int icmp_check(struct xdp_md *ctx, int type) -{ - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - struct ethhdr *eth = data; - struct icmphdr *icmph; - struct iphdr *iph; - - if (data + sizeof(*eth) + sizeof(*iph) + ICMP_ECHO_LEN > data_end) - return XDP_PASS; - - if (eth->h_proto != bpf_htons(ETH_P_IP)) - return XDP_PASS; - - iph = data + sizeof(*eth); - - if (iph->protocol != IPPROTO_ICMP) - return XDP_PASS; - - if (bpf_ntohs(iph->tot_len) - sizeof(*iph) != ICMP_ECHO_LEN) - return XDP_PASS; - - icmph = data + sizeof(*eth) + sizeof(*iph); - - if (icmph->type != type) - return XDP_PASS; - - return XDP_TX; -} - -SEC("xdp") -int xdping_client(struct xdp_md *ctx) -{ - void *data = (void *)(long)ctx->data; - struct pinginfo *pinginfo = NULL; - struct ethhdr *eth = data; - struct icmphdr *icmph; - struct iphdr *iph; - __u64 recvtime; - __be32 raddr; - __be16 seq; - int ret; - __u8 i; - - ret = icmp_check(ctx, ICMP_ECHOREPLY); - - if (ret != XDP_TX) - return ret; - - iph = data + sizeof(*eth); - icmph = data + sizeof(*eth) + sizeof(*iph); - raddr = iph->saddr; - - /* Record time reply received. */ - recvtime = bpf_ktime_get_ns(); - pinginfo = bpf_map_lookup_elem(&ping_map, &raddr); - if (!pinginfo || pinginfo->seq != icmph->un.echo.sequence) - return XDP_PASS; - - if (pinginfo->start) { - __pragma_loop_unroll_full - for (i = 0; i < XDPING_MAX_COUNT; i++) { - if (pinginfo->times[i] == 0) - break; - } - /* verifier is fussy here... */ - if (i < XDPING_MAX_COUNT) { - pinginfo->times[i] = recvtime - - pinginfo->start; - pinginfo->start = 0; - i++; - } - /* No more space for values? */ - if (i == pinginfo->count || i == XDPING_MAX_COUNT) - return XDP_PASS; - } - - /* Now convert reply back into echo request. */ - swap_src_dst_mac(data); - iph->saddr = iph->daddr; - iph->daddr = raddr; - icmph->type = ICMP_ECHO; - seq = bpf_htons(bpf_ntohs(icmph->un.echo.sequence) + 1); - icmph->un.echo.sequence = seq; - icmph->checksum = 0; - icmph->checksum = ipv4_csum(icmph, ICMP_ECHO_LEN); - - pinginfo->seq = seq; - pinginfo->start = bpf_ktime_get_ns(); - - return XDP_TX; -} - -SEC("xdp") -int xdping_server(struct xdp_md *ctx) -{ - void *data = (void *)(long)ctx->data; - struct ethhdr *eth = data; - struct icmphdr *icmph; - struct iphdr *iph; - __be32 raddr; - int ret; - - ret = icmp_check(ctx, ICMP_ECHO); - - if (ret != XDP_TX) - return ret; - - iph = data + sizeof(*eth); - icmph = data + sizeof(*eth) + sizeof(*iph); - raddr = iph->saddr; - - /* Now convert request into echo reply. */ - swap_src_dst_mac(data); - iph->saddr = iph->daddr; - iph->daddr = raddr; - icmph->type = ICMP_ECHOREPLY; - icmph->checksum = 0; - icmph->checksum = ipv4_csum(icmph, ICMP_ECHO_LEN); - - return XDP_TX; -} - -char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_xdping.sh b/tools/testing/selftests/bpf/test_xdping.sh deleted file mode 100755 index c3d82e0a7378..000000000000 --- a/tools/testing/selftests/bpf/test_xdping.sh +++ /dev/null @@ -1,103 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -# xdping tests -# Here we setup and teardown configuration required to run -# xdping, exercising its options. -# -# Setup is similar to test_tunnel tests but without the tunnel. -# -# Topology: -# --------- -# root namespace | tc_ns0 namespace -# | -# ---------- | ---------- -# | veth1 | --------- | veth0 | -# ---------- peer ---------- -# -# Device Configuration -# -------------------- -# Root namespace with BPF -# Device names and addresses: -# veth1 IP: 10.1.1.200 -# xdp added to veth1, xdpings originate from here. -# -# Namespace tc_ns0 with BPF -# Device names and addresses: -# veth0 IPv4: 10.1.1.100 -# For some tests xdping run in server mode here. -# - -readonly TARGET_IP="10.1.1.100" -readonly TARGET_NS="xdp_ns0" - -readonly LOCAL_IP="10.1.1.200" - -setup() -{ - ip netns add $TARGET_NS - ip link add veth0 type veth peer name veth1 - ip link set veth0 netns $TARGET_NS - ip netns exec $TARGET_NS ip addr add ${TARGET_IP}/24 dev veth0 - ip addr add ${LOCAL_IP}/24 dev veth1 - ip netns exec $TARGET_NS ip link set veth0 up - ip link set veth1 up -} - -cleanup() -{ - set +e - ip netns delete $TARGET_NS 2>/dev/null - ip link del veth1 2>/dev/null - if [[ $server_pid -ne 0 ]]; then - kill -TERM $server_pid - fi -} - -test() -{ - client_args="$1" - server_args="$2" - - echo "Test client args '$client_args'; server args '$server_args'" - - server_pid=0 - if [[ -n "$server_args" ]]; then - ip netns exec $TARGET_NS ./xdping $server_args & - server_pid=$! - sleep 10 - fi - ./xdping $client_args $TARGET_IP - - if [[ $server_pid -ne 0 ]]; then - kill -TERM $server_pid - server_pid=0 - fi - - echo "Test client args '$client_args'; server args '$server_args': PASS" -} - -set -e - -server_pid=0 - -trap cleanup EXIT - -setup - -for server_args in "" "-I veth0 -s -S" ; do - # client in skb mode - client_args="-I veth1 -S" - test "$client_args" "$server_args" - - # client with count of 10 RTT measurements. - client_args="-I veth1 -S -c 10" - test "$client_args" "$server_args" -done - -# Test drv mode -test "-I veth1 -N" "-I veth0 -s -N" -test "-I veth1 -N -c 10" "-I veth0 -s -N" - -echo "OK. All tests passed" -exit 0 diff --git a/tools/testing/selftests/bpf/xdping.c b/tools/testing/selftests/bpf/xdping.c deleted file mode 100644 index 9ed8c796645d..000000000000 --- a/tools/testing/selftests/bpf/xdping.c +++ /dev/null @@ -1,254 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "bpf/bpf.h" -#include "bpf/libbpf.h" - -#include "xdping.h" -#include "testing_helpers.h" - -static int ifindex; -static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; - -static void cleanup(int sig) -{ - bpf_xdp_detach(ifindex, xdp_flags, NULL); - if (sig) - exit(1); -} - -static int get_stats(int fd, __u16 count, __u32 raddr) -{ - struct pinginfo pinginfo = { 0 }; - char inaddrbuf[INET_ADDRSTRLEN]; - struct in_addr inaddr; - __u16 i; - - inaddr.s_addr = raddr; - - printf("\nXDP RTT data:\n"); - - if (bpf_map_lookup_elem(fd, &raddr, &pinginfo)) { - perror("bpf_map_lookup elem"); - return 1; - } - - for (i = 0; i < count; i++) { - if (pinginfo.times[i] == 0) - break; - - printf("64 bytes from %s: icmp_seq=%d ttl=64 time=%#.5f ms\n", - inet_ntop(AF_INET, &inaddr, inaddrbuf, - sizeof(inaddrbuf)), - count + i + 1, - (double)pinginfo.times[i]/1000000); - } - - if (i < count) { - fprintf(stderr, "Expected %d samples, got %d.\n", count, i); - return 1; - } - - bpf_map_delete_elem(fd, &raddr); - - return 0; -} - -static void show_usage(const char *prog) -{ - fprintf(stderr, - "usage: %s [OPTS] -I interface destination\n\n" - "OPTS:\n" - " -c count Stop after sending count requests\n" - " (default %d, max %d)\n" - " -I interface interface name\n" - " -N Run in driver mode\n" - " -s Server mode\n" - " -S Run in skb mode\n", - prog, XDPING_DEFAULT_COUNT, XDPING_MAX_COUNT); -} - -int main(int argc, char **argv) -{ - __u32 mode_flags = XDP_FLAGS_DRV_MODE | XDP_FLAGS_SKB_MODE; - struct addrinfo *a, hints = { .ai_family = AF_INET }; - __u16 count = XDPING_DEFAULT_COUNT; - struct pinginfo pinginfo = { 0 }; - const char *optstr = "c:I:NsS"; - struct bpf_program *main_prog; - int prog_fd = -1, map_fd = -1; - struct sockaddr_in rin; - struct bpf_object *obj; - struct bpf_map *map; - char *ifname = NULL; - char filename[256]; - int opt, ret = 1; - __u32 raddr = 0; - int server = 0; - char cmd[256]; - - while ((opt = getopt(argc, argv, optstr)) != -1) { - switch (opt) { - case 'c': - count = atoi(optarg); - if (count < 1 || count > XDPING_MAX_COUNT) { - fprintf(stderr, - "min count is 1, max count is %d\n", - XDPING_MAX_COUNT); - return 1; - } - break; - case 'I': - ifname = optarg; - ifindex = if_nametoindex(ifname); - if (!ifindex) { - fprintf(stderr, "Could not get interface %s\n", - ifname); - return 1; - } - break; - case 'N': - xdp_flags |= XDP_FLAGS_DRV_MODE; - break; - case 's': - /* use server program */ - server = 1; - break; - case 'S': - xdp_flags |= XDP_FLAGS_SKB_MODE; - break; - default: - show_usage(basename(argv[0])); - return 1; - } - } - - if (!ifname) { - show_usage(basename(argv[0])); - return 1; - } - if (!server && optind == argc) { - show_usage(basename(argv[0])); - return 1; - } - - if ((xdp_flags & mode_flags) == mode_flags) { - fprintf(stderr, "-N or -S can be specified, not both.\n"); - show_usage(basename(argv[0])); - return 1; - } - - if (!server) { - /* Only supports IPv4; see hints initialization above. */ - if (getaddrinfo(argv[optind], NULL, &hints, &a) || !a) { - fprintf(stderr, "Could not resolve %s\n", argv[optind]); - return 1; - } - memcpy(&rin, a->ai_addr, sizeof(rin)); - raddr = rin.sin_addr.s_addr; - freeaddrinfo(a); - } - - /* Use libbpf 1.0 API mode */ - libbpf_set_strict_mode(LIBBPF_STRICT_ALL); - - snprintf(filename, sizeof(filename), "%s_kern.bpf.o", argv[0]); - - if (bpf_prog_test_load(filename, BPF_PROG_TYPE_XDP, &obj, &prog_fd)) { - fprintf(stderr, "load of %s failed\n", filename); - return 1; - } - - main_prog = bpf_object__find_program_by_name(obj, - server ? "xdping_server" : "xdping_client"); - if (main_prog) - prog_fd = bpf_program__fd(main_prog); - if (!main_prog || prog_fd < 0) { - fprintf(stderr, "could not find xdping program"); - return 1; - } - - map = bpf_object__next_map(obj, NULL); - if (map) - map_fd = bpf_map__fd(map); - if (!map || map_fd < 0) { - fprintf(stderr, "Could not find ping map"); - goto done; - } - - signal(SIGINT, cleanup); - signal(SIGTERM, cleanup); - - printf("Setting up XDP for %s, please wait...\n", ifname); - - printf("XDP setup disrupts network connectivity, hit Ctrl+C to quit\n"); - - if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) { - fprintf(stderr, "Link set xdp fd failed for %s\n", ifname); - goto done; - } - - if (server) { - close(prog_fd); - close(map_fd); - printf("Running server on %s; press Ctrl+C to exit...\n", - ifname); - do { } while (1); - } - - /* Start xdping-ing from last regular ping reply, e.g. for a count - * of 10 ICMP requests, we start xdping-ing using reply with seq number - * 10. The reason the last "real" ping RTT is much higher is that - * the ping program sees the ICMP reply associated with the last - * XDP-generated packet, so ping doesn't get a reply until XDP is done. - */ - pinginfo.seq = htons(count); - pinginfo.count = count; - - if (bpf_map_update_elem(map_fd, &raddr, &pinginfo, BPF_ANY)) { - fprintf(stderr, "could not communicate with BPF map: %s\n", - strerror(errno)); - cleanup(0); - goto done; - } - - /* We need to wait for XDP setup to complete. */ - sleep(10); - - snprintf(cmd, sizeof(cmd), "ping -c %d -I %s %s", - count, ifname, argv[optind]); - - printf("\nNormal ping RTT data\n"); - printf("[Ignore final RTT; it is distorted by XDP using the reply]\n"); - - ret = system(cmd); - - if (!ret) - ret = get_stats(map_fd, count, raddr); - - cleanup(0); - -done: - if (prog_fd > 0) - close(prog_fd); - if (map_fd > 0) - close(map_fd); - - return ret; -} diff --git a/tools/testing/selftests/bpf/xdping.h b/tools/testing/selftests/bpf/xdping.h deleted file mode 100644 index afc578df77be..000000000000 --- a/tools/testing/selftests/bpf/xdping.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. */ - -#define XDPING_MAX_COUNT 10 -#define XDPING_DEFAULT_COUNT 4 - -struct pinginfo { - __u64 start; - __be16 seq; - __u16 count; - __u32 pad; - __u64 times[XDPING_MAX_COUNT]; -}; -- cgit v1.2.3 From bbc631085503a7fde9617be18b0657cc9a83910a Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Fri, 24 Apr 2026 15:52:44 -0700 Subject: bpf: replace min/max fields with struct cnum{32,64} Replace eight independent s64, u64, s32, u32 min/max fields in bpf_reg_state with two circular number fields: - cnum64 for a unified signed/unsigned 64-bit range tracking; - cnum32 for a unified signed/unsigned 32-bit range tracking. Each cnum represents a range as a single arc on the circular number line (base + size), from which signed and unsigned bounds are derived on demand via accessor functions introduced in the preceding commit. Notable changes: - Signed<->unsigned deductions in __reg_deduce_bounds() are removed. - 64<->32 bit deductions are replaced with: - reg->r32 = cnum32_intersect(reg->r32, cnum32_from_cnum64(reg->r64)); this is functionally equivalent to the old code. - reg->r64 = cnum64_cnum32_intersect(reg->r64, reg->r32); this handles a few additional cases, see commit message for "bpf: representation and basic operations on circular numbers". - regs_refine_cond_op() now computes results in terms of operations on sets, e.g. for JNE: /* Complement of the range [val, val] as cnum64. */ lo = (struct cnum64){ val + 1, U64_MAX - 1 }; reg1->r64 = cnum64_intersect(reg1->r64, lo); - For add, sub operations on scalars replace explicit bounds computations with cnum{32,64}_{add,negate}. - For add, sub operations on pointers deduplicate with arithmetic operations on scalars and use cnum{32,64}_{add,negate}. - For and, or, xor operations on scalars remove explicit signed bounds computations. - range_bounds_violation() reduces to checking cnum_is_empty(). - const_tnum_range_mismatch() reduces to checking cnum_is_const(). Selftest adjustments: a few existing tests are updated because a single cnum arc cannot always represent what the old system expressed as the intersection of independent signed and unsigned ranges. For example, if the old system tracked u64=[0, U64_MAX-U32_MAX+2] and s64=[S64_MIN+2, 2] independently, their intersection is a tight two-point set. A single cnum must pick the shorter arc, losing the other constraint. These cases are documented with comments in the adjusted tests. reg_bounds.c is updated with logic similar to cnum64_cnum32_intersect(). Instead of using cnums it inspects intersection between 'b' and first / last / next-after-first / previous-before-last sub-ranges of 'a'. reg_bounds.c is also updated to skip test cases that rely in signed and unsigned ranges intersecting in two intervals, as such cases are not representable by a single cnum. The following "crafted" test cases are affected: - reg_bounds_crafted/(s64)[0xffffffffffff8000; 0x7fff] (u32) [0; 0x1f] - reg_bounds_crafted/(s64)[0; 0x1f] (u32) [0xffffffffffffff80; 0x7f] - reg_bounds_crafted/(s64)[0xffffffffffffff80; 0x7f] (u32) [0; 0x1f] - reg_bounds_crafted/(u64)[0; 1] (s32) [1; 2147483648] - reg_bounds_crafted/(u64)[1; 2147483648] (s32) [0; 1] - reg_bounds_crafted/(u64)[0; 0xffffffff00000000] (s64) 0 - reg_bounds_crafted/(u64)0 (s64) [0; 0xffffffff00000000] - reg_bounds_crafted/(u64)[0; 0xffffffff00000000] (s32) 0 - reg_bounds_crafted/(u64)0 (s32) [0; 0xffffffff00000000] - reg_bounds_crafted/(s64)[S64_MIN; 0] (u64) S64_MIN - reg_bounds_crafted/(s64)S64_MIN (u64) [S64_MIN; 0] - reg_bounds_crafted/(s32)[S32_MIN; 0] (u32) S32_MIN - reg_bounds_crafted/(s32)S32_MIN (u32) [S32_MIN; 0] - reg_bounds_crafted/(s64)[0; 0x1f] (u32) [0xffffffff80000000; 0x7fffffff] - reg_bounds_crafted/(s64)[0xffffffff80000000; 0x7fffffff] (u32) [0; 0x1f] - reg_bounds_crafted/(s64)[0; 0x1f] (u32) [0xffffffffffff8000; 0x7fff] As well as some reg_bounds_roand_{consts,ranges}_A_B, where A and B differ in sign domain. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20260424-cnums-everywhere-rfc-v1-v3-3-ca434b39a486@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 39 +- kernel/bpf/verifier.c | 843 +++------------------ .../testing/selftests/bpf/prog_tests/reg_bounds.c | 90 ++- .../testing/selftests/bpf/progs/verifier_bounds.c | 9 +- .../testing/selftests/bpf/progs/verifier_subreg.c | 6 +- 5 files changed, 218 insertions(+), 769 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index bf3ffa56bbe5..101ca6cc5424 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -8,6 +8,7 @@ #include /* for struct btf and btf_id() */ #include /* for MAX_BPF_STACK */ #include +#include /* Maximum variable offset umax_value permitted when resolving memory accesses. * In practice this is far bigger than any realistic pointer offset; this limit @@ -120,14 +121,8 @@ struct bpf_reg_state { * These refer to the same value as var_off, not necessarily the actual * contents of the register. */ - s64 smin_value; /* minimum possible (s64)value */ - s64 smax_value; /* maximum possible (s64)value */ - u64 umin_value; /* minimum possible (u64)value */ - u64 umax_value; /* maximum possible (u64)value */ - s32 s32_min_value; /* minimum possible (s32)value */ - s32 s32_max_value; /* maximum possible (s32)value */ - u32 u32_min_value; /* minimum possible (u32)value */ - u32 u32_max_value; /* maximum possible (u32)value */ + struct cnum64 r64; /* 64-bit range as circular number */ + struct cnum32 r32; /* 32-bit range as circular number */ /* For PTR_TO_PACKET, used to find other pointers with the same variable * offset, so they can share range knowledge. * For PTR_TO_MAP_VALUE_OR_NULL this is used to share which map value we @@ -211,66 +206,62 @@ struct bpf_reg_state { static inline s64 reg_smin(const struct bpf_reg_state *reg) { - return reg->smin_value; + return cnum64_smin(reg->r64); } static inline s64 reg_smax(const struct bpf_reg_state *reg) { - return reg->smax_value; + return cnum64_smax(reg->r64); } static inline u64 reg_umin(const struct bpf_reg_state *reg) { - return reg->umin_value; + return cnum64_umin(reg->r64); } static inline u64 reg_umax(const struct bpf_reg_state *reg) { - return reg->umax_value; + return cnum64_umax(reg->r64); } static inline s32 reg_s32_min(const struct bpf_reg_state *reg) { - return reg->s32_min_value; + return cnum32_smin(reg->r32); } static inline s32 reg_s32_max(const struct bpf_reg_state *reg) { - return reg->s32_max_value; + return cnum32_smax(reg->r32); } static inline u32 reg_u32_min(const struct bpf_reg_state *reg) { - return reg->u32_min_value; + return cnum32_umin(reg->r32); } static inline u32 reg_u32_max(const struct bpf_reg_state *reg) { - return reg->u32_max_value; + return cnum32_umax(reg->r32); } static inline void reg_set_srange32(struct bpf_reg_state *reg, s32 smin, s32 smax) { - reg->s32_min_value = smin; - reg->s32_max_value = smax; + reg->r32 = cnum32_from_srange(smin, smax); } static inline void reg_set_urange32(struct bpf_reg_state *reg, u32 umin, u32 umax) { - reg->u32_min_value = umin; - reg->u32_max_value = umax; + reg->r32 = cnum32_from_urange(umin, umax); } static inline void reg_set_srange64(struct bpf_reg_state *reg, s64 smin, s64 smax) { - reg->smin_value = smin; - reg->smax_value = smax; + reg->r64 = cnum64_from_srange(smin, smax); } static inline void reg_set_urange64(struct bpf_reg_state *reg, u64 umin, u64 umax) { - reg->umin_value = umin; - reg->umax_value = umax; + reg->r64 = cnum64_from_urange(umin, umax); } enum bpf_stack_slot_type { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b91d2789e7b9..03f9e16c2abe 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -1796,10 +1797,8 @@ static const int caller_saved[CALLER_SAVED_REGS] = { static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm) { reg->var_off = tnum_const(imm); - reg_set_srange64(reg, (s64)imm, (s64)imm); - reg_set_urange64(reg, imm, imm); - reg_set_srange32(reg, (s32)imm, (s32)imm); - reg_set_urange32(reg, (u32)imm, (u32)imm); + reg->r64 = cnum64_from_urange(imm, imm); + reg->r32 = cnum32_from_urange((u32)imm, (u32)imm); } /* Mark the unknown part of a register (variable offset or scalar value) as @@ -1818,8 +1817,7 @@ static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm) static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm) { reg->var_off = tnum_const_subreg(reg->var_off, imm); - reg_set_srange32(reg, (s32)imm, (s32)imm); - reg_set_urange32(reg, (u32)imm, (u32)imm); + reg->r32 = cnum32_from_urange((u32)imm, (u32)imm); } /* Mark the 'variable offset' part of a register as zero. This should be @@ -1932,23 +1930,19 @@ static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg, static void __mark_reg32_unbounded(struct bpf_reg_state *reg) { - reg_set_srange32(reg, S32_MIN, S32_MAX); - reg_set_urange32(reg, 0, U32_MAX); + reg->r32 = CNUM32_UNBOUNDED; } -/* Reset the min/max bounds of a register */ -static void __mark_reg_unbounded(struct bpf_reg_state *reg) +static void __mark_reg64_unbounded(struct bpf_reg_state *reg) { - reg_set_srange64(reg, S64_MIN, S64_MAX); - reg_set_urange64(reg, 0, U64_MAX); - - __mark_reg32_unbounded(reg); + reg->r64 = CNUM64_UNBOUNDED; } -static void __mark_reg64_unbounded(struct bpf_reg_state *reg) +/* Reset the min/max bounds of a register */ +static void __mark_reg_unbounded(struct bpf_reg_state *reg) { - reg_set_srange64(reg, S64_MIN, S64_MAX); - reg_set_urange64(reg, 0, U64_MAX); + __mark_reg64_unbounded(reg); + __mark_reg32_unbounded(reg); } static void reset_reg64_and_tnum(struct bpf_reg_state *reg) @@ -1963,18 +1957,32 @@ static void reset_reg32_and_tnum(struct bpf_reg_state *reg) reg->var_off = tnum_unknown; } -static void __update_reg32_bounds(struct bpf_reg_state *reg) +static struct cnum32 cnum32_from_tnum(struct tnum tnum) { - struct tnum var32_off = tnum_subreg(reg->var_off); + tnum = tnum_subreg(tnum); + if ((tnum.mask & S32_MIN) || (tnum.value & S32_MIN)) + /* min signed is max(sign bit) | min(other bits) */ + /* max signed is min(sign bit) | max(other bits) */ + return cnum32_from_srange(tnum.value | (tnum.mask & S32_MIN), + tnum.value | (tnum.mask & S32_MAX)); + else + return cnum32_from_urange(tnum.value, (tnum.value | tnum.mask)); +} - reg_set_srange32(reg, - /* min signed is max(sign bit) | min(other bits) */ - max_t(s32, reg_s32_min(reg), var32_off.value | (var32_off.mask & S32_MIN)), - /* max signed is min(sign bit) | max(other bits) */ - min_t(s32, reg_s32_max(reg), var32_off.value | (var32_off.mask & S32_MAX))); - reg_set_urange32(reg, - max_t(u32, reg_u32_min(reg), (u32)var32_off.value), - min(reg_u32_max(reg), (u32)(var32_off.value | var32_off.mask))); +static struct cnum64 cnum64_from_tnum(struct tnum tnum) +{ + if ((tnum.mask & S64_MIN) || (tnum.value & S64_MIN)) + /* min signed is max(sign bit) | min(other bits) */ + /* max signed is min(sign bit) | max(other bits) */ + return cnum64_from_srange(tnum.value | (tnum.mask & S64_MIN), + tnum.value | (tnum.mask & S64_MAX)); + else + return cnum64_from_urange(tnum.value, (tnum.value | tnum.mask)); +} + +static void __update_reg32_bounds(struct bpf_reg_state *reg) +{ + cnum32_intersect_with(®->r32, cnum32_from_tnum(reg->var_off)); } static void __update_reg64_bounds(struct bpf_reg_state *reg) @@ -1982,17 +1990,7 @@ static void __update_reg64_bounds(struct bpf_reg_state *reg) u64 tnum_next, tmax; bool umin_in_tnum; - /* min signed is max(sign bit) | min(other bits) */ - /* max signed is min(sign bit) | max(other bits) */ - reg_set_srange64(reg, - max_t(s64, reg_smin(reg), - reg->var_off.value | (reg->var_off.mask & S64_MIN)), - min_t(s64, reg_smax(reg), - reg->var_off.value | (reg->var_off.mask & S64_MAX))); - reg_set_urange64(reg, - max(reg_umin(reg), reg->var_off.value), - min(reg_umax(reg), - reg->var_off.value | reg->var_off.mask)); + cnum64_intersect_with(®->r64, cnum64_from_tnum(reg->var_off)); /* Check if u64 and tnum overlap in a single value */ tnum_next = tnum_step(reg->var_off, reg_umin(reg)); @@ -2028,343 +2026,19 @@ static void __update_reg_bounds(struct bpf_reg_state *reg) __update_reg64_bounds(reg); } -/* Uses signed min/max values to inform unsigned, and vice-versa */ static void deduce_bounds_32_from_64(struct bpf_reg_state *reg) { - /* If upper 32 bits of u64/s64 range don't change, we can use lower 32 - * bits to improve our u32/s32 boundaries. - * - * E.g., the case where we have upper 32 bits as zero ([10, 20] in - * u64) is pretty trivial, it's obvious that in u32 we'll also have - * [10, 20] range. But this property holds for any 64-bit range as - * long as upper 32 bits in that entire range of values stay the same. - * - * E.g., u64 range [0x10000000A, 0x10000000F] ([4294967306, 4294967311] - * in decimal) has the same upper 32 bits throughout all the values in - * that range. As such, lower 32 bits form a valid [0xA, 0xF] ([10, 15]) - * range. - * - * Note also, that [0xA, 0xF] is a valid range both in u32 and in s32, - * following the rules outlined below about u64/s64 correspondence - * (which equally applies to u32 vs s32 correspondence). In general it - * depends on actual hexadecimal values of 32-bit range. They can form - * only valid u32, or only valid s32 ranges in some cases. - * - * So we use all these insights to derive bounds for subregisters here. - */ - if ((reg_umin(reg) >> 32) == (reg_umax(reg) >> 32)) { - /* u64 to u32 casting preserves validity of low 32 bits as - * a range, if upper 32 bits are the same - */ - reg_set_urange32(reg, - max_t(u32, reg_u32_min(reg), (u32)reg_umin(reg)), - min_t(u32, reg_u32_max(reg), (u32)reg_umax(reg))); - - if ((s32)reg_umin(reg) <= (s32)reg_umax(reg)) { - reg_set_srange32(reg, - max_t(s32, reg_s32_min(reg), (s32)reg_umin(reg)), - min_t(s32, reg_s32_max(reg), (s32)reg_umax(reg))); - } - } - if ((reg_smin(reg) >> 32) == (reg_smax(reg) >> 32)) { - /* low 32 bits should form a proper u32 range */ - if ((u32)reg_smin(reg) <= (u32)reg_smax(reg)) { - reg_set_urange32(reg, - max_t(u32, reg_u32_min(reg), (u32)reg_smin(reg)), - min_t(u32, reg_u32_max(reg), (u32)reg_smax(reg))); - } - /* low 32 bits should form a proper s32 range */ - if ((s32)reg_smin(reg) <= (s32)reg_smax(reg)) { - reg_set_srange32(reg, - max_t(s32, reg_s32_min(reg), (s32)reg_smin(reg)), - min_t(s32, reg_s32_max(reg), (s32)reg_smax(reg))); - } - } - /* Special case where upper bits form a small sequence of two - * sequential numbers (in 32-bit unsigned space, so 0xffffffff to - * 0x00000000 is also valid), while lower bits form a proper s32 range - * going from negative numbers to positive numbers. E.g., let's say we - * have s64 range [-1, 1] ([0xffffffffffffffff, 0x0000000000000001]). - * Possible s64 values are {-1, 0, 1} ({0xffffffffffffffff, - * 0x0000000000000000, 0x00000000000001}). Ignoring upper 32 bits, - * we still get a valid s32 range [-1, 1] ([0xffffffff, 0x00000001]). - * Note that it doesn't have to be 0xffffffff going to 0x00000000 in - * upper 32 bits. As a random example, s64 range - * [0xfffffff0fffffff0; 0xfffffff100000010], forms a valid s32 range - * [-16, 16] ([0xfffffff0; 0x00000010]) in its 32 bit subregister. - */ - if ((u32)(reg_umin(reg) >> 32) + 1 == (u32)(reg_umax(reg) >> 32) && - (s32)reg_umin(reg) < 0 && (s32)reg_umax(reg) >= 0) { - reg_set_srange32(reg, - max_t(s32, reg_s32_min(reg), (s32)reg_umin(reg)), - min_t(s32, reg_s32_max(reg), (s32)reg_umax(reg))); - } - if ((u32)(reg_smin(reg) >> 32) + 1 == (u32)(reg_smax(reg) >> 32) && - (s32)reg_smin(reg) < 0 && (s32)reg_smax(reg) >= 0) { - reg_set_srange32(reg, - max_t(s32, reg_s32_min(reg), (s32)reg_smin(reg)), - min_t(s32, reg_s32_max(reg), (s32)reg_smax(reg))); - } -} - -static void deduce_bounds_32_from_32(struct bpf_reg_state *reg) -{ - /* if u32 range forms a valid s32 range (due to matching sign bit), - * try to learn from that - */ - if ((s32)reg_u32_min(reg) <= (s32)reg_u32_max(reg)) { - reg_set_srange32(reg, - max_t(s32, reg_s32_min(reg), reg_u32_min(reg)), - min_t(s32, reg_s32_max(reg), reg_u32_max(reg))); - } - /* If we cannot cross the sign boundary, then signed and unsigned bounds - * are the same, so combine. This works even in the negative case, e.g. - * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff. - */ - if ((u32)reg_s32_min(reg) <= (u32)reg_s32_max(reg)) { - reg_set_urange32(reg, - max_t(u32, reg_s32_min(reg), reg_u32_min(reg)), - min_t(u32, reg_s32_max(reg), reg_u32_max(reg))); - } else { - if (reg_u32_max(reg) < (u32)reg_s32_min(reg)) { - /* See __reg64_deduce_bounds() for detailed explanation. - * Refine ranges in the following situation: - * - * 0 U32_MAX - * | [xxxxxxxxxxxxxx u32 range xxxxxxxxxxxxxx] | - * |----------------------------|----------------------------| - * |xxxxx s32 range xxxxxxxxx] [xxxxxxx| - * 0 S32_MAX S32_MIN -1 - */ - reg_set_srange32(reg, (s32)reg_u32_min(reg), reg_s32_max(reg)); - reg_set_urange32(reg, - reg_u32_min(reg), - min_t(u32, reg_u32_max(reg), reg_s32_max(reg))); - } else if ((u32)reg_s32_max(reg) < reg_u32_min(reg)) { - /* - * 0 U32_MAX - * | [xxxxxxxxxxxxxx u32 range xxxxxxxxxxxxxx] | - * |----------------------------|----------------------------| - * |xxxxxxxxx] [xxxxxxxxxxxx s32 range | - * 0 S32_MAX S32_MIN -1 - */ - reg_set_srange32(reg, reg_s32_min(reg), (s32)reg_u32_max(reg)); - reg_set_urange32(reg, - max_t(u32, reg_u32_min(reg), reg_s32_min(reg)), - reg_u32_max(reg)); - } - } -} - -static void deduce_bounds_64_from_64(struct bpf_reg_state *reg) -{ - /* If u64 range forms a valid s64 range (due to matching sign bit), - * try to learn from that. Let's do a bit of ASCII art to see when - * this is happening. Let's take u64 range first: - * - * 0 0x7fffffffffffffff 0x8000000000000000 U64_MAX - * |-------------------------------|--------------------------------| - * - * Valid u64 range is formed when umin and umax are anywhere in the - * range [0, U64_MAX], and umin <= umax. u64 case is simple and - * straightforward. Let's see how s64 range maps onto the same range - * of values, annotated below the line for comparison: - * - * 0 0x7fffffffffffffff 0x8000000000000000 U64_MAX - * |-------------------------------|--------------------------------| - * 0 S64_MAX S64_MIN -1 - * - * So s64 values basically start in the middle and they are logically - * contiguous to the right of it, wrapping around from -1 to 0, and - * then finishing as S64_MAX (0x7fffffffffffffff) right before - * S64_MIN. We can try drawing the continuity of u64 vs s64 values - * more visually as mapped to sign-agnostic range of hex values. - * - * u64 start u64 end - * _______________________________________________________________ - * / \ - * 0 0x7fffffffffffffff 0x8000000000000000 U64_MAX - * |-------------------------------|--------------------------------| - * 0 S64_MAX S64_MIN -1 - * / \ - * >------------------------------ -------------------------------> - * s64 continues... s64 end s64 start s64 "midpoint" - * - * What this means is that, in general, we can't always derive - * something new about u64 from any random s64 range, and vice versa. - * - * But we can do that in two particular cases. One is when entire - * u64/s64 range is *entirely* contained within left half of the above - * diagram or when it is *entirely* contained in the right half. I.e.: - * - * |-------------------------------|--------------------------------| - * ^ ^ ^ ^ - * A B C D - * - * [A, B] and [C, D] are contained entirely in their respective halves - * and form valid contiguous ranges as both u64 and s64 values. [A, B] - * will be non-negative both as u64 and s64 (and in fact it will be - * identical ranges no matter the signedness). [C, D] treated as s64 - * will be a range of negative values, while in u64 it will be - * non-negative range of values larger than 0x8000000000000000. - * - * Now, any other range here can't be represented in both u64 and s64 - * simultaneously. E.g., [A, C], [A, D], [B, C], [B, D] are valid - * contiguous u64 ranges, but they are discontinuous in s64. [B, C] - * in s64 would be properly presented as [S64_MIN, C] and [B, S64_MAX], - * for example. Similarly, valid s64 range [D, A] (going from negative - * to positive values), would be two separate [D, U64_MAX] and [0, A] - * ranges as u64. Currently reg_state can't represent two segments per - * numeric domain, so in such situations we can only derive maximal - * possible range ([0, U64_MAX] for u64, and [S64_MIN, S64_MAX] for s64). - * - * So we use these facts to derive umin/umax from smin/smax and vice - * versa only if they stay within the same "half". This is equivalent - * to checking sign bit: lower half will have sign bit as zero, upper - * half have sign bit 1. Below in code we simplify this by just - * casting umin/umax as smin/smax and checking if they form valid - * range, and vice versa. Those are equivalent checks. - */ - if ((s64)reg_umin(reg) <= (s64)reg_umax(reg)) { - reg_set_srange64(reg, - max_t(s64, reg_smin(reg), reg_umin(reg)), - min_t(s64, reg_smax(reg), reg_umax(reg))); - } - /* If we cannot cross the sign boundary, then signed and unsigned bounds - * are the same, so combine. This works even in the negative case, e.g. - * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff. - */ - if ((u64)reg_smin(reg) <= (u64)reg_smax(reg)) { - reg_set_urange64(reg, - max_t(u64, reg_smin(reg), reg_umin(reg)), - min_t(u64, reg_smax(reg), reg_umax(reg))); - } else { - /* If the s64 range crosses the sign boundary, then it's split - * between the beginning and end of the U64 domain. In that - * case, we can derive new bounds if the u64 range overlaps - * with only one end of the s64 range. - * - * In the following example, the u64 range overlaps only with - * positive portion of the s64 range. - * - * 0 U64_MAX - * | [xxxxxxxxxxxxxx u64 range xxxxxxxxxxxxxx] | - * |----------------------------|----------------------------| - * |xxxxx s64 range xxxxxxxxx] [xxxxxxx| - * 0 S64_MAX S64_MIN -1 - * - * We can thus derive the following new s64 and u64 ranges. - * - * 0 U64_MAX - * | [xxxxxx u64 range xxxxx] | - * |----------------------------|----------------------------| - * | [xxxxxx s64 range xxxxx] | - * 0 S64_MAX S64_MIN -1 - * - * If they overlap in two places, we can't derive anything - * because reg_state can't represent two ranges per numeric - * domain. - * - * 0 U64_MAX - * | [xxxxxxxxxxxxxxxxx u64 range xxxxxxxxxxxxxxxxx] | - * |----------------------------|----------------------------| - * |xxxxx s64 range xxxxxxxxx] [xxxxxxxxxx| - * 0 S64_MAX S64_MIN -1 - * - * The first condition below corresponds to the first diagram - * above. - */ - if (reg_umax(reg) < (u64)reg_smin(reg)) { - reg_set_srange64(reg, (s64)reg_umin(reg), reg_smax(reg)); - reg_set_urange64(reg, reg_umin(reg), min_t(u64, reg_umax(reg), reg_smax(reg))); - } else if ((u64)reg_smax(reg) < reg_umin(reg)) { - /* This second condition considers the case where the u64 range - * overlaps with the negative portion of the s64 range: - * - * 0 U64_MAX - * | [xxxxxxxxxxxxxx u64 range xxxxxxxxxxxxxx] | - * |----------------------------|----------------------------| - * |xxxxxxxxx] [xxxxxxxxxxxx s64 range | - * 0 S64_MAX S64_MIN -1 - */ - reg_set_srange64(reg, reg_smin(reg), (s64)reg_umax(reg)); - reg_set_urange64(reg, max_t(u64, reg_umin(reg), reg_smin(reg)), reg_umax(reg)); - } - } + cnum32_intersect_with(®->r32, cnum32_from_cnum64(reg->r64)); } static void deduce_bounds_64_from_32(struct bpf_reg_state *reg) { - /* Try to tighten 64-bit bounds from 32-bit knowledge, using 32-bit - * values on both sides of 64-bit range in hope to have tighter range. - * E.g., if r1 is [0x1'00000000, 0x3'80000000], and we learn from - * 32-bit signed > 0 operation that s32 bounds are now [1; 0x7fffffff]. - * With this, we can substitute 1 as low 32-bits of _low_ 64-bit bound - * (0x100000000 -> 0x100000001) and 0x7fffffff as low 32-bits of - * _high_ 64-bit bound (0x380000000 -> 0x37fffffff) and arrive at a - * better overall bounds for r1 as [0x1'000000001; 0x3'7fffffff]. - * We just need to make sure that derived bounds we are intersecting - * with are well-formed ranges in respective s64 or u64 domain, just - * like we do with similar kinds of 32-to-64 or 64-to-32 adjustments. - */ - __u64 new_umin, new_umax; - __s64 new_smin, new_smax; - - /* u32 -> u64 tightening, it's always well-formed */ - new_umin = (reg_umin(reg) & ~0xffffffffULL) | reg_u32_min(reg); - new_umax = (reg_umax(reg) & ~0xffffffffULL) | reg_u32_max(reg); - reg_set_urange64(reg, - max_t(u64, reg_umin(reg), new_umin), - min_t(u64, reg_umax(reg), new_umax)); - /* u32 -> s64 tightening, u32 range embedded into s64 preserves range validity */ - new_smin = (reg_smin(reg) & ~0xffffffffULL) | reg_u32_min(reg); - new_smax = (reg_smax(reg) & ~0xffffffffULL) | reg_u32_max(reg); - reg_set_srange64(reg, - max_t(s64, reg_smin(reg), new_smin), - min_t(s64, reg_smax(reg), new_smax)); - - /* Here we would like to handle a special case after sign extending load, - * when upper bits for a 64-bit range are all 1s or all 0s. - * - * Upper bits are all 1s when register is in a range: - * [0xffff_ffff_0000_0000, 0xffff_ffff_ffff_ffff] - * Upper bits are all 0s when register is in a range: - * [0x0000_0000_0000_0000, 0x0000_0000_ffff_ffff] - * Together this forms are continuous range: - * [0xffff_ffff_0000_0000, 0x0000_0000_ffff_ffff] - * - * Now, suppose that register range is in fact tighter: - * [0xffff_ffff_8000_0000, 0x0000_0000_ffff_ffff] (R) - * Also suppose that it's 32-bit range is positive, - * meaning that lower 32-bits of the full 64-bit register - * are in the range: - * [0x0000_0000, 0x7fff_ffff] (W) - * - * If this happens, then any value in a range: - * [0xffff_ffff_0000_0000, 0xffff_ffff_7fff_ffff] - * is smaller than a lowest bound of the range (R): - * 0xffff_ffff_8000_0000 - * which means that upper bits of the full 64-bit register - * can't be all 1s, when lower bits are in range (W). - * - * Note that: - * - 0xffff_ffff_8000_0000 == (s64)S32_MIN - * - 0x0000_0000_7fff_ffff == (s64)S32_MAX - * These relations are used in the conditions below. - */ - if (reg_s32_min(reg) >= 0 && reg_smin(reg) >= S32_MIN && reg_smax(reg) <= S32_MAX) { - reg_set_srange64(reg, reg_s32_min(reg), reg_s32_max(reg)); - reg_set_urange64(reg, reg_s32_min(reg), reg_s32_max(reg)); - reg->var_off = tnum_intersect(reg->var_off, - tnum_range(reg_smin(reg), reg_smax(reg))); - } + reg->r64 = cnum64_cnum32_intersect(reg->r64, reg->r32); } static void __reg_deduce_bounds(struct bpf_reg_state *reg) { - deduce_bounds_64_from_64(reg); deduce_bounds_32_from_64(reg); - deduce_bounds_32_from_32(reg); deduce_bounds_64_from_32(reg); } @@ -2402,35 +2076,25 @@ static void reg_bounds_sync(struct bpf_reg_state *reg) __update_reg_bounds(reg); } -static bool range_bounds_violation(struct bpf_reg_state *reg) -{ - return (reg_umin(reg) > reg_umax(reg) || reg_smin(reg) > reg_smax(reg) || - reg_u32_min(reg) > reg_u32_max(reg) || - reg_s32_min(reg) > reg_s32_max(reg)); -} - static bool const_tnum_range_mismatch(struct bpf_reg_state *reg) { - u64 uval = reg->var_off.value; - s64 sval = (s64)uval; - if (!tnum_is_const(reg->var_off)) return false; - return reg_umin(reg) != uval || reg_umax(reg) != uval || - reg_smin(reg) != sval || reg_smax(reg) != sval; + return !cnum64_is_const(reg->r64) || reg->r64.base != reg->var_off.value; } static bool const_tnum_range_mismatch_32(struct bpf_reg_state *reg) { - u32 uval32 = tnum_subreg(reg->var_off).value; - s32 sval32 = (s32)uval32; - if (!tnum_subreg_is_const(reg->var_off)) return false; - return reg_u32_min(reg) != uval32 || reg_u32_max(reg) != uval32 || - reg_s32_min(reg) != sval32 || reg_s32_max(reg) != sval32; + return !cnum32_is_const(reg->r32) || reg->r32.base != tnum_subreg(reg->var_off).value; +} + +static bool range_bounds_violation(struct bpf_reg_state *reg) +{ + return cnum32_is_empty(reg->r32) || cnum64_is_empty(reg->r64); } static int reg_bounds_sanity_check(struct bpf_verifier_env *env, @@ -2455,12 +2119,11 @@ static int reg_bounds_sanity_check(struct bpf_verifier_env *env, return 0; out: - verifier_bug(env, "REG INVARIANTS VIOLATION (%s): %s u64=[%#llx, %#llx] " - "s64=[%#llx, %#llx] u32=[%#x, %#x] s32=[%#x, %#x] var_off=(%#llx, %#llx)", - ctx, msg, reg_umin(reg), reg_umax(reg), - reg_smin(reg), reg_smax(reg), - reg_u32_min(reg), reg_u32_max(reg), - reg_s32_min(reg), reg_s32_max(reg), + verifier_bug(env, "REG INVARIANTS VIOLATION (%s): %s r64={.base=%#llx, .size=%#llx} " + "r32={.base=%#x, .size=%#x} var_off=(%#llx, %#llx)", + ctx, msg, + reg->r64.base, reg->r64.size, + reg->r32.base, reg->r32.size, reg->var_off.value, reg->var_off.mask); if (env->test_reg_invariants) return -EFAULT; @@ -2468,26 +2131,6 @@ out: return 0; } -static bool __reg32_bound_s64(s32 a) -{ - return a >= 0 && a <= S32_MAX; -} - -static void __reg_assign_32_into_64(struct bpf_reg_state *reg) -{ - reg_set_urange64(reg, reg_u32_min(reg), reg_u32_max(reg)); - - /* Attempt to pull 32-bit signed bounds into 64-bit bounds but must - * be positive otherwise set to worse case bounds and refine later - * from tnum. - */ - if (__reg32_bound_s64(reg_s32_min(reg)) && - __reg32_bound_s64(reg_s32_max(reg))) - reg_set_srange64(reg, reg_s32_min(reg), reg_s32_max(reg)); - else - reg_set_srange64(reg, 0, U32_MAX); -} - /* Mark a register as having a completely unknown (scalar) value. */ void bpf_mark_reg_unknown_imprecise(struct bpf_reg_state *reg) { @@ -5636,7 +5279,7 @@ static int check_buffer_access(struct bpf_verifier_env *env, static void zext_32_to_64(struct bpf_reg_state *reg) { reg->var_off = tnum_subreg(reg->var_off); - __reg_assign_32_into_64(reg); + reg_set_urange64(reg, reg_u32_min(reg), reg_u32_max(reg)); } /* truncate register to smaller size (in bytes) @@ -5651,12 +5294,10 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size) /* fix arithmetic bounds */ mask = ((u64)1 << (size * 8)) - 1; - if ((reg_umin(reg) & ~mask) == (reg_umax(reg) & ~mask)) { + if ((reg_umin(reg) & ~mask) == (reg_umax(reg) & ~mask)) reg_set_urange64(reg, reg_umin(reg) & mask, reg_umax(reg) & mask); - } else { + else reg_set_urange64(reg, 0, mask); - } - reg_set_srange64(reg, reg_umin(reg), reg_umax(reg)); /* If size is smaller than 32bit register the 32bit register * values are also truncated so we push 64-bit bounds into @@ -5681,8 +5322,6 @@ static void set_sext64_default_val(struct bpf_reg_state *reg, int size) reg_set_srange64(reg, S32_MIN, S32_MAX); reg_set_srange32(reg, S32_MIN, S32_MAX); } - reg_set_urange64(reg, 0, U64_MAX); - reg_set_urange32(reg, 0, U32_MAX); reg->var_off = tnum_unknown; } @@ -5703,10 +5342,8 @@ static void coerce_reg_to_size_sx(struct bpf_reg_state *reg, int size) reg->var_off = tnum_const((s32)u64_cval); u64_cval = reg->var_off.value; - reg_set_srange64(reg, u64_cval, u64_cval); - reg_set_urange64(reg, u64_cval, u64_cval); - reg_set_srange32(reg, u64_cval, u64_cval); - reg_set_urange32(reg, u64_cval, u64_cval); + reg->r64 = cnum64_from_urange(u64_cval, u64_cval); + reg->r32 = cnum32_from_urange((u32)u64_cval, (u32)u64_cval); return; } @@ -5734,9 +5371,7 @@ static void coerce_reg_to_size_sx(struct bpf_reg_state *reg, int size) /* both of s64_max/s64_min positive or negative */ if ((s64_max >= 0) == (s64_min >= 0)) { reg_set_srange64(reg, s64_min, s64_max); - reg_set_urange64(reg, s64_min, s64_max); reg_set_srange32(reg, s64_min, s64_max); - reg_set_urange32(reg, s64_min, s64_max); reg->var_off = tnum_range(s64_min, s64_max); return; } @@ -5752,7 +5387,6 @@ static void set_sext32_default_val(struct bpf_reg_state *reg, int size) else /* size == 2 */ reg_set_srange32(reg, S16_MIN, S16_MAX); - reg_set_urange32(reg, 0, U32_MAX); reg->var_off = tnum_subreg(tnum_unknown); } @@ -5771,7 +5405,6 @@ static void coerce_subreg_to_size_sx(struct bpf_reg_state *reg, int size) u32_val = reg->var_off.value; reg_set_srange32(reg, u32_val, u32_val); - reg_set_urange32(reg, u32_val, u32_val); return; } @@ -5795,7 +5428,6 @@ static void coerce_subreg_to_size_sx(struct bpf_reg_state *reg, int size) if ((s32_min >= 0) == (s32_max >= 0)) { reg_set_srange32(reg, s32_min, s32_max); - reg_set_urange32(reg, (u32)s32_min, (u32)s32_max); reg->var_off = tnum_subreg(tnum_range(s32_min, s32_max)); return; } @@ -9952,8 +9584,6 @@ static int do_refine_retval_range(struct bpf_verifier_env *env, case BPF_FUNC_get_smp_processor_id: reg_set_urange64(ret_reg, 0, nr_cpu_ids - 1); reg_set_urange32(ret_reg, 0, nr_cpu_ids - 1); - reg_set_srange64(ret_reg, 0, nr_cpu_ids - 1); - reg_set_srange32(ret_reg, 0, nr_cpu_ids - 1); reg_bounds_sync(ret_reg); break; } @@ -13756,10 +13386,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, struct bpf_func_state *state = vstate->frame[vstate->curframe]; struct bpf_reg_state *regs = state->regs, *dst_reg; bool known = tnum_is_const(off_reg->var_off); - s64 smin_val = reg_smin(off_reg), smax_val = reg_smax(off_reg), - smin_ptr = reg_smin(ptr_reg), smax_ptr = reg_smax(ptr_reg); - u64 umin_val = reg_umin(off_reg), umax_val = reg_umax(off_reg), - umin_ptr = reg_umin(ptr_reg), umax_ptr = reg_umax(ptr_reg); + s64 smin_val = reg_smin(off_reg), smax_val = reg_smax(off_reg); + u64 umin_val = reg_umin(off_reg), umax_val = reg_umax(off_reg); struct bpf_sanitize_info info = {}; u8 opcode = BPF_OP(insn->code); u32 dst = insn->dst_reg; @@ -13861,23 +13489,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, * added into the variable offset, and we copy the fixed offset * from ptr_reg. */ - { - s64 smin_res, smax_res; - u64 umin_res, umax_res; - - if (check_add_overflow(smin_ptr, smin_val, &smin_res) || - check_add_overflow(smax_ptr, smax_val, &smax_res)) { - reg_set_srange64(dst_reg, S64_MIN, S64_MAX); - } else { - reg_set_srange64(dst_reg, smin_res, smax_res); - } - if (check_add_overflow(umin_ptr, umin_val, &umin_res) || - check_add_overflow(umax_ptr, umax_val, &umax_res)) { - reg_set_urange64(dst_reg, 0, U64_MAX); - } else { - reg_set_urange64(dst_reg, umin_res, umax_res); - } - } + dst_reg->r64 = cnum64_add(ptr_reg->r64, off_reg->r64); dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off); dst_reg->raw = ptr_reg->raw; if (reg_is_pkt_pointer(ptr_reg)) { @@ -13909,27 +13521,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, dst); return -EACCES; } - /* A new variable offset is created. If the subtrahend is known - * nonnegative, then any reg->range we had before is still good. - */ - { - s64 smin_res, smax_res; - - if (check_sub_overflow(smin_ptr, smax_val, &smin_res) || - check_sub_overflow(smax_ptr, smin_val, &smax_res)) { - /* Overflow possible, we know nothing */ - reg_set_srange64(dst_reg, S64_MIN, S64_MAX); - } else { - reg_set_srange64(dst_reg, smin_res, smax_res); - } - } - if (umin_ptr < umax_val) { - /* Overflow possible, we know nothing */ - reg_set_urange64(dst_reg, 0, U64_MAX); - } else { - /* Cannot overflow (as long as bounds are consistent) */ - reg_set_urange64(dst_reg, umin_ptr - umax_val, umax_ptr - umin_val); - } + dst_reg->r64 = cnum64_add(ptr_reg->r64, cnum64_negate(off_reg->r64)); dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off); dst_reg->raw = ptr_reg->raw; if (reg_is_pkt_pointer(ptr_reg)) { @@ -13986,139 +13578,25 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, static void scalar32_min_max_add(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg) { - s32 smin = reg_s32_min(dst_reg); - s32 smax = reg_s32_max(dst_reg); - u32 umin = reg_u32_min(dst_reg); - u32 umax = reg_u32_max(dst_reg); - u32 umin_val = reg_u32_min(src_reg); - u32 umax_val = reg_u32_max(src_reg); - bool min_overflow, max_overflow; - - if (check_add_overflow(smin, reg_s32_min(src_reg), &smin) || - check_add_overflow(smax, reg_s32_max(src_reg), &smax)) { - smin = S32_MIN; - smax = S32_MAX; - } - - /* If either all additions overflow or no additions overflow, then - * it is okay to set: dst_umin = dst_umin + src_umin, dst_umax = - * dst_umax + src_umax. Otherwise (some additions overflow), set - * the output bounds to unbounded. - */ - min_overflow = check_add_overflow(umin, umin_val, &umin); - max_overflow = check_add_overflow(umax, umax_val, &umax); - - if (!min_overflow && max_overflow) { - umin = 0; - umax = U32_MAX; - } - - reg_set_srange32(dst_reg, smin, smax); - reg_set_urange32(dst_reg, umin, umax); + dst_reg->r32 = cnum32_add(dst_reg->r32, src_reg->r32); } static void scalar_min_max_add(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg) { - s64 smin = reg_smin(dst_reg); - s64 smax = reg_smax(dst_reg); - u64 umin = reg_umin(dst_reg); - u64 umax = reg_umax(dst_reg); - u64 umin_val = reg_umin(src_reg); - u64 umax_val = reg_umax(src_reg); - bool min_overflow, max_overflow; - - if (check_add_overflow(smin, reg_smin(src_reg), &smin) || - check_add_overflow(smax, reg_smax(src_reg), &smax)) { - smin = S64_MIN; - smax = S64_MAX; - } - - /* If either all additions overflow or no additions overflow, then - * it is okay to set: dst_umin = dst_umin + src_umin, dst_umax = - * dst_umax + src_umax. Otherwise (some additions overflow), set - * the output bounds to unbounded. - */ - min_overflow = check_add_overflow(umin, umin_val, &umin); - max_overflow = check_add_overflow(umax, umax_val, &umax); - - if (!min_overflow && max_overflow) { - umin = 0; - umax = U64_MAX; - } - - reg_set_srange64(dst_reg, smin, smax); - reg_set_urange64(dst_reg, umin, umax); + dst_reg->r64 = cnum64_add(dst_reg->r64, src_reg->r64); } static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg) { - s32 smin = reg_s32_min(dst_reg); - s32 smax = reg_s32_max(dst_reg); - u32 umin = reg_u32_min(dst_reg); - u32 umax = reg_u32_max(dst_reg); - u32 umin_val = reg_u32_min(src_reg); - u32 umax_val = reg_u32_max(src_reg); - bool min_underflow, max_underflow; - - if (check_sub_overflow(smin, reg_s32_max(src_reg), &smin) || - check_sub_overflow(smax, reg_s32_min(src_reg), &smax)) { - /* Overflow possible, we know nothing */ - smin = S32_MIN; - smax = S32_MAX; - } - - /* If either all subtractions underflow or no subtractions - * underflow, it is okay to set: dst_umin = dst_umin - src_umax, - * dst_umax = dst_umax - src_umin. Otherwise (some subtractions - * underflow), set the output bounds to unbounded. - */ - min_underflow = check_sub_overflow(umin, umax_val, &umin); - max_underflow = check_sub_overflow(umax, umin_val, &umax); - - if (min_underflow && !max_underflow) { - umin = 0; - umax = U32_MAX; - } - - reg_set_srange32(dst_reg, smin, smax); - reg_set_urange32(dst_reg, umin, umax); + dst_reg->r32 = cnum32_add(dst_reg->r32, cnum32_negate(src_reg->r32)); } static void scalar_min_max_sub(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg) { - s64 smin = reg_smin(dst_reg); - s64 smax = reg_smax(dst_reg); - u64 umin = reg_umin(dst_reg); - u64 umax = reg_umax(dst_reg); - u64 umin_val = reg_umin(src_reg); - u64 umax_val = reg_umax(src_reg); - bool min_underflow, max_underflow; - - if (check_sub_overflow(smin, reg_smax(src_reg), &smin) || - check_sub_overflow(smax, reg_smin(src_reg), &smax)) { - /* Overflow possible, we know nothing */ - smin = S64_MIN; - smax = S64_MAX; - } - - /* If either all subtractions underflow or no subtractions - * underflow, it is okay to set: dst_umin = dst_umin - src_umax, - * dst_umax = dst_umax - src_umin. Otherwise (some subtractions - * underflow), set the output bounds to unbounded. - */ - min_underflow = check_sub_overflow(umin, umax_val, &umin); - max_underflow = check_sub_overflow(umax, umin_val, &umax); - - if (min_underflow && !max_underflow) { - umin = 0; - umax = U64_MAX; - } - - reg_set_srange64(dst_reg, smin, smax); - reg_set_urange64(dst_reg, umin, umax); + dst_reg->r64 = cnum64_add(dst_reg->r64, cnum64_negate(src_reg->r64)); } static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg, @@ -14148,8 +13626,8 @@ static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg, smax = max_array(tmp_prod, 4); } - reg_set_srange32(dst_reg, smin, smax); - reg_set_urange32(dst_reg, umin, umax); + dst_reg->r32 = cnum32_intersect(cnum32_from_urange(umin, umax), + cnum32_from_srange(smin, smax)); } static void scalar_min_max_mul(struct bpf_reg_state *dst_reg, @@ -14179,8 +13657,8 @@ static void scalar_min_max_mul(struct bpf_reg_state *dst_reg, smax = max_array(tmp_prod, 4); } - reg_set_srange64(dst_reg, smin, smax); - reg_set_urange64(dst_reg, umin, umax); + dst_reg->r64 = cnum64_intersect(cnum64_from_urange(umin, umax), + cnum64_from_srange(smin, smax)); } static void scalar32_min_max_udiv(struct bpf_reg_state *dst_reg, @@ -14192,7 +13670,6 @@ static void scalar32_min_max_udiv(struct bpf_reg_state *dst_reg, reg_u32_max(dst_reg) / src_val); /* Reset other ranges/tnum to unbounded/unknown. */ - reg_set_srange32(dst_reg, S32_MIN, S32_MAX); reset_reg64_and_tnum(dst_reg); } @@ -14205,7 +13682,6 @@ static void scalar_min_max_udiv(struct bpf_reg_state *dst_reg, div64_u64(reg_umax(dst_reg), src_val)); /* Reset other ranges/tnum to unbounded/unknown. */ - reg_set_srange64(dst_reg, S64_MIN, S64_MAX); reset_reg32_and_tnum(dst_reg); } @@ -14242,7 +13718,6 @@ static void scalar32_min_max_sdiv(struct bpf_reg_state *dst_reg, reset: reg_set_srange32(dst_reg, smin, smax); /* Reset other ranges/tnum to unbounded/unknown. */ - reg_set_urange32(dst_reg, 0, U32_MAX); reset_reg64_and_tnum(dst_reg); } @@ -14279,7 +13754,6 @@ static void scalar_min_max_sdiv(struct bpf_reg_state *dst_reg, reset: reg_set_srange64(dst_reg, smin, smax); /* Reset other ranges/tnum to unbounded/unknown. */ - reg_set_urange64(dst_reg, 0, U64_MAX); reset_reg32_and_tnum(dst_reg); } @@ -14299,7 +13773,6 @@ static void scalar32_min_max_umod(struct bpf_reg_state *dst_reg, reg_set_urange32(dst_reg, 0, min(reg_u32_max(dst_reg), res_max)); /* Reset other ranges/tnum to unbounded/unknown. */ - reg_set_srange32(dst_reg, S32_MIN, S32_MAX); reset_reg64_and_tnum(dst_reg); } @@ -14319,7 +13792,6 @@ static void scalar_min_max_umod(struct bpf_reg_state *dst_reg, reg_set_urange64(dst_reg, 0, min(reg_umax(dst_reg), res_max)); /* Reset other ranges/tnum to unbounded/unknown. */ - reg_set_srange64(dst_reg, S64_MIN, S64_MAX); reset_reg32_and_tnum(dst_reg); } @@ -14359,7 +13831,6 @@ static void scalar32_min_max_smod(struct bpf_reg_state *dst_reg, } /* Reset other ranges/tnum to unbounded/unknown. */ - reg_set_urange32(dst_reg, 0, U32_MAX); reset_reg64_and_tnum(dst_reg); } @@ -14399,7 +13870,6 @@ static void scalar_min_max_smod(struct bpf_reg_state *dst_reg, } /* Reset other ranges/tnum to unbounded/unknown. */ - reg_set_urange64(dst_reg, 0, U64_MAX); reset_reg32_and_tnum(dst_reg); } @@ -14419,15 +13889,9 @@ static void scalar32_min_max_and(struct bpf_reg_state *dst_reg, /* We get our minimum from the var_off, since that's inherently * bitwise. Our maximum is the minimum of the operands' maxima. */ - reg_set_urange32(dst_reg, var32_off.value, min(reg_u32_max(dst_reg), umax_val)); - - /* Safe to set s32 bounds by casting u32 result into s32 when u32 - * doesn't cross sign boundary. Otherwise set s32 bounds to unbounded. - */ - if ((s32)reg_u32_min(dst_reg) <= (s32)reg_u32_max(dst_reg)) - reg_set_srange32(dst_reg, reg_u32_min(dst_reg), reg_u32_max(dst_reg)); - else - reg_set_srange32(dst_reg, S32_MIN, S32_MAX); + reg_set_urange32(dst_reg, + var32_off.value, + min(reg_u32_max(dst_reg), umax_val)); } static void scalar_min_max_and(struct bpf_reg_state *dst_reg, @@ -14445,15 +13909,10 @@ static void scalar_min_max_and(struct bpf_reg_state *dst_reg, /* We get our minimum from the var_off, since that's inherently * bitwise. Our maximum is the minimum of the operands' maxima. */ - reg_set_urange64(dst_reg, dst_reg->var_off.value, min(reg_umax(dst_reg), umax_val)); + reg_set_urange64(dst_reg, + dst_reg->var_off.value, + min(reg_umax(dst_reg), umax_val)); - /* Safe to set s64 bounds by casting u64 result into s64 when u64 - * doesn't cross sign boundary. Otherwise set s64 bounds to unbounded. - */ - if ((s64)reg_umin(dst_reg) <= (s64)reg_umax(dst_reg)) - reg_set_srange64(dst_reg, reg_umin(dst_reg), reg_umax(dst_reg)); - else - reg_set_srange64(dst_reg, S64_MIN, S64_MAX); /* We may learn something more from the var_off */ __update_reg_bounds(dst_reg); } @@ -14474,16 +13933,9 @@ static void scalar32_min_max_or(struct bpf_reg_state *dst_reg, /* We get our maximum from the var_off, and our minimum is the * maximum of the operands' minima */ - reg_set_urange32(dst_reg, max(reg_u32_min(dst_reg), umin_val), + reg_set_urange32(dst_reg, + max(reg_u32_min(dst_reg), umin_val), var32_off.value | var32_off.mask); - - /* Safe to set s32 bounds by casting u32 result into s32 when u32 - * doesn't cross sign boundary. Otherwise set s32 bounds to unbounded. - */ - if ((s32)reg_u32_min(dst_reg) <= (s32)reg_u32_max(dst_reg)) - reg_set_srange32(dst_reg, reg_u32_min(dst_reg), reg_u32_max(dst_reg)); - else - reg_set_srange32(dst_reg, S32_MIN, S32_MAX); } static void scalar_min_max_or(struct bpf_reg_state *dst_reg, @@ -14501,16 +13953,10 @@ static void scalar_min_max_or(struct bpf_reg_state *dst_reg, /* We get our maximum from the var_off, and our minimum is the * maximum of the operands' minima */ - reg_set_urange64(dst_reg, max(reg_umin(dst_reg), umin_val), + reg_set_urange64(dst_reg, + max(reg_umin(dst_reg), umin_val), dst_reg->var_off.value | dst_reg->var_off.mask); - /* Safe to set s64 bounds by casting u64 result into s64 when u64 - * doesn't cross sign boundary. Otherwise set s64 bounds to unbounded. - */ - if ((s64)reg_umin(dst_reg) <= (s64)reg_umax(dst_reg)) - reg_set_srange64(dst_reg, reg_umin(dst_reg), reg_umax(dst_reg)); - else - reg_set_srange64(dst_reg, S64_MIN, S64_MAX); /* We may learn something more from the var_off */ __update_reg_bounds(dst_reg); } @@ -14529,14 +13975,6 @@ static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg, /* We get both minimum and maximum from the var32_off. */ reg_set_urange32(dst_reg, var32_off.value, var32_off.value | var32_off.mask); - - /* Safe to set s32 bounds by casting u32 result into s32 when u32 - * doesn't cross sign boundary. Otherwise set s32 bounds to unbounded. - */ - if ((s32)reg_u32_min(dst_reg) <= (s32)reg_u32_max(dst_reg)) - reg_set_srange32(dst_reg, reg_u32_min(dst_reg), reg_u32_max(dst_reg)); - else - reg_set_srange32(dst_reg, S32_MIN, S32_MAX); } static void scalar_min_max_xor(struct bpf_reg_state *dst_reg, @@ -14552,31 +13990,21 @@ static void scalar_min_max_xor(struct bpf_reg_state *dst_reg, } /* We get both minimum and maximum from the var_off. */ - reg_set_urange64(dst_reg, dst_reg->var_off.value, + reg_set_urange64(dst_reg, + dst_reg->var_off.value, dst_reg->var_off.value | dst_reg->var_off.mask); - - /* Safe to set s64 bounds by casting u64 result into s64 when u64 - * doesn't cross sign boundary. Otherwise set s64 bounds to unbounded. - */ - if ((s64)reg_umin(dst_reg) <= (s64)reg_umax(dst_reg)) - reg_set_srange64(dst_reg, reg_umin(dst_reg), reg_umax(dst_reg)); - else - reg_set_srange64(dst_reg, S64_MIN, S64_MAX); - - __update_reg_bounds(dst_reg); } static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg, u64 umin_val, u64 umax_val) { - /* We lose all sign bit information (except what we can pick - * up from var_off) - */ - reg_set_srange32(dst_reg, S32_MIN, S32_MAX); /* If we might shift our top bit out, then we know nothing */ if (umax_val > 31 || reg_u32_max(dst_reg) > 1ULL << (31 - umax_val)) reg_set_urange32(dst_reg, 0, U32_MAX); else + /* We lose all sign bit information (except what we can pick + * up from var_off) + */ reg_set_urange32(dst_reg, reg_u32_min(dst_reg) << umin_val, reg_u32_max(dst_reg) << umax_val); } @@ -14602,23 +14030,27 @@ static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg, static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg, u64 umin_val, u64 umax_val) { + struct cnum64 u, s; + /* Special case <<32 because it is a common compiler pattern to sign * extend subreg by doing <<32 s>>32. smin/smax assignments are correct * because s32 bounds don't flip sign when shifting to the left by * 32bits. */ if (umin_val == 32 && umax_val == 32) - reg_set_srange64(dst_reg, (s64)reg_s32_min(dst_reg) << 32, - (s64)reg_s32_max(dst_reg) << 32); + s = cnum64_from_srange((s64)reg_s32_min(dst_reg) << 32, + (s64)reg_s32_max(dst_reg) << 32); else - reg_set_srange64(dst_reg, S64_MIN, S64_MAX); + s = CNUM64_UNBOUNDED; /* If we might shift our top bit out, then we know nothing */ if (reg_umax(dst_reg) > 1ULL << (63 - umax_val)) - reg_set_urange64(dst_reg, 0, U64_MAX); + u = CNUM64_UNBOUNDED; else - reg_set_urange64(dst_reg, reg_umin(dst_reg) << umin_val, - reg_umax(dst_reg) << umax_val); + u = cnum64_from_urange(reg_umin(dst_reg) << umin_val, + reg_umax(dst_reg) << umax_val); + + dst_reg->r64 = cnum64_intersect(u, s); } static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg, @@ -14657,7 +14089,6 @@ static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg, * and rely on inferring new ones from the unsigned bounds and * var_off of the result. */ - reg_set_srange32(dst_reg, S32_MIN, S32_MAX); dst_reg->var_off = tnum_rshift(subreg, umin_val); reg_set_urange32(dst_reg, reg_u32_min(dst_reg) >> umax_val, @@ -14687,7 +14118,6 @@ static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg, * and rely on inferring new ones from the unsigned bounds and * var_off of the result. */ - reg_set_srange64(dst_reg, S64_MIN, S64_MAX); dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val); reg_set_urange64(dst_reg, reg_umin(dst_reg) >> umax_val, reg_umax(dst_reg) >> umin_val); @@ -14707,6 +14137,8 @@ static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg, /* Upon reaching here, src_known is true and * umax_val is equal to umin_val. + * Blow away the dst_reg umin_value/umax_value and rely on + * dst_reg var_off to refine the result. */ reg_set_srange32(dst_reg, (u32)(((s32)reg_s32_min(dst_reg)) >> umin_val), @@ -14714,11 +14146,6 @@ static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg, dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32); - /* blow away the dst_reg umin_value/umax_value and rely on - * dst_reg var_off to refine the result. - */ - reg_set_urange32(dst_reg, 0, U32_MAX); - __mark_reg64_unbounded(dst_reg); __update_reg32_bounds(dst_reg); } @@ -14736,11 +14163,6 @@ static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg, dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64); - /* blow away the dst_reg umin_value/umax_value and rely on - * dst_reg var_off to refine the result. - */ - reg_set_urange64(dst_reg, 0, U64_MAX); - /* Its not easy to operate on alu32 bounds here because it depends * on bits being shifted in from upper 32-bits. Take easy way out * and mark unbounded so we can recalculate later from tnum. @@ -15829,23 +15251,15 @@ static void regs_refine_cond_op(struct bpf_reg_state *reg1, struct bpf_reg_state switch (opcode) { case BPF_JEQ: if (is_jmp32) { - reg_set_urange32(reg1, max(reg_u32_min(reg1), reg_u32_min(reg2)), - min(reg_u32_max(reg1), reg_u32_max(reg2))); - reg_set_srange32(reg1, max(reg_s32_min(reg1), reg_s32_min(reg2)), - min(reg_s32_max(reg1), reg_s32_max(reg2))); - reg_set_urange32(reg2, reg_u32_min(reg1), reg_u32_max(reg1)); - reg_set_srange32(reg2, reg_s32_min(reg1), reg_s32_max(reg1)); + reg1->r32 = cnum32_intersect(reg1->r32, reg2->r32); + reg2->r32 = reg1->r32; t = tnum_intersect(tnum_subreg(reg1->var_off), tnum_subreg(reg2->var_off)); reg1->var_off = tnum_with_subreg(reg1->var_off, t); reg2->var_off = tnum_with_subreg(reg2->var_off, t); } else { - reg_set_urange64(reg1, max(reg_umin(reg1), reg_umin(reg2)), - min(reg_umax(reg1), reg_umax(reg2))); - reg_set_srange64(reg1, max(reg_smin(reg1), reg_smin(reg2)), - min(reg_smax(reg1), reg_smax(reg2))); - reg_set_urange64(reg2, reg_umin(reg1), reg_umax(reg1)); - reg_set_srange64(reg2, reg_smin(reg1), reg_smax(reg1)); + reg1->r64 = cnum64_intersect(reg1->r64, reg2->r64); + reg2->r64 = reg1->r64; reg1->var_off = tnum_intersect(reg1->var_off, reg2->var_off); reg2->var_off = reg1->var_off; @@ -15862,32 +15276,11 @@ static void regs_refine_cond_op(struct bpf_reg_state *reg1, struct bpf_reg_state */ val = reg_const_value(reg2, is_jmp32); if (is_jmp32) { - /* u32_min is not equal to 0xffffffff at this point, - * because otherwise u32_max is 0xffffffff as well, - * in such a case both reg1 and reg2 would be constants, - * jump would be predicted and regs_refine_cond_op() - * wouldn't be called. - * - * Same reasoning works for all {u,s}{min,max}{32,64} cases - * below. - */ - if (reg_u32_min(reg1) == (u32)val) - reg_set_urange32(reg1, reg_u32_min(reg1) + 1, reg_u32_max(reg1)); - if (reg_u32_max(reg1) == (u32)val) - reg_set_urange32(reg1, reg_u32_min(reg1), reg_u32_max(reg1) - 1); - if (reg_s32_min(reg1) == (s32)val) - reg_set_srange32(reg1, reg_s32_min(reg1) + 1, reg_s32_max(reg1)); - if (reg_s32_max(reg1) == (s32)val) - reg_set_srange32(reg1, reg_s32_min(reg1), reg_s32_max(reg1) - 1); + /* Complement of the range [val, val] as cnum32. */ + cnum32_intersect_with(®1->r32, (struct cnum32){ val + 1, U32_MAX - 1 }); } else { - if (reg_umin(reg1) == (u64)val) - reg_set_urange64(reg1, reg_umin(reg1) + 1, reg_umax(reg1)); - if (reg_umax(reg1) == (u64)val) - reg_set_urange64(reg1, reg_umin(reg1), reg_umax(reg1) - 1); - if (reg_smin(reg1) == (s64)val) - reg_set_srange64(reg1, reg_smin(reg1) + 1, reg_smax(reg1)); - if (reg_smax(reg1) == (s64)val) - reg_set_srange64(reg1, reg_smin(reg1), reg_smax(reg1) - 1); + /* Complement of the range [val, val] as cnum64. */ + cnum64_intersect_with(®1->r64, (struct cnum64){ val + 1, U64_MAX - 1 }); } break; case BPF_JSET: @@ -15934,38 +15327,38 @@ static void regs_refine_cond_op(struct bpf_reg_state *reg1, struct bpf_reg_state break; case BPF_JLE: if (is_jmp32) { - reg_set_urange32(reg1, reg_u32_min(reg1), min(reg_u32_max(reg1), reg_u32_max(reg2))); - reg_set_urange32(reg2, max(reg_u32_min(reg1), reg_u32_min(reg2)), reg_u32_max(reg2)); + cnum32_intersect_with_urange(®1->r32, 0, reg_u32_max(reg2)); + cnum32_intersect_with_urange(®2->r32, reg_u32_min(reg1), U32_MAX); } else { - reg_set_urange64(reg1, reg_umin(reg1), min(reg_umax(reg1), reg_umax(reg2))); - reg_set_urange64(reg2, max(reg_umin(reg1), reg_umin(reg2)), reg_umax(reg2)); + cnum64_intersect_with_urange(®1->r64, 0, reg_umax(reg2)); + cnum64_intersect_with_urange(®2->r64, reg_umin(reg1), U64_MAX); } break; case BPF_JLT: if (is_jmp32) { - reg_set_urange32(reg1, reg_u32_min(reg1), min(reg_u32_max(reg1), reg_u32_max(reg2) - 1)); - reg_set_urange32(reg2, max(reg_u32_min(reg1) + 1, reg_u32_min(reg2)), reg_u32_max(reg2)); + cnum32_intersect_with_urange(®1->r32, 0, reg_u32_max(reg2) - 1); + cnum32_intersect_with_urange(®2->r32, reg_u32_min(reg1) + 1, U32_MAX); } else { - reg_set_urange64(reg1, reg_umin(reg1), min(reg_umax(reg1), reg_umax(reg2) - 1)); - reg_set_urange64(reg2, max(reg_umin(reg1) + 1, reg_umin(reg2)), reg_umax(reg2)); + cnum64_intersect_with_urange(®1->r64, 0, reg_umax(reg2) - 1); + cnum64_intersect_with_urange(®2->r64, reg_umin(reg1) + 1, U64_MAX); } break; case BPF_JSLE: if (is_jmp32) { - reg_set_srange32(reg1, reg_s32_min(reg1), min(reg_s32_max(reg1), reg_s32_max(reg2))); - reg_set_srange32(reg2, max(reg_s32_min(reg1), reg_s32_min(reg2)), reg_s32_max(reg2)); + cnum32_intersect_with_srange(®1->r32, S32_MIN, reg_s32_max(reg2)); + cnum32_intersect_with_srange(®2->r32, reg_s32_min(reg1), S32_MAX); } else { - reg_set_srange64(reg1, reg_smin(reg1), min(reg_smax(reg1), reg_smax(reg2))); - reg_set_srange64(reg2, max(reg_smin(reg1), reg_smin(reg2)), reg_smax(reg2)); + cnum64_intersect_with_srange(®1->r64, S64_MIN, reg_smax(reg2)); + cnum64_intersect_with_srange(®2->r64, reg_smin(reg1), S64_MAX); } break; case BPF_JSLT: if (is_jmp32) { - reg_set_srange32(reg1, reg_s32_min(reg1), min(reg_s32_max(reg1), reg_s32_max(reg2) - 1)); - reg_set_srange32(reg2, max(reg_s32_min(reg1) + 1, reg_s32_min(reg2)), reg_s32_max(reg2)); + cnum32_intersect_with_srange(®1->r32, S32_MIN, reg_s32_max(reg2) - 1); + cnum32_intersect_with_srange(®2->r32, reg_s32_min(reg1) + 1, S32_MAX); } else { - reg_set_srange64(reg1, reg_smin(reg1), min(reg_smax(reg1), reg_smax(reg2) - 1)); - reg_set_srange64(reg2, max(reg_smin(reg1) + 1, reg_smin(reg2)), reg_smax(reg2)); + cnum64_intersect_with_srange(®1->r64, S64_MIN, reg_smax(reg2) - 1); + cnum64_intersect_with_srange(®2->r64, reg_smin(reg1) + 1, S64_MAX); } break; default: diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c index 71f5240cc5b7..7f170a69d1d8 100644 --- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c +++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c @@ -478,6 +478,52 @@ static struct range range_refine_in_halves(enum num_t x_t, struct range x, } +static __always_inline u64 next_u32_block(u64 x) { return x + (1ULL << 32); } +static __always_inline u64 prev_u32_block(u64 x) { return x - (1ULL << 32); } + +/* Is v within the circular u64 range [base, base + len]? */ +static __always_inline bool u64_range_contains(u64 v, u64 base, u64 len) +{ + return v - base <= len; +} + +/* Is v within the circular u32 range [base, base + len]? */ +static __always_inline bool u32_range_contains(u32 v, u32 base, u32 len) +{ + return v - base <= len; +} + +static bool range64_range32_intersect(enum num_t a_t, + struct range a /* 64 */, + struct range b /* 32 */, + struct range *out /* 64 */) +{ + u64 b_len = (u32)(b.b - b.a); + u64 a_len = a.b - a.a; + u64 lo, hi; + + if (u32_range_contains((u32)a.a, (u32)b.a, b_len)) { + lo = a.a; + } else { + lo = swap_low32(a.a, (u32)b.a); + if (!u64_range_contains(lo, a.a, a_len)) + lo = next_u32_block(lo); + if (!u64_range_contains(lo, a.a, a_len)) + return false; + } + if (u32_range_contains(a.b, (u32)b.a, b_len)) { + hi = a.b; + } else { + hi = swap_low32(a.b, (u32)b.b); + if (!u64_range_contains(hi, a.a, a_len)) + hi = prev_u32_block(hi); + if (!u64_range_contains(hi, a.a, a_len)) + return false; + } + *out = range(a_t, lo, hi); + return true; +} + static struct range range_refine(enum num_t x_t, struct range x, enum num_t y_t, struct range y) { struct range y_cast; @@ -533,23 +579,12 @@ static struct range range_refine(enum num_t x_t, struct range x, enum num_t y_t, } } - /* the case when new range knowledge, *y*, is a 32-bit subregister - * range, while previous range knowledge, *x*, is a full register - * 64-bit range, needs special treatment to take into account upper 32 - * bits of full register range - */ if (t_is_32(y_t) && !t_is_32(x_t)) { - struct range x_swap; + struct range x1; - /* some combinations of upper 32 bits and sign bit can lead to - * invalid ranges, in such cases it's easier to detect them - * after cast/swap than try to enumerate all the conditions - * under which transformation and knowledge transfer is valid - */ - x_swap = range(x_t, swap_low32(x.a, y_cast.a), swap_low32(x.b, y_cast.b)); - if (!is_valid_range(x_t, x_swap)) - return x; - return range_intersection(x_t, x, x_swap); + if (range64_range32_intersect(x_t, x, y, &x1)) + return x1; + return x; } /* otherwise, plain range cast and intersection works */ @@ -1300,6 +1335,26 @@ static bool assert_range_eq(enum num_t t, struct range x, struct range y, return false; } +/* For a pair of signed/unsigned t1/t2 checks if r1/r2 intersect in two intervals. */ +static bool needs_two_arcs(enum num_t t1, struct range r1, + enum num_t t2, struct range r2) +{ + u64 lo = cast_t(t1, r2.a); + u64 hi = cast_t(t1, r2.b); + + /* does r2 wrap in t1's domain: [0, hi] ∪ [lo, MAX]? */ + return lo > hi && r1.a <= hi && r1.b >= lo; +} + +static bool reg_state_needs_two_arcs(struct reg_state *s) +{ + if (!s->valid) + return false; + + return needs_two_arcs(U64, s->r[U64], S64, s->r[S64]) || + needs_two_arcs(U32, s->r[U32], S32, s->r[S32]); +} + /* Validate that register states match, and print details if they don't */ static bool assert_reg_state_eq(struct reg_state *r, struct reg_state *e, const char *ctx) { @@ -1524,6 +1579,11 @@ static int verify_case_op(enum num_t init_t, enum num_t cond_t, !assert_reg_state_eq(&fr2, &fe2, "false_reg2") || !assert_reg_state_eq(&tr1, &te1, "true_reg1") || !assert_reg_state_eq(&tr2, &te2, "true_reg2")) { + if (reg_state_needs_two_arcs(&fe1) || reg_state_needs_two_arcs(&fe2) || + reg_state_needs_two_arcs(&te1) || reg_state_needs_two_arcs(&te2)) { + test__skip(); + return 0; + } failed = true; } diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c index c1ae013dee29..f0b3fbbbb627 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c @@ -1239,7 +1239,8 @@ l0_%=: r0 = 0; \ SEC("tc") __description("multiply mixed sign bounds. test 1") __success __log_level(2) -__msg("r6 *= r7 {{.*}}; R6=scalar(smin=umin=0x1bc16d5cd4927ee1,smax=umax=0x1bc16d674ec80000,smax32=0x7ffffeff,umax32=0xfffffeff,var_off=(0x1bc16d4000000000; 0x3ffffffeff))") +__msg("r6 *= r7 {{.*}}; R6=scalar(smin=umin=0x1bc16d5cd4927ee1,smax=umax=0x1bc16d674ec80000,smax32=0x7ffffeff,var_off=(0x1bc16d4000000000; 0x3ffffffeff))") +/* cnum can't represent both [0, 0xffff_feff] and [0x8000_0000, 0x7fff_feff], so it picks one */ __naked void mult_mixed0_sign(void) { asm volatile ( @@ -1648,7 +1649,8 @@ l0_%=: r0 = 0; \ SEC("socket") __description("bounds deduction cross sign boundary, two overlaps") __failure -__msg("3: (2d) if r0 > r1 {{.*}} R0=scalar(smin=smin32=-128,smax=smax32=127,umax=0xffffffffffffff80)") +__msg("3: (2d) if r0 > r1 {{.*}} R0=scalar(smin=smin32=-128,smax=smax32=127)") +/* smin=-128 includes point 0xffffffffffffff80 */ __msg("frame pointer is read only") __naked void bounds_deduct_two_overlaps(void) { @@ -2043,7 +2045,8 @@ __naked void signed_unsigned_intersection32_case2(void *ctx) */ SEC("socket") __description("bounds refinement: 64bits ranges not overwritten by 32bits ranges") -__msg("3: (65) if r0 s> 0x2 {{.*}} R0=scalar(smin=0x8000000000000002,smax=2,umin=smin32=umin32=2,umax=0xffffffff00000003,smax32=umax32=3") +__msg("3: (65) if r0 s> 0x2 {{.*}} R0=scalar(smin=0x8000000000000002,smax=2,smin32=umin32=2,smax32=umax32=3,var_off{{.*}}))") +/* Can't represent both [S64_MIN+2, 2] and [2, U64_MAX - U32_MAX + 2] at the same time, picks shorter interval */ __msg("4: (25) if r0 > 0x13 {{.*}} R0=2") __success __log_level(2) __naked void refinement_32bounds_not_overwriting_64bounds(void *ctx) diff --git a/tools/testing/selftests/bpf/progs/verifier_subreg.c b/tools/testing/selftests/bpf/progs/verifier_subreg.c index 31832a306f91..73b5b0cf6706 100644 --- a/tools/testing/selftests/bpf/progs/verifier_subreg.c +++ b/tools/testing/selftests/bpf/progs/verifier_subreg.c @@ -558,7 +558,8 @@ __description("arsh32 imm sign negative extend check") __success __retval(0) __log_level(2) __msg("3: (17) r6 -= 4095 ; R6=scalar(smin=smin32=-4095,smax=smax32=0)") -__msg("4: (67) r6 <<= 32 ; R6=scalar(smin=0xfffff00100000000,smax=smax32=umax32=0,umax=0xffffffff00000000,smin32=0,var_off=(0x0; 0xffffffff00000000))") +__msg("4: (67) r6 <<= 32 ; R6=scalar(smin=0xfffff00100000000,smax=smax32=umax32=0,smin32=0,var_off=(0x0; 0xffffffff00000000))") +/* represents shorter of signed / unsigned 64-bit ranges */ __msg("5: (c7) r6 s>>= 32 ; R6=scalar(smin=smin32=-4095,smax=smax32=0)") __naked void arsh32_imm_sign_extend_negative_check(void) { @@ -581,7 +582,8 @@ __description("arsh32 imm sign extend check") __success __retval(0) __log_level(2) __msg("3: (17) r6 -= 2047 ; R6=scalar(smin=smin32=-2047,smax=smax32=2048)") -__msg("4: (67) r6 <<= 32 ; R6=scalar(smin=0xfffff80100000000,smax=0x80000000000,umax=0xffffffff00000000,smin32=0,smax32=umax32=0,var_off=(0x0; 0xffffffff00000000))") +__msg("4: (67) r6 <<= 32 ; R6=scalar(smin=0xfffff80100000000,smax=0x80000000000,smin32=0,smax32=umax32=0,var_off=(0x0; 0xffffffff00000000))") +/* represents shorter of signed / unsigned 64-bit ranges */ __msg("5: (c7) r6 s>>= 32 ; R6=scalar(smin=smin32=-2047,smax=smax32=2048)") __naked void arsh32_imm_sign_extend_check(void) { -- cgit v1.2.3 From 4c0710ab011ec144fa96670f960a0686bdeb153a Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Fri, 24 Apr 2026 15:52:45 -0700 Subject: selftests/bpf: new cases handled by 32->64 range refinements 1. 32-bit range starts before 64-bit range's low bits in each block, causing intersection to skip entire blocks. 2. 32-bit range crosses the U32_MAX/0 boundary, represented as s32 range crossing sign boundary. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20260424-cnums-everywhere-rfc-v1-v3-4-ca434b39a486@gmail.com Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/progs/verifier_bounds.c | 80 ++++++++++++++++++++++ 1 file changed, 80 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c index f0b3fbbbb627..5dd243e653c9 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c @@ -2187,4 +2187,84 @@ __naked void tnums_equal_impossible_constant(void *ctx) : __clobber_all); } +/* + * 32-bit range starts before 64-bit range low bits in each 2^32 block. + * + * N*2^32 (N+1)*2^32 (N+2)*2^32 (N+3)*2^32 + * ||----|=====|--|----------||----|=====|-------------||--|-|=====|-------------|| + * |< b >| | |< b >| | |< b >| + * | | | | + * |<---------------+- a -+---------------->| + * | | + * |< t >| refined r0 range + * + * a = u64 [0x1'00000008, 0x3'00000001] + * b = u32 [2, 5] + * t = u64 [0x2'00000002, 0x2'00000005] + */ +SEC("socket") +__success +__flag(BPF_F_TEST_REG_INVARIANTS) +__naked void deduce64_from_32_before_block_start(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + r1 = 0x100000008 ll; \ + if r0 < r1 goto 2f; \ + r1 = 0x300000001 ll; \ + if r0 > r1 goto 2f; /* u64: [0x1'00000008, 0x3'00000001] */ \ + if w0 < 2 goto 2f; \ + if w0 > 5 goto 2f; /* u32: [2, 5] */ \ + r2 = 0x200000002 ll; \ + r3 = 0x200000005 ll; \ + if r0 >= r2 goto 1f; /* should be always true */ \ + r10 = 0; /* dead code */ \ +1: if r0 <= r3 goto 2f; /* should be always true */ \ + r10 = 0; /* dead code */ \ +2: exit; \ + " + :: __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +/* + * 32-bit range crossing U32_MAX / 0 boundary. + * + * N*2^32 (N+1)*2^32 (N+2)*2^32 (N+3)*2^32 + * ||===|---------|------|===||===|----------------|===||===|---------|------|===|| + * |b >| | |< b||b >| |< b||b >| | |< b| + * | | | | + * |<-----+----------------- a --------------+-------->| + * | | + * |<---------------- t ------------->| refined r0 range + * + * a = u64 [0x1'00000006, 0x2'FFFFFFEF] + * b = s32 [-16, 5] (u32 wrapping [0xFFFFFFF0, 0x00000005]) + * t = u64 [0x1'FFFFFFF0, 0x2'00000005] + */ +SEC("socket") +__success +__flag(BPF_F_TEST_REG_INVARIANTS) +__naked void deduce64_from_32_wrapping_32bit(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + r1 = 0x100000006 ll; \ + if r0 < r1 goto 2f; \ + r1 = 0x2ffffffef ll; \ + if r0 > r1 goto 2f; /* u64: [0x1'00000006, 0x2'FFFFFFEF] */ \ + if w0 s< -16 goto 2f; \ + if w0 s> 5 goto 2f; /* s32: [-16, 5] */ \ + r1 = 0x1fffffff0 ll; \ + r2 = 0x200000005 ll; \ + if r0 >= r1 goto 1f; /* should be always true */ \ + r10 = 0; /* dead code */ \ +1: if r0 <= r2 goto 2f; /* should be always true */ \ + r10 = 0; /* dead code */ \ +2: exit; \ + " + :: __imm(bpf_get_prandom_u32) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From ac985e7bf840e34a8dafe0808cc571fd85896c30 Mon Sep 17 00:00:00 2001 From: Gregory Bell Date: Fri, 17 Apr 2026 11:41:21 -0400 Subject: selftests/bpf: Use local type for flow_offload_tuple_rhash in xdp_flowtable Define flow_offload_tuple_rhash___local and use it in place of the forward-declared kernel type for the bpf_xdp_flow_lookup kfunc return type and tuplehash variable. This is consistent with how bpf_flowtable_opts___local is already handled in the same file and avoids relying on a forward declaration of the struct. Fixes: eeb23b54e447 ("selftests/bpf: fix compilation failure when CONFIG_NF_FLOW_TABLE=m") Signed-off-by: Gregory Bell Link: https://lore.kernel.org/r/20260417154122.2558890-2-grbell@redhat.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/xdp_flowtable.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/xdp_flowtable.c b/tools/testing/selftests/bpf/progs/xdp_flowtable.c index 7fdc7b23ee74..e67daa02749d 100644 --- a/tools/testing/selftests/bpf/progs/xdp_flowtable.c +++ b/tools/testing/selftests/bpf/progs/xdp_flowtable.c @@ -15,7 +15,10 @@ struct bpf_flowtable_opts___local { s32 error; }; -struct flow_offload_tuple_rhash * +struct flow_offload_tuple_rhash___local { +}; + +struct flow_offload_tuple_rhash___local * bpf_xdp_flow_lookup(struct xdp_md *, struct bpf_fib_lookup *, struct bpf_flowtable_opts___local *, u32) __ksym; @@ -67,7 +70,7 @@ int xdp_flowtable_do_lookup(struct xdp_md *ctx) { void *data_end = (void *)(long)ctx->data_end; struct bpf_flowtable_opts___local opts = {}; - struct flow_offload_tuple_rhash *tuplehash; + struct flow_offload_tuple_rhash___local *tuplehash; struct bpf_fib_lookup tuple = { .ifindex = ctx->ingress_ifindex, }; -- cgit v1.2.3 From afb0450be061907a0f5d36bd8b010ca30eda3d3b Mon Sep 17 00:00:00 2001 From: Gregory Bell Date: Fri, 17 Apr 2026 11:41:22 -0400 Subject: selftests/bpf: Use local type for bpf_fou_encap in test_tunnel_kern Replace the forward-declared struct bpf_fou_encap with the existing bpf_fou_encap___local type in the bpf_skb_set_fou_encap and bpf_skb_get_fou_encap declarations. This removes the need for the forward declaration and the explicit casts at each call. Fixes: d17f9b370df6 ("selftests/bpf: Fix compilation failure when CONFIG_NET_FOU!=y") Signed-off-by: Gregory Bell Link: https://lore.kernel.org/r/20260417154122.2558890-3-grbell@redhat.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/test_tunnel_kern.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index 32127f1cd687..30f1de458669 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -6,6 +6,7 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ +#define BPF_NO_KFUNC_PROTOTYPES #include "vmlinux.h" #include #include @@ -36,12 +37,10 @@ enum bpf_fou_encap_type___local { FOU_BPF_ENCAP_GUE___local, }; -struct bpf_fou_encap; - int bpf_skb_set_fou_encap(struct __sk_buff *skb_ctx, - struct bpf_fou_encap *encap, int type) __ksym; + struct bpf_fou_encap___local *encap, int type) __ksym; int bpf_skb_get_fou_encap(struct __sk_buff *skb_ctx, - struct bpf_fou_encap *encap) __ksym; + struct bpf_fou_encap___local *encap) __ksym; struct xfrm_state * bpf_xdp_get_xfrm_state(struct xdp_md *ctx, struct bpf_xfrm_state_opts *opts, u32 opts__sz) __ksym; @@ -781,7 +780,7 @@ int ipip_gue_set_tunnel(struct __sk_buff *skb) encap.sport = 0; encap.dport = bpf_htons(5555); - ret = bpf_skb_set_fou_encap(skb, (struct bpf_fou_encap *)&encap, + ret = bpf_skb_set_fou_encap(skb, &encap, bpf_core_enum_value(enum bpf_fou_encap_type___local, FOU_BPF_ENCAP_GUE___local)); if (ret < 0) { @@ -820,7 +819,7 @@ int ipip_fou_set_tunnel(struct __sk_buff *skb) encap.sport = 0; encap.dport = bpf_htons(5555); - ret = bpf_skb_set_fou_encap(skb, (struct bpf_fou_encap *)&encap, + ret = bpf_skb_set_fou_encap(skb, &encap, FOU_BPF_ENCAP_FOU___local); if (ret < 0) { log_err(ret); @@ -843,7 +842,7 @@ int ipip_encap_get_tunnel(struct __sk_buff *skb) return TC_ACT_SHOT; } - ret = bpf_skb_get_fou_encap(skb, (struct bpf_fou_encap *)&encap); + ret = bpf_skb_get_fou_encap(skb, &encap); if (ret < 0) { log_err(ret); return TC_ACT_SHOT; -- cgit v1.2.3 From 1fb8e9b32e19f7fa444863a251a5310c54585172 Mon Sep 17 00:00:00 2001 From: Emil Tsalapatis Date: Sun, 26 Apr 2026 15:03:31 -0400 Subject: selftests/bpf: Add ifdef guard for WRITE_ONCE macro in bpf_atomic.h The WRITE_ONCE macro is identically defined both in bpf_atomic.h and in bpf_arena_common.h. However, the bpf_atomic.h definition has no ifdef guard. If bpf_atomic.h is included after bpf_arena.common.h, compilation fails because of the duplicate definition. Guard the definiton in bpf_atomic.h with and ifdef to let programs include the two headers in any order. Duplicating the definition is the simplest solution out of all the alternatives: - Keeping one of the two existing definitions is not possible because both BPF atomics and arena programs need the macro, and the two features are independent. Using one should not require the header for the other. - Factoring out the definition into a new header that only includes it is more churn than just duplicating it. - Factoring out the definition into bpf_experimental.h requires all users of WRITE_ONCE to include the header. However, the arena library introduced in subsequent commits must be self-contained, while bpf_experimental.h is in the base selftests/bpf directory. Both headers are moved to the arena library in a subsequent patch. Signed-off-by: Emil Tsalapatis Reviewed-by: Matt Bobrowski Link: https://lore.kernel.org/r/20260426190338.4615-2-emil@etsalapatis.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/bpf_atomic.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/bpf_atomic.h b/tools/testing/selftests/bpf/bpf_atomic.h index c550e5711967..d89a22d63c1c 100644 --- a/tools/testing/selftests/bpf/bpf_atomic.h +++ b/tools/testing/selftests/bpf/bpf_atomic.h @@ -42,7 +42,9 @@ extern bool CONFIG_X86_64 __kconfig __weak; #define READ_ONCE(x) (*(volatile typeof(x) *)&(x)) +#ifndef WRITE_ONCE #define WRITE_ONCE(x, val) ((*(volatile typeof(x) *)&(x)) = (val)) +#endif #define cmpxchg(p, old, new) __sync_val_compare_and_swap((p), old, new) -- cgit v1.2.3 From d5327480a12a031f283c85c3c9c9201685099036 Mon Sep 17 00:00:00 2001 From: Emil Tsalapatis Date: Sun, 26 Apr 2026 15:03:32 -0400 Subject: selftests/bpf: Add basic libarena scaffolding Add initial code and a Makefile for an arena-based BPF library. Modules can be added just by including the source file in the library's src/ subdirectory. Future commits will introduce the library code itself. The code includes workarounds that are removed in subsequent patches that ensure bisectability. Signed-off-by: Emil Tsalapatis Link: https://lore.kernel.org/r/20260426190338.4615-3-emil@etsalapatis.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 27 ++++++ tools/testing/selftests/bpf/libarena/Makefile | 69 +++++++++++++++ .../bpf/libarena/include/libarena/common.h | 79 +++++++++++++++++ .../bpf/libarena/include/libarena/userspace.h | 99 ++++++++++++++++++++++ .../selftests/bpf/libarena/src/common.bpf.c | 29 +++++++ 5 files changed, 303 insertions(+) create mode 100644 tools/testing/selftests/bpf/libarena/Makefile create mode 100644 tools/testing/selftests/bpf/libarena/include/libarena/common.h create mode 100644 tools/testing/selftests/bpf/libarena/include/libarena/userspace.h create mode 100644 tools/testing/selftests/bpf/libarena/src/common.bpf.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index ac676d2a4a29..9fe30a665c2e 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -151,6 +151,7 @@ override define CLEAN $(Q)$(RM) -r $(TEST_KMODS) $(Q)$(RM) -r $(EXTRA_CLEAN) $(Q)$(MAKE) -C test_kmods clean + $(Q)$(MAKE) -C libarena clean $(Q)$(MAKE) docs-clean endef @@ -522,6 +523,7 @@ LINKED_BPF_OBJS := $(foreach skel,$(LINKED_SKELS),$($(skel)-deps)) LINKED_BPF_SRCS := $(patsubst %.bpf.o,%.c,$(LINKED_BPF_OBJS)) HEADERS_FOR_BPF_OBJS := $(wildcard $(BPFDIR)/*.bpf.h) \ + $(wildcard $(CURDIR)/libarena/include/*.[ch]) \ $(addprefix $(BPFDIR)/, bpf_core_read.h \ bpf_endian.h \ bpf_helpers.h \ @@ -737,6 +739,29 @@ $(VERIFY_SIG_HDR): $(VERIFICATION_CERT) echo "};"; \ echo "unsigned int test_progs_verification_cert_len = $$(wc -c < $<);") > $@ +LIBARENA_MAKE_ARGS = \ + BPFTOOL="$(BPFTOOL)" \ + INCLUDE_DIR="$(INCLUDE_DIR)" \ + LIBBPF_INCLUDE="$(HOST_INCLUDE_DIR)" \ + BPFOBJ="$(BPFOBJ)" \ + LDLIBS="$(LDLIBS) -lzstd" \ + CLANG="$(CLANG)" \ + BPF_CFLAGS="$(BPF_CFLAGS) $(CLANG_CFLAGS)" \ + BPF_TARGET_ENDIAN="$(BPF_TARGET_ENDIAN)" \ + Q="$(Q)" + +LIBARENA_BPF_DEPS := $(wildcard libarena/Makefile \ + libarena/include/* \ + libarena/include/libarena/* \ + libarena/src/* \ + libarena/selftests/* \ + libarena/*.bpf.o) + +LIBARENA_SKEL := libarena/libarena.skel.h + +$(LIBARENA_SKEL): $(INCLUDE_DIR)/vmlinux.h $(BPFOBJ) $(LIBARENA_BPF_DEPS) + +$(MAKE) -C libarena libarena.skel.h $(LIBARENA_MAKE_ARGS) + # Define test_progs test runner. TRUNNER_TESTS_DIR := prog_tests TRUNNER_BPF_PROGS_DIR := progs @@ -930,3 +955,5 @@ override define INSTALL_RULE rsync -a $(OUTPUT)/$$DIR/*.bpf.o $(INSTALL_PATH)/$$DIR;\ done endef + +libarena: $(LIBARENA_SKEL) diff --git a/tools/testing/selftests/bpf/libarena/Makefile b/tools/testing/selftests/bpf/libarena/Makefile new file mode 100644 index 000000000000..e85b3ad96890 --- /dev/null +++ b/tools/testing/selftests/bpf/libarena/Makefile @@ -0,0 +1,69 @@ +# SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause +# Copyright (c) 2026 Meta Platforms, Inc. and affiliates. + +.PHONY: clean + +# Defaults for standalone builds + +CLANG ?= clang +BPFTOOL ?= bpftool +LDLIBS ?= -lbpf -lelf -lz -lrt -lpthread -lzstd + +ifeq ($(V),1) +Q = +msg = +else +Q ?= @ +msg = @printf ' %-8s%s %s%s\n' "$(1)" "$(if $(2), [$(2)])" "$(notdir $(3))" "$(if $(4), $(4))"; +endif + +IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - $@ + +libarena.bpf.o: $(LIBARENA_OBJECTS) + $(call msg,GEN-OBJ,libarena,$@) + $(Q)$(BPFTOOL) gen object $@ $^ + +%.bpf.o: %.bpf.c + $(call msg,CLNG-BPF,libarena,$@) + $(Q)$(CLANG) $(BPF_CFLAGS) $(BPF_TARGET_ENDIAN) -c $< -o $@ + +clean: + $(Q)rm -f *.skel.h *.bpf.o diff --git a/tools/testing/selftests/bpf/libarena/include/libarena/common.h b/tools/testing/selftests/bpf/libarena/include/libarena/common.h new file mode 100644 index 000000000000..92b67b20ed15 --- /dev/null +++ b/tools/testing/selftests/bpf/libarena/include/libarena/common.h @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ +#pragma once + +#ifdef __BPF__ + +#include + +#include "../../bpf_arena_common.h" +#include "../../progs/bpf_arena_spin_lock.h" + +#include + +#ifndef __BPF_FEATURE_ADDR_SPACE_CAST +#error "Arena allocators require bpf_addr_space_cast feature" +#endif + +#define arena_stdout(fmt, ...) bpf_stream_printk(1, (fmt), ##__VA_ARGS__) +#define arena_stderr(fmt, ...) bpf_stream_printk(2, (fmt), ##__VA_ARGS__) + +#ifndef __maybe_unused +#define __maybe_unused __attribute__((__unused__)) +#endif + +#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8))) + +#define ARENA_PAGES (1UL << (32 - __builtin_ffs(__PAGE_SIZE) + 1)) + +struct { + __uint(type, BPF_MAP_TYPE_ARENA); + __uint(map_flags, BPF_F_MMAPABLE); + __uint(max_entries, ARENA_PAGES); /* number of pages */ +#if defined(__TARGET_ARCH_arm64) || defined(__aarch64__) + __ulong(map_extra, (1ull << 32)); /* start of mmap() region */ +#else + __ulong(map_extra, (1ull << 44)); /* start of mmap() region */ +#endif +} arena __weak SEC(".maps"); + +/* + * This is a variable used to aid verification. The may_goto directive + * permits open-coded for loops, but requires that the index variable is + * imprecise. To force the variable to be imprecise, initialize it with + * the opaque volatile variable 0 instead of the constant 0. + */ +extern const volatile u32 zero; + +int arena_fls(__u64 word); + +#else /* ! __BPF__ */ + +#include + +#define __arena + +typedef uint8_t u8; +typedef uint16_t u16; +typedef uint32_t u32; +typedef uint64_t u64; +typedef int8_t s8; +typedef int16_t s16; +typedef int32_t s32; +typedef int64_t s64; + +/* Dummy "definition" for userspace. */ +#define arena_spinlock_t int + +#endif /* __BPF__ */ + +struct arena_get_info_args { + void __arena *arena_base; +}; + +struct arena_alloc_reserve_args { + u64 nr_pages; +}; + +/* Reasonable default number of pages reserved by arena_alloc_reserve. */ +#define ARENA_RESERVE_PAGES_DFL (8) diff --git a/tools/testing/selftests/bpf/libarena/include/libarena/userspace.h b/tools/testing/selftests/bpf/libarena/include/libarena/userspace.h new file mode 100644 index 000000000000..0438a751d5fd --- /dev/null +++ b/tools/testing/selftests/bpf/libarena/include/libarena/userspace.h @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ +#pragma once + +#include +#include +#include +#include + +#include +#include + +static inline int libarena_run_prog(int prog_fd) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + int ret; + + ret = bpf_prog_test_run_opts(prog_fd, &opts); + if (ret) + return ret; + + return opts.retval; +} + +static inline bool libarena_is_test_prog(const char *name) +{ + return strstr(name, "test_") == name; +} + +static inline int libarena_run_prog_args(int prog_fd, void *args, size_t argsize) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + int ret; + + opts.ctx_in = args; + opts.ctx_size_in = argsize; + + ret = bpf_prog_test_run_opts(prog_fd, &opts); + + return ret ?: opts.retval; +} + +static inline int libarena_get_arena_base(int arena_get_info_fd, + void **arena_base) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + struct arena_get_info_args args = { .arena_base = NULL }; + int ret; + + opts.ctx_in = &args; + opts.ctx_size_in = sizeof(args); + + ret = bpf_prog_test_run_opts(arena_get_info_fd, &opts); + if (ret) + return ret; + if (opts.retval) + return opts.retval; + + *arena_base = args.arena_base; + return 0; +} + +static inline int libarena_get_globals_pages(int arena_get_globals_fd, + size_t arena_all_pages, + u64 *globals_pages) +{ + size_t pgsize = sysconf(_SC_PAGESIZE); + void *arena_base; + ssize_t i; + u8 *vec; + int ret; + + ret = libarena_get_arena_base(arena_get_globals_fd, &arena_base); + if (ret) + return ret; + + if (!arena_base) + return -EINVAL; + + vec = calloc(arena_all_pages, sizeof(*vec)); + if (!vec) + return -ENOMEM; + + if (mincore(arena_base, arena_all_pages * pgsize, vec) < 0) { + ret = -errno; + free(vec); + return ret; + } + + *globals_pages = 0; + for (i = arena_all_pages - 1; i >= 0; i--) { + if (!(vec[i] & 0x1)) + break; + *globals_pages += 1; + } + + free(vec); + return 0; +} diff --git a/tools/testing/selftests/bpf/libarena/src/common.bpf.c b/tools/testing/selftests/bpf/libarena/src/common.bpf.c new file mode 100644 index 000000000000..659ccead5624 --- /dev/null +++ b/tools/testing/selftests/bpf/libarena/src/common.bpf.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ +#include + +const volatile u32 zero = 0; + +int arena_fls(__u64 word) +{ + if (!word) + return 0; + + return 64 - __builtin_clzll(word); +} + +SEC("syscall") +__weak int arena_get_info(struct arena_get_info_args *args) +{ + args->arena_base = arena_base(&arena); + + return 0; +} + +SEC("syscall") +__weak int arena_alloc_reserve(struct arena_alloc_reserve_args *args) +{ + return bpf_arena_reserve_pages(&arena, NULL, args->nr_pages); +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 8c1e1c33fe5ad867bc0b6ba121911d70e7881d88 Mon Sep 17 00:00:00 2001 From: Emil Tsalapatis Date: Sun, 26 Apr 2026 15:03:33 -0400 Subject: selftests/bpf: Move arena-related headers into libarena The BPF selftest headers include functionality that is specific to arenas and is required by libarena. Keep libarena self-contained by moving all functionality into its include/ directory. Also add libarena/include to the standard include paths for the selftests to make the moved headers easy to access by existing selftests. Some functionality is required by libarena but not strictly arena-related. We still move it to the libarena/include path, which is an upgrade from directly accessing them from the selftests/bpf directory using relative paths. A new bpf_may_goto.h file is split off of bpf_experimental.h. bpf_arena_spin_lock.h and bpf_arena_common.h are moved to libarena/include. bpf_atomic.h is also moved to libarena because it is necessary for arena spinlocks. For bpf_arena_spin_lock.h, mark the spinlock state array as __weak to define the spinlock state array in the header while also being compatible with multi-compilation unit programs. While we're at it, we remove unnecessary definitions from existing test programs. Signed-off-by: Emil Tsalapatis Link: https://lore.kernel.org/r/20260426190338.4615-4-emil@etsalapatis.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 4 +- tools/testing/selftests/bpf/bpf_arena_alloc.h | 2 +- tools/testing/selftests/bpf/bpf_arena_common.h | 75 --- tools/testing/selftests/bpf/bpf_arena_list.h | 2 +- tools/testing/selftests/bpf/bpf_arena_strsearch.h | 2 +- tools/testing/selftests/bpf/bpf_atomic.h | 142 ------ tools/testing/selftests/bpf/bpf_experimental.h | 84 +--- .../bpf/libarena/include/bpf_arena_common.h | 75 +++ .../bpf/libarena/include/bpf_arena_spin_lock.h | 547 +++++++++++++++++++++ .../selftests/bpf/libarena/include/bpf_atomic.h | 142 ++++++ .../selftests/bpf/libarena/include/bpf_may_goto.h | 84 ++++ .../bpf/libarena/include/libarena/common.h | 4 +- .../selftests/bpf/prog_tests/arena_spin_lock.c | 7 - tools/testing/selftests/bpf/progs/arena_atomics.c | 2 +- .../testing/selftests/bpf/progs/arena_spin_lock.c | 2 +- .../selftests/bpf/progs/bpf_arena_spin_lock.h | 542 -------------------- .../selftests/bpf/progs/compute_live_registers.c | 2 +- tools/testing/selftests/bpf/progs/lpm_trie_bench.c | 2 +- tools/testing/selftests/bpf/progs/stream.c | 2 +- tools/testing/selftests/bpf/progs/verifier_arena.c | 2 +- .../selftests/bpf/progs/verifier_arena_globals1.c | 2 +- .../selftests/bpf/progs/verifier_arena_globals2.c | 2 +- .../selftests/bpf/progs/verifier_arena_large.c | 2 +- tools/testing/selftests/bpf/progs/verifier_ldsx.c | 2 +- 24 files changed, 867 insertions(+), 865 deletions(-) delete mode 100644 tools/testing/selftests/bpf/bpf_arena_common.h delete mode 100644 tools/testing/selftests/bpf/bpf_atomic.h create mode 100644 tools/testing/selftests/bpf/libarena/include/bpf_arena_common.h create mode 100644 tools/testing/selftests/bpf/libarena/include/bpf_arena_spin_lock.h create mode 100644 tools/testing/selftests/bpf/libarena/include/bpf_atomic.h create mode 100644 tools/testing/selftests/bpf/libarena/include/bpf_may_goto.h delete mode 100644 tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 9fe30a665c2e..71c7873c4b15 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -56,7 +56,8 @@ CFLAGS += -g $(OPT_FLAGS) -rdynamic -std=gnu11 \ -Wno-unused-but-set-variable \ $(GENFLAGS) $(SAN_CFLAGS) $(LIBELF_CFLAGS) \ -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \ - -I$(TOOLSINCDIR) -I$(TOOLSARCHINCDIR) -I$(APIDIR) -I$(OUTPUT) + -I$(TOOLSINCDIR) -I$(TOOLSARCHINCDIR) -I$(APIDIR) -I$(OUTPUT) \ + -I$(CURDIR)/libarena/include LDFLAGS += $(SAN_LDFLAGS) LDLIBS += $(LIBELF_LIBS) -lz -lrt -lpthread @@ -444,6 +445,7 @@ endif CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH)) BPF_CFLAGS = -g -Wall -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR) \ + -I$(CURDIR)/libarena/include \ -I$(abspath $(OUTPUT)/../usr/include) \ -std=gnu11 \ -fno-strict-aliasing \ diff --git a/tools/testing/selftests/bpf/bpf_arena_alloc.h b/tools/testing/selftests/bpf/bpf_arena_alloc.h index c27678299e0c..cda147fd9d25 100644 --- a/tools/testing/selftests/bpf/bpf_arena_alloc.h +++ b/tools/testing/selftests/bpf/bpf_arena_alloc.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ /* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ #pragma once -#include "bpf_arena_common.h" +#include #ifndef __round_mask #define __round_mask(x, y) ((__typeof__(x))((y)-1)) diff --git a/tools/testing/selftests/bpf/bpf_arena_common.h b/tools/testing/selftests/bpf/bpf_arena_common.h deleted file mode 100644 index 16f8ce832004..000000000000 --- a/tools/testing/selftests/bpf/bpf_arena_common.h +++ /dev/null @@ -1,75 +0,0 @@ -/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ -/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ -#pragma once - -#ifndef WRITE_ONCE -#define WRITE_ONCE(x, val) ((*(volatile typeof(x) *) &(x)) = (val)) -#endif - -#ifndef NUMA_NO_NODE -#define NUMA_NO_NODE (-1) -#endif - -#ifndef arena_container_of -#define arena_container_of(ptr, type, member) \ - ({ \ - void __arena *__mptr = (void __arena *)(ptr); \ - ((type *)(__mptr - offsetof(type, member))); \ - }) -#endif - -#ifdef __BPF__ /* when compiled as bpf program */ - -#ifndef PAGE_SIZE -#define PAGE_SIZE __PAGE_SIZE -/* - * for older kernels try sizeof(struct genradix_node) - * or flexible: - * static inline long __bpf_page_size(void) { - * return bpf_core_enum_value(enum page_size_enum___l, __PAGE_SIZE___l) ?: sizeof(struct genradix_node); - * } - * but generated code is not great. - */ -#endif - -#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) && !defined(BPF_ARENA_FORCE_ASM) -#define __arena __attribute__((address_space(1))) -#define __arena_global __attribute__((address_space(1))) -#define cast_kern(ptr) /* nop for bpf prog. emitted by LLVM */ -#define cast_user(ptr) /* nop for bpf prog. emitted by LLVM */ -#else -#define __arena -#define __arena_global SEC(".addr_space.1") -#define cast_kern(ptr) bpf_addr_space_cast(ptr, 0, 1) -#define cast_user(ptr) bpf_addr_space_cast(ptr, 1, 0) -#endif - -void __arena* bpf_arena_alloc_pages(void *map, void __arena *addr, __u32 page_cnt, - int node_id, __u64 flags) __ksym __weak; -int bpf_arena_reserve_pages(void *map, void __arena *addr, __u32 page_cnt) __ksym __weak; -void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt) __ksym __weak; - -#define arena_base(map) ((void __arena *)((struct bpf_arena *)(map))->user_vm_start) - -#else /* when compiled as user space code */ - -#define __arena -#define __arg_arena -#define cast_kern(ptr) /* nop for user space */ -#define cast_user(ptr) /* nop for user space */ -__weak char arena[1]; - -#ifndef offsetof -#define offsetof(type, member) ((unsigned long)&((type *)0)->member) -#endif - -static inline void __arena* bpf_arena_alloc_pages(void *map, void *addr, __u32 page_cnt, - int node_id, __u64 flags) -{ - return NULL; -} -static inline void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt) -{ -} - -#endif diff --git a/tools/testing/selftests/bpf/bpf_arena_list.h b/tools/testing/selftests/bpf/bpf_arena_list.h index e16fa7d95fcf..1af2ffc27d9c 100644 --- a/tools/testing/selftests/bpf/bpf_arena_list.h +++ b/tools/testing/selftests/bpf/bpf_arena_list.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ /* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ #pragma once -#include "bpf_arena_common.h" +#include struct arena_list_node; diff --git a/tools/testing/selftests/bpf/bpf_arena_strsearch.h b/tools/testing/selftests/bpf/bpf_arena_strsearch.h index c1b6eaa905bb..f0d575daef5a 100644 --- a/tools/testing/selftests/bpf/bpf_arena_strsearch.h +++ b/tools/testing/selftests/bpf/bpf_arena_strsearch.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ /* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ #pragma once -#include "bpf_arena_common.h" +#include __noinline int bpf_arena_strlen(const char __arena *s __arg_arena) { diff --git a/tools/testing/selftests/bpf/bpf_atomic.h b/tools/testing/selftests/bpf/bpf_atomic.h deleted file mode 100644 index d89a22d63c1c..000000000000 --- a/tools/testing/selftests/bpf/bpf_atomic.h +++ /dev/null @@ -1,142 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ -#ifndef BPF_ATOMIC_H -#define BPF_ATOMIC_H - -#include -#include -#include "bpf_experimental.h" - -extern bool CONFIG_X86_64 __kconfig __weak; - -/* - * __unqual_typeof(x) - Declare an unqualified scalar type, leaving - * non-scalar types unchanged, - * - * Prefer C11 _Generic for better compile-times and simpler code. Note: 'char' - * is not type-compatible with 'signed char', and we define a separate case. - * - * This is copied verbatim from kernel's include/linux/compiler_types.h, but - * with default expression (for pointers) changed from (x) to (typeof(x)0). - * - * This is because LLVM has a bug where for lvalue (x), it does not get rid of - * an extra address_space qualifier, but does in case of rvalue (typeof(x)0). - * Hence, for pointers, we need to create an rvalue expression to get the - * desired type. See https://github.com/llvm/llvm-project/issues/53400. - */ -#define __scalar_type_to_expr_cases(type) \ - unsigned type : (unsigned type)0, signed type : (signed type)0 - -#define __unqual_typeof(x) \ - typeof(_Generic((x), \ - char: (char)0, \ - __scalar_type_to_expr_cases(char), \ - __scalar_type_to_expr_cases(short), \ - __scalar_type_to_expr_cases(int), \ - __scalar_type_to_expr_cases(long), \ - __scalar_type_to_expr_cases(long long), \ - default: (typeof(x))0)) - -/* No-op for BPF */ -#define cpu_relax() ({}) - -#define READ_ONCE(x) (*(volatile typeof(x) *)&(x)) - -#ifndef WRITE_ONCE -#define WRITE_ONCE(x, val) ((*(volatile typeof(x) *)&(x)) = (val)) -#endif - -#define cmpxchg(p, old, new) __sync_val_compare_and_swap((p), old, new) - -#define try_cmpxchg(p, pold, new) \ - ({ \ - __unqual_typeof(*(pold)) __o = *(pold); \ - __unqual_typeof(*(p)) __r = cmpxchg(p, __o, new); \ - if (__r != __o) \ - *(pold) = __r; \ - __r == __o; \ - }) - -#define try_cmpxchg_relaxed(p, pold, new) try_cmpxchg(p, pold, new) - -#define try_cmpxchg_acquire(p, pold, new) try_cmpxchg(p, pold, new) - -#define smp_mb() \ - ({ \ - volatile unsigned long __val; \ - __sync_fetch_and_add(&__val, 0); \ - }) - -#define smp_rmb() \ - ({ \ - if (!CONFIG_X86_64) \ - smp_mb(); \ - else \ - barrier(); \ - }) - -#define smp_wmb() \ - ({ \ - if (!CONFIG_X86_64) \ - smp_mb(); \ - else \ - barrier(); \ - }) - -/* Control dependency provides LOAD->STORE, provide LOAD->LOAD */ -#define smp_acquire__after_ctrl_dep() ({ smp_rmb(); }) - -#define smp_load_acquire(p) \ - ({ \ - __unqual_typeof(*(p)) __v = READ_ONCE(*(p)); \ - if (!CONFIG_X86_64) \ - smp_mb(); \ - barrier(); \ - __v; \ - }) - -#define smp_store_release(p, val) \ - ({ \ - if (!CONFIG_X86_64) \ - smp_mb(); \ - barrier(); \ - WRITE_ONCE(*(p), val); \ - }) - -#define smp_cond_load_relaxed_label(p, cond_expr, label) \ - ({ \ - typeof(p) __ptr = (p); \ - __unqual_typeof(*(p)) VAL; \ - for (;;) { \ - VAL = (__unqual_typeof(*(p)))READ_ONCE(*__ptr); \ - if (cond_expr) \ - break; \ - cond_break_label(label); \ - cpu_relax(); \ - } \ - (typeof(*(p)))VAL; \ - }) - -#define smp_cond_load_acquire_label(p, cond_expr, label) \ - ({ \ - __unqual_typeof(*p) __val = \ - smp_cond_load_relaxed_label(p, cond_expr, label); \ - smp_acquire__after_ctrl_dep(); \ - (typeof(*(p)))__val; \ - }) - -#define atomic_read(p) READ_ONCE((p)->counter) - -#define atomic_cond_read_relaxed_label(p, cond_expr, label) \ - smp_cond_load_relaxed_label(&(p)->counter, cond_expr, label) - -#define atomic_cond_read_acquire_label(p, cond_expr, label) \ - smp_cond_load_acquire_label(&(p)->counter, cond_expr, label) - -#define atomic_try_cmpxchg_relaxed(p, pold, new) \ - try_cmpxchg_relaxed(&(p)->counter, pold, new) - -#define atomic_try_cmpxchg_acquire(p, pold, new) \ - try_cmpxchg_acquire(&(p)->counter, pold, new) - -#endif /* BPF_ATOMIC_H */ diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h index 2234bd6bc9d3..d1db355e872b 100644 --- a/tools/testing/selftests/bpf/bpf_experimental.h +++ b/tools/testing/selftests/bpf/bpf_experimental.h @@ -5,6 +5,7 @@ #include #include #include +#include #define __contains(name, node) __attribute__((btf_decl_tag("contains:" #name ":" #node))) @@ -204,89 +205,6 @@ l_true: \ }) #endif -/* - * Note that cond_break can only be portably used in the body of a breakable - * construct, whereas can_loop can be used anywhere. - */ -#ifdef __BPF_FEATURE_MAY_GOTO -#define can_loop \ - ({ __label__ l_break, l_continue; \ - bool ret = true; \ - asm volatile goto("may_goto %l[l_break]" \ - :::: l_break); \ - goto l_continue; \ - l_break: ret = false; \ - l_continue:; \ - ret; \ - }) - -#define __cond_break(expr) \ - ({ __label__ l_break, l_continue; \ - asm volatile goto("may_goto %l[l_break]" \ - :::: l_break); \ - goto l_continue; \ - l_break: expr; \ - l_continue:; \ - }) -#else -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ -#define can_loop \ - ({ __label__ l_break, l_continue; \ - bool ret = true; \ - asm volatile goto("1:.byte 0xe5; \ - .byte 0; \ - .long ((%l[l_break] - 1b - 8) / 8) & 0xffff; \ - .short 0" \ - :::: l_break); \ - goto l_continue; \ - l_break: ret = false; \ - l_continue:; \ - ret; \ - }) - -#define __cond_break(expr) \ - ({ __label__ l_break, l_continue; \ - asm volatile goto("1:.byte 0xe5; \ - .byte 0; \ - .long ((%l[l_break] - 1b - 8) / 8) & 0xffff; \ - .short 0" \ - :::: l_break); \ - goto l_continue; \ - l_break: expr; \ - l_continue:; \ - }) -#else -#define can_loop \ - ({ __label__ l_break, l_continue; \ - bool ret = true; \ - asm volatile goto("1:.byte 0xe5; \ - .byte 0; \ - .long (((%l[l_break] - 1b - 8) / 8) & 0xffff) << 16; \ - .short 0" \ - :::: l_break); \ - goto l_continue; \ - l_break: ret = false; \ - l_continue:; \ - ret; \ - }) - -#define __cond_break(expr) \ - ({ __label__ l_break, l_continue; \ - asm volatile goto("1:.byte 0xe5; \ - .byte 0; \ - .long (((%l[l_break] - 1b - 8) / 8) & 0xffff) << 16; \ - .short 0" \ - :::: l_break); \ - goto l_continue; \ - l_break: expr; \ - l_continue:; \ - }) -#endif -#endif - -#define cond_break __cond_break(break) -#define cond_break_label(label) __cond_break(goto label) - #ifndef bpf_nop_mov #define bpf_nop_mov(var) \ asm volatile("%[reg]=%[reg]"::[reg]"r"((short)var)) diff --git a/tools/testing/selftests/bpf/libarena/include/bpf_arena_common.h b/tools/testing/selftests/bpf/libarena/include/bpf_arena_common.h new file mode 100644 index 000000000000..16f8ce832004 --- /dev/null +++ b/tools/testing/selftests/bpf/libarena/include/bpf_arena_common.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#pragma once + +#ifndef WRITE_ONCE +#define WRITE_ONCE(x, val) ((*(volatile typeof(x) *) &(x)) = (val)) +#endif + +#ifndef NUMA_NO_NODE +#define NUMA_NO_NODE (-1) +#endif + +#ifndef arena_container_of +#define arena_container_of(ptr, type, member) \ + ({ \ + void __arena *__mptr = (void __arena *)(ptr); \ + ((type *)(__mptr - offsetof(type, member))); \ + }) +#endif + +#ifdef __BPF__ /* when compiled as bpf program */ + +#ifndef PAGE_SIZE +#define PAGE_SIZE __PAGE_SIZE +/* + * for older kernels try sizeof(struct genradix_node) + * or flexible: + * static inline long __bpf_page_size(void) { + * return bpf_core_enum_value(enum page_size_enum___l, __PAGE_SIZE___l) ?: sizeof(struct genradix_node); + * } + * but generated code is not great. + */ +#endif + +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) && !defined(BPF_ARENA_FORCE_ASM) +#define __arena __attribute__((address_space(1))) +#define __arena_global __attribute__((address_space(1))) +#define cast_kern(ptr) /* nop for bpf prog. emitted by LLVM */ +#define cast_user(ptr) /* nop for bpf prog. emitted by LLVM */ +#else +#define __arena +#define __arena_global SEC(".addr_space.1") +#define cast_kern(ptr) bpf_addr_space_cast(ptr, 0, 1) +#define cast_user(ptr) bpf_addr_space_cast(ptr, 1, 0) +#endif + +void __arena* bpf_arena_alloc_pages(void *map, void __arena *addr, __u32 page_cnt, + int node_id, __u64 flags) __ksym __weak; +int bpf_arena_reserve_pages(void *map, void __arena *addr, __u32 page_cnt) __ksym __weak; +void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt) __ksym __weak; + +#define arena_base(map) ((void __arena *)((struct bpf_arena *)(map))->user_vm_start) + +#else /* when compiled as user space code */ + +#define __arena +#define __arg_arena +#define cast_kern(ptr) /* nop for user space */ +#define cast_user(ptr) /* nop for user space */ +__weak char arena[1]; + +#ifndef offsetof +#define offsetof(type, member) ((unsigned long)&((type *)0)->member) +#endif + +static inline void __arena* bpf_arena_alloc_pages(void *map, void *addr, __u32 page_cnt, + int node_id, __u64 flags) +{ + return NULL; +} +static inline void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt) +{ +} + +#endif diff --git a/tools/testing/selftests/bpf/libarena/include/bpf_arena_spin_lock.h b/tools/testing/selftests/bpf/libarena/include/bpf_arena_spin_lock.h new file mode 100644 index 000000000000..164638690a4d --- /dev/null +++ b/tools/testing/selftests/bpf/libarena/include/bpf_arena_spin_lock.h @@ -0,0 +1,547 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#ifndef BPF_ARENA_SPIN_LOCK_H +#define BPF_ARENA_SPIN_LOCK_H + +#include +#include +#include + +#define arch_mcs_spin_lock_contended_label(l, label) smp_cond_load_acquire_label(l, VAL, label) +#define arch_mcs_spin_unlock_contended(l) smp_store_release((l), 1) + +#if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST) + +#define EBUSY 16 +#define EOPNOTSUPP 95 +#define ETIMEDOUT 110 + +#ifndef __arena +#define __arena __attribute__((address_space(1))) +#endif + +extern unsigned long CONFIG_NR_CPUS __kconfig; + +/* + * Typically, we'd just rely on the definition in vmlinux.h for qspinlock, but + * PowerPC overrides the definition to define lock->val as u32 instead of + * atomic_t, leading to compilation errors. Import a local definition below so + * that we don't depend on the vmlinux.h version. + */ + +struct __qspinlock { + union { + atomic_t val; +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + struct { + u8 locked; + u8 pending; + }; + struct { + u16 locked_pending; + u16 tail; + }; +#else + struct { + u16 tail; + u16 locked_pending; + }; + struct { + u8 reserved[2]; + u8 pending; + u8 locked; + }; +#endif + }; +}; + +#define arena_spinlock_t struct __qspinlock +/* FIXME: Using typedef causes CO-RE relocation error */ +/* typedef struct qspinlock arena_spinlock_t; */ + +struct arena_mcs_spinlock { + struct arena_mcs_spinlock __arena *next; + int locked; + int count; +}; + +struct arena_qnode { + struct arena_mcs_spinlock mcs; +}; + +#define _Q_MAX_NODES 4 +#define _Q_PENDING_LOOPS 1 + +/* + * Bitfields in the atomic value: + * + * 0- 7: locked byte + * 8: pending + * 9-15: not used + * 16-17: tail index + * 18-31: tail cpu (+1) + */ +#define _Q_MAX_CPUS 1024 + +#define _Q_SET_MASK(type) (((1U << _Q_ ## type ## _BITS) - 1)\ + << _Q_ ## type ## _OFFSET) +#define _Q_LOCKED_OFFSET 0 +#define _Q_LOCKED_BITS 8 +#define _Q_LOCKED_MASK _Q_SET_MASK(LOCKED) + +#define _Q_PENDING_OFFSET (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS) +#define _Q_PENDING_BITS 8 +#define _Q_PENDING_MASK _Q_SET_MASK(PENDING) + +#define _Q_TAIL_IDX_OFFSET (_Q_PENDING_OFFSET + _Q_PENDING_BITS) +#define _Q_TAIL_IDX_BITS 2 +#define _Q_TAIL_IDX_MASK _Q_SET_MASK(TAIL_IDX) + +#define _Q_TAIL_CPU_OFFSET (_Q_TAIL_IDX_OFFSET + _Q_TAIL_IDX_BITS) +#define _Q_TAIL_CPU_BITS (32 - _Q_TAIL_CPU_OFFSET) +#define _Q_TAIL_CPU_MASK _Q_SET_MASK(TAIL_CPU) + +#define _Q_TAIL_OFFSET _Q_TAIL_IDX_OFFSET +#define _Q_TAIL_MASK (_Q_TAIL_IDX_MASK | _Q_TAIL_CPU_MASK) + +#define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET) +#define _Q_PENDING_VAL (1U << _Q_PENDING_OFFSET) + +/* + * The qnodes are marked __weak so we can define them in the header + * while still ensuring all compilation units use the same struct + * instance. + */ +struct arena_qnode __weak __arena __hidden qnodes[_Q_MAX_CPUS][_Q_MAX_NODES]; + +static inline u32 encode_tail(int cpu, int idx) +{ + u32 tail; + + tail = (cpu + 1) << _Q_TAIL_CPU_OFFSET; + tail |= idx << _Q_TAIL_IDX_OFFSET; /* assume < 4 */ + + return tail; +} + +static inline struct arena_mcs_spinlock __arena *decode_tail(u32 tail) +{ + u32 cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1; + u32 idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET; + + return &qnodes[cpu][idx].mcs; +} + +static inline +struct arena_mcs_spinlock __arena *grab_mcs_node(struct arena_mcs_spinlock __arena *base, int idx) +{ + return &((struct arena_qnode __arena *)base + idx)->mcs; +} + +#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK) + +/** + * xchg_tail - Put in the new queue tail code word & retrieve previous one + * @lock : Pointer to queued spinlock structure + * @tail : The new queue tail code word + * Return: The previous queue tail code word + * + * xchg(lock, tail) + * + * p,*,* -> n,*,* ; prev = xchg(lock, node) + */ +static __always_inline u32 xchg_tail(arena_spinlock_t __arena *lock, u32 tail) +{ + u32 old, new; + + old = atomic_read(&lock->val); + do { + new = (old & _Q_LOCKED_PENDING_MASK) | tail; + /* + * We can use relaxed semantics since the caller ensures that + * the MCS node is properly initialized before updating the + * tail. + */ + /* These loops are not expected to stall, but we still need to + * prove to the verifier they will terminate eventually. + */ + cond_break_label(out); + } while (!atomic_try_cmpxchg_relaxed(&lock->val, &old, new)); + + return old; +out: + bpf_printk("RUNTIME ERROR: %s unexpected cond_break exit!!!", __func__); + return old; +} + +/** + * clear_pending - clear the pending bit. + * @lock: Pointer to queued spinlock structure + * + * *,1,* -> *,0,* + */ +static __always_inline void clear_pending(arena_spinlock_t __arena *lock) +{ + WRITE_ONCE(lock->pending, 0); +} + +/** + * clear_pending_set_locked - take ownership and clear the pending bit. + * @lock: Pointer to queued spinlock structure + * + * *,1,0 -> *,0,1 + * + * Lock stealing is not allowed if this function is used. + */ +static __always_inline void clear_pending_set_locked(arena_spinlock_t __arena *lock) +{ + WRITE_ONCE(lock->locked_pending, _Q_LOCKED_VAL); +} + +/** + * set_locked - Set the lock bit and own the lock + * @lock: Pointer to queued spinlock structure + * + * *,*,0 -> *,0,1 + */ +static __always_inline void set_locked(arena_spinlock_t __arena *lock) +{ + WRITE_ONCE(lock->locked, _Q_LOCKED_VAL); +} + +static __always_inline +u32 arena_fetch_set_pending_acquire(arena_spinlock_t __arena *lock) +{ + u32 old, new; + + old = atomic_read(&lock->val); + do { + new = old | _Q_PENDING_VAL; + /* + * These loops are not expected to stall, but we still need to + * prove to the verifier they will terminate eventually. + */ + cond_break_label(out); + } while (!atomic_try_cmpxchg_acquire(&lock->val, &old, new)); + + return old; +out: + bpf_printk("RUNTIME ERROR: %s unexpected cond_break exit!!!", __func__); + return old; +} + +/** + * arena_spin_trylock - try to acquire the queued spinlock + * @lock : Pointer to queued spinlock structure + * Return: 1 if lock acquired, 0 if failed + */ +static __always_inline int arena_spin_trylock(arena_spinlock_t __arena *lock) +{ + int val = atomic_read(&lock->val); + + if (unlikely(val)) + return 0; + + return likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL)); +} + +__noinline __weak +int arena_spin_lock_slowpath(arena_spinlock_t __arena __arg_arena *lock, u32 val) +{ + struct arena_mcs_spinlock __arena *prev, *next, *node0, *node; + int ret = -ETIMEDOUT; + u32 old, tail; + int idx; + + /* + * Wait for in-progress pending->locked hand-overs with a bounded + * number of spins so that we guarantee forward progress. + * + * 0,1,0 -> 0,0,1 + */ + if (val == _Q_PENDING_VAL) { + int cnt = _Q_PENDING_LOOPS; + val = atomic_cond_read_relaxed_label(&lock->val, + (VAL != _Q_PENDING_VAL) || !cnt--, + release_err); + } + + /* + * If we observe any contention; queue. + */ + if (val & ~_Q_LOCKED_MASK) + goto queue; + + /* + * trylock || pending + * + * 0,0,* -> 0,1,* -> 0,0,1 pending, trylock + */ + val = arena_fetch_set_pending_acquire(lock); + + /* + * If we observe contention, there is a concurrent locker. + * + * Undo and queue; our setting of PENDING might have made the + * n,0,0 -> 0,0,0 transition fail and it will now be waiting + * on @next to become !NULL. + */ + if (unlikely(val & ~_Q_LOCKED_MASK)) { + + /* Undo PENDING if we set it. */ + if (!(val & _Q_PENDING_MASK)) + clear_pending(lock); + + goto queue; + } + + /* + * We're pending, wait for the owner to go away. + * + * 0,1,1 -> *,1,0 + * + * this wait loop must be a load-acquire such that we match the + * store-release that clears the locked bit and create lock + * sequentiality; this is because not all + * clear_pending_set_locked() implementations imply full + * barriers. + */ + if (val & _Q_LOCKED_MASK) + (void)smp_cond_load_acquire_label(&lock->locked, !VAL, release_err); + + /* + * take ownership and clear the pending bit. + * + * 0,1,0 -> 0,0,1 + */ + clear_pending_set_locked(lock); + return 0; + + /* + * End of pending bit optimistic spinning and beginning of MCS + * queuing. + */ +queue: + node0 = &(qnodes[bpf_get_smp_processor_id()])[0].mcs; + idx = node0->count++; + tail = encode_tail(bpf_get_smp_processor_id(), idx); + + /* + * 4 nodes are allocated based on the assumption that there will not be + * nested NMIs taking spinlocks. That may not be true in some + * architectures even though the chance of needing more than 4 nodes + * will still be extremely unlikely. When that happens, we simply return + * an error. Original qspinlock has a trylock fallback in this case. + */ + if (unlikely(idx >= _Q_MAX_NODES)) { + ret = -EBUSY; + goto release_node_err; + } + + node = grab_mcs_node(node0, idx); + + /* + * Ensure that we increment the head node->count before initialising + * the actual node. If the compiler is kind enough to reorder these + * stores, then an IRQ could overwrite our assignments. + */ + barrier(); + + node->locked = 0; + node->next = NULL; + + /* + * We touched a (possibly) cold cacheline in the per-cpu queue node; + * attempt the trylock once more in the hope someone let go while we + * weren't watching. + */ + if (arena_spin_trylock(lock)) + goto release; + + /* + * Ensure that the initialisation of @node is complete before we + * publish the updated tail via xchg_tail() and potentially link + * @node into the waitqueue via WRITE_ONCE(prev->next, node) below. + */ + smp_wmb(); + + /* + * Publish the updated tail. + * We have already touched the queueing cacheline; don't bother with + * pending stuff. + * + * p,*,* -> n,*,* + */ + old = xchg_tail(lock, tail); + next = NULL; + + /* + * if there was a previous node; link it and wait until reaching the + * head of the waitqueue. + */ + if (old & _Q_TAIL_MASK) { + prev = decode_tail(old); + + /* Link @node into the waitqueue. */ + WRITE_ONCE(prev->next, node); + + (void)arch_mcs_spin_lock_contended_label(&node->locked, release_node_err); + + /* + * While waiting for the MCS lock, the next pointer may have + * been set by another lock waiter. We cannot prefetch here + * due to lack of equivalent instruction in BPF ISA. + */ + next = READ_ONCE(node->next); + } + + /* + * we're at the head of the waitqueue, wait for the owner & pending to + * go away. + * + * *,x,y -> *,0,0 + * + * this wait loop must use a load-acquire such that we match the + * store-release that clears the locked bit and create lock + * sequentiality; this is because the set_locked() function below + * does not imply a full barrier. + */ + val = atomic_cond_read_acquire_label(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK), + release_node_err); + + /* + * claim the lock: + * + * n,0,0 -> 0,0,1 : lock, uncontended + * *,*,0 -> *,*,1 : lock, contended + * + * If the queue head is the only one in the queue (lock value == tail) + * and nobody is pending, clear the tail code and grab the lock. + * Otherwise, we only need to grab the lock. + */ + + /* + * In the PV case we might already have _Q_LOCKED_VAL set, because + * of lock stealing; therefore we must also allow: + * + * n,0,1 -> 0,0,1 + * + * Note: at this point: (val & _Q_PENDING_MASK) == 0, because of the + * above wait condition, therefore any concurrent setting of + * PENDING will make the uncontended transition fail. + */ + if ((val & _Q_TAIL_MASK) == tail) { + if (atomic_try_cmpxchg_relaxed(&lock->val, &val, _Q_LOCKED_VAL)) + goto release; /* No contention */ + } + + /* + * Either somebody is queued behind us or _Q_PENDING_VAL got set + * which will then detect the remaining tail and queue behind us + * ensuring we'll see a @next. + */ + set_locked(lock); + + /* + * contended path; wait for next if not observed yet, release. + */ + if (!next) + next = smp_cond_load_relaxed_label(&node->next, (VAL), release_node_err); + + arch_mcs_spin_unlock_contended(&next->locked); + +release:; + /* + * release the node + * + * Doing a normal dec vs this_cpu_dec is fine. An upper context always + * decrements count it incremented before returning, thus we're fine. + * For contexts interrupting us, they either observe our dec or not. + * Just ensure the compiler doesn't reorder this statement, as a + * this_cpu_dec implicitly implied that. + */ + barrier(); + node0->count--; + return 0; +release_node_err: + barrier(); + node0->count--; + goto release_err; +release_err: + return ret; +} + +/** + * arena_spin_lock - acquire a queued spinlock + * @lock: Pointer to queued spinlock structure + * + * On error, returned value will be negative. + * On success, zero is returned. + * + * The return value _must_ be tested against zero for success, + * instead of checking it against negative, for passing the + * BPF verifier. + * + * The user should do: + * if (arena_spin_lock(...) != 0) // failure + * or + * if (arena_spin_lock(...) == 0) // success + * or + * if (arena_spin_lock(...)) // failure + * or + * if (!arena_spin_lock(...)) // success + * instead of: + * if (arena_spin_lock(...) < 0) // failure + * + * The return value can still be inspected later. + */ +static __always_inline int arena_spin_lock(arena_spinlock_t __arena *lock) +{ + int val = 0; + + if (CONFIG_NR_CPUS > 1024) + return -EOPNOTSUPP; + + bpf_preempt_disable(); + if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL))) + return 0; + + val = arena_spin_lock_slowpath(lock, val); + /* FIXME: bpf_assert_range(-MAX_ERRNO, 0) once we have it working for all cases. */ + if (val) + bpf_preempt_enable(); + return val; +} + +/** + * arena_spin_unlock - release a queued spinlock + * @lock : Pointer to queued spinlock structure + */ +static __always_inline void arena_spin_unlock(arena_spinlock_t __arena *lock) +{ + /* + * unlock() needs release semantics: + */ + smp_store_release(&lock->locked, 0); + bpf_preempt_enable(); +} + +#define arena_spin_lock_irqsave(lock, flags) \ + ({ \ + int __ret; \ + bpf_local_irq_save(&(flags)); \ + __ret = arena_spin_lock((lock)); \ + if (__ret) \ + bpf_local_irq_restore(&(flags)); \ + (__ret); \ + }) + +#define arena_spin_unlock_irqrestore(lock, flags) \ + ({ \ + arena_spin_unlock((lock)); \ + bpf_local_irq_restore(&(flags)); \ + }) + +#endif + +#endif /* BPF_ARENA_SPIN_LOCK_H */ diff --git a/tools/testing/selftests/bpf/libarena/include/bpf_atomic.h b/tools/testing/selftests/bpf/libarena/include/bpf_atomic.h new file mode 100644 index 000000000000..b7b230431929 --- /dev/null +++ b/tools/testing/selftests/bpf/libarena/include/bpf_atomic.h @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#ifndef BPF_ATOMIC_H +#define BPF_ATOMIC_H + +#include +#include +#include + +extern bool CONFIG_X86_64 __kconfig __weak; + +/* + * __unqual_typeof(x) - Declare an unqualified scalar type, leaving + * non-scalar types unchanged, + * + * Prefer C11 _Generic for better compile-times and simpler code. Note: 'char' + * is not type-compatible with 'signed char', and we define a separate case. + * + * This is copied verbatim from kernel's include/linux/compiler_types.h, but + * with default expression (for pointers) changed from (x) to (typeof(x)0). + * + * This is because LLVM has a bug where for lvalue (x), it does not get rid of + * an extra address_space qualifier, but does in case of rvalue (typeof(x)0). + * Hence, for pointers, we need to create an rvalue expression to get the + * desired type. See https://github.com/llvm/llvm-project/issues/53400. + */ +#define __scalar_type_to_expr_cases(type) \ + unsigned type : (unsigned type)0, signed type : (signed type)0 + +#define __unqual_typeof(x) \ + typeof(_Generic((x), \ + char: (char)0, \ + __scalar_type_to_expr_cases(char), \ + __scalar_type_to_expr_cases(short), \ + __scalar_type_to_expr_cases(int), \ + __scalar_type_to_expr_cases(long), \ + __scalar_type_to_expr_cases(long long), \ + default: (typeof(x))0)) + +/* No-op for BPF */ +#define cpu_relax() ({}) + +#define READ_ONCE(x) (*(volatile typeof(x) *)&(x)) + +#ifndef WRITE_ONCE +#define WRITE_ONCE(x, val) ((*(volatile typeof(x) *)&(x)) = (val)) +#endif + +#define cmpxchg(p, old, new) __sync_val_compare_and_swap((p), old, new) + +#define try_cmpxchg(p, pold, new) \ + ({ \ + __unqual_typeof(*(pold)) __o = *(pold); \ + __unqual_typeof(*(p)) __r = cmpxchg(p, __o, new); \ + if (__r != __o) \ + *(pold) = __r; \ + __r == __o; \ + }) + +#define try_cmpxchg_relaxed(p, pold, new) try_cmpxchg(p, pold, new) + +#define try_cmpxchg_acquire(p, pold, new) try_cmpxchg(p, pold, new) + +#define smp_mb() \ + ({ \ + volatile unsigned long __val; \ + __sync_fetch_and_add(&__val, 0); \ + }) + +#define smp_rmb() \ + ({ \ + if (!CONFIG_X86_64) \ + smp_mb(); \ + else \ + barrier(); \ + }) + +#define smp_wmb() \ + ({ \ + if (!CONFIG_X86_64) \ + smp_mb(); \ + else \ + barrier(); \ + }) + +/* Control dependency provides LOAD->STORE, provide LOAD->LOAD */ +#define smp_acquire__after_ctrl_dep() ({ smp_rmb(); }) + +#define smp_load_acquire(p) \ + ({ \ + __unqual_typeof(*(p)) __v = READ_ONCE(*(p)); \ + if (!CONFIG_X86_64) \ + smp_mb(); \ + barrier(); \ + __v; \ + }) + +#define smp_store_release(p, val) \ + ({ \ + if (!CONFIG_X86_64) \ + smp_mb(); \ + barrier(); \ + WRITE_ONCE(*(p), val); \ + }) + +#define smp_cond_load_relaxed_label(p, cond_expr, label) \ + ({ \ + typeof(p) __ptr = (p); \ + __unqual_typeof(*(p)) VAL; \ + for (;;) { \ + VAL = (__unqual_typeof(*(p)))READ_ONCE(*__ptr); \ + if (cond_expr) \ + break; \ + cond_break_label(label); \ + cpu_relax(); \ + } \ + (typeof(*(p)))VAL; \ + }) + +#define smp_cond_load_acquire_label(p, cond_expr, label) \ + ({ \ + __unqual_typeof(*p) __val = \ + smp_cond_load_relaxed_label(p, cond_expr, label); \ + smp_acquire__after_ctrl_dep(); \ + (typeof(*(p)))__val; \ + }) + +#define atomic_read(p) READ_ONCE((p)->counter) + +#define atomic_cond_read_relaxed_label(p, cond_expr, label) \ + smp_cond_load_relaxed_label(&(p)->counter, cond_expr, label) + +#define atomic_cond_read_acquire_label(p, cond_expr, label) \ + smp_cond_load_acquire_label(&(p)->counter, cond_expr, label) + +#define atomic_try_cmpxchg_relaxed(p, pold, new) \ + try_cmpxchg_relaxed(&(p)->counter, pold, new) + +#define atomic_try_cmpxchg_acquire(p, pold, new) \ + try_cmpxchg_acquire(&(p)->counter, pold, new) + +#endif /* BPF_ATOMIC_H */ diff --git a/tools/testing/selftests/bpf/libarena/include/bpf_may_goto.h b/tools/testing/selftests/bpf/libarena/include/bpf_may_goto.h new file mode 100644 index 000000000000..9ba90689d6ba --- /dev/null +++ b/tools/testing/selftests/bpf/libarena/include/bpf_may_goto.h @@ -0,0 +1,84 @@ +#pragma once + +/* + * Note that cond_break can only be portably used in the body of a breakable + * construct, whereas can_loop can be used anywhere. + */ +#ifdef __BPF_FEATURE_MAY_GOTO +#define can_loop \ + ({ __label__ l_break, l_continue; \ + bool ret = true; \ + asm volatile goto("may_goto %l[l_break]" \ + :::: l_break); \ + goto l_continue; \ + l_break: ret = false; \ + l_continue:; \ + ret; \ + }) + +#define __cond_break(expr) \ + ({ __label__ l_break, l_continue; \ + asm volatile goto("may_goto %l[l_break]" \ + :::: l_break); \ + goto l_continue; \ + l_break: expr; \ + l_continue:; \ + }) +#else +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define can_loop \ + ({ __label__ l_break, l_continue; \ + bool ret = true; \ + asm volatile goto("1:.byte 0xe5; \ + .byte 0; \ + .long ((%l[l_break] - 1b - 8) / 8) & 0xffff; \ + .short 0" \ + :::: l_break); \ + goto l_continue; \ + l_break: ret = false; \ + l_continue:; \ + ret; \ + }) + +#define __cond_break(expr) \ + ({ __label__ l_break, l_continue; \ + asm volatile goto("1:.byte 0xe5; \ + .byte 0; \ + .long ((%l[l_break] - 1b - 8) / 8) & 0xffff; \ + .short 0" \ + :::: l_break); \ + goto l_continue; \ + l_break: expr; \ + l_continue:; \ + }) +#else +#define can_loop \ + ({ __label__ l_break, l_continue; \ + bool ret = true; \ + asm volatile goto("1:.byte 0xe5; \ + .byte 0; \ + .long (((%l[l_break] - 1b - 8) / 8) & 0xffff) << 16; \ + .short 0" \ + :::: l_break); \ + goto l_continue; \ + l_break: ret = false; \ + l_continue:; \ + ret; \ + }) + +#define __cond_break(expr) \ + ({ __label__ l_break, l_continue; \ + asm volatile goto("1:.byte 0xe5; \ + .byte 0; \ + .long (((%l[l_break] - 1b - 8) / 8) & 0xffff) << 16; \ + .short 0" \ + :::: l_break); \ + goto l_continue; \ + l_break: expr; \ + l_continue:; \ + }) +#endif +#endif + +#define cond_break __cond_break(break) +#define cond_break_label(label) __cond_break(goto label) diff --git a/tools/testing/selftests/bpf/libarena/include/libarena/common.h b/tools/testing/selftests/bpf/libarena/include/libarena/common.h index 92b67b20ed15..d088f3e75798 100644 --- a/tools/testing/selftests/bpf/libarena/include/libarena/common.h +++ b/tools/testing/selftests/bpf/libarena/include/libarena/common.h @@ -6,8 +6,8 @@ #include -#include "../../bpf_arena_common.h" -#include "../../progs/bpf_arena_spin_lock.h" +#include +#include #include diff --git a/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c b/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c index 693fd86fbde6..acb9d53b5973 100644 --- a/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c @@ -5,13 +5,6 @@ #include struct __qspinlock { int val; }; -typedef struct __qspinlock arena_spinlock_t; - -struct arena_qnode { - unsigned long next; - int count; - int locked; -}; #include "arena_spin_lock.skel.h" diff --git a/tools/testing/selftests/bpf/progs/arena_atomics.c b/tools/testing/selftests/bpf/progs/arena_atomics.c index d1841aac94a2..2e7751a85399 100644 --- a/tools/testing/selftests/bpf/progs/arena_atomics.c +++ b/tools/testing/selftests/bpf/progs/arena_atomics.c @@ -5,7 +5,7 @@ #include #include #include -#include "bpf_arena_common.h" +#include #include "../../../include/linux/filter.h" #include "bpf_misc.h" diff --git a/tools/testing/selftests/bpf/progs/arena_spin_lock.c b/tools/testing/selftests/bpf/progs/arena_spin_lock.c index 086b57a426cf..7236d92d382f 100644 --- a/tools/testing/selftests/bpf/progs/arena_spin_lock.c +++ b/tools/testing/selftests/bpf/progs/arena_spin_lock.c @@ -4,7 +4,7 @@ #include #include #include "bpf_misc.h" -#include "bpf_arena_spin_lock.h" +#include struct { __uint(type, BPF_MAP_TYPE_ARENA); diff --git a/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h b/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h deleted file mode 100644 index f90531cf3ee5..000000000000 --- a/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h +++ /dev/null @@ -1,542 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ -#ifndef BPF_ARENA_SPIN_LOCK_H -#define BPF_ARENA_SPIN_LOCK_H - -#include -#include -#include "bpf_atomic.h" - -#define arch_mcs_spin_lock_contended_label(l, label) smp_cond_load_acquire_label(l, VAL, label) -#define arch_mcs_spin_unlock_contended(l) smp_store_release((l), 1) - -#if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST) - -#define EBUSY 16 -#define EOPNOTSUPP 95 -#define ETIMEDOUT 110 - -#ifndef __arena -#define __arena __attribute__((address_space(1))) -#endif - -extern unsigned long CONFIG_NR_CPUS __kconfig; - -/* - * Typically, we'd just rely on the definition in vmlinux.h for qspinlock, but - * PowerPC overrides the definition to define lock->val as u32 instead of - * atomic_t, leading to compilation errors. Import a local definition below so - * that we don't depend on the vmlinux.h version. - */ - -struct __qspinlock { - union { - atomic_t val; -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - struct { - u8 locked; - u8 pending; - }; - struct { - u16 locked_pending; - u16 tail; - }; -#else - struct { - u16 tail; - u16 locked_pending; - }; - struct { - u8 reserved[2]; - u8 pending; - u8 locked; - }; -#endif - }; -}; - -#define arena_spinlock_t struct __qspinlock -/* FIXME: Using typedef causes CO-RE relocation error */ -/* typedef struct qspinlock arena_spinlock_t; */ - -struct arena_mcs_spinlock { - struct arena_mcs_spinlock __arena *next; - int locked; - int count; -}; - -struct arena_qnode { - struct arena_mcs_spinlock mcs; -}; - -#define _Q_MAX_NODES 4 -#define _Q_PENDING_LOOPS 1 - -/* - * Bitfields in the atomic value: - * - * 0- 7: locked byte - * 8: pending - * 9-15: not used - * 16-17: tail index - * 18-31: tail cpu (+1) - */ -#define _Q_MAX_CPUS 1024 - -#define _Q_SET_MASK(type) (((1U << _Q_ ## type ## _BITS) - 1)\ - << _Q_ ## type ## _OFFSET) -#define _Q_LOCKED_OFFSET 0 -#define _Q_LOCKED_BITS 8 -#define _Q_LOCKED_MASK _Q_SET_MASK(LOCKED) - -#define _Q_PENDING_OFFSET (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS) -#define _Q_PENDING_BITS 8 -#define _Q_PENDING_MASK _Q_SET_MASK(PENDING) - -#define _Q_TAIL_IDX_OFFSET (_Q_PENDING_OFFSET + _Q_PENDING_BITS) -#define _Q_TAIL_IDX_BITS 2 -#define _Q_TAIL_IDX_MASK _Q_SET_MASK(TAIL_IDX) - -#define _Q_TAIL_CPU_OFFSET (_Q_TAIL_IDX_OFFSET + _Q_TAIL_IDX_BITS) -#define _Q_TAIL_CPU_BITS (32 - _Q_TAIL_CPU_OFFSET) -#define _Q_TAIL_CPU_MASK _Q_SET_MASK(TAIL_CPU) - -#define _Q_TAIL_OFFSET _Q_TAIL_IDX_OFFSET -#define _Q_TAIL_MASK (_Q_TAIL_IDX_MASK | _Q_TAIL_CPU_MASK) - -#define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET) -#define _Q_PENDING_VAL (1U << _Q_PENDING_OFFSET) - -struct arena_qnode __arena qnodes[_Q_MAX_CPUS][_Q_MAX_NODES]; - -static inline u32 encode_tail(int cpu, int idx) -{ - u32 tail; - - tail = (cpu + 1) << _Q_TAIL_CPU_OFFSET; - tail |= idx << _Q_TAIL_IDX_OFFSET; /* assume < 4 */ - - return tail; -} - -static inline struct arena_mcs_spinlock __arena *decode_tail(u32 tail) -{ - u32 cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1; - u32 idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET; - - return &qnodes[cpu][idx].mcs; -} - -static inline -struct arena_mcs_spinlock __arena *grab_mcs_node(struct arena_mcs_spinlock __arena *base, int idx) -{ - return &((struct arena_qnode __arena *)base + idx)->mcs; -} - -#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK) - -/** - * xchg_tail - Put in the new queue tail code word & retrieve previous one - * @lock : Pointer to queued spinlock structure - * @tail : The new queue tail code word - * Return: The previous queue tail code word - * - * xchg(lock, tail) - * - * p,*,* -> n,*,* ; prev = xchg(lock, node) - */ -static __always_inline u32 xchg_tail(arena_spinlock_t __arena *lock, u32 tail) -{ - u32 old, new; - - old = atomic_read(&lock->val); - do { - new = (old & _Q_LOCKED_PENDING_MASK) | tail; - /* - * We can use relaxed semantics since the caller ensures that - * the MCS node is properly initialized before updating the - * tail. - */ - /* These loops are not expected to stall, but we still need to - * prove to the verifier they will terminate eventually. - */ - cond_break_label(out); - } while (!atomic_try_cmpxchg_relaxed(&lock->val, &old, new)); - - return old; -out: - bpf_printk("RUNTIME ERROR: %s unexpected cond_break exit!!!", __func__); - return old; -} - -/** - * clear_pending - clear the pending bit. - * @lock: Pointer to queued spinlock structure - * - * *,1,* -> *,0,* - */ -static __always_inline void clear_pending(arena_spinlock_t __arena *lock) -{ - WRITE_ONCE(lock->pending, 0); -} - -/** - * clear_pending_set_locked - take ownership and clear the pending bit. - * @lock: Pointer to queued spinlock structure - * - * *,1,0 -> *,0,1 - * - * Lock stealing is not allowed if this function is used. - */ -static __always_inline void clear_pending_set_locked(arena_spinlock_t __arena *lock) -{ - WRITE_ONCE(lock->locked_pending, _Q_LOCKED_VAL); -} - -/** - * set_locked - Set the lock bit and own the lock - * @lock: Pointer to queued spinlock structure - * - * *,*,0 -> *,0,1 - */ -static __always_inline void set_locked(arena_spinlock_t __arena *lock) -{ - WRITE_ONCE(lock->locked, _Q_LOCKED_VAL); -} - -static __always_inline -u32 arena_fetch_set_pending_acquire(arena_spinlock_t __arena *lock) -{ - u32 old, new; - - old = atomic_read(&lock->val); - do { - new = old | _Q_PENDING_VAL; - /* - * These loops are not expected to stall, but we still need to - * prove to the verifier they will terminate eventually. - */ - cond_break_label(out); - } while (!atomic_try_cmpxchg_acquire(&lock->val, &old, new)); - - return old; -out: - bpf_printk("RUNTIME ERROR: %s unexpected cond_break exit!!!", __func__); - return old; -} - -/** - * arena_spin_trylock - try to acquire the queued spinlock - * @lock : Pointer to queued spinlock structure - * Return: 1 if lock acquired, 0 if failed - */ -static __always_inline int arena_spin_trylock(arena_spinlock_t __arena *lock) -{ - int val = atomic_read(&lock->val); - - if (unlikely(val)) - return 0; - - return likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL)); -} - -__noinline -int arena_spin_lock_slowpath(arena_spinlock_t __arena __arg_arena *lock, u32 val) -{ - struct arena_mcs_spinlock __arena *prev, *next, *node0, *node; - int ret = -ETIMEDOUT; - u32 old, tail; - int idx; - - /* - * Wait for in-progress pending->locked hand-overs with a bounded - * number of spins so that we guarantee forward progress. - * - * 0,1,0 -> 0,0,1 - */ - if (val == _Q_PENDING_VAL) { - int cnt = _Q_PENDING_LOOPS; - val = atomic_cond_read_relaxed_label(&lock->val, - (VAL != _Q_PENDING_VAL) || !cnt--, - release_err); - } - - /* - * If we observe any contention; queue. - */ - if (val & ~_Q_LOCKED_MASK) - goto queue; - - /* - * trylock || pending - * - * 0,0,* -> 0,1,* -> 0,0,1 pending, trylock - */ - val = arena_fetch_set_pending_acquire(lock); - - /* - * If we observe contention, there is a concurrent locker. - * - * Undo and queue; our setting of PENDING might have made the - * n,0,0 -> 0,0,0 transition fail and it will now be waiting - * on @next to become !NULL. - */ - if (unlikely(val & ~_Q_LOCKED_MASK)) { - - /* Undo PENDING if we set it. */ - if (!(val & _Q_PENDING_MASK)) - clear_pending(lock); - - goto queue; - } - - /* - * We're pending, wait for the owner to go away. - * - * 0,1,1 -> *,1,0 - * - * this wait loop must be a load-acquire such that we match the - * store-release that clears the locked bit and create lock - * sequentiality; this is because not all - * clear_pending_set_locked() implementations imply full - * barriers. - */ - if (val & _Q_LOCKED_MASK) - (void)smp_cond_load_acquire_label(&lock->locked, !VAL, release_err); - - /* - * take ownership and clear the pending bit. - * - * 0,1,0 -> 0,0,1 - */ - clear_pending_set_locked(lock); - return 0; - - /* - * End of pending bit optimistic spinning and beginning of MCS - * queuing. - */ -queue: - node0 = &(qnodes[bpf_get_smp_processor_id()])[0].mcs; - idx = node0->count++; - tail = encode_tail(bpf_get_smp_processor_id(), idx); - - /* - * 4 nodes are allocated based on the assumption that there will not be - * nested NMIs taking spinlocks. That may not be true in some - * architectures even though the chance of needing more than 4 nodes - * will still be extremely unlikely. When that happens, we simply return - * an error. Original qspinlock has a trylock fallback in this case. - */ - if (unlikely(idx >= _Q_MAX_NODES)) { - ret = -EBUSY; - goto release_node_err; - } - - node = grab_mcs_node(node0, idx); - - /* - * Ensure that we increment the head node->count before initialising - * the actual node. If the compiler is kind enough to reorder these - * stores, then an IRQ could overwrite our assignments. - */ - barrier(); - - node->locked = 0; - node->next = NULL; - - /* - * We touched a (possibly) cold cacheline in the per-cpu queue node; - * attempt the trylock once more in the hope someone let go while we - * weren't watching. - */ - if (arena_spin_trylock(lock)) - goto release; - - /* - * Ensure that the initialisation of @node is complete before we - * publish the updated tail via xchg_tail() and potentially link - * @node into the waitqueue via WRITE_ONCE(prev->next, node) below. - */ - smp_wmb(); - - /* - * Publish the updated tail. - * We have already touched the queueing cacheline; don't bother with - * pending stuff. - * - * p,*,* -> n,*,* - */ - old = xchg_tail(lock, tail); - next = NULL; - - /* - * if there was a previous node; link it and wait until reaching the - * head of the waitqueue. - */ - if (old & _Q_TAIL_MASK) { - prev = decode_tail(old); - - /* Link @node into the waitqueue. */ - WRITE_ONCE(prev->next, node); - - (void)arch_mcs_spin_lock_contended_label(&node->locked, release_node_err); - - /* - * While waiting for the MCS lock, the next pointer may have - * been set by another lock waiter. We cannot prefetch here - * due to lack of equivalent instruction in BPF ISA. - */ - next = READ_ONCE(node->next); - } - - /* - * we're at the head of the waitqueue, wait for the owner & pending to - * go away. - * - * *,x,y -> *,0,0 - * - * this wait loop must use a load-acquire such that we match the - * store-release that clears the locked bit and create lock - * sequentiality; this is because the set_locked() function below - * does not imply a full barrier. - */ - val = atomic_cond_read_acquire_label(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK), - release_node_err); - - /* - * claim the lock: - * - * n,0,0 -> 0,0,1 : lock, uncontended - * *,*,0 -> *,*,1 : lock, contended - * - * If the queue head is the only one in the queue (lock value == tail) - * and nobody is pending, clear the tail code and grab the lock. - * Otherwise, we only need to grab the lock. - */ - - /* - * In the PV case we might already have _Q_LOCKED_VAL set, because - * of lock stealing; therefore we must also allow: - * - * n,0,1 -> 0,0,1 - * - * Note: at this point: (val & _Q_PENDING_MASK) == 0, because of the - * above wait condition, therefore any concurrent setting of - * PENDING will make the uncontended transition fail. - */ - if ((val & _Q_TAIL_MASK) == tail) { - if (atomic_try_cmpxchg_relaxed(&lock->val, &val, _Q_LOCKED_VAL)) - goto release; /* No contention */ - } - - /* - * Either somebody is queued behind us or _Q_PENDING_VAL got set - * which will then detect the remaining tail and queue behind us - * ensuring we'll see a @next. - */ - set_locked(lock); - - /* - * contended path; wait for next if not observed yet, release. - */ - if (!next) - next = smp_cond_load_relaxed_label(&node->next, (VAL), release_node_err); - - arch_mcs_spin_unlock_contended(&next->locked); - -release:; - /* - * release the node - * - * Doing a normal dec vs this_cpu_dec is fine. An upper context always - * decrements count it incremented before returning, thus we're fine. - * For contexts interrupting us, they either observe our dec or not. - * Just ensure the compiler doesn't reorder this statement, as a - * this_cpu_dec implicitly implied that. - */ - barrier(); - node0->count--; - return 0; -release_node_err: - barrier(); - node0->count--; - goto release_err; -release_err: - return ret; -} - -/** - * arena_spin_lock - acquire a queued spinlock - * @lock: Pointer to queued spinlock structure - * - * On error, returned value will be negative. - * On success, zero is returned. - * - * The return value _must_ be tested against zero for success, - * instead of checking it against negative, for passing the - * BPF verifier. - * - * The user should do: - * if (arena_spin_lock(...) != 0) // failure - * or - * if (arena_spin_lock(...) == 0) // success - * or - * if (arena_spin_lock(...)) // failure - * or - * if (!arena_spin_lock(...)) // success - * instead of: - * if (arena_spin_lock(...) < 0) // failure - * - * The return value can still be inspected later. - */ -static __always_inline int arena_spin_lock(arena_spinlock_t __arena *lock) -{ - int val = 0; - - if (CONFIG_NR_CPUS > 1024) - return -EOPNOTSUPP; - - bpf_preempt_disable(); - if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL))) - return 0; - - val = arena_spin_lock_slowpath(lock, val); - /* FIXME: bpf_assert_range(-MAX_ERRNO, 0) once we have it working for all cases. */ - if (val) - bpf_preempt_enable(); - return val; -} - -/** - * arena_spin_unlock - release a queued spinlock - * @lock : Pointer to queued spinlock structure - */ -static __always_inline void arena_spin_unlock(arena_spinlock_t __arena *lock) -{ - /* - * unlock() needs release semantics: - */ - smp_store_release(&lock->locked, 0); - bpf_preempt_enable(); -} - -#define arena_spin_lock_irqsave(lock, flags) \ - ({ \ - int __ret; \ - bpf_local_irq_save(&(flags)); \ - __ret = arena_spin_lock((lock)); \ - if (__ret) \ - bpf_local_irq_restore(&(flags)); \ - (__ret); \ - }) - -#define arena_spin_unlock_irqrestore(lock, flags) \ - ({ \ - arena_spin_unlock((lock)); \ - bpf_local_irq_restore(&(flags)); \ - }) - -#endif - -#endif /* BPF_ARENA_SPIN_LOCK_H */ diff --git a/tools/testing/selftests/bpf/progs/compute_live_registers.c b/tools/testing/selftests/bpf/progs/compute_live_registers.c index f05e120f3450..d055fc7b3b95 100644 --- a/tools/testing/selftests/bpf/progs/compute_live_registers.c +++ b/tools/testing/selftests/bpf/progs/compute_live_registers.c @@ -3,7 +3,7 @@ #include #include #include "../../../include/linux/filter.h" -#include "bpf_arena_common.h" +#include #include "bpf_misc.h" struct { diff --git a/tools/testing/selftests/bpf/progs/lpm_trie_bench.c b/tools/testing/selftests/bpf/progs/lpm_trie_bench.c index a0e6ebd5507a..2831cf4445e8 100644 --- a/tools/testing/selftests/bpf/progs/lpm_trie_bench.c +++ b/tools/testing/selftests/bpf/progs/lpm_trie_bench.c @@ -7,7 +7,7 @@ #include #include #include "bpf_misc.h" -#include "bpf_atomic.h" +#include #include "progs/lpm_trie.h" #define BPF_OBJ_NAME_LEN 16U diff --git a/tools/testing/selftests/bpf/progs/stream.c b/tools/testing/selftests/bpf/progs/stream.c index 6f999ba951a3..92ba1d72e0ec 100644 --- a/tools/testing/selftests/bpf/progs/stream.c +++ b/tools/testing/selftests/bpf/progs/stream.c @@ -5,7 +5,7 @@ #include #include "bpf_misc.h" #include "bpf_experimental.h" -#include "bpf_arena_common.h" +#include struct arr_elem { struct bpf_res_spin_lock lock; diff --git a/tools/testing/selftests/bpf/progs/verifier_arena.c b/tools/testing/selftests/bpf/progs/verifier_arena.c index 62e282f4448a..89d72c8d756a 100644 --- a/tools/testing/selftests/bpf/progs/verifier_arena.c +++ b/tools/testing/selftests/bpf/progs/verifier_arena.c @@ -8,7 +8,7 @@ #include #include "bpf_misc.h" #include "bpf_experimental.h" -#include "bpf_arena_common.h" +#include #define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8))) diff --git a/tools/testing/selftests/bpf/progs/verifier_arena_globals1.c b/tools/testing/selftests/bpf/progs/verifier_arena_globals1.c index 83182ddbfb95..45d364b0bc85 100644 --- a/tools/testing/selftests/bpf/progs/verifier_arena_globals1.c +++ b/tools/testing/selftests/bpf/progs/verifier_arena_globals1.c @@ -6,7 +6,7 @@ #include #include #include "bpf_experimental.h" -#include "bpf_arena_common.h" +#include #include "bpf_misc.h" #define ARENA_PAGES (1UL<< (32 - __builtin_ffs(__PAGE_SIZE) + 1)) diff --git a/tools/testing/selftests/bpf/progs/verifier_arena_globals2.c b/tools/testing/selftests/bpf/progs/verifier_arena_globals2.c index e6bd7b61f9f1..b51594dbc005 100644 --- a/tools/testing/selftests/bpf/progs/verifier_arena_globals2.c +++ b/tools/testing/selftests/bpf/progs/verifier_arena_globals2.c @@ -7,7 +7,7 @@ #include #include "bpf_misc.h" #include "bpf_experimental.h" -#include "bpf_arena_common.h" +#include #define ARENA_PAGES (32) diff --git a/tools/testing/selftests/bpf/progs/verifier_arena_large.c b/tools/testing/selftests/bpf/progs/verifier_arena_large.c index 5f7e7afee169..6ab8730d4878 100644 --- a/tools/testing/selftests/bpf/progs/verifier_arena_large.c +++ b/tools/testing/selftests/bpf/progs/verifier_arena_large.c @@ -7,7 +7,7 @@ #include #include "bpf_misc.h" #include "bpf_experimental.h" -#include "bpf_arena_common.h" +#include #define ARENA_SIZE (1ull << 32) diff --git a/tools/testing/selftests/bpf/progs/verifier_ldsx.c b/tools/testing/selftests/bpf/progs/verifier_ldsx.c index c8494b682c31..1026524a1983 100644 --- a/tools/testing/selftests/bpf/progs/verifier_ldsx.c +++ b/tools/testing/selftests/bpf/progs/verifier_ldsx.c @@ -3,7 +3,7 @@ #include #include #include "bpf_misc.h" -#include "bpf_arena_common.h" +#include #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ -- cgit v1.2.3 From 9ab78691eb5fd0d3ad0a1994d4103223678eb78b Mon Sep 17 00:00:00 2001 From: Emil Tsalapatis Date: Sun, 26 Apr 2026 15:03:34 -0400 Subject: selftests/bpf: Add arena ASAN runtime to libarena Add an address sanitizer (ASAN) runtime to the arena library. The ASAN runtime implements the functions injected into BPF binaries by LLVM sanitization when ASAN is enabled during compilation. The runtime also includes functions called explicitly by memory allocation code to mark memory as poisoned/unpoisoned to ASAN. This code is a no-op when sanitization is turned off. Signed-off-by: Emil Tsalapatis Link: https://lore.kernel.org/r/20260426190338.4615-5-emil@etsalapatis.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/libarena/include/libarena/asan.h | 103 ++++ .../bpf/libarena/include/libarena/common.h | 1 + .../testing/selftests/bpf/libarena/src/asan.bpf.c | 553 +++++++++++++++++++++ 3 files changed, 657 insertions(+) create mode 100644 tools/testing/selftests/bpf/libarena/include/libarena/asan.h create mode 100644 tools/testing/selftests/bpf/libarena/src/asan.bpf.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/libarena/include/libarena/asan.h b/tools/testing/selftests/bpf/libarena/include/libarena/asan.h new file mode 100644 index 000000000000..eb9fc69d9eb0 --- /dev/null +++ b/tools/testing/selftests/bpf/libarena/include/libarena/asan.h @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ +#pragma once + +struct asan_init_args { + u64 arena_all_pages; + u64 arena_globals_pages; +}; + +int asan_init(struct asan_init_args *args); + +extern volatile u64 __asan_shadow_memory_dynamic_address; +extern volatile u32 asan_reported; +extern volatile bool asan_inited; +extern volatile bool asan_report_once; + +#ifdef __BPF__ + +#define ASAN_SHADOW_SHIFT 3 +#define ASAN_SHADOW_SCALE (1ULL << ASAN_SHADOW_SHIFT) +#define ASAN_GRANULE_MASK ((1ULL << ASAN_SHADOW_SHIFT) - 1) +#define ASAN_GRANULE(addr) ((s8)((u32)(u64)((addr)) & ASAN_GRANULE_MASK)) + +#define __noasan __attribute__((no_sanitize("address"))) + +#ifdef BPF_ARENA_ASAN + +typedef s8 __arena s8a; + +static inline +s8a *mem_to_shadow(void __arena __arg_arena *addr) +{ + return (s8a *)(((u32)(u64)addr >> ASAN_SHADOW_SHIFT) + + __asan_shadow_memory_dynamic_address); +} + +__weak __noasan +bool asan_ready(void) +{ + return __asan_shadow_memory_dynamic_address; +} + +int asan_poison(void __arena *addr, s8 val, size_t size); +int asan_unpoison(void __arena *addr, size_t size); +bool asan_shadow_set(void __arena *addr); + +/* + * Dummy calls to ensure the ASAN runtime's BTF information is present + * in every object file when compiling the runtime and local BPF code + * separately. The runtime calls are injected into the LLVM IR file + */ +#define DECLARE_ASAN_LOAD_STORE_SIZE(size) \ + void __asan_store##size(intptr_t addr); \ + void __asan_store##size##_noabort(intptr_t addr); \ + void __asan_load##size(intptr_t addr); \ + void __asan_load##size##_noabort(intptr_t addr); \ + void __asan_report_store##size(intptr_t addr); \ + void __asan_report_store##size##_noabort(intptr_t addr); \ + void __asan_report_load##size(intptr_t addr); \ + void __asan_report_load##size##_noabort(intptr_t addr); + +DECLARE_ASAN_LOAD_STORE_SIZE(1); +DECLARE_ASAN_LOAD_STORE_SIZE(2); +DECLARE_ASAN_LOAD_STORE_SIZE(4); +DECLARE_ASAN_LOAD_STORE_SIZE(8); + +void __asan_storeN(intptr_t addr, ssize_t size); +void __asan_storeN_noabort(intptr_t addr, ssize_t size); +void __asan_loadN(intptr_t addr, ssize_t size); +void __asan_loadN_noabort(intptr_t addr, ssize_t size); + +/* + * Force LLVM to emit BTF information for the stubs, + * because the ASAN pass in LLVM by itself doesn't. + */ +#define ASAN_LOAD_STORE_SIZE(size) \ + __asan_store##size, \ + __asan_store##size##_noabort, \ + __asan_load##size, \ + __asan_load##size##_noabort, \ + __asan_report_store##size, \ + __asan_report_store##size##_noabort, \ + __asan_report_load##size, \ + __asan_report_load##size##_noabort + +__attribute__((used)) +static void (*__asan_btf_anchors[])(intptr_t) = { + ASAN_LOAD_STORE_SIZE(1), + ASAN_LOAD_STORE_SIZE(2), + ASAN_LOAD_STORE_SIZE(4), + ASAN_LOAD_STORE_SIZE(8), +}; + +#else /* BPF_ARENA_ASAN */ + +static inline int asan_poison(void __arena *addr, s8 val, size_t size) { return 0; } +static inline int asan_unpoison(void __arena *addr, size_t size) { return 0; } +static inline bool asan_shadow_set(void __arena *addr) { return 0; } +__weak bool asan_ready(void) { return true; } + +#endif /* BPF_ARENA_ASAN */ + +#endif /* __BPF__ */ diff --git a/tools/testing/selftests/bpf/libarena/include/libarena/common.h b/tools/testing/selftests/bpf/libarena/include/libarena/common.h index d088f3e75798..21eb18bf4533 100644 --- a/tools/testing/selftests/bpf/libarena/include/libarena/common.h +++ b/tools/testing/selftests/bpf/libarena/include/libarena/common.h @@ -44,6 +44,7 @@ struct { * the opaque volatile variable 0 instead of the constant 0. */ extern const volatile u32 zero; +extern volatile u64 asan_violated; int arena_fls(__u64 word); diff --git a/tools/testing/selftests/bpf/libarena/src/asan.bpf.c b/tools/testing/selftests/bpf/libarena/src/asan.bpf.c new file mode 100644 index 000000000000..64c5b990086c --- /dev/null +++ b/tools/testing/selftests/bpf/libarena/src/asan.bpf.c @@ -0,0 +1,553 @@ +// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ +#include +#include +#include + + +enum { + /* + * Is the access checked by check_region_inline + * a read or a write? + */ + ASAN_READ = 0x0U, + ASAN_WRITE = 0x1U, +}; + +/* + * Address sanitizer (ASAN) for arena-based BPF programs, inspired + * by KASAN. + * + * The API + * ------- + * + * The implementation includes two kinds of components: Implementation + * of ASAN hooks injected by LLVM into the program, and API calls that + * allocators use to mark memory as valid or invalid. The full list is: + * + * LLVM stubs: + * + * void __asan_{load, store}(intptr_t addr) + * Checks whether an access is valid. All variations covered + * by check_region_inline(). + * + * void __asan_{store, load}((intptr_t addr, ssize_t size) + * + * void __asan_report_{load, store}(intptr_t addr) + * Report an access violation for the program. Used when LLVM + * uses direct code generation for shadow map checks. + * + * void *__asan_memcpy(void *d, const void *s, size_t n) + * void *__asan_memmove(void *d, const void *s, size_t n) + * void *__asan_memset(void *p, int c, size_t n) + * Hooks for ASAN instrumentation of the LLVM mem* builtins. + * Currently unimplemented just like the builtins themselves. + * + * API methods: + * + * asan_init() + * Initialize the ASAN map for the arena. + * + * asan_poison() + * Mark a region of memory as poisoned. Accessing poisoned memory + * causes asan_report() to fire. Invoked during free(). + * + * asan_unpoison() + * Mark a region as unpoisoned after alloc(). + * + * asan_shadow_set() + * Check a byte's validity directly. + * + * The Algorithm In Brief + * ---------------------- + * Each group of 8 bytes is mapped to a "granule" in the shadow map. This + * granule is the size of the byte and describes which bytes are valid. + * Possible values are: + * + * 0: All bytes are valid. Makes checks in the middle of an allocated region + * (most of them) fast. + * (0, 7]: How many consecutive bytes are valid, starting from the lowest one. + * The tradeoff is that we can't poison individual bytes in the middle of a + * valid region. + * [0x80, 0xff]: Special poison values, can be used to denote specific error + * modes (e.g., recently freed vs uninitialized memory). + * + * The mapping between a memory location and its shadow is: + * shadow_addr = shadow_base + (addr >> 3). We retain the 8:1 data:shadow + * ratio of existing ASAN implementations as a compromise between tracking + * granularity and space usage/scan overhead. + */ + +#ifdef BPF_ARENA_ASAN + +#pragma clang attribute push(__attribute__((no_sanitize("address"))), \ + apply_to = function) + +#define SHADOW_ALL_ZEROES ((u64)-1) + +/* + * Canary variable for ASAN violations. Set to the offending address. + */ +volatile u64 asan_violated = 0; + +/* + * Shadow map occupancy map. + */ +volatile u64 __asan_shadow_memory_dynamic_address; + +volatile u32 asan_reported = false; +volatile bool asan_inited = false; + +/* + * Set during program load. + */ +volatile bool asan_report_once = false; + +/* + * BPF does not currently support the memset/memcpy/memcmp intrinsics. + * For large sequential copies, or assignments of large data structures, + * the frontend will generate an intrinsic that causes the BPF backend + * to exit due to a missing implementation. Provide a simple implementation + * just for memset to use it for poisoning/unpoisoning the map. + */ +__weak int asan_memset(s8a __arg_arena *dst, s8 val, size_t size) +{ + size_t i; + + for (i = zero; i < size && can_loop; i++) + dst[i] = val; + + return 0; +} + +/* Validate a 1-byte access, always within a single byte. */ +static __always_inline bool memory_is_poisoned_1(s8a *addr) +{ + s8 shadow_value = *(s8a *)mem_to_shadow(addr); + + /* Byte is 0, access is valid. */ + if (likely(!shadow_value)) + return false; + + /* + * Byte is non-zero. Access is valid if granule offset in [0, shadow_value), + * so the memory is poisoned if shadow_value is negative or smaller than + * the granule's value. + */ + + return ASAN_GRANULE(addr) >= shadow_value; +} + +/* Validate a 2- 4-, 8-byte access, shadow spans up to 2 bytes. */ +static __always_inline bool memory_is_poisoned_2_4_8(s8a *addr, u64 size) +{ + u64 end = (u64)addr + size - 1; + + /* + * Region fully within a single byte (addition didn't + * overflow above ASAN_GRANULE). + */ + if (likely(ASAN_GRANULE(end) >= size - 1)) + return memory_is_poisoned_1((s8a *)end); + + /* + * Otherwise first byte must be fully unpoisoned, and second byte + * must be unpoisoned up to the end of the accessed region. + */ + + return *(s8a *)mem_to_shadow(addr) || memory_is_poisoned_1((s8a *)end); +} + +__weak bool asan_shadow_set(void __arena __arg_arena *addr) +{ + return memory_is_poisoned_1(addr); +} + +static __always_inline u64 first_nonzero_byte(u64 addr, size_t size) +{ + while (size && can_loop) { + if (unlikely(*(s8a *)addr)) + return addr; + addr += 1; + size -= 1; + } + + return SHADOW_ALL_ZEROES; +} + +static __always_inline bool memory_is_poisoned_n(s8a *addr, u64 size) +{ + u64 ret; + u64 start; + u64 end; + + /* Size of [start, end] is end - start + 1. */ + start = (u64)mem_to_shadow(addr); + end = (u64)mem_to_shadow(addr + size - 1); + + ret = first_nonzero_byte(start, (end - start) + 1); + if (likely(ret == SHADOW_ALL_ZEROES)) + return false; + + return unlikely(ret != end || ASAN_GRANULE(addr + size - 1) >= *(s8a *)end); +} + +__weak int asan_report(s8a __arg_arena *addr, size_t sz, u32 flags) +{ + u32 reported = __sync_val_compare_and_swap(&asan_reported, false, true); + + /* Only report the first ASAN violation. */ + if (reported && asan_report_once) + return 0; + + asan_violated = (u64)addr; + + arena_stderr("Memory violation for address %p (0x%lx) for %s of size %ld\n", + addr, (u64)addr, + (flags & ASAN_WRITE) ? "write" : "read", + sz); + bpf_stream_print_stack(BPF_STDERR); + + return 0; +} + +static __always_inline bool check_asan_args(s8a *addr, size_t size, + bool *result) +{ + bool valid = true; + + /* Size 0 accesses are valid even if the address is invalid. */ + if (unlikely(size == 0)) + goto confirmed_valid; + + /* + * Wraparound is possible for values close to the the edge of the + * 4GiB boundary of the arena (last valid address is 1UL << 32 - 1). + * + * + * The wraparound detection below works for small sizes. check_asan_args is + * always called from the builtin ASAN checks, so 1 <= size <= 64. Even + * for storeN/loadN that we do not expect to encounter the intrinsics will + * not have a large enough size that: + * + * - addr + size > MAX_U32 + * - (u32)(addr + size) > (u32) addr + * + * which would defeat wraparound detection. + */ + if (unlikely((u32)(u64)(addr + size) < (u32)(u64)addr)) + goto confirmed_invalid; + + return false; + +confirmed_invalid: + valid = false; + + /* FALLTHROUGH */ +confirmed_valid: + *result = valid; + + return true; +} + +static __always_inline bool check_region_inline(intptr_t ptr, size_t size, + u32 flags) +{ + s8a *addr = (s8a *)(u64)ptr; + bool is_poisoned, is_valid; + + if (check_asan_args(addr, size, &is_valid)) { + if (!is_valid) + asan_report(addr, size, flags); + return is_valid; + } + + switch (size) { + case 1: + is_poisoned = memory_is_poisoned_1(addr); + break; + case 2: + case 4: + case 8: + is_poisoned = memory_is_poisoned_2_4_8(addr, size); + break; + default: + is_poisoned = memory_is_poisoned_n(addr, size); + } + + if (is_poisoned) { + asan_report(addr, size, flags); + return false; + } + + return true; +} + +/* + * __alias is not supported for BPF so define *__noabort() variants as wrappers. + */ +#define DEFINE_ASAN_LOAD_STORE(size) \ + __hidden void __asan_store##size(intptr_t addr) \ + { \ + check_region_inline(addr, size, ASAN_WRITE); \ + } \ + __hidden void __asan_store##size##_noabort(intptr_t addr) \ + { \ + check_region_inline(addr, size, ASAN_WRITE); \ + } \ + __hidden void __asan_load##size(intptr_t addr) \ + { \ + check_region_inline(addr, size, ASAN_READ); \ + } \ + __hidden void __asan_load##size##_noabort(intptr_t addr) \ + { \ + check_region_inline(addr, size, ASAN_READ); \ + } \ + __hidden void __asan_report_store##size(intptr_t addr) \ + { \ + asan_report((s8a *)addr, size, ASAN_WRITE); \ + } \ + __hidden void __asan_report_store##size##_noabort(intptr_t addr) \ + { \ + asan_report((s8a *)addr, size, ASAN_WRITE); \ + } \ + __hidden void __asan_report_load##size(intptr_t addr) \ + { \ + asan_report((s8a *)addr, size, ASAN_READ); \ + } \ + __hidden void __asan_report_load##size##_noabort(intptr_t addr) \ + { \ + asan_report((s8a *)addr, size, ASAN_READ); \ + } + +DEFINE_ASAN_LOAD_STORE(1); +DEFINE_ASAN_LOAD_STORE(2); +DEFINE_ASAN_LOAD_STORE(4); +DEFINE_ASAN_LOAD_STORE(8); + +void __asan_storeN(intptr_t addr, ssize_t size) +{ + check_region_inline(addr, size, ASAN_WRITE); +} + +void __asan_storeN_noabort(intptr_t addr, ssize_t size) +{ + check_region_inline(addr, size, ASAN_WRITE); +} + +void __asan_loadN(intptr_t addr, ssize_t size) +{ + check_region_inline(addr, size, ASAN_READ); +} + +void __asan_loadN_noabort(intptr_t addr, ssize_t size) +{ + check_region_inline(addr, size, ASAN_READ); +} + +/* + * We currently do not sanitize globals. + */ +void __asan_register_globals(intptr_t globals, size_t n) +{ +} + +void __asan_unregister_globals(intptr_t globals, size_t n) +{ +} + +/* + * We do not currently have memcpy/memmove/memset intrinsics + * in LLVM. Do not implement sanitization. + */ +void *__asan_memcpy(void *d, const void *s, size_t n) +{ + arena_stderr("ASAN: Unexpected %s call", __func__); + return NULL; +} + +void *__asan_memmove(void *d, const void *s, size_t n) +{ + arena_stderr("ASAN: Unexpected %s call", __func__); + return NULL; +} + +void *__asan_memset(void *p, int c, size_t n) +{ + arena_stderr("ASAN: Unexpected %s call", __func__); + return NULL; +} + +/* + * Poisoning code, used when we add more freed memory to the allocator by: + * a) pulling memory from the arena segment using bpf_arena_alloc_pages() + * b) freeing memory from application code + */ +__hidden __noasan int asan_poison(void __arena *addr, s8 val, size_t size) +{ + s8a *shadow; + size_t len; + + /* + * Poisoning from a non-granule address makes no sense: We can only allocate + * memory to the application that has a granule-aligned starting address, + * and bpf_arena_alloc_pages returns page-aligned memory. A non-aligned + * addr then implies we're freeing a different address than the one we + * allocated. + */ + if (unlikely((u64)addr & ASAN_GRANULE_MASK)) + return -EINVAL; + + /* + * We cannot free an unaligned region because it'd be possible that we + * cannot describe the resulting poisoning state of the granule in + * the ASAN encoding. + * + * Every granule represents a region of memory that looks like the + * following (P for poisoned bytes, C for clear): + * + * + * [ C C C ... P P ] + * + * The value of the granule's shadow map is the number of clear bytes in + * it. We cannot represent granules with the following state: + * + * [ P P ... C C ... P P ] + * + * That would be possible if we could free unaligned regions, so prevent that. + */ + if (unlikely(size & ASAN_GRANULE_MASK)) + return -EINVAL; + + shadow = mem_to_shadow(addr); + len = size >> ASAN_SHADOW_SHIFT; + + asan_memset(shadow, val, len); + + return 0; +} + +/* + * Unpoisoning code for marking memory as valid during allocation calls. + * + * Very similar to asan_poison, except we need to round up instead of + * down, then partially poison the last granule if necessary. + * + * Partial poisoning is useful for keeping the padding poisoned. Allocations + * are granule-aligned, so we we're reserving granule-aligned sizes for the + * allocation. However, we want to still treat accesses to the padding as + * invalid. Partial poisoning takes care of that. Freeing and poisoning the + * memory is still done in granule-aligned sizes and repoisons the already + * poisoned padding. + */ +__hidden __noasan int asan_unpoison(void __arena *addr, size_t size) +{ + size_t partial = size & ASAN_GRANULE_MASK; + s8a *shadow; + size_t len; + + /* + * We cannot allocate in the middle of the granule. The ASAN shadow + * map encoding only describes regions of memory where every granule + * follows this format (P for poisoned, C for clear): + * + * + * [ C C C ... P P ] + * + * This is so we can use a single number in [0, ASAN_SHADOW_SCALE) + * to represent the poison state of the granule. + */ + if (unlikely((u64)addr & ASAN_GRANULE_MASK)) + return -EINVAL; + + shadow = mem_to_shadow(addr); + len = size >> ASAN_SHADOW_SHIFT; + + asan_memset(shadow, 0, len); + + /* + * If we are allocating a non-granule aligned region, we need to adjust + * the last byte of the shadow map to list how many bytes in the granule + * are unpoisoned. If the region is aligned, then the memset call above + * was enough. + */ + if (partial) + shadow[len] = partial; + + return 0; +} + +/* + * Initialize ASAN state when necessary. Triggered from userspace before + * allocator startup. + */ +SEC("syscall") +__weak __noasan int asan_init(struct asan_init_args *args) +{ + u64 globals_pages = args->arena_globals_pages; + u64 all_pages = args->arena_all_pages; + u64 shadow_map, shadow_pgoff; + u64 shadow_pages; + + if (asan_inited) + return 0; + + /* + * Round up the shadow map size to the nearest page. + */ + shadow_pages = all_pages >> ASAN_SHADOW_SHIFT; + if ((all_pages & ((1 << ASAN_SHADOW_SHIFT) - 1))) + shadow_pages += 1; + + if (all_pages > (1ULL << 32) / __PAGE_SIZE) { + arena_stderr("error: arena size %lx too large", all_pages); + return -EINVAL; + } + + if (globals_pages > all_pages) { + arena_stderr("error: globals %lx do not fit in arena %lx", + globals_pages, all_pages); + return -EINVAL; + } + + if (globals_pages + shadow_pages >= all_pages) { + arena_stderr("error: globals %lx do not leave room for shadow map %lx " + "(arena pages %lx)", + globals_pages, shadow_pages, all_pages); + return -EINVAL; + } + + shadow_pgoff = all_pages - shadow_pages - globals_pages; + __asan_shadow_memory_dynamic_address = shadow_pgoff * __PAGE_SIZE; + + /* + * Allocate the last (1/ASAN_SHADOW_SCALE)th of an arena's pages for the map + * We find the offset and size from the arena map. + * + * The allocated map pages are zeroed out, meaning all memory is marked as valid + * even if it's not allocated already. This is expected: Since the actual memory + * pages are not allocated, accesses to it will trigger page faults and will be + * reported through BPF streams. Any pages allocated through bpf_arena_alloc_pages + * should be poisoned by the allocator right after the call succeeds. + */ + shadow_map = (u64)bpf_arena_alloc_pages( + &arena, (void __arena *)__asan_shadow_memory_dynamic_address, + shadow_pages, NUMA_NO_NODE, 0); + if (!shadow_map) { + arena_stderr("Could not allocate shadow map\n"); + + __asan_shadow_memory_dynamic_address = 0; + + return -ENOMEM; + } + + asan_inited = true; + + return 0; +} + +#pragma clang attribute pop + +#endif /* BPF_ARENA_ASAN */ + +__weak char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From cfc00618b9dfc75cd507f1a4f0d83b4429627399 Mon Sep 17 00:00:00 2001 From: Emil Tsalapatis Date: Sun, 26 Apr 2026 15:03:35 -0400 Subject: selftests/bpf: Add ASAN support for libarena selftests Expand the arena library selftest infrastructure to support address sanitization. Add the compiler flags necessary to compile the library under ASAN when supported. Signed-off-by: Emil Tsalapatis Link: https://lore.kernel.org/r/20260426190338.4615-6-emil@etsalapatis.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 22 ++++++++- tools/testing/selftests/bpf/libarena/Makefile | 25 ++++++++++- .../bpf/libarena/include/libarena/userspace.h | 33 ++++++++++++++ .../bpf/libarena/selftests/st_asan_common.h | 52 ++++++++++++++++++++++ .../selftests/bpf/libarena/src/common.bpf.c | 2 + 5 files changed, 132 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/bpf/libarena/selftests/st_asan_common.h (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 71c7873c4b15..97ee61f2ade5 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -79,6 +79,12 @@ ifneq ($(shell $(CLANG) --target=bpf -mcpu=help 2>&1 | grep 'v4'),) CLANG_CPUV4 := 1 endif +# Check whether clang supports BPF address sanitizer (requires LLVM 22+) +CLANG_HAS_ARENA_ASAN := $(shell echo 'int x;' | \ + $(CLANG) --target=bpf -fsanitize=kernel-address \ + -mllvm -asan-shadow-addr-space=1 \ + -x c -c - -o /dev/null 2>/dev/null && echo 1) + # Order correspond to 'make run_tests' order TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_progs \ test_sockmap \ @@ -764,6 +770,14 @@ LIBARENA_SKEL := libarena/libarena.skel.h $(LIBARENA_SKEL): $(INCLUDE_DIR)/vmlinux.h $(BPFOBJ) $(LIBARENA_BPF_DEPS) +$(MAKE) -C libarena libarena.skel.h $(LIBARENA_MAKE_ARGS) +ifneq ($(CLANG_HAS_ARENA_ASAN),) +LIBARENA_ASAN_SKEL := libarena/libarena_asan.skel.h +CFLAGS += -DHAS_BPF_ARENA_ASAN + +$(LIBARENA_ASAN_SKEL): $(INCLUDE_DIR)/vmlinux.h $(BPFOBJ) $(LIBARENA_BPF_DEPS) + +$(MAKE) -C libarena libarena_asan.skel.h $(LIBARENA_MAKE_ARGS) +endif + # Define test_progs test runner. TRUNNER_TESTS_DIR := prog_tests TRUNNER_BPF_PROGS_DIR := progs @@ -788,7 +802,9 @@ TRUNNER_EXTRA_SOURCES := test_progs.c \ flow_dissector_load.h \ ip_check_defrag_frags.h \ bpftool_helpers.c \ - usdt_1.c usdt_2.c + usdt_1.c usdt_2.c \ + $(LIBARENA_SKEL) \ + $(LIBARENA_ASAN_SKEL) TRUNNER_LIB_SOURCES := find_bit.c TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \ $(OUTPUT)/liburandom_read.so \ @@ -959,3 +975,7 @@ override define INSTALL_RULE endef libarena: $(LIBARENA_SKEL) + +ifneq ($(CLANG_HAS_ARENA_ASAN),) +libarena_asan: $(LIBARENA_ASAN_SKEL) +endif diff --git a/tools/testing/selftests/bpf/libarena/Makefile b/tools/testing/selftests/bpf/libarena/Makefile index e85b3ad96890..5e2ab514805e 100644 --- a/tools/testing/selftests/bpf/libarena/Makefile +++ b/tools/testing/selftests/bpf/libarena/Makefile @@ -30,6 +30,7 @@ LIBBPF_INCLUDE ?= $(INCLUDE_DIR) # Scan src/ and selftests/ to generate the final binaries LIBARENA_SOURCES = $(wildcard $(LIBARENA)/src/*.bpf.c) $(wildcard $(LIBARENA)/selftests/*.bpf.c) LIBARENA_OBJECTS = $(notdir $(LIBARENA_SOURCES:.bpf.c=.bpf.o)) +LIBARENA_OBJECTS_ASAN = $(notdir $(LIBARENA_SOURCES:.bpf.c=_asan.bpf.o)) INCLUDES = -I$(LIBARENA)/include -I$(BPFDIR) ifneq ($(INCLUDE_DIR),) @@ -39,6 +40,13 @@ ifneq ($(LIBBPF_INCLUDE),) INCLUDES += -I$(LIBBPF_INCLUDE) endif +ASAN_FLAGS = -fsanitize=kernel-address -fno-stack-protector -fno-builtin +ASAN_FLAGS += -mllvm -asan-instrument-address-spaces=1 -mllvm -asan-shadow-addr-space=1 +ASAN_FLAGS += -mllvm -asan-use-stack-safety=0 -mllvm -asan-stack=0 +ASAN_FLAGS += -mllvm -asan-kernel=1 +ASAN_FLAGS += -mllvm -asan-constructor-kind=none +ASAN_FLAGS += -mllvm -asan-destructor-kind=none + # ENABLE_ATOMICS_TESTS required because we use arena spinlocks override BPF_CFLAGS += -DENABLE_ATOMICS_TESTS override BPF_CFLAGS += -O2 -g @@ -53,17 +61,32 @@ CFLAGS += $(INCLUDES) vpath %.bpf.c $(LIBARENA)/src $(LIBARENA)/selftests vpath %.c $(LIBARENA)/src $(LIBARENA)/selftests +skeletons: libarena.skel.h libarena_asan.skel.h +.PHONY: skeletons + +libarena_asan.skel.h: libarena_asan.bpf.o + $(call msg,GEN-SKEL,libarena,$@) + $(Q)$(BPFTOOL) gen skeleton $< name "libarena_asan" > $@ + libarena.skel.h: libarena.bpf.o $(call msg,GEN-SKEL,libarena,$@) $(Q)$(BPFTOOL) gen skeleton $< name "libarena" > $@ +libarena_asan.bpf.o: $(LIBARENA_OBJECTS_ASAN) + $(call msg,GEN-OBJ,libarena,$@) + $(Q)$(BPFTOOL) gen object $@ $^ + libarena.bpf.o: $(LIBARENA_OBJECTS) $(call msg,GEN-OBJ,libarena,$@) $(Q)$(BPFTOOL) gen object $@ $^ +%_asan.bpf.o: %.bpf.c + $(call msg,CLNG-BPF,libarena,$@) + $(Q)$(CLANG) $(BPF_CFLAGS) $(ASAN_FLAGS) -DBPF_ARENA_ASAN $(BPF_TARGET_ENDIAN) -c $< -o $@ + %.bpf.o: %.bpf.c $(call msg,CLNG-BPF,libarena,$@) $(Q)$(CLANG) $(BPF_CFLAGS) $(BPF_TARGET_ENDIAN) -c $< -o $@ clean: - $(Q)rm -f *.skel.h *.bpf.o + $(Q)rm -f *.skel.h *.bpf.o *.linked*.o diff --git a/tools/testing/selftests/bpf/libarena/include/libarena/userspace.h b/tools/testing/selftests/bpf/libarena/include/libarena/userspace.h index 0438a751d5fd..88b68ac73cca 100644 --- a/tools/testing/selftests/bpf/libarena/include/libarena/userspace.h +++ b/tools/testing/selftests/bpf/libarena/include/libarena/userspace.h @@ -27,6 +27,11 @@ static inline bool libarena_is_test_prog(const char *name) return strstr(name, "test_") == name; } +static inline bool libarena_is_asan_test_prog(const char *name) +{ + return strstr(name, "asan_test") == name; +} + static inline int libarena_run_prog_args(int prog_fd, void *args, size_t argsize) { LIBBPF_OPTS(bpf_test_run_opts, opts); @@ -97,3 +102,31 @@ static inline int libarena_get_globals_pages(int arena_get_globals_fd, free(vec); return 0; } + +static inline int libarena_asan_init(int arena_asan_init_fd, + int asan_init_fd, + size_t arena_all_pages) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + struct asan_init_args args; + u64 globals_pages; + int ret; + + ret = libarena_get_globals_pages(arena_asan_init_fd, + arena_all_pages, &globals_pages); + if (ret) + return ret; + + args = (struct asan_init_args){ + .arena_all_pages = arena_all_pages, + .arena_globals_pages = globals_pages, + }; + + opts.ctx_in = &args; + opts.ctx_size_in = sizeof(args); + + ret = bpf_prog_test_run_opts(asan_init_fd, &opts); + if (ret) + return ret; + return opts.retval; +} diff --git a/tools/testing/selftests/bpf/libarena/selftests/st_asan_common.h b/tools/testing/selftests/bpf/libarena/selftests/st_asan_common.h new file mode 100644 index 000000000000..1d3edc4372ac --- /dev/null +++ b/tools/testing/selftests/bpf/libarena/selftests/st_asan_common.h @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ + +#pragma once + +#define ST_PAGES 64 + +static inline void print_asan_map_state(void __arena *addr) +{ + arena_stdout("%s:%d ASAN %p -> (val: %x gran: %x set: [%s])", + __func__, __LINE__, addr, + *(s8a *)(addr), ASAN_GRANULE(addr), + asan_shadow_set(addr) ? "yes" : "no"); +} + +/* + * Emit an error and force the current function to exit if the ASAN + * violation state is unexpected. Reset the violation state after. + */ +static inline int asan_validate_addr(bool cond, void __arena *addr) +{ + if ((asan_violated != 0) == cond) { + asan_violated = 0; + return 0; + } + + arena_stdout("%s:%d ASAN asan_violated %lx", __func__, __LINE__, + (u64)asan_violated); + print_asan_map_state(addr); + + asan_violated = 0; + + return -EINVAL; +} + +static inline int asan_validate(void) +{ + if (!asan_violated) + return 0; + + arena_stdout("%s:%d Found ASAN violation at %lx", __func__, __LINE__, + asan_violated); + + asan_violated = 0; + + return -EINVAL; +} + +struct blob { + volatile u8 mem[59]; + u8 oob; +}; diff --git a/tools/testing/selftests/bpf/libarena/src/common.bpf.c b/tools/testing/selftests/bpf/libarena/src/common.bpf.c index 659ccead5624..84e8a8b7d42e 100644 --- a/tools/testing/selftests/bpf/libarena/src/common.bpf.c +++ b/tools/testing/selftests/bpf/libarena/src/common.bpf.c @@ -2,6 +2,8 @@ /* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ #include +#include + const volatile u32 zero = 0; int arena_fls(__u64 word) -- cgit v1.2.3 From 86426a28c52d756a5edbe29885716128b8915991 Mon Sep 17 00:00:00 2001 From: Emil Tsalapatis Date: Sun, 26 Apr 2026 15:03:36 -0400 Subject: selftests/bpf: Add buddy allocator for libarena Add a byte-oriented buddy allocator for libarena. The buddy allocator provides an alloc/free interface for small arena allocations ranging from 16 bytes to 512 KiB. Lower allocations values are rounded up to 16 bytes. The buddy allocator does not handle larger allocations that can instead use the existing bpf_arena_{alloc, free}_pages() kfunc. Signed-off-by: Emil Tsalapatis Link: https://lore.kernel.org/r/20260426190338.4615-7-emil@etsalapatis.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/default.profraw | Bin 0 -> 160 bytes tools/testing/selftests/bpf/libarena/Makefile | 2 + .../bpf/libarena/include/libarena/buddy.h | 92 +++ .../bpf/libarena/include/libarena/common.h | 14 + .../testing/selftests/bpf/libarena/src/buddy.bpf.c | 903 +++++++++++++++++++++ .../selftests/bpf/libarena/src/common.bpf.c | 23 +- 6 files changed, 1033 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/default.profraw create mode 100644 tools/testing/selftests/bpf/libarena/include/libarena/buddy.h create mode 100644 tools/testing/selftests/bpf/libarena/src/buddy.bpf.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/default.profraw b/tools/testing/selftests/bpf/default.profraw new file mode 100644 index 000000000000..e865e87829f8 Binary files /dev/null and b/tools/testing/selftests/bpf/default.profraw differ diff --git a/tools/testing/selftests/bpf/libarena/Makefile b/tools/testing/selftests/bpf/libarena/Makefile index 5e2ab514805e..3c695f9c0054 100644 --- a/tools/testing/selftests/bpf/libarena/Makefile +++ b/tools/testing/selftests/bpf/libarena/Makefile @@ -51,6 +51,8 @@ ASAN_FLAGS += -mllvm -asan-destructor-kind=none override BPF_CFLAGS += -DENABLE_ATOMICS_TESTS override BPF_CFLAGS += -O2 -g override BPF_CFLAGS += -Wno-incompatible-pointer-types-discards-qualifiers +# Required to define our own arena-based free() +override BPF_CFLAGS += -Wno-incompatible-library-redeclaration # Required for suppressing harmless vmlinux.h-related warnings. override BPF_CFLAGS += -Wno-missing-declarations override BPF_CFLAGS += $(INCLUDES) diff --git a/tools/testing/selftests/bpf/libarena/include/libarena/buddy.h b/tools/testing/selftests/bpf/libarena/include/libarena/buddy.h new file mode 100644 index 000000000000..00e2437128ef --- /dev/null +++ b/tools/testing/selftests/bpf/libarena/include/libarena/buddy.h @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ +#pragma once + +struct buddy_chunk; +typedef struct buddy_chunk __arena buddy_chunk_t; + +struct buddy_header; +typedef struct buddy_header __arena buddy_header_t; + +enum buddy_consts { + /* + * Minimum allocation is 1 << BUDDY_MIN_ALLOC_SHIFT. + * Larger sizes increase internal fragmentation, but smaller + * sizes increase the space overhead of the block metadata. + */ + BUDDY_MIN_ALLOC_SHIFT = 4, + BUDDY_MIN_ALLOC_BYTES = 1 << BUDDY_MIN_ALLOC_SHIFT, + + /* + * How many orders the buddy allocator can serve. Minimum block + * size is 1 << BUDDY_MIN_ALLOC_SHIFT, maximum block size is + * 1 << (BUDDY_MIN_ALLOC_SHIFT + BUDDY_CHUNK_NUM_ORDERS - 1): + * Each block has size 1 << BUDDY_MIN_ALLOC_SHIFT, and the + * allocation orders are in [0, BUDDY_CHUNK_NUM_ORDERS). + * We keep two blocks of the maximum size to retain the + * property in the code that all blocks have a buddy. + * Higher values increase the maximum allocation size, + * but also the size of the metadata for each block. + */ + BUDDY_CHUNK_NUM_ORDERS = 1 << 4, + BUDDY_CHUNK_BYTES = BUDDY_MIN_ALLOC_BYTES << (BUDDY_CHUNK_NUM_ORDERS), + + /* Offset of the buddy header within a free block, see buddy.bpf.c for details */ + BUDDY_HEADER_OFF = 8, + + /* The maximum number of blocks a chunk may have to track. */ + BUDDY_CHUNK_ITEMS = 1 << (BUDDY_CHUNK_NUM_ORDERS), + BUDDY_CHUNK_OFFSET_MASK = BUDDY_CHUNK_BYTES - 1, + + /* + * Alignment for chunk allocations based on bpf_arena_alloc_pages. + * The arena allocation kfunc does not have an alignment argument, + * but that is required for all block calculations in the chunk to + * work. + */ + BUDDY_VADDR_OFFSET = BUDDY_CHUNK_BYTES, + + /* Total arena virtual address space the allocator can consume. */ + BUDDY_VADDR_SIZE = BUDDY_CHUNK_BYTES << 10 +}; + +struct buddy_header { + u32 prev_index; /* "Pointer" to the previous available allocation of the same size. */ + u32 next_index; /* Same for the next allocation. */ +}; + +/* + * We bring memory into the allocator 1 MiB at a time. + */ +struct buddy_chunk { + /* The order of the current allocation for a item. 4 bits per order. */ + u8 orders[BUDDY_CHUNK_ITEMS / 2]; + /* + * Bit to denote whether chunk is allocated. Size of the allocated/free + * chunk found from the orders array. + */ + u8 allocated[BUDDY_CHUNK_ITEMS / 8]; + /* Freelists for O(1) allocation. */ + u64 freelists[BUDDY_CHUNK_NUM_ORDERS]; + buddy_chunk_t *next; +}; + +struct buddy { + buddy_chunk_t *first_chunk; /* Pointer to the chunk linked list. */ + arena_spinlock_t lock; /* Allocator lock */ + u64 vaddr; /* Allocation into reserved vaddr */ +}; + +typedef struct buddy __arena buddy_t; + +#ifdef __BPF__ + +int buddy_init(buddy_t *buddy); +int buddy_destroy(buddy_t *buddy); +int buddy_free_internal(buddy_t *buddy, u64 free); +#define buddy_free(buddy, ptr) buddy_free_internal((buddy), (u64)(ptr)) +u64 buddy_alloc_internal(buddy_t *buddy, size_t size); +#define buddy_alloc(alloc, size) ((void __arena *)buddy_alloc_internal((alloc), (size))) + + +#endif /* __BPF__ */ diff --git a/tools/testing/selftests/bpf/libarena/include/libarena/common.h b/tools/testing/selftests/bpf/libarena/include/libarena/common.h index 21eb18bf4533..e54cb7b869bd 100644 --- a/tools/testing/selftests/bpf/libarena/include/libarena/common.h +++ b/tools/testing/selftests/bpf/libarena/include/libarena/common.h @@ -48,6 +48,20 @@ extern volatile u64 asan_violated; int arena_fls(__u64 word); +u64 malloc_internal(size_t size); +#define malloc(size) ((void __arena *)malloc_internal((size))) +void free(void __arena *ptr); + +/* + * The verifier associates arenas with programs by checking LD.IMM + * instruction operands for an arena and populating the program state + * with the first instance it finds. This requires accessing our global + * arena variable, but subprogs do not necessarily do so while still + * using pointers from that arena. Insert an LD.IMM instruction to + * access the arena and help the verifier. + */ +#define arena_subprog_init() do { asm volatile ("" :: "r"(&arena)); } while (0) + #else /* ! __BPF__ */ #include diff --git a/tools/testing/selftests/bpf/libarena/src/buddy.bpf.c b/tools/testing/selftests/bpf/libarena/src/buddy.bpf.c new file mode 100644 index 000000000000..865e00803daa --- /dev/null +++ b/tools/testing/selftests/bpf/libarena/src/buddy.bpf.c @@ -0,0 +1,903 @@ +// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include + +/* + * Buddy allocator arena-based implementation. + * + * Memory is organized into chunks. These chunks + * cannot be coalesced or split. Allocating + * chunks allocates their memory eagerly. + * + * Internally, each chunk is organized into blocks. + * Blocks _can_ be coalesced/split, but only inside + * the chunk. Each block can be allocated or + * unallocated. If allocated, the entire block holds + * user data. If unallocated, the block is mostly + * invalid memory, with the exception of a header + * used for freelist tracking. + * + * The header is placed at an offset inside the block + * to prevent off-by-one errors from the previous block + * from trivially overwriting the header. Such an error + * is also not catchable by ASAN, since the header remains + * valid memory even after the block is freed. It is still + * theoretically possible for the header to be corrupted + * without being caught by ASAN, but harder. + * + * Since the allocator needs to track order information for + * both allocated and free blocks, and allocated blocks cannot + * store a header, the allocator also stores per-chunk order + * information in a reserved region at the beginning of the + * chunk. The header includes a bitmap with the order of blocks + * and their allocation state. It also includes the freelist + * heads for the allocation itself. + */ + + +enum { + BUDDY_POISONED = (s8)0xef, + + /* Number of pages to be allocated per chunk. */ + BUDDY_CHUNK_PAGES = BUDDY_CHUNK_BYTES / __PAGE_SIZE +}; + +static inline int buddy_lock(buddy_t *buddy) +{ + return arena_spin_lock(&buddy->lock); +} + +static inline void buddy_unlock(buddy_t *buddy) +{ + arena_spin_unlock(&buddy->lock); +} + +/* + * Reserve part of the arena address space for the allocator. We use + * this to get aligned addresses for the chunks, since the arena + * page alloc kfuncs do not support aligning to a boundary (in this + * case 1 MiB, see buddy.h on how this is derived). + */ +static int buddy_reserve_arena_vaddr(buddy_t *buddy) +{ + buddy->vaddr = 0; + + return bpf_arena_reserve_pages(&arena, + (void __arena *)BUDDY_VADDR_OFFSET, + BUDDY_VADDR_SIZE / __PAGE_SIZE); +} + +/* + * Free up any unused address space. Used only during teardown. + */ +static void buddy_unreserve_arena_vaddr(buddy_t *buddy) +{ + bpf_arena_free_pages( + &arena, (void __arena *)(BUDDY_VADDR_OFFSET + buddy->vaddr), + (BUDDY_VADDR_SIZE - buddy->vaddr) / __PAGE_SIZE); + + buddy->vaddr = 0; +} + +/* + * Carve out part of the reserved address space and hand it over + * to the buddy allocator. + * + * We are assuming the buddy allocator is the only allocator in the + * system, so there is no race between this function reserving a + * page range and some other allocator actually making the BPF call + * to really create and reserve it. + * + * However, bump allocation must still be atomic because this function + * is called without the buddy lock from multiple threads concurrently. + */ +__weak int buddy_alloc_arena_vaddr(buddy_t __arg_arena *buddy, u64 *vaddrp) +{ + u64 vaddr, old, new; + + if (!buddy || !vaddrp) + return -EINVAL; + + do { + vaddr = buddy->vaddr; + new = vaddr + BUDDY_CHUNK_BYTES; + + if (new > BUDDY_VADDR_SIZE) + return -EINVAL; + + old = __sync_val_compare_and_swap(&buddy->vaddr, vaddr, new); + } while (old != vaddr && can_loop); + + if (old != vaddr) + return -EINVAL; + + *vaddrp = BUDDY_VADDR_OFFSET + vaddr; + + return 0; +} + +static u64 arena_next_pow2(__u64 n) +{ + n--; + n |= n >> 1; + n |= n >> 2; + n |= n >> 4; + n |= n >> 8; + n |= n >> 16; + n |= n >> 32; + n++; + + return n; +} + +__weak +int idx_set_allocated(buddy_chunk_t __arg_arena *chunk, u64 idx, bool allocated) +{ + bool already_allocated; + + if (unlikely(idx >= BUDDY_CHUNK_ITEMS)) { + arena_stderr("setting state of invalid idx (%ld, max %d)\n", idx, + BUDDY_CHUNK_ITEMS); + return -EINVAL; + } + + already_allocated = chunk->allocated[idx / 8] & (1 << (idx % 8)); + if (unlikely(already_allocated == allocated)) { + arena_stderr("Double %s of idx %ld for chunk %p", + allocated ? "alloc" : "free", + idx, chunk); + return -EINVAL; + } + + if (allocated) + chunk->allocated[idx / 8] |= 1 << (idx % 8); + else + chunk->allocated[idx / 8] &= ~(1 << (idx % 8)); + + return 0; +} + +static int idx_is_allocated(buddy_chunk_t *chunk, u64 idx, bool *allocated) +{ + if (unlikely(idx >= BUDDY_CHUNK_ITEMS)) { + arena_stderr("getting state of invalid idx (%llu, max %d)\n", idx, + BUDDY_CHUNK_ITEMS); + return -EINVAL; + } + + *allocated = chunk->allocated[idx / 8] & (1 << (idx % 8)); + return 0; +} + +__weak +int idx_set_order(buddy_chunk_t __arg_arena *chunk, u64 idx, u8 order) +{ + u8 prev_order; + + if (unlikely(order >= BUDDY_CHUNK_NUM_ORDERS)) { + arena_stderr("setting invalid order %u\n", order); + return -EINVAL; + } + + if (unlikely(idx >= BUDDY_CHUNK_ITEMS)) { + arena_stderr("setting order of invalid idx (%d, max %d)\n", idx, + BUDDY_CHUNK_ITEMS); + return -EINVAL; + } + + /* + * We store two order instances per byte, one per nibble. + * Retain the existing nibble. + */ + prev_order = chunk->orders[idx / 2]; + if (idx & 0x1) { + order &= 0xf; + order |= (prev_order & 0xf0); + } else { + order <<= 4; + order |= (prev_order & 0xf); + } + + chunk->orders[idx / 2] = order; + + return 0; +} + +static u8 idx_get_order(buddy_chunk_t *chunk, u64 idx) +{ + u8 result; + + _Static_assert(BUDDY_CHUNK_NUM_ORDERS <= 16, + "order must fit in 4 bits"); + + if (unlikely(idx >= BUDDY_CHUNK_ITEMS)) { + arena_stderr("getting order of invalid idx %u\n", idx); + return BUDDY_CHUNK_NUM_ORDERS; + } + + result = chunk->orders[idx / 2]; + + return (idx & 0x1) ? (result & 0xf) : (result >> 4); +} + +static void __arena *idx_to_addr(buddy_chunk_t *chunk, size_t idx) +{ + u64 address; + + if (unlikely(idx >= BUDDY_CHUNK_ITEMS)) { + arena_stderr("translating invalid idx %u\n", idx); + return NULL; + } + + /* + * The data blocks start in the chunk after the metadata block. + * We find the actual address by indexing into the region at an + * BUDDY_MIN_ALLOC_BYTES granularity, the minimum allowed. + * The index number already accounts for the fact that the first + * blocks in the chunk are occupied by the metadata, so we do + * not need to offset it. + */ + + address = (u64)chunk + (idx * BUDDY_MIN_ALLOC_BYTES); + + return (void __arena *)address; +} + +static buddy_header_t *idx_to_header(buddy_chunk_t *chunk, size_t idx) +{ + bool allocated; + u64 address; + + if (unlikely(idx_is_allocated(chunk, idx, &allocated))) { + arena_stderr("accessing invalid idx 0x%lx\n", idx); + return NULL; + } + + if (unlikely(allocated)) { + arena_stderr("accessing allocated idx 0x%lx as header\n", idx); + return NULL; + } + + address = (u64)idx_to_addr(chunk, idx); + if (!address) + return NULL; + + /* + * Offset the header within the block. This avoids accidental overwrites + * to the header because of off-by-one errors when using adjacent blocks. + * + * The offset has been chosen as a compromise between ASAN effectiveness + * and allocator granularity: + * 1) ASAN dictates valid data runs are 8-byte aligned. + * 2) We want to keep a low minimum allocation size (currently 16). + * + * As a result, we have only two possible positions for the header: Bytes + * 0 and 8. Keeping the header in byte 0 means off-by-ones from the previous + * block touch the header, and, since the header must be accessible, ASAN + * will not trigger. Keeping the header on byte 8 means off-by-one errors from + * the previous block are caught by ASAN. Negative offsets are rarer, so + * while accesses into the block from the next block are possible, they are + * less probable. + */ + + return (buddy_header_t *)(address + BUDDY_HEADER_OFF); +} + +static void header_add_freelist(buddy_chunk_t *chunk, buddy_header_t *header, + u64 idx, u8 order) +{ + buddy_header_t *tmp_header; + + idx_set_order(chunk, idx, order); + + header->next_index = chunk->freelists[order]; + header->prev_index = BUDDY_CHUNK_ITEMS; + + if (header->next_index != BUDDY_CHUNK_ITEMS) { + tmp_header = idx_to_header(chunk, header->next_index); + tmp_header->prev_index = idx; + } + + chunk->freelists[order] = idx; +} + +static void header_remove_freelist(buddy_chunk_t *chunk, + buddy_header_t *header, u8 order) +{ + buddy_header_t *tmp_header; + + if (header->prev_index != BUDDY_CHUNK_ITEMS) { + tmp_header = idx_to_header(chunk, header->prev_index); + tmp_header->next_index = header->next_index; + } + + if (header->next_index != BUDDY_CHUNK_ITEMS) { + tmp_header = idx_to_header(chunk, header->next_index); + tmp_header->prev_index = header->prev_index; + } + + /* Pop off the list head if necessary. */ + if (idx_to_header(chunk, chunk->freelists[order]) == header) + chunk->freelists[order] = header->next_index; + + header->prev_index = BUDDY_CHUNK_ITEMS; + header->next_index = BUDDY_CHUNK_ITEMS; +} + +static u64 size_to_order(size_t size) +{ + u64 order; + + /* + * Legal sizes are [1, 4GiB] (the biggest possible arena). + * Of course, sizes close to GiB are practically impossible + * to fulfill and allocation will fail, but that's taken care + * of by the caller. + */ + + if (unlikely(size == 0 || size > (1UL << 32))) { + arena_stderr("illegal size request %lu\n", size); + return 64; + } + /* + * To find the order of the allocation we find the first power of two + * >= the requested size, take the log2, then adjust it for the minimum + * allocation size by removing the minimum shift from it. Requests + * smaller than the minimum allocation size are rounded up. + */ + order = arena_fls(arena_next_pow2(size)) - 1; + if (order < BUDDY_MIN_ALLOC_SHIFT) + return 0; + + return order - BUDDY_MIN_ALLOC_SHIFT; +} + +__weak +int add_leftovers_to_freelist(buddy_chunk_t __arg_arena *chunk, u32 cur_idx, + u64 min_order, u64 max_order) +{ + buddy_header_t *header; + u64 ord; + u32 idx; + + for (ord = min_order; ord < max_order && can_loop; ord++) { + /* Mark the buddy as free and add it to the freelists. */ + idx = cur_idx + (1 << ord); + + header = idx_to_header(chunk, idx); + if (unlikely(!header)) { + arena_stderr("idx %u has no header", idx); + return -EINVAL; + } + + asan_unpoison(header, sizeof(*header)); + + header_add_freelist(chunk, header, idx, ord); + } + + return 0; +} + +static buddy_chunk_t *buddy_chunk_get(buddy_t *buddy) +{ + u64 order, ord, min_order, max_order; + buddy_chunk_t *chunk; + size_t left; + int power2; + u64 vaddr; + u32 idx; + int ret; + + /* + * Step 1: Allocate a properly aligned chunk, and + * prep it for insertion into the buddy allocator. + * We don't need the allocator lock until step 2. + */ + + ret = buddy_alloc_arena_vaddr(buddy, &vaddr); + if (ret) + return NULL; + + /* Addresses must be aligned to the chunk boundary. */ + if (vaddr % BUDDY_CHUNK_BYTES) + return NULL; + + /* Unreserve the address space. */ + bpf_arena_free_pages(&arena, (void __arena *)vaddr, + BUDDY_CHUNK_PAGES); + + chunk = bpf_arena_alloc_pages(&arena, (void __arena *)vaddr, + BUDDY_CHUNK_PAGES, NUMA_NO_NODE, 0); + if (!chunk) { + arena_stderr("[ALLOC FAILED]"); + return NULL; + } + + if (buddy_lock(buddy)) { + /* + * We cannot reclaim the vaddr space, but that is ok - this + * operation should always succeed. The error path is to catch + * accidental deadlocks that will cause -ENOMEMs to the program as + * the allocator fails to refill itself, in which case vaddr usage + * is the least of our worries. + */ + bpf_arena_free_pages(&arena, (void __arena *)vaddr, BUDDY_CHUNK_PAGES); + return NULL; + } + + asan_poison(chunk, BUDDY_POISONED, BUDDY_CHUNK_PAGES * __PAGE_SIZE); + + /* Unpoison the chunk itself. */ + asan_unpoison(chunk, sizeof(*chunk)); + + /* Mark all freelists as empty. */ + for (ord = zero; ord < BUDDY_CHUNK_NUM_ORDERS && can_loop; ord++) + chunk->freelists[ord] = BUDDY_CHUNK_ITEMS; + + /* + * Initialize the chunk by carving out a page range to hold the metadata + * struct above, then dumping the rest of the pages into the allocator. + */ + + _Static_assert(BUDDY_CHUNK_PAGES * __PAGE_SIZE >= + BUDDY_MIN_ALLOC_BYTES * + BUDDY_CHUNK_ITEMS, + "chunk must fit within the allocation"); + + /* + * Step 2: Reserve a chunk for the chunk metadata, then breaks + * the rest of the full allocation into the different buckets. + * We allocating the memory by grabbing blocks of progressively + * smaller sizes from the allocator, which are guaranteed to be + * continuous. + * + * This operation also populates the allocator. + * + * Algorithm: + * + * - max_order: The last order allocation we made + * - left: How many bytes are left to allocate + * - cur_index: Current index into the top-level block we are + * allocating from. + * + * Step 3: + * - Find the largest power-of-2 allocation still smaller than left (infimum) + * - Reserve a chunk of that size, along with its buddy + * - For every order from [infimum + 1, last order), carve out a block + * and put it into the allocator. + * + * Example: Chunk size 0b1010000 (80 bytes) + * + * Step 1: + * + * idx infimum 1 << max_order + * 0 64 128 1 << 20 + * |________|_________|______________________| + * + * Blocks set aside: + * [0, 64) - Completely allocated + * [64, 128) - Will be further split in the next iteration + * + * Blocks added to the allocator: + * [128, 256) + * [256, 512) + * ... + * [1 << 18, 1 << 19) + * [1 << 19, 1 << 20) + * + * Step 2: + * + * idx infimum idx + 1 << max_order + * 64 80 96 64 + 1 << 6 = 128 + * |________|_________|______________________| + * + * Blocks set aside: + * [64, 80) - Completely allocated + * + * Blocks added to the allocator: + * [80, 96) - left == 0 so the buddy is unused and marked as freed + * [96, 128) + */ + max_order = BUDDY_CHUNK_NUM_ORDERS; + left = sizeof(*chunk); + idx = 0; + while (left && can_loop) { + power2 = arena_fls(left) - 1; + /* + * Note: The condition below only triggers to catch serious bugs + * early. There is no sane way to undo any block insertions from + * the allocated chunk, so just leak any leftover allocations, + * emit a diagnostic, unlock and exit. + * + */ + if (unlikely(power2 >= BUDDY_CHUNK_NUM_ORDERS)) { + arena_stderr( + "buddy chunk metadata require allocation of order %d\n", + power2); + arena_stderr( + "chunk has size of 0x%lx bytes (left %lx bytes)\n", + sizeof(*chunk), left); + buddy_unlock(buddy); + + return NULL; + } + + /* Round up allocations that are too small. */ + + left -= (power2 >= BUDDY_MIN_ALLOC_SHIFT) ? 1 << power2 : left; + order = (power2 >= BUDDY_MIN_ALLOC_SHIFT) ? power2 - BUDDY_MIN_ALLOC_SHIFT : 0; + + if (idx_set_allocated(chunk, idx, true)) { + buddy_unlock(buddy); + return NULL; + } + + /* + * Starting an order above the one we allocated, populate + * the allocator with free blocks. If this is the last + * allocation (left == 0), also mark the buddy as free. + * + * See comment above about error handling: The error path + * is only there as a way to mitigate deeply buggy allocator + * states by emitting a diagnostic in add_leftovers_to_freelist() + * and leaking any memory not added in the freelists. + */ + min_order = left ? order + 1 : order; + if (add_leftovers_to_freelist(chunk, idx, min_order, max_order)) { + buddy_unlock(buddy); + return NULL; + } + + /* Adjust the index. */ + idx += 1 << order; + max_order = order; + } + + buddy_unlock(buddy); + + return chunk; +} + +__weak int buddy_init(buddy_t __arg_arena *buddy) +{ + buddy_chunk_t *chunk; + int ret; + + if (!asan_ready()) + return -EINVAL; + + /* Reserve enough address space to ensure allocations are aligned. */ + ret = buddy_reserve_arena_vaddr(buddy); + if (ret) + return ret; + + _Static_assert(BUDDY_CHUNK_PAGES > 0, + "chunk must use one or more pages"); + + chunk = buddy_chunk_get(buddy); + + if (buddy_lock(buddy)) { + bpf_arena_free_pages(&arena, chunk, BUDDY_CHUNK_PAGES); + return -EINVAL; + } + + /* Chunk is already properly unpoisoned if allocated. */ + if (chunk) + chunk->next = buddy->first_chunk; + + /* Put the chunk at the beginning of the list. */ + buddy->first_chunk = chunk; + + buddy_unlock(buddy); + + return chunk ? 0 : -ENOMEM; +} + +/* + * Destroy the allocator. This does not check whether there are any allocations + * currently in use, so any pages being accessed will start taking arena faults. + * We do not take a lock because we are freeing arena pages, and nobody should + * be using the allocator at that point in the execution. + */ +__weak int buddy_destroy(buddy_t __arg_arena *buddy) +{ + buddy_chunk_t *chunk, *next; + + if (!buddy) + return -EINVAL; + + /* + * Traverse all buddy chunks and free them back to the arena + * with the same granularity they were allocated with. + */ + for (chunk = buddy->first_chunk; chunk && can_loop; chunk = next) { + next = chunk->next; + + /* Wholesale poison the entire block. */ + asan_poison(chunk, BUDDY_POISONED, + BUDDY_CHUNK_PAGES * __PAGE_SIZE); + bpf_arena_free_pages(&arena, chunk, BUDDY_CHUNK_PAGES); + } + + /* Free up any part of the address space that did not get used. */ + buddy_unreserve_arena_vaddr(buddy); + + /* Clear all fields. */ + buddy->first_chunk = NULL; + + return 0; +} + +__weak u64 buddy_chunk_alloc(buddy_chunk_t __arg_arena *chunk, int order_req) +{ + buddy_header_t *header, *tmp_header, *next_header; + u32 idx, tmpidx, retidx; + u64 address; + u64 order = 0; + u64 i; + + for (order = order_req; order < BUDDY_CHUNK_NUM_ORDERS && can_loop; order++) { + if (chunk->freelists[order] != BUDDY_CHUNK_ITEMS) + break; + } + + if (order >= BUDDY_CHUNK_NUM_ORDERS) + return (u64)NULL; + + retidx = chunk->freelists[order]; + header = idx_to_header(chunk, retidx); + if (unlikely(!header)) + return (u64) NULL; + + chunk->freelists[order] = header->next_index; + + if (header->next_index != BUDDY_CHUNK_ITEMS) { + next_header = idx_to_header(chunk, header->next_index); + next_header->prev_index = BUDDY_CHUNK_ITEMS; + } + + header->prev_index = BUDDY_CHUNK_ITEMS; + header->next_index = BUDDY_CHUNK_ITEMS; + if (idx_set_order(chunk, retidx, order_req)) + return (u64)NULL; + + if (idx_set_allocated(chunk, retidx, true)) + return (u64)NULL; + + /* + * Do not unpoison the address yet, will be done by the caller + * because the caller has the exact allocation size requested. + */ + address = (u64)idx_to_addr(chunk, retidx); + if (!address) + return (u64)NULL; + + /* If we allocated from a larger-order chunk, split the buddies. */ + for (i = order_req; i < order && can_loop; i++) { + /* + * Flip the bit for the current order (the bit is guaranteed + * to be 0, so just add 1 << i). + */ + idx = retidx + (1 << i); + + /* Add the buddy of the allocation to the free list. */ + header = idx_to_header(chunk, idx); + /* Unpoison the buddy header */ + asan_unpoison(header, sizeof(*header)); + + if (idx_set_order(chunk, idx, i)) + return (u64)NULL; + + /* Push the header to the beginning of the freelists list. */ + tmpidx = chunk->freelists[i]; + + header->prev_index = BUDDY_CHUNK_ITEMS; + header->next_index = tmpidx; + + if (tmpidx != BUDDY_CHUNK_ITEMS) { + tmp_header = idx_to_header(chunk, tmpidx); + tmp_header->prev_index = idx; + } + + chunk->freelists[i] = idx; + } + + return address; +} + +/* Scan the existing chunks for available memory. */ +static u64 buddy_alloc_from_existing_chunks(buddy_t *buddy, int order) +{ + buddy_chunk_t *chunk; + u64 address; + + for (chunk = buddy->first_chunk; chunk != NULL && can_loop; + chunk = chunk->next) { + address = buddy_chunk_alloc(chunk, order); + if (address) + return address; + } + + return (u64)NULL; +} + +/* + * Try an allocation from a newly allocated chunk. Also + * incorporate the chunk into the linked list. + */ +static u64 buddy_alloc_from_new_chunk(buddy_t *buddy, buddy_chunk_t *chunk, int order) +{ + u64 address; + + if (buddy_lock(buddy)) + return (u64)NULL; + + + /* + * Add the chunk into the allocator and try + * to allocate specifically from that chunk. + */ + chunk->next = buddy->first_chunk; + buddy->first_chunk = chunk; + + address = buddy_chunk_alloc(buddy->first_chunk, order); + + buddy_unlock(buddy); + + return (u64)address; +} +__weak +u64 buddy_alloc_internal(buddy_t __arg_arena *buddy, size_t size) +{ + buddy_chunk_t *chunk; + u64 address = (u64)NULL; + int order; + + if (!buddy) + return (u64)NULL; + + order = size_to_order(size); + if (order >= BUDDY_CHUNK_NUM_ORDERS || order < 0) { + arena_stderr("invalid order %d (sz %lu)\n", order, size); + return (u64)NULL; + } + + if (buddy_lock(buddy)) + return (u64)NULL; + + address = buddy_alloc_from_existing_chunks(buddy, order); + buddy_unlock(buddy); + if (address) + goto done; + + /* Get a new chunk. */ + chunk = buddy_chunk_get(buddy); + if (chunk) + address = buddy_alloc_from_new_chunk(buddy, chunk, order); + +done: + /* If we failed to allocate memory, return NULL. */ + if (!address) + return (u64)NULL; + + /* + * Unpoison exactly the amount of bytes requested. If the + * data is smaller than the header, we must poison any + * unused bytes that were part of the header. + */ + if (size < BUDDY_HEADER_OFF + sizeof(buddy_header_t)) + asan_poison((u8 __arena *)address + BUDDY_HEADER_OFF, + BUDDY_POISONED, sizeof(buddy_header_t)); + + asan_unpoison((u8 __arena *)address, size); + + return address; +} + +static __always_inline int buddy_free_unlocked(buddy_t *buddy, u64 addr) +{ + buddy_header_t *header, *buddy_header; + u64 idx, buddy_idx, tmp_idx; + buddy_chunk_t *chunk; + bool allocated; + u8 order; + int ret; + + if (!buddy) + return -EINVAL; + + if (addr & (BUDDY_MIN_ALLOC_BYTES - 1)) { + arena_stderr("Freeing unaligned address %llx\n", addr); + return -EINVAL; + } + + /* Get (chunk, idx) out of the address. */ + chunk = (void __arena *)(addr & ~BUDDY_CHUNK_OFFSET_MASK); + idx = (addr & BUDDY_CHUNK_OFFSET_MASK) / BUDDY_MIN_ALLOC_BYTES; + + /* Mark the block as unallocated so we can access the header. */ + ret = idx_set_allocated(chunk, idx, false); + if (ret) + return ret; + + order = idx_get_order(chunk, idx); + header = idx_to_header(chunk, idx); + + /* The header is in the block itself, keep it unpoisoned. */ + asan_poison((u8 __arena *)addr, BUDDY_POISONED, + BUDDY_MIN_ALLOC_BYTES << order); + asan_unpoison(header, sizeof(*header)); + + /* + * Coalescing loop. Merge with free buddies of equal order. + * For every coalescing step, keep the left buddy and + * drop the right buddy's header. + */ + for (; order < BUDDY_CHUNK_NUM_ORDERS && can_loop; order++) { + buddy_idx = idx ^ (1 << order); + + /* Check if the buddy is actually free. */ + idx_is_allocated(chunk, buddy_idx, &allocated); + if (allocated) + break; + + /* + * If buddy is not the same order as the chunk + * being freed, then we're done coalescing. + */ + if (idx_get_order(chunk, buddy_idx) != order) + break; + + buddy_header = idx_to_header(chunk, buddy_idx); + header_remove_freelist(chunk, buddy_header, order); + + /* Keep the left header out of the two buddies, drop the other one. */ + if (buddy_idx < idx) { + tmp_idx = idx; + idx = buddy_idx; + buddy_idx = tmp_idx; + } + + /* Remove the buddy from the freelists so that we can merge it. */ + idx_set_order(chunk, buddy_idx, order); + + buddy_header = idx_to_header(chunk, buddy_idx); + asan_poison(buddy_header, BUDDY_POISONED, + sizeof(*buddy_header)); + } + + /* Header properly freed but not in any freelists yet .*/ + idx_set_order(chunk, idx, order); + + header = idx_to_header(chunk, idx); + header_add_freelist(chunk, header, idx, order); + + return 0; +} + +__weak int buddy_free_internal(buddy_t __arg_arena *buddy, u64 addr) +{ + int ret; + + if (!buddy) + return -EINVAL; + + /* Freeing NULL is a valid no-op. */ + if (!addr) + return 0; + + ret = buddy_lock(buddy); + if (ret) + return ret; + + ret = buddy_free_unlocked(buddy, addr); + + buddy_unlock(buddy); + + return ret; +} + +__weak char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/libarena/src/common.bpf.c b/tools/testing/selftests/bpf/libarena/src/common.bpf.c index 84e8a8b7d42e..e5da1e37e83e 100644 --- a/tools/testing/selftests/bpf/libarena/src/common.bpf.c +++ b/tools/testing/selftests/bpf/libarena/src/common.bpf.c @@ -1,11 +1,13 @@ // SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause /* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ #include - #include +#include const volatile u32 zero = 0; +buddy_t buddy; + int arena_fls(__u64 word) { if (!word) @@ -28,4 +30,23 @@ __weak int arena_alloc_reserve(struct arena_alloc_reserve_args *args) return bpf_arena_reserve_pages(&arena, NULL, args->nr_pages); } +SEC("syscall") +__weak int arena_buddy_reset(void) +{ + buddy_destroy(&buddy); + + return buddy_init(&buddy); +} + +__weak u64 malloc_internal(size_t size) +{ + return buddy_alloc_internal(&buddy, size); +} + +__weak void free(void __arg_arena __arena *ptr) +{ + buddy_free_internal(&buddy, (u64)ptr); +} + + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From b1487dc1b181ad6aaea95357030a421bb180d8e7 Mon Sep 17 00:00:00 2001 From: Emil Tsalapatis Date: Sun, 26 Apr 2026 15:03:37 -0400 Subject: selftests/bpf: Add selftests for libarena buddy allocator Introduce selftests for the buddy allocator with and without ASAN. Add the libarena selftests both to the libarena test runner and to test_progs, so that they are a) available when libarena is pulled as a standalone library, and b) exercised along with all other test programs in this directory. ASAN for libarena requires LLVM 22. Add logic in the top-level selftests Makefile to only compile the ASAN variant if the compiler supports it, otherwise skip the test. Signed-off-by: Emil Tsalapatis Link: https://lore.kernel.org/r/20260426190338.4615-8-emil@etsalapatis.com Signed-off-by: Alexei Starovoitov --- .../bpf/libarena/selftests/st_asan_buddy.bpf.c | 240 +++++++++++++++++++++ .../bpf/libarena/selftests/st_buddy.bpf.c | 209 ++++++++++++++++++ tools/testing/selftests/bpf/prog_tests/libarena.c | 66 ++++++ .../selftests/bpf/prog_tests/libarena_asan.c | 91 ++++++++ 4 files changed, 606 insertions(+) create mode 100644 tools/testing/selftests/bpf/libarena/selftests/st_asan_buddy.bpf.c create mode 100644 tools/testing/selftests/bpf/libarena/selftests/st_buddy.bpf.c create mode 100644 tools/testing/selftests/bpf/prog_tests/libarena.c create mode 100644 tools/testing/selftests/bpf/prog_tests/libarena_asan.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/libarena/selftests/st_asan_buddy.bpf.c b/tools/testing/selftests/bpf/libarena/selftests/st_asan_buddy.bpf.c new file mode 100644 index 000000000000..9dd2980b5d6c --- /dev/null +++ b/tools/testing/selftests/bpf/libarena/selftests/st_asan_buddy.bpf.c @@ -0,0 +1,240 @@ +// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include + +extern buddy_t buddy; + +#ifdef BPF_ARENA_ASAN + +#include "st_asan_common.h" + +static __always_inline int asan_test_buddy_oob_single(size_t alloc_size) +{ + u8 __arena *mem; + int ret, i; + + ret = asan_validate(); + if (ret < 0) + return ret; + + mem = buddy_alloc(&buddy, alloc_size); + if (!mem) { + arena_stdout("buddy_alloc failed for size %lu", alloc_size); + return -ENOMEM; + } + + ret = asan_validate(); + if (ret < 0) + return ret; + + for (i = zero; i < alloc_size && can_loop; i++) { + mem[i] = 0xba; + ret = asan_validate_addr(false, &mem[i]); + if (ret < 0) + return ret; + } + + mem[alloc_size] = 0xba; + ret = asan_validate_addr(true, &mem[alloc_size]); + if (ret < 0) + return ret; + + buddy_free(&buddy, mem); + + return 0; +} + +/* + * Factored out because asan_validate_addr is complex enough to cause + * verification failures if verified with the rest of asan_test_buddy_uaf_single. + */ +__weak int asan_test_buddy_byte(u8 __arena __arg_arena *mem, int i, bool freed) +{ + int ret; + + /* The header in freed blocks doesn't get poisoned. */ + if (freed && BUDDY_HEADER_OFF <= i && + i < BUDDY_HEADER_OFF + sizeof(struct buddy_header)) + return 0; + + mem[i] = 0xba; + ret = asan_validate_addr(freed, &mem[i]); + if (ret < 0) + return ret; + + return 0; +} + +__weak int asan_test_buddy_uaf_single(size_t alloc_size) +{ + u8 __arena *mem; + int ret; + int i; + + mem = buddy_alloc(&buddy, alloc_size); + if (!mem) { + arena_stdout("buddy_alloc failed for size %lu", alloc_size); + return -ENOMEM; + } + + ret = asan_validate(); + if (ret < 0) + return ret; + + for (i = zero; i < alloc_size && can_loop; i++) { + ret = asan_test_buddy_byte(mem, i, false); + if (ret) + return ret; + } + + ret = asan_validate(); + if (ret < 0) + return ret; + + buddy_free(&buddy, mem); + + for (i = zero; i < alloc_size && can_loop; i++) { + ret = asan_test_buddy_byte(mem, i, true); + if (ret) + return ret; + } + + return 0; +} + +struct buddy_blob { + volatile u8 mem[48]; + u8 oob; +}; + +static __always_inline int asan_test_buddy_blob_single(void) +{ + volatile struct buddy_blob __arena *blob; + const size_t alloc_size = sizeof(struct buddy_blob) - 1; + int ret; + + blob = buddy_alloc(&buddy, alloc_size); + if (!blob) + return -ENOMEM; + + blob->mem[0] = 0xba; + ret = asan_validate_addr(false, &blob->mem[0]); + if (ret < 0) + return ret; + + blob->mem[47] = 0xba; + ret = asan_validate_addr(false, &blob->mem[47]); + if (ret < 0) + return ret; + + blob->oob = 0; + ret = asan_validate_addr(true, &blob->oob); + if (ret < 0) + return ret; + + buddy_free(&buddy, (void __arena *)blob); + + return 0; +} + +SEC("syscall") +__weak int asan_test_buddy_oob(void) +{ + size_t sizes[] = { + 7, 8, 17, 18, 64, 256, 317, 512, 1024, + }; + int ret, i; + + ret = buddy_init(&buddy); + if (ret) { + arena_stdout("buddy_init failed with %d", ret); + return ret; + } + + for (i = zero; i < sizeof(sizes) / sizeof(sizes[0]) && can_loop; i++) { + ret = asan_test_buddy_oob_single(sizes[i]); + if (ret) { + arena_stdout("%s:%d Failed for size %lu", __func__, + __LINE__, sizes[i]); + buddy_destroy(&buddy); + return ret; + } + } + + buddy_destroy(&buddy); + + ret = asan_validate(); + if (ret < 0) + return ret; + + return 0; +} + +SEC("syscall") +__weak int asan_test_buddy_uaf(void) +{ + size_t sizes[] = { 16, 32, 64, 128, 256, 512, 1024, 16384 }; + int ret, i; + + ret = buddy_init(&buddy); + if (ret) { + arena_stdout("buddy_init failed with %d", ret); + return ret; + } + + for (i = zero; i < sizeof(sizes) / sizeof(sizes[0]) && can_loop; i++) { + ret = asan_test_buddy_uaf_single(sizes[i]); + if (ret) { + arena_stdout("%s:%d Failed for size %lu", __func__, + __LINE__, sizes[i]); + buddy_destroy(&buddy); + return ret; + } + } + + buddy_destroy(&buddy); + + ret = asan_validate(); + if (ret < 0) + return ret; + + return 0; +} + +SEC("syscall") +__weak int asan_test_buddy_blob(void) +{ + const int iters = 10; + int ret, i; + + ret = buddy_init(&buddy); + if (ret) { + arena_stdout("buddy_init failed with %d", ret); + return ret; + } + + for (i = zero; i < iters && can_loop; i++) { + ret = asan_test_buddy_blob_single(); + if (ret) { + arena_stdout("%s:%d Failed on iteration %d", __func__, + __LINE__, i); + buddy_destroy(&buddy); + return ret; + } + } + + buddy_destroy(&buddy); + + ret = asan_validate(); + if (ret < 0) + return ret; + + return 0; +} + +#endif + +__weak char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/libarena/selftests/st_buddy.bpf.c b/tools/testing/selftests/bpf/libarena/selftests/st_buddy.bpf.c new file mode 100644 index 000000000000..79e6f0baabfe --- /dev/null +++ b/tools/testing/selftests/bpf/libarena/selftests/st_buddy.bpf.c @@ -0,0 +1,209 @@ +// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ + +#include + +#include +#include + +extern buddy_t buddy; + +struct segarr_entry { + u8 __arena *block; + size_t sz; + u8 poison; +}; + +#define SEGARRLEN (512) +static struct segarr_entry __arena segarr[SEGARRLEN]; +static void __arena *ptrs[17]; +size_t __arena alloc_sizes[] = { 3, 17, 1025, 129, 16350, 333, 9, 517 }; +size_t __arena alloc_multiple_sizes[] = { 3, 17, 1025, 129, 16350, 333, 9, 517, 2099 }; +size_t __arena alloc_free_sizes[] = { 3, 17, 64, 129, 256, 333, 512, 517 }; +size_t __arena alignment_sizes[] = { 1, 3, 7, 8, 9, 15, 16, 17, 31, + 32, 64, 100, 128, 255, 256, 512, 1000 }; + +SEC("syscall") +__weak int test_buddy_create(void) +{ + const int iters = 10; + int ret, i; + + for (i = zero; i < iters && can_loop; i++) { + ret = buddy_init(&buddy); + if (ret) + return ret; + + ret = buddy_destroy(&buddy); + if (ret) + return ret; + } + + return 0; +} + +SEC("syscall") +__weak int test_buddy_alloc(void) +{ + void __arena *mem; + int ret, i; + + for (i = zero; i < 8 && can_loop; i++) { + ret = buddy_init(&buddy); + if (ret) + return ret; + + mem = buddy_alloc(&buddy, alloc_sizes[i]); + if (!mem) { + buddy_destroy(&buddy); + return -ENOMEM; + } + + buddy_destroy(&buddy); + } + + return 0; +} + +SEC("syscall") +__weak int test_buddy_alloc_free(void) +{ + const int iters = 800; + void __arena *mem; + int ret, i; + + ret = buddy_init(&buddy); + if (ret) + return ret; + + for (i = zero; i < iters && can_loop; i++) { + mem = buddy_alloc(&buddy, alloc_free_sizes[(i * 5) % 8]); + if (!mem) { + buddy_destroy(&buddy); + return -ENOMEM; + } + + buddy_free(&buddy, mem); + } + + buddy_destroy(&buddy); + + return 0; +} + +SEC("syscall") +__weak int test_buddy_alloc_multiple(void) +{ + int ret, j; + u32 i, idx; + u8 __arena *mem; + size_t sz; + u8 poison; + + ret = buddy_init(&buddy); + if (ret) + return ret; + + /* + * Cycle through each size, allocating an entry in the + * segarr. Continue for SEGARRLEN iterations. For every + * allocation write down the size, use the current index + * as a poison value, and log it with the pointer in the + * segarr entry. Use the poison value to poison the entire + * allocated memory according to the size given. + */ + for (i = zero; i < SEGARRLEN && can_loop; i++) { + sz = alloc_multiple_sizes[i % 9]; + poison = (u8)i; + + mem = buddy_alloc(&buddy, sz); + if (!mem) { + buddy_destroy(&buddy); + arena_stdout("%s:%d", __func__, __LINE__); + return -ENOMEM; + } + + segarr[i].block = mem; + segarr[i].sz = sz; + segarr[i].poison = poison; + + for (j = zero; j < sz && can_loop; j++) { + mem[j] = poison; + if (mem[j] != poison) { + buddy_destroy(&buddy); + return -EINVAL; + } + } + } + + /* + * Go to (i * 17) % SEGARRLEN, and free the block pointed to. + * Before freeing, check all bytes have the poisoned value + * corresponding to the element. If any values are unexpected, + * return an error. Skip some elements to test destroying the + * buddy allocator while data is still allocated. + */ + for (i = 10; i < SEGARRLEN && can_loop; i++) { + idx = (i * 17) % SEGARRLEN; + + mem = segarr[idx].block; + sz = segarr[idx].sz; + poison = segarr[idx].poison; + + for (j = zero; j < sz && can_loop; j++) { + if (mem[j] != poison) { + buddy_destroy(&buddy); + arena_stdout("%s:%d %lx %u vs %u", __func__, + __LINE__, (uintptr_t)&mem[j], + mem[j], poison); + return -EINVAL; + } + } + + buddy_free(&buddy, mem); + } + + buddy_destroy(&buddy); + + return 0; +} + +SEC("syscall") +__weak int test_buddy_alignment(void) +{ + int ret, i; + + ret = buddy_init(&buddy); + if (ret) + return ret; + + /* Allocate various sizes and check alignment */ + for (i = zero; i < 17 && can_loop; i++) { + ptrs[i] = buddy_alloc(&buddy, alignment_sizes[i]); + if (!ptrs[i]) { + arena_stdout("alignment test: alloc failed for size %lu", + alignment_sizes[i]); + buddy_destroy(&buddy); + return -ENOMEM; + } + + /* Check 8-byte alignment */ + if ((u64)ptrs[i] & 0x7) { + arena_stdout( + "alignment test: ptr %llx not 8-byte aligned (size %lu)", + (u64)ptrs[i], alignment_sizes[i]); + buddy_destroy(&buddy); + return -EINVAL; + } + } + + /* Free all allocations */ + for (i = zero; i < 17 && can_loop; i++) + buddy_free(&buddy, ptrs[i]); + + buddy_destroy(&buddy); + + return 0; +} + +__weak char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/prog_tests/libarena.c b/tools/testing/selftests/bpf/prog_tests/libarena.c new file mode 100644 index 000000000000..81bdb084c271 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/libarena.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ +#include +#include + +#include +#include +#include +#include + +#include "libarena/libarena.skel.h" + +static void run_libarena_test(struct libarena *skel, struct bpf_program *prog, + const char *name) +{ + int ret; + + if (!strstr(name, "test_buddy")) { + ret = libarena_run_prog(bpf_program__fd(skel->progs.arena_buddy_reset)); + if (!ASSERT_OK(ret, "arena_buddy_reset")) + return; + } + + ret = libarena_run_prog(bpf_program__fd(prog)); + + ASSERT_OK(ret, name); + +} + +void test_libarena(void) +{ + struct arena_alloc_reserve_args args; + struct libarena *skel; + struct bpf_program *prog; + int ret; + + skel = libarena__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + ret = libarena__attach(skel); + if (!ASSERT_OK(ret, "attach")) + goto out; + + args.nr_pages = ARENA_RESERVE_PAGES_DFL; + + ret = libarena_run_prog_args(bpf_program__fd(skel->progs.arena_alloc_reserve), + &args, sizeof(args)); + if (!ASSERT_OK(ret, "arena_alloc_reserve")) + goto out; + + bpf_object__for_each_program(prog, skel->obj) { + const char *name = bpf_program__name(prog); + + if (!libarena_is_test_prog(name)) + continue; + + if (!test__start_subtest(name)) + continue; + + run_libarena_test(skel, prog, name); + } + +out: + libarena__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/libarena_asan.c b/tools/testing/selftests/bpf/prog_tests/libarena_asan.c new file mode 100644 index 000000000000..b4fba10cdfbf --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/libarena_asan.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ +#include + +#ifdef HAS_BPF_ARENA_ASAN +#include + +#include +#include +#include +#include + +#include "libarena/libarena_asan.skel.h" + +static void run_libarena_asan_test(struct libarena_asan *skel, + struct bpf_program *prog, const char *name) +{ + int ret; + + if (!strstr(name, "test_buddy")) { + ret = libarena_run_prog(bpf_program__fd(skel->progs.arena_buddy_reset)); + if (!ASSERT_OK(ret, "arena_buddy_reset")) + return; + } + + ret = libarena_run_prog(bpf_program__fd(prog)); + ASSERT_OK(ret, name); +} + +static void run_test(void) +{ + struct arena_alloc_reserve_args args; + struct libarena_asan *skel; + struct bpf_program *prog; + int ret; + + skel = libarena_asan__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + ret = libarena_asan__attach(skel); + if (!ASSERT_OK(ret, "attach")) + goto out; + + args.nr_pages = ARENA_RESERVE_PAGES_DFL; + + ret = libarena_run_prog_args(bpf_program__fd(skel->progs.arena_alloc_reserve), + &args, sizeof(args)); + if (!ASSERT_OK(ret, "arena_alloc_reserve")) + goto out; + + ret = libarena_asan_init( + bpf_program__fd(skel->progs.arena_get_info), + bpf_program__fd(skel->progs.asan_init), + (1ULL << 32) / sysconf(_SC_PAGESIZE)); + if (!ASSERT_OK(ret, "libarena_asan_init")) + goto out; + + bpf_object__for_each_program(prog, skel->obj) { + const char *name = bpf_program__name(prog); + + if (!libarena_is_asan_test_prog(name)) + continue; + + if (!test__start_subtest(name)) + continue; + + run_libarena_asan_test(skel, prog, name); + } + +out: + libarena_asan__destroy(skel); +} + +#endif /* HAS_BPF_ARENA_ASAN */ + +/* + * Run the test depending on whether LLVM can compile arena ASAN + * programs. + */ +void test_libarena_asan(void) +{ +#ifdef HAS_BPF_ARENA_ASAN + run_test(); +#else + test__skip(); +#endif + + return; +} + -- cgit v1.2.3 From 554e4eb9e4b75358f73733e2be7a59aaf4b7875e Mon Sep 17 00:00:00 2001 From: Emil Tsalapatis Date: Sun, 26 Apr 2026 15:03:38 -0400 Subject: selftests/bpf: Reuse stderr parsing for libarena ASAN tests Add code to directly test the output of libarena ASAN tests. The code reuses testing infrastructure originally for BPF streams to verify that ASAN emits call stacks when the selftests trigger a memory error. Since stderr() testing uses logic from test_progs, it is only available on the test_progs-based selftest runner. The standalone runner still uses internal ASAN state to verify access errors are triaged as expected. Signed-off-by: Emil Tsalapatis Link: https://lore.kernel.org/r/20260426190338.4615-9-emil@etsalapatis.com Signed-off-by: Alexei Starovoitov --- .../bpf/libarena/selftests/st_asan_buddy.bpf.c | 18 ++++++++ .../bpf/libarena/selftests/test_progs_compat.h | 15 +++++++ .../selftests/bpf/prog_tests/libarena_asan.c | 2 + tools/testing/selftests/bpf/test_loader.c | 51 +++++++++++++++++----- tools/testing/selftests/bpf/test_progs.h | 2 + 5 files changed, 76 insertions(+), 12 deletions(-) create mode 100644 tools/testing/selftests/bpf/libarena/selftests/test_progs_compat.h (limited to 'tools') diff --git a/tools/testing/selftests/bpf/libarena/selftests/st_asan_buddy.bpf.c b/tools/testing/selftests/bpf/libarena/selftests/st_asan_buddy.bpf.c index 9dd2980b5d6c..97acd50ffa5c 100644 --- a/tools/testing/selftests/bpf/libarena/selftests/st_asan_buddy.bpf.c +++ b/tools/testing/selftests/bpf/libarena/selftests/st_asan_buddy.bpf.c @@ -5,6 +5,9 @@ #include #include +/* Required for parsing the ASAN call stacks. */ +#include "test_progs_compat.h" + extern buddy_t buddy; #ifdef BPF_ARENA_ASAN @@ -141,6 +144,11 @@ static __always_inline int asan_test_buddy_blob_single(void) } SEC("syscall") +__stderr("Memory violation for address {{.*}} for write of size 1") +__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}") +__stderr("Call trace:\n" +"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n" +"|[ \t]+[^\n]+\n)*}}") __weak int asan_test_buddy_oob(void) { size_t sizes[] = { @@ -174,6 +182,11 @@ __weak int asan_test_buddy_oob(void) } SEC("syscall") +__stderr("Memory violation for address {{.*}} for write of size 1") +__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}") +__stderr("Call trace:\n" +"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n" +"|[ \t]+[^\n]+\n)*}}") __weak int asan_test_buddy_uaf(void) { size_t sizes[] = { 16, 32, 64, 128, 256, 512, 1024, 16384 }; @@ -205,6 +218,11 @@ __weak int asan_test_buddy_uaf(void) } SEC("syscall") +__stderr("Memory violation for address {{.*}} for write of size 1") +__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}") +__stderr("Call trace:\n" +"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n" +"|[ \t]+[^\n]+\n)*}}") __weak int asan_test_buddy_blob(void) { const int iters = 10; diff --git a/tools/testing/selftests/bpf/libarena/selftests/test_progs_compat.h b/tools/testing/selftests/bpf/libarena/selftests/test_progs_compat.h new file mode 100644 index 000000000000..9d431376c42f --- /dev/null +++ b/tools/testing/selftests/bpf/libarena/selftests/test_progs_compat.h @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ +#pragma once + +#ifdef __BPF__ + +/* Selftests use these tags for compatibility with test_progs. */ +#define __test_tag(tag) __attribute__((btf_decl_tag("comment:" XSTR(__COUNTER__) ":" tag))) +#define __stderr(msg) __test_tag("test_expect_stderr=" msg) +#define __stderr_unpriv(msg) __test_tag("test_expect_stderr_unpriv=" msg) + +#define XSTR(s) STR(s) +#define STR(s) #s + +#endif diff --git a/tools/testing/selftests/bpf/prog_tests/libarena_asan.c b/tools/testing/selftests/bpf/prog_tests/libarena_asan.c index b4fba10cdfbf..d59d9dd12ef2 100644 --- a/tools/testing/selftests/bpf/prog_tests/libarena_asan.c +++ b/tools/testing/selftests/bpf/prog_tests/libarena_asan.c @@ -25,6 +25,8 @@ static void run_libarena_asan_test(struct libarena_asan *skel, ret = libarena_run_prog(bpf_program__fd(prog)); ASSERT_OK(ret, name); + + verify_test_stderr(skel->obj, prog); } static void run_test(void) diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index c4c34cae6102..ee637809a1d4 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -93,7 +93,7 @@ void test_loader_fini(struct test_loader *tester) free(tester->log_buf); } -static void free_msgs(struct expected_msgs *msgs) +void free_msgs(struct expected_msgs *msgs) { int i; @@ -789,6 +789,43 @@ static void emit_stderr(const char *stderr, bool force) fprintf(stdout, "STDERR:\n=============\n%s=============\n", stderr); } +static void verify_stderr(int prog_fd, struct expected_msgs *msgs) +{ + LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts); + char *buf; + int ret; + + if (!msgs->cnt) + return; + + buf = malloc(TEST_LOADER_LOG_BUF_SZ); + if (!ASSERT_OK_PTR(buf, "malloc")) + return; + + ret = bpf_prog_stream_read(prog_fd, 2, buf, TEST_LOADER_LOG_BUF_SZ - 1, + &ropts); + if (ret > 0) { + buf[ret] = '\0'; + emit_stderr(buf, false); + validate_msgs(buf, msgs, emit_stderr); + } else { + ASSERT_GT(ret, 0, "stderr stream read"); + } + + free(buf); +} + +void verify_test_stderr(struct bpf_object *obj, struct bpf_program *prog) +{ + struct test_spec spec = {}; + + if (parse_test_spec(NULL, obj, prog, &spec)) + return; + + verify_stderr(bpf_program__fd(prog), &spec.priv.stderr); + free_test_spec(&spec); +} + static void emit_stdout(const char *bpf_stdout, bool force) { if (!force && env.verbosity == VERBOSE_NONE) @@ -1314,17 +1351,7 @@ void run_subtest(struct test_loader *tester, goto tobj_cleanup; } - if (subspec->stderr.cnt) { - err = get_stream(2, bpf_program__fd(tprog), - tester->log_buf, tester->log_buf_sz); - if (err <= 0) { - PRINT_FAIL("Unexpected retval from get_stream(): %d, errno = %d\n", - err, errno); - goto tobj_cleanup; - } - emit_stderr(tester->log_buf, false /*force*/); - validate_msgs(tester->log_buf, &subspec->stderr, emit_stderr); - } + verify_stderr(bpf_program__fd(tprog), &subspec->stderr); if (subspec->stdout.cnt) { err = get_stream(1, bpf_program__fd(tprog), diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 1a44467f4310..37955a8ad385 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -563,5 +563,7 @@ struct expected_msgs { void validate_msgs(const char *log_buf, struct expected_msgs *msgs, void (*emit_fn)(const char *buf, bool force)); +void free_msgs(struct expected_msgs *msgs); +void verify_test_stderr(struct bpf_object *obj, struct bpf_program *prog); #endif /* __TEST_PROGS_H */ -- cgit v1.2.3 From af469e10b4bc1446391514f69eeede843f29cf9c Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 25 Apr 2026 15:48:24 -0700 Subject: selftests/bpf: a test for proper cnums compare in is_state_visited() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Test case demonstrating a bug in cnum comparison logic fixed by previous commit. A pruning point is reached with r6 in two states: 1. 32-bit range of [0x7FFFFFF0, U32_MAX] ∪ [0, 0x10] 2. 32-bit range of [0x100, 0x200] At pruning point the buggy is_state_visited() logic would assume that would assume range (2) to be a subset of (1) and fail to explore the path performing division by zero. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20260425-cnum-range-within-v1-2-2fdca70cb09d@gmail.com Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/progs/verifier_bounds.c | 27 ++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c index 5dd243e653c9..a3e4c0945137 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c @@ -2267,4 +2267,31 @@ __naked void deduce64_from_32_wrapping_32bit(void) : __clobber_all); } +/* Check that range_within() compares cnum ranges, not min/max projections. */ +SEC("socket") +__failure __msg("div by zero") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void range_within_cnum_cross_both_boundaries(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + r1 = 0x80000020; \ + if r0 > r1 goto 1f; \ + r0 += 0x7FFFFFF0; /* PATH 1 */ \ + goto 2f; \ +1: call %[bpf_get_prandom_u32]; /* PATH 2 */ \ + if r0 < 0x100 goto 3f; \ + if r0 > 0x200 goto 3f; \ +2: /* PATH 1: r0 ∈ [0x7FFFFFF0, U32_MAX] ∪ [0, 0x10] */ \ + /* PATH 2: r0 ∈ [0x100, 0x200] */ \ + if r0 != 0x100 goto 3f; /* True only on PATH 2 */ \ + r0 /= 0; \ +3: exit; \ + " + :: __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b), + __imm(bpf_get_prandom_u32) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 9f5b3ffc3f1dac7204e32eeeff84bc5cc55c393e Mon Sep 17 00:00:00 2001 From: Emil Tsalapatis Date: Tue, 28 Apr 2026 06:42:52 -0700 Subject: selftests/bpf: Rename libarena malloc/free methods The s390 architecture uses the token "free" for an enum, conflicting with the malloc/free definitions. Rename the calls to arena_malloc and arena_free instead to prevent collisions. Reported-by: Ihor Solodrai Signed-off-by: Emil Tsalapatis Fixes: 86426a28c52d ("selftests/bpf: Add buddy allocator for libarena") Acked-by: Ihor Solodrai Link: https://lore.kernel.org/r/20260428134252.2783519-1-etsal@meta.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/libarena/Makefile | 2 -- tools/testing/selftests/bpf/libarena/include/libarena/common.h | 6 +++--- tools/testing/selftests/bpf/libarena/src/common.bpf.c | 4 ++-- 3 files changed, 5 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/libarena/Makefile b/tools/testing/selftests/bpf/libarena/Makefile index 3c695f9c0054..5e2ab514805e 100644 --- a/tools/testing/selftests/bpf/libarena/Makefile +++ b/tools/testing/selftests/bpf/libarena/Makefile @@ -51,8 +51,6 @@ ASAN_FLAGS += -mllvm -asan-destructor-kind=none override BPF_CFLAGS += -DENABLE_ATOMICS_TESTS override BPF_CFLAGS += -O2 -g override BPF_CFLAGS += -Wno-incompatible-pointer-types-discards-qualifiers -# Required to define our own arena-based free() -override BPF_CFLAGS += -Wno-incompatible-library-redeclaration # Required for suppressing harmless vmlinux.h-related warnings. override BPF_CFLAGS += -Wno-missing-declarations override BPF_CFLAGS += $(INCLUDES) diff --git a/tools/testing/selftests/bpf/libarena/include/libarena/common.h b/tools/testing/selftests/bpf/libarena/include/libarena/common.h index e54cb7b869bd..ca1a6c1d6477 100644 --- a/tools/testing/selftests/bpf/libarena/include/libarena/common.h +++ b/tools/testing/selftests/bpf/libarena/include/libarena/common.h @@ -48,9 +48,9 @@ extern volatile u64 asan_violated; int arena_fls(__u64 word); -u64 malloc_internal(size_t size); -#define malloc(size) ((void __arena *)malloc_internal((size))) -void free(void __arena *ptr); +u64 arena_malloc_internal(size_t size); +#define arena_malloc(size) ((void __arena *)arena_malloc_internal((size))) +void arena_free(void __arena *ptr); /* * The verifier associates arenas with programs by checking LD.IMM diff --git a/tools/testing/selftests/bpf/libarena/src/common.bpf.c b/tools/testing/selftests/bpf/libarena/src/common.bpf.c index e5da1e37e83e..544bf9e1cb38 100644 --- a/tools/testing/selftests/bpf/libarena/src/common.bpf.c +++ b/tools/testing/selftests/bpf/libarena/src/common.bpf.c @@ -38,12 +38,12 @@ __weak int arena_buddy_reset(void) return buddy_init(&buddy); } -__weak u64 malloc_internal(size_t size) +__weak u64 arena_malloc_internal(size_t size) { return buddy_alloc_internal(&buddy, size); } -__weak void free(void __arg_arena __arena *ptr) +__weak void arena_free(void __arg_arena __arena *ptr) { buddy_free_internal(&buddy, (u64)ptr); } -- cgit v1.2.3 From 2ca6723a5f7b68c739dba47b2639e3eaa7884b09 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Thu, 30 Apr 2026 10:45:24 +0200 Subject: selftests/bpf: Test insns processed breakdown This patch covers in global subprog selftests the new verifier log with the breakdown of instructions processed by global subprogs. The test ensures the log line is present and that it has the right number of subcounts. Signed-off-by: Paul Chaignon Link: https://lore.kernel.org/bpf/3a5157f4573edaa8846f6fc4041f715136f693b1.1777538384.git.paul.chaignon@gmail.com Signed-off-by: Kumar Kartikeya Dwivedi --- tools/testing/selftests/bpf/progs/verifier_global_subprogs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c index 1e08aff7532e..dc09d0e2d8ad 100644 --- a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c +++ b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c @@ -46,12 +46,13 @@ __noinline long global_dead(void) } SEC("?raw_tp") -__success __log_level(2) +__success __log_level(6) /* main prog is validated completely first */ __msg("('global_calls_good_only') is global and assumed valid.") /* eventually global_good() is transitively validated as well */ __msg("Validating global_good() func") __msg("('global_good') is safe for any args that match its prototype") +__msg("insns processed {{[0-9]+\\+[0-9]+\\+[0-9]+$}}") int chained_global_func_calls_success(void) { int sum = 0; -- cgit v1.2.3 From 2b6f0a1e4c9e0f618179c4a108249cc4a0442d11 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Mon, 27 Apr 2026 16:22:58 -0700 Subject: selftests/bpf: Add bench_force_done() for early benchmark completion The bench framework waits for duration_sec to elapse before collecting results. Benchmarks that know exactly how many samples they need can call bench_force_done() to signal completion early, avoiding wasted wall-clock time. Also refactor collect_measurements() to reuse bench_force_done() instead of open-coding the same mutex/cond_signal sequence. Signed-off-by: Puranjay Mohan Link: https://lore.kernel.org/r/20260427232313.1582588-2-puranjay@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/bench.c | 14 +++++++++----- tools/testing/selftests/bpf/bench.h | 1 + 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index 029b3e21f438..47a4e72208d6 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -741,6 +741,13 @@ static void setup_benchmark(void) static pthread_mutex_t bench_done_mtx = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t bench_done = PTHREAD_COND_INITIALIZER; +void bench_force_done(void) +{ + pthread_mutex_lock(&bench_done_mtx); + pthread_cond_signal(&bench_done); + pthread_mutex_unlock(&bench_done_mtx); +} + static void collect_measurements(long delta_ns) { int iter = state.res_cnt++; struct bench_res *res = &state.results[iter]; @@ -750,11 +757,8 @@ static void collect_measurements(long delta_ns) { if (bench->report_progress) bench->report_progress(iter, res, delta_ns); - if (iter == env.duration_sec + env.warmup_sec) { - pthread_mutex_lock(&bench_done_mtx); - pthread_cond_signal(&bench_done); - pthread_mutex_unlock(&bench_done_mtx); - } + if (iter == env.duration_sec + env.warmup_sec) + bench_force_done(); } int main(int argc, char **argv) diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h index 7cf21936e7ed..89a3fc72f70e 100644 --- a/tools/testing/selftests/bpf/bench.h +++ b/tools/testing/selftests/bpf/bench.h @@ -70,6 +70,7 @@ extern struct env env; extern const struct bench *bench; void setup_libbpf(void); +void bench_force_done(void); void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns); void hits_drops_report_final(struct bench_res res[], int res_cnt); void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns); -- cgit v1.2.3 From 08158c111d7d87d88269d9f873a2fc54b87bcb99 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Mon, 27 Apr 2026 16:22:59 -0700 Subject: selftests/bpf: Add BPF batch-timing library Add a reusable timing library for BPF benchmarks that need to measure BPF program execution time. The BPF side (progs/bench_bpf_timing.bpf.h) provides per-CPU sample arrays and BENCH_BPF_LOOP(), a macro that brackets batch_iters iterations with bpf_ktime_get_ns() reads and records the elapsed time. One extra untimed iteration runs afterward for output validation. The userspace side (benchs/bench_bpf_timing.c) collects samples from the skeleton BSS, computes percentile statistics, and auto-calibrates batch_iters to target ~10 ms per batch. Signed-off-by: Puranjay Mohan Link: https://lore.kernel.org/r/20260427232313.1582588-3-puranjay@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 2 + tools/testing/selftests/bpf/bench_bpf_timing.h | 50 ++++ .../selftests/bpf/benchs/bench_bpf_timing.c | 272 +++++++++++++++++++++ .../selftests/bpf/progs/bench_bpf_timing.bpf.h | 69 ++++++ 4 files changed, 393 insertions(+) create mode 100644 tools/testing/selftests/bpf/bench_bpf_timing.h create mode 100644 tools/testing/selftests/bpf/benchs/bench_bpf_timing.c create mode 100644 tools/testing/selftests/bpf/progs/bench_bpf_timing.bpf.h (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 97ee61f2ade5..3d516f10f29e 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -906,6 +906,7 @@ $(OUTPUT)/bench_htab_mem.o: $(OUTPUT)/htab_mem_bench.skel.h $(OUTPUT)/bench_bpf_crypto.o: $(OUTPUT)/crypto_bench.skel.h $(OUTPUT)/bench_sockmap.o: $(OUTPUT)/bench_sockmap_prog.skel.h $(OUTPUT)/bench_lpm_trie_map.o: $(OUTPUT)/lpm_trie_bench.skel.h $(OUTPUT)/lpm_trie_map.skel.h +$(OUTPUT)/bench_bpf_timing.o: bench_bpf_timing.h $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ) $(OUTPUT)/bench: LDLIBS += -lm $(OUTPUT)/bench: $(OUTPUT)/bench.o \ @@ -928,6 +929,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \ $(OUTPUT)/bench_bpf_crypto.o \ $(OUTPUT)/bench_sockmap.o \ $(OUTPUT)/bench_lpm_trie_map.o \ + $(OUTPUT)/bench_bpf_timing.o \ $(OUTPUT)/usdt_1.o \ $(OUTPUT)/usdt_2.o \ # diff --git a/tools/testing/selftests/bpf/bench_bpf_timing.h b/tools/testing/selftests/bpf/bench_bpf_timing.h new file mode 100644 index 000000000000..6ef23b6d6639 --- /dev/null +++ b/tools/testing/selftests/bpf/bench_bpf_timing.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ + +#ifndef __BENCH_BPF_TIMING_H__ +#define __BENCH_BPF_TIMING_H__ + +#include +#include +#include "bench.h" + +#ifndef BENCH_NR_SAMPLES +#define BENCH_NR_SAMPLES 4096 +#endif +#ifndef BENCH_NR_CPUS +#define BENCH_NR_CPUS 256 +#endif + +typedef void (*bpf_bench_run_fn)(void *ctx); + +struct bpf_bench_timing { + __u64 (*samples)[BENCH_NR_SAMPLES]; /* skel->bss->timing_samples */ + __u32 *idx; /* skel->bss->timing_idx */ + volatile __u32 *timing_enabled; /* &skel->bss->timing_enabled */ + volatile __u32 *batch_iters_bss; /* &skel->bss->batch_iters */ + __u32 batch_iters; + __u32 target_samples; + __u32 nr_cpus; + int warmup_ticks; + bool done; + bool machine_readable; +}; + +#define BENCH_TIMING_INIT(t, skel, iters) do { \ + (t)->samples = (skel)->bss->timing_samples; \ + (t)->idx = (skel)->bss->timing_idx; \ + (t)->timing_enabled = &(skel)->bss->timing_enabled; \ + (t)->batch_iters_bss = &(skel)->bss->batch_iters; \ + (t)->batch_iters = (iters); \ + (t)->target_samples = 200; \ + (t)->nr_cpus = env.nr_cpus; \ + (t)->warmup_ticks = 0; \ + (t)->done = false; \ + (t)->machine_readable = false; \ +} while (0) + +void bpf_bench_timing_measure(struct bpf_bench_timing *t, struct bench_res *res); +void bpf_bench_timing_report(struct bpf_bench_timing *t, const char *name, const char *desc); +void bpf_bench_calibrate(struct bpf_bench_timing *t, bpf_bench_run_fn run_fn, void *ctx); + +#endif /* __BENCH_BPF_TIMING_H__ */ diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_timing.c b/tools/testing/selftests/bpf/benchs/bench_bpf_timing.c new file mode 100644 index 000000000000..75a39da69655 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_bpf_timing.c @@ -0,0 +1,272 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include +#include +#include "bench_bpf_timing.h" +#include "bpf_util.h" + +struct timing_stats { + double min, max; + double median, p99; + double mean, stddev; + int count; +}; + +static int cmp_double(const void *a, const void *b) +{ + double da = *(const double *)a; + double db = *(const double *)b; + + if (da < db) + return -1; + if (da > db) + return 1; + return 0; +} + +static double percentile(const double *sorted, int n, double pct) +{ + int idx = (int)(n * pct / 100.0); + + if (idx >= n) + idx = n - 1; + return sorted[idx]; +} + +static int collect_samples(struct bpf_bench_timing *t, + double *out, int max_out) +{ + unsigned int nr_cpus = bpf_num_possible_cpus(); + __u32 timed_iters = t->batch_iters; + int total = 0; + + if (nr_cpus > BENCH_NR_CPUS) + nr_cpus = BENCH_NR_CPUS; + + for (unsigned int cpu = 0; cpu < nr_cpus; cpu++) { + __u32 count = t->idx[cpu]; + + if (count > BENCH_NR_SAMPLES) + count = BENCH_NR_SAMPLES; + + for (__u32 i = 0; i < count && total < max_out; i++) { + __u64 sample = t->samples[cpu][i]; + + if (sample == 0) + continue; + out[total++] = (double)sample / timed_iters; + } + } + + qsort(out, total, sizeof(double), cmp_double); + return total; +} + +static void compute_stats(const double *sorted, int n, + struct timing_stats *s) +{ + double sum = 0, var_sum = 0; + + memset(s, 0, sizeof(*s)); + s->count = n; + + if (n == 0) + return; + + s->min = sorted[0]; + s->max = sorted[n - 1]; + s->median = sorted[n / 2]; + s->p99 = percentile(sorted, n, 99); + + for (int i = 0; i < n; i++) + sum += sorted[i]; + s->mean = sum / n; + + for (int i = 0; i < n; i++) { + double d = sorted[i] - s->mean; + + var_sum += d * d; + } + s->stddev = n > 1 ? sqrt(var_sum / (n - 1)) : 0; +} + +void bpf_bench_timing_measure(struct bpf_bench_timing *t, struct bench_res *res) +{ + unsigned int nr_cpus; + __u32 total_samples; + int i; + + t->warmup_ticks++; + + if (t->warmup_ticks < env.warmup_sec) + return; + + if (t->warmup_ticks == env.warmup_sec) { + *t->timing_enabled = 1; + return; + } + + nr_cpus = bpf_num_possible_cpus(); + if (nr_cpus > BENCH_NR_CPUS) + nr_cpus = BENCH_NR_CPUS; + + total_samples = 0; + for (i = 0; i < (int)nr_cpus; i++) { + __u32 cnt = t->idx[i]; + + if (cnt > BENCH_NR_SAMPLES) + cnt = BENCH_NR_SAMPLES; + total_samples += cnt; + } + + if (total_samples >= (__u32)env.producer_cnt * t->target_samples && !t->done) { + t->done = true; + *t->timing_enabled = 0; + bench_force_done(); + } +} + +void bpf_bench_timing_report(struct bpf_bench_timing *t, const char *name, const char *description) +{ + int max_out = BENCH_NR_CPUS * BENCH_NR_SAMPLES; + struct timing_stats s; + double *all; + int total; + + all = calloc(max_out, sizeof(*all)); + if (!all) { + fprintf(stderr, "failed to allocate timing buffer\n"); + return; + } + + total = collect_samples(t, all, max_out); + + if (total == 0) { + printf("No timing samples collected.\n"); + free(all); + return; + } + + compute_stats(all, total, &s); + + if (t->machine_readable) { + printf("RESULT scenario=%s samples=%d median=%.2f stddev=%.2f cv=%.2f min=%.2f " + "p99=%.2f max=%.2f\n", name, total, s.median, s.stddev, + s.mean > 0 ? s.stddev / s.mean * 100.0 : 0.0, s.min, s.p99, s.max); + } else { + printf("%s: median %.2f ns/op, stddev %.2f, p99 %.2f (%d samples)\n", name, + s.median, s.stddev, s.p99, total); + } + + free(all); +} + +#define CALIBRATE_SEED_BATCH 100 +#define CALIBRATE_MIN_BATCH 100 +#define CALIBRATE_MAX_BATCH 10000000 +#define CALIBRATE_TARGET_MS 10 +#define CALIBRATE_RUNS 5 +#define PROPORTIONALITY_TOL 0.05 /* 5% */ + +static void reset_timing(struct bpf_bench_timing *t) +{ + *t->timing_enabled = 0; + memset(t->samples, 0, sizeof(__u64) * BENCH_NR_CPUS * BENCH_NR_SAMPLES); + memset(t->idx, 0, sizeof(__u32) * BENCH_NR_CPUS); +} + +static __u64 measure_elapsed(struct bpf_bench_timing *t, bpf_bench_run_fn run_fn, void *run_ctx, + __u32 iters, int runs) +{ + __u64 buf[CALIBRATE_RUNS]; + int n = 0, i, j; + + reset_timing(t); + *t->batch_iters_bss = iters; + *t->timing_enabled = 1; + + for (i = 0; i < runs; i++) + run_fn(run_ctx); + + *t->timing_enabled = 0; + + for (i = 0; i < BENCH_NR_CPUS && n < runs; i++) { + __u32 cnt = t->idx[i]; + + for (j = 0; j < (int)cnt && n < runs; j++) + buf[n++] = t->samples[i][j]; + } + + if (n == 0) + return 0; + + for (i = 1; i < n; i++) { + __u64 key = buf[i]; + + j = i - 1; + while (j >= 0 && buf[j] > key) { + buf[j + 1] = buf[j]; + j--; + } + buf[j + 1] = key; + } + + return buf[n / 2]; +} + +static __u32 compute_batch_iters(__u64 per_op_ns) +{ + __u64 target_ns = (__u64)CALIBRATE_TARGET_MS * 1000000ULL; + __u32 iters; + + if (per_op_ns == 0) + return CALIBRATE_MIN_BATCH; + + iters = target_ns / per_op_ns; + + if (iters < CALIBRATE_MIN_BATCH) + iters = CALIBRATE_MIN_BATCH; + if (iters > CALIBRATE_MAX_BATCH) + iters = CALIBRATE_MAX_BATCH; + + return iters; +} + +void bpf_bench_calibrate(struct bpf_bench_timing *t, bpf_bench_run_fn run_fn, void *run_ctx) +{ + __u64 elapsed, per_op_ns; + __u64 time_n, time_2n; + double ratio; + + elapsed = measure_elapsed(t, run_fn, run_ctx, CALIBRATE_SEED_BATCH, CALIBRATE_RUNS); + if (elapsed == 0) { + fprintf(stderr, "calibration: no timing samples, using default\n"); + t->batch_iters = 10000; + *t->batch_iters_bss = t->batch_iters; + reset_timing(t); + return; + } + + per_op_ns = elapsed / CALIBRATE_SEED_BATCH; + t->batch_iters = compute_batch_iters(per_op_ns); + + time_n = measure_elapsed(t, run_fn, run_ctx, t->batch_iters, CALIBRATE_RUNS); + time_2n = measure_elapsed(t, run_fn, run_ctx, t->batch_iters * 2, CALIBRATE_RUNS); + + if (time_n > 0 && time_2n > 0) { + ratio = (double)time_2n / (double)time_n; + + if (fabs(ratio - 2.0) / 2.0 > PROPORTIONALITY_TOL) + fprintf(stderr, + "WARNING: proportionality check failed (2N/N ratio=%.3f, " + "expected=2.000, error=%.1f%%)\n System noise may be affecting " + "results.\n", + ratio, fabs(ratio - 2.0) / 2.0 * 100.0); + } + + *t->batch_iters_bss = t->batch_iters; + reset_timing(t); +} diff --git a/tools/testing/selftests/bpf/progs/bench_bpf_timing.bpf.h b/tools/testing/selftests/bpf/progs/bench_bpf_timing.bpf.h new file mode 100644 index 000000000000..6a1ad75f1fd7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bench_bpf_timing.bpf.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ + +#ifndef __BENCH_BPF_TIMING_BPF_H__ +#define __BENCH_BPF_TIMING_BPF_H__ + +#include +#include +#include +#include + +#ifndef BENCH_NR_SAMPLES +#define BENCH_NR_SAMPLES 4096 +#endif +#ifndef BENCH_NR_CPUS +#define BENCH_NR_CPUS 256 +#endif +#define BENCH_CPU_MASK (BENCH_NR_CPUS - 1) + +__u64 timing_samples[BENCH_NR_CPUS][BENCH_NR_SAMPLES]; +__u32 timing_idx[BENCH_NR_CPUS]; + +volatile __u32 batch_iters; +volatile __u32 timing_enabled; + +static __always_inline void bench_record_sample(__u64 elapsed_ns) +{ + __u32 cpu, idx; + + if (!timing_enabled) + return; + + cpu = bpf_get_smp_processor_id() & BENCH_CPU_MASK; + idx = timing_idx[cpu]; + + if (idx >= BENCH_NR_SAMPLES) + return; + + timing_samples[cpu][idx] = elapsed_ns; + timing_idx[cpu] = idx + 1; +} + +/* + * @body: expression to time; return value (int) stored in __bench_result. + * @reset: undo body's side-effects so each iteration starts identically. + * May reference __bench_result. Use ({}) for empty reset. + * + * Runs batch_iters timed iterations, then one untimed iteration whose + * return value the macro evaluates to (for validation). + */ +#define BENCH_BPF_LOOP(body, reset) ({ \ + __u64 __bench_start = bpf_ktime_get_ns(); \ + __u32 __bench_i; \ + int __bench_result; \ + \ + for (__bench_i = 0; \ + __bench_i < batch_iters && can_loop; \ + __bench_i++) { \ + __bench_result = (body); \ + reset; \ + } \ + \ + bench_record_sample(bpf_ktime_get_ns() - __bench_start); \ + \ + __bench_result = (body); \ + __bench_result; \ +}) + +#endif /* __BENCH_BPF_TIMING_BPF_H__ */ -- cgit v1.2.3 From dcf11479c2a8d3520953e8366f587ec2a36505a8 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Mon, 27 Apr 2026 16:23:00 -0700 Subject: selftests/bpf: Add bpf-nop benchmark for timing overhead baseline Add a minimal benchmark that measures the overhead of the batch-timing infrastructure itself. The BPF program runs an empty BENCH_BPF_LOOP body (~1.5-2 ns/op), establishing the floor cost that all timing-library benchmarks include. [root@virtme-ng tools/testing/selftests/bpf]# sudo ./bench -a -p8 bpf-nop Setting up benchmark 'bpf-nop'... Benchmark 'bpf-nop' started. bpf-nop: median 1.82 ns/op, stddev 0.01, p99 1.86 (1754 samples) Signed-off-by: Puranjay Mohan Link: https://lore.kernel.org/r/20260427232313.1582588-4-puranjay@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 2 + tools/testing/selftests/bpf/bench.c | 2 + tools/testing/selftests/bpf/benchs/bench_bpf_nop.c | 84 ++++++++++++++++++++++ tools/testing/selftests/bpf/progs/bpf_nop_bench.c | 14 ++++ 4 files changed, 102 insertions(+) create mode 100644 tools/testing/selftests/bpf/benchs/bench_bpf_nop.c create mode 100644 tools/testing/selftests/bpf/progs/bpf_nop_bench.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 3d516f10f29e..97f9fbd41244 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -906,6 +906,7 @@ $(OUTPUT)/bench_htab_mem.o: $(OUTPUT)/htab_mem_bench.skel.h $(OUTPUT)/bench_bpf_crypto.o: $(OUTPUT)/crypto_bench.skel.h $(OUTPUT)/bench_sockmap.o: $(OUTPUT)/bench_sockmap_prog.skel.h $(OUTPUT)/bench_lpm_trie_map.o: $(OUTPUT)/lpm_trie_bench.skel.h $(OUTPUT)/lpm_trie_map.skel.h +$(OUTPUT)/bench_bpf_nop.o: $(OUTPUT)/bpf_nop_bench.skel.h bench_bpf_timing.h $(OUTPUT)/bench_bpf_timing.o: bench_bpf_timing.h $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ) $(OUTPUT)/bench: LDLIBS += -lm @@ -930,6 +931,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \ $(OUTPUT)/bench_sockmap.o \ $(OUTPUT)/bench_lpm_trie_map.o \ $(OUTPUT)/bench_bpf_timing.o \ + $(OUTPUT)/bench_bpf_nop.o \ $(OUTPUT)/usdt_1.o \ $(OUTPUT)/usdt_2.o \ # diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index 47a4e72208d6..1696de5d6780 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -575,6 +575,7 @@ extern const struct bench bench_lpm_trie_insert; extern const struct bench bench_lpm_trie_update; extern const struct bench bench_lpm_trie_delete; extern const struct bench bench_lpm_trie_free; +extern const struct bench bench_bpf_nop; static const struct bench *benchs[] = { &bench_count_global, @@ -653,6 +654,7 @@ static const struct bench *benchs[] = { &bench_lpm_trie_update, &bench_lpm_trie_delete, &bench_lpm_trie_free, + &bench_bpf_nop, }; static void find_benchmark(void) diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_nop.c b/tools/testing/selftests/bpf/benchs/bench_bpf_nop.c new file mode 100644 index 000000000000..e2d8c2ccf384 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_bpf_nop.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ + +#include "bench.h" +#include "bench_bpf_timing.h" +#include "bpf_nop_bench.skel.h" +#include "bpf_util.h" + +static struct ctx { + struct bpf_nop_bench *skel; + struct bpf_bench_timing timing; + int prog_fd; +} ctx; + +static void nop_validate(void) +{ + if (env.consumer_cnt != 0) { + fprintf(stderr, "benchmark doesn't support consumers\n"); + exit(1); + } +} + +static void nop_run_once(void *unused __always_unused) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + + bpf_prog_test_run_opts(ctx.prog_fd, &topts); +} + +static void nop_setup(void) +{ + struct bpf_nop_bench *skel; + int err; + + setup_libbpf(); + + skel = bpf_nop_bench__open(); + if (!skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + err = bpf_nop_bench__load(skel); + if (err) { + fprintf(stderr, "failed to load skeleton: %s\n", strerror(-err)); + bpf_nop_bench__destroy(skel); + exit(1); + } + + ctx.skel = skel; + ctx.prog_fd = bpf_program__fd(skel->progs.bench_nop); + + BENCH_TIMING_INIT(&ctx.timing, skel, 0); + bpf_bench_calibrate(&ctx.timing, nop_run_once, NULL); + + env.duration_sec = 600; +} + +static void *nop_producer(void *input) +{ + while (true) + nop_run_once(NULL); + + return NULL; +} + +static void nop_measure(struct bench_res *res) +{ + bpf_bench_timing_measure(&ctx.timing, res); +} + +static void nop_report_final(struct bench_res res[], int res_cnt) +{ + bpf_bench_timing_report(&ctx.timing, "bpf-nop", NULL); +} + +const struct bench bench_bpf_nop = { + .name = "bpf-nop", + .validate = nop_validate, + .setup = nop_setup, + .producer_thread = nop_producer, + .measure = nop_measure, + .report_final = nop_report_final, +}; diff --git a/tools/testing/selftests/bpf/progs/bpf_nop_bench.c b/tools/testing/selftests/bpf/progs/bpf_nop_bench.c new file mode 100644 index 000000000000..01ed284c1bb3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_nop_bench.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include "bench_bpf_timing.bpf.h" + +SEC("syscall") +int bench_nop(void *ctx) +{ + return BENCH_BPF_LOOP(0, ({})); +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 6b4003a7b333602fb24b514a27067e7a2c98136e Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Mon, 27 Apr 2026 16:23:01 -0700 Subject: selftests/bpf: Add XDP load-balancer common definitions Add the shared header for the XDP load-balancer benchmark. This defines the data structures used by both the BPF program and userspace: flow_key, vip_definition, real_definition, and the stats/control structures. Also provides the encapsulation source-address helpers shared between the BPF datapath (for encap) and userspace (for building expected output packets used in validation). Signed-off-by: Puranjay Mohan Link: https://lore.kernel.org/r/20260427232313.1582588-5-puranjay@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/xdp_lb_bench_common.h | 112 ++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 tools/testing/selftests/bpf/xdp_lb_bench_common.h (limited to 'tools') diff --git a/tools/testing/selftests/bpf/xdp_lb_bench_common.h b/tools/testing/selftests/bpf/xdp_lb_bench_common.h new file mode 100644 index 000000000000..aed20a963701 --- /dev/null +++ b/tools/testing/selftests/bpf/xdp_lb_bench_common.h @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ + +#ifndef XDP_LB_BENCH_COMMON_H +#define XDP_LB_BENCH_COMMON_H + +#define F_IPV6 (1 << 0) +#define F_LRU_BYPASS (1 << 1) + +#define CH_RING_SIZE 65537 /* per-VIP consistent hash ring slots */ +#define MAX_VIPS 16 +#define CH_RINGS_SIZE (MAX_VIPS * CH_RING_SIZE) +#define MAX_REALS 512 +#define DEFAULT_LRU_SIZE 100000 /* connection tracking cache size */ +#define ONE_SEC 1000000000U /* 1 sec in nanosec */ +#define MAX_CONN_RATE 100000000 /* high enough to never trigger in bench */ +#define LRU_UDP_TIMEOUT 30000000000ULL /* 30 sec in nanosec */ +#define PCKT_FRAGMENTED 0x3FFF +#define KNUTH_HASH_MULT 2654435761U +#define IPIP_V4_PREFIX 4268 /* 172.16/12 in network order */ +#define IPIP_V6_PREFIX1 1 /* 0100::/64 (RFC 6666 discard) */ +#define IPIP_V6_PREFIX2 0 +#define IPIP_V6_PREFIX3 0 + +/* Stats indices (0..MAX_VIPS-1 are per-VIP packet/byte counters) */ +#define STATS_LRU (MAX_VIPS + 0) /* v1: total VIP packets, v2: LRU misses */ +#define STATS_XDP_TX (MAX_VIPS + 1) +#define STATS_XDP_PASS (MAX_VIPS + 2) +#define STATS_XDP_DROP (MAX_VIPS + 3) +#define STATS_NEW_CONN (MAX_VIPS + 4) /* v1: conn count, v2: last reset ts */ +#define STATS_LRU_MISS (MAX_VIPS + 5) /* v1: TCP LRU misses */ +#define STATS_SIZE (MAX_VIPS + 6) + +#ifdef __BPF__ +#define lb_htons(x) bpf_htons(x) +#define LB_INLINE static __always_inline +#else +#define lb_htons(x) htons(x) +#define LB_INLINE static inline +#endif + +LB_INLINE __be32 create_encap_ipv4_src(__u16 port, __be32 src) +{ + __u32 ip_suffix = lb_htons(port); + + ip_suffix <<= 16; + ip_suffix ^= src; + return (0xFFFF0000 & ip_suffix) | IPIP_V4_PREFIX; +} + +LB_INLINE void create_encap_ipv6_src(__u16 port, __be32 src, __be32 *saddr) +{ + saddr[0] = IPIP_V6_PREFIX1; + saddr[1] = IPIP_V6_PREFIX2; + saddr[2] = IPIP_V6_PREFIX3; + saddr[3] = src ^ port; +} + +struct flow_key { + union { + __be32 src; + __be32 srcv6[4]; + }; + union { + __be32 dst; + __be32 dstv6[4]; + }; + union { + __u32 ports; + __u16 port16[2]; + }; + __u8 proto; + __u8 pad[3]; +}; + +struct vip_definition { + union { + __be32 vip; + __be32 vipv6[4]; + }; + __u16 port; + __u8 proto; + __u8 pad; +}; + +struct vip_meta { + __u32 flags; + __u32 vip_num; +}; + +struct real_pos_lru { + __u32 pos; + __u64 atime; +}; + +struct real_definition { + __be32 dst; + __be32 dstv6[4]; + __u8 flags; +}; + +struct lb_stats { + __u64 v1; + __u64 v2; +}; + +struct ctl_value { + __u8 mac[6]; + __u8 pad[2]; +}; + +#endif /* XDP_LB_BENCH_COMMON_H */ -- cgit v1.2.3 From 4b4f2229104c9010005d50125ccbfb1b4be68be5 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Mon, 27 Apr 2026 16:23:02 -0700 Subject: selftests/bpf: Add XDP load-balancer BPF program Add the BPF datapath for the XDP load-balancer benchmark, a simplified L4 load-balancer inspired by katran. The pipeline: L3/L4 parse -> VIP lookup -> per-CPU LRU connection table or consistent-hash fallback -> real server lookup -> per-VIP and per-real stats -> IPIP/IP6IP6 encapsulation. TCP SYN forces the consistent-hash path (skipping LRU); TCP RST skips LRU insert to avoid polluting the table. process_packet() is marked __noinline so that the BENCH_BPF_LOOP reset block (which strips encapsulation) operates on valid packet pointers after bpf_xdp_adjust_head(). Signed-off-by: Puranjay Mohan Link: https://lore.kernel.org/r/20260427232313.1582588-6-puranjay@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/xdp_lb_bench.c | 647 +++++++++++++++++++++++ 1 file changed, 647 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/xdp_lb_bench.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/xdp_lb_bench.c b/tools/testing/selftests/bpf/progs/xdp_lb_bench.c new file mode 100644 index 000000000000..b9fd848c035d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/xdp_lb_bench.c @@ -0,0 +1,647 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "bpf_compiler.h" +#include "xdp_lb_bench_common.h" +#include "bench_bpf_timing.bpf.h" + +#ifndef IPPROTO_FRAGMENT +#define IPPROTO_FRAGMENT 44 +#endif + +/* jhash helpers */ + +static inline __u32 rol32(__u32 word, unsigned int shift) +{ + return (word << shift) | (word >> ((-shift) & 31)); +} + +#define __jhash_mix(a, b, c) \ +{ \ + a -= c; a ^= rol32(c, 4); c += b; \ + b -= a; b ^= rol32(a, 6); a += c; \ + c -= b; c ^= rol32(b, 8); b += a; \ + a -= c; a ^= rol32(c, 16); c += b; \ + b -= a; b ^= rol32(a, 19); a += c; \ + c -= b; c ^= rol32(b, 4); b += a; \ +} + +#define __jhash_final(a, b, c) \ +{ \ + c ^= b; c -= rol32(b, 14); \ + a ^= c; a -= rol32(c, 11); \ + b ^= a; b -= rol32(a, 25); \ + c ^= b; c -= rol32(b, 16); \ + a ^= c; a -= rol32(c, 4); \ + b ^= a; b -= rol32(a, 14); \ + c ^= b; c -= rol32(b, 24); \ +} + +#define JHASH_INITVAL 0xdeadbeef + +static inline __u32 __jhash_nwords(__u32 a, __u32 b, __u32 c, __u32 initval) +{ + a += initval; + b += initval; + c += initval; + __jhash_final(a, b, c); + return c; +} + +static inline __u32 jhash_2words(__u32 a, __u32 b, __u32 initval) +{ + return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); +} + +static inline __u32 jhash2_4words(const __u32 *k, __u32 initval) +{ + __u32 a, b, c; + + a = b = c = JHASH_INITVAL + (4 << 2) + initval; + + a += k[0]; b += k[1]; c += k[2]; + __jhash_mix(a, b, c); + + a += k[3]; + __jhash_final(a, b, c); + + return c; +} + +static __always_inline void ipv4_csum(struct iphdr *iph) +{ + __u16 *next_iph = (__u16 *)iph; + __u32 csum = 0; + int i; + + __pragma_loop_unroll_full + for (i = 0; i < (int)(sizeof(*iph) >> 1); i++) + csum += *next_iph++; + + csum = (csum & 0xffff) + (csum >> 16); + csum = (csum & 0xffff) + (csum >> 16); + iph->check = ~csum; +} + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 64); + __type(key, struct vip_definition); + __type(value, struct vip_meta); +} vip_map SEC(".maps"); + +struct lru_inner_map { + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __type(key, struct flow_key); + __type(value, struct real_pos_lru); + __uint(max_entries, DEFAULT_LRU_SIZE); +} lru_inner SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, BENCH_NR_CPUS); + __array(values, struct lru_inner_map); +} lru_mapping SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, CH_RINGS_SIZE); + __type(key, __u32); + __type(value, __u32); +} ch_rings SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, MAX_REALS); + __type(key, __u32); + __type(value, struct real_definition); +} reals SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, STATS_SIZE); + __type(key, __u32); + __type(value, struct lb_stats); +} stats SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, MAX_REALS); + __type(key, __u32); + __type(value, struct lb_stats); +} reals_stats SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, struct ctl_value); +} ctl_array SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, struct vip_definition); +} vip_miss_stats SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, MAX_REALS); + __type(key, __u32); + __type(value, __u32); +} lru_miss_stats SEC(".maps"); + +volatile __u32 flow_mask; +volatile __u32 cold_lru; +__u32 batch_gen; + +/* + * old_eth MUST be read BEFORE writing the outer header because + * bpf_xdp_adjust_head makes them overlap. + */ +static __always_inline int encap_v4(struct xdp_md *xdp, __be32 saddr, __be32 daddr, + __u16 payload_len, const __u8 *dst_mac) +{ + struct ethhdr *new_eth, *old_eth; + void *data, *data_end; + struct iphdr *iph; + + if (bpf_xdp_adjust_head(xdp, -(int)sizeof(struct iphdr))) + return -1; + + data = (void *)(long)xdp->data; + data_end = (void *)(long)xdp->data_end; + + new_eth = data; + iph = data + sizeof(struct ethhdr); + old_eth = data + sizeof(struct iphdr); + + if (new_eth + 1 > data_end || old_eth + 1 > data_end || iph + 1 > data_end) + return -1; + + __builtin_memcpy(new_eth->h_source, old_eth->h_dest, sizeof(new_eth->h_source)); + __builtin_memcpy(new_eth->h_dest, dst_mac, sizeof(new_eth->h_dest)); + new_eth->h_proto = bpf_htons(ETH_P_IP); + + __builtin_memset(iph, 0, sizeof(*iph)); + iph->version = 4; + iph->ihl = sizeof(*iph) >> 2; + iph->protocol = IPPROTO_IPIP; + iph->tot_len = bpf_htons(payload_len + sizeof(*iph)); + iph->ttl = 64; + iph->saddr = saddr; + iph->daddr = daddr; + ipv4_csum(iph); + + return 0; +} + +static __always_inline int encap_v6(struct xdp_md *xdp, const __be32 saddr[4], + const __be32 daddr[4], __u8 nexthdr, __u16 payload_len, + const __u8 *dst_mac) +{ + struct ethhdr *new_eth, *old_eth; + void *data, *data_end; + struct ipv6hdr *ip6h; + + if (bpf_xdp_adjust_head(xdp, -(int)sizeof(struct ipv6hdr))) + return -1; + + data = (void *)(long)xdp->data; + data_end = (void *)(long)xdp->data_end; + + new_eth = data; + ip6h = data + sizeof(struct ethhdr); + old_eth = data + sizeof(struct ipv6hdr); + + if (new_eth + 1 > data_end || old_eth + 1 > data_end || ip6h + 1 > data_end) + return -1; + + __builtin_memcpy(new_eth->h_source, old_eth->h_dest, sizeof(new_eth->h_source)); + __builtin_memcpy(new_eth->h_dest, dst_mac, sizeof(new_eth->h_dest)); + new_eth->h_proto = bpf_htons(ETH_P_IPV6); + + __builtin_memset(ip6h, 0, sizeof(*ip6h)); + ip6h->version = 6; + ip6h->nexthdr = nexthdr; + ip6h->payload_len = bpf_htons(payload_len); + ip6h->hop_limit = 64; + __builtin_memcpy(&ip6h->saddr, saddr, sizeof(ip6h->saddr)); + __builtin_memcpy(&ip6h->daddr, daddr, sizeof(ip6h->daddr)); + + return 0; +} + +static __always_inline void update_stats(void *map, __u32 key, __u16 bytes) +{ + struct lb_stats *st = bpf_map_lookup_elem(map, &key); + + if (st) { + st->v1 += 1; + st->v2 += bytes; + } +} + +static __always_inline void count_action(int action) +{ + struct lb_stats *st; + __u32 key; + + if (action == XDP_TX) + key = STATS_XDP_TX; + else if (action == XDP_PASS) + key = STATS_XDP_PASS; + else + key = STATS_XDP_DROP; + + st = bpf_map_lookup_elem(&stats, &key); + if (st) + st->v1 += 1; +} + +static __always_inline bool is_under_flood(void) +{ + __u32 key = STATS_NEW_CONN; + struct lb_stats *conn_st = bpf_map_lookup_elem(&stats, &key); + __u64 cur_time; + + if (!conn_st) + return true; + + cur_time = bpf_ktime_get_ns(); + if ((cur_time - conn_st->v2) > ONE_SEC) { + conn_st->v1 = 1; + conn_st->v2 = cur_time; + } else { + conn_st->v1 += 1; + if (conn_st->v1 > MAX_CONN_RATE) + return true; + } + return false; +} + +static __always_inline struct real_definition *connection_table_lookup(void *lru_map, + struct flow_key *flow, + __u32 *out_pos) +{ + struct real_pos_lru *dst_lru; + struct real_definition *real; + __u32 key; + + dst_lru = bpf_map_lookup_elem(lru_map, flow); + if (!dst_lru) + return NULL; + + /* UDP connections use atime-based timeout instead of FIN/RST */ + if (flow->proto == IPPROTO_UDP) { + __u64 cur_time = bpf_ktime_get_ns(); + + if (cur_time - dst_lru->atime > LRU_UDP_TIMEOUT) + return NULL; + dst_lru->atime = cur_time; + } + + key = dst_lru->pos; + *out_pos = key; + real = bpf_map_lookup_elem(&reals, &key); + return real; +} + +static __always_inline bool get_packet_dst(struct real_definition **real, struct flow_key *flow, + struct vip_meta *vip_info, bool is_v6, void *lru_map, + bool is_rst, __u32 *out_pos) +{ + bool under_flood; + __u32 hash, ch_key; + __u32 *ch_val; + __u32 real_pos; + + under_flood = is_under_flood(); + + if (is_v6) { + __u32 src_hash = jhash2_4words((__u32 *)flow->srcv6, MAX_VIPS); + + hash = jhash_2words(src_hash, flow->ports, CH_RING_SIZE); + } else { + hash = jhash_2words(flow->src, flow->ports, CH_RING_SIZE); + } + + ch_key = CH_RING_SIZE * vip_info->vip_num + hash % CH_RING_SIZE; + ch_val = bpf_map_lookup_elem(&ch_rings, &ch_key); + if (!ch_val) + return false; + real_pos = *ch_val; + + *real = bpf_map_lookup_elem(&reals, &real_pos); + if (!(*real)) + return false; + + if (!(vip_info->flags & F_LRU_BYPASS) && !under_flood && !is_rst) { + struct real_pos_lru new_lru = { .pos = real_pos }; + + if (flow->proto == IPPROTO_UDP) + new_lru.atime = bpf_ktime_get_ns(); + bpf_map_update_elem(lru_map, flow, &new_lru, BPF_ANY); + } + + *out_pos = real_pos; + return true; +} + +static __always_inline void update_vip_lru_miss_stats(struct vip_definition *vip, bool is_v6, + __u32 real_idx) +{ + struct vip_definition *miss_vip; + __u32 key = 0; + __u32 *cnt; + + miss_vip = bpf_map_lookup_elem(&vip_miss_stats, &key); + if (!miss_vip) + return; + + if (is_v6) { + if (miss_vip->vipv6[0] != vip->vipv6[0] || miss_vip->vipv6[1] != vip->vipv6[1] || + miss_vip->vipv6[2] != vip->vipv6[2] || miss_vip->vipv6[3] != vip->vipv6[3]) + return; + } else { + if (miss_vip->vip != vip->vip) + return; + } + + if (miss_vip->port != vip->port || miss_vip->proto != vip->proto) + return; + + cnt = bpf_map_lookup_elem(&lru_miss_stats, &real_idx); + if (cnt) + *cnt += 1; +} + +static __noinline int process_packet(struct xdp_md *xdp) +{ + void *data = (void *)(long)xdp->data; + void *data_end = (void *)(long)xdp->data_end; + struct ethhdr *eth = data; + struct real_definition *dst = NULL; + struct vip_definition vip_def = {}; + struct ctl_value *cval; + struct flow_key flow = {}; + struct vip_meta *vip_info; + struct lb_stats *data_stats; + struct udphdr *uh; + __be32 tnl_src[4]; + void *lru_map; + void *l4; + __u16 payload_len; + __u32 real_pos = 0, cpu_num, key; + __u8 proto; + int action = XDP_DROP; + bool is_v6, is_syn = false, is_rst = false; + + if (eth + 1 > data_end) + goto out; + + if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { + is_v6 = true; + } else if (eth->h_proto == bpf_htons(ETH_P_IP)) { + is_v6 = false; + } else { + action = XDP_PASS; + goto out; + } + + if (is_v6) { + struct ipv6hdr *ip6h = (void *)(eth + 1); + + if (ip6h + 1 > data_end) + goto out; + if (ip6h->nexthdr == IPPROTO_FRAGMENT) + goto out; + + payload_len = sizeof(struct ipv6hdr) + bpf_ntohs(ip6h->payload_len); + proto = ip6h->nexthdr; + + __builtin_memcpy(flow.srcv6, &ip6h->saddr, sizeof(flow.srcv6)); + __builtin_memcpy(flow.dstv6, &ip6h->daddr, sizeof(flow.dstv6)); + __builtin_memcpy(vip_def.vipv6, &ip6h->daddr, sizeof(vip_def.vipv6)); + l4 = (void *)(ip6h + 1); + } else { + struct iphdr *iph = (void *)(eth + 1); + + if (iph + 1 > data_end) + goto out; + if (iph->ihl != 5) + goto out; + if (iph->frag_off & bpf_htons(PCKT_FRAGMENTED)) + goto out; + + payload_len = bpf_ntohs(iph->tot_len); + proto = iph->protocol; + + flow.src = iph->saddr; + flow.dst = iph->daddr; + vip_def.vip = iph->daddr; + l4 = (void *)(iph + 1); + } + + /* TCP and UDP share the same port layout at offset 0 */ + if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) { + action = XDP_PASS; + goto out; + } + + uh = l4; + if ((void *)(uh + 1) > data_end) + goto out; + flow.port16[0] = uh->source; + flow.port16[1] = uh->dest; + + if (proto == IPPROTO_TCP) { + struct tcphdr *th = l4; + + if ((void *)(th + 1) > data_end) + goto out; + is_syn = th->syn; + is_rst = th->rst; + } + + flow.proto = proto; + vip_def.port = flow.port16[1]; + vip_def.proto = proto; + + vip_info = bpf_map_lookup_elem(&vip_map, &vip_def); + if (!vip_info) { + action = XDP_PASS; + goto out; + } + + key = STATS_LRU; + data_stats = bpf_map_lookup_elem(&stats, &key); + if (!data_stats) + goto out; + data_stats->v1 += 1; + + cpu_num = bpf_get_smp_processor_id(); + lru_map = bpf_map_lookup_elem(&lru_mapping, &cpu_num); + if (!lru_map) + goto out; + + if (!(vip_info->flags & F_LRU_BYPASS) && !is_syn) + dst = connection_table_lookup(lru_map, &flow, &real_pos); + + if (!dst) { + if (flow.proto == IPPROTO_TCP) { + struct lb_stats *miss_st; + + key = STATS_LRU_MISS; + miss_st = bpf_map_lookup_elem(&stats, &key); + if (miss_st) + miss_st->v1 += 1; + } + + if (!get_packet_dst(&dst, &flow, vip_info, is_v6, lru_map, is_rst, &real_pos)) + goto out; + + update_vip_lru_miss_stats(&vip_def, is_v6, real_pos); + data_stats->v2 += 1; + } + + key = 0; + cval = bpf_map_lookup_elem(&ctl_array, &key); + if (!cval) + goto out; + + update_stats(&stats, vip_info->vip_num, payload_len); + update_stats(&reals_stats, real_pos, payload_len); + + if (is_v6) { + create_encap_ipv6_src(flow.port16[0], flow.srcv6[0], tnl_src); + if (encap_v6(xdp, tnl_src, dst->dstv6, IPPROTO_IPV6, payload_len, cval->mac)) + goto out; + } else if (dst->flags & F_IPV6) { + create_encap_ipv6_src(flow.port16[0], flow.src, tnl_src); + if (encap_v6(xdp, tnl_src, dst->dstv6, IPPROTO_IPIP, payload_len, cval->mac)) + goto out; + } else { + if (encap_v4(xdp, create_encap_ipv4_src(flow.port16[0], flow.src), dst->dst, + payload_len, cval->mac)) + goto out; + } + + action = XDP_TX; + +out: + count_action(action); + return action; +} + +static __always_inline int strip_encap(struct xdp_md *xdp, const struct ethhdr *saved_eth) +{ + void *data = (void *)(long)xdp->data; + void *data_end = (void *)(long)xdp->data_end; + struct ethhdr *eth = data; + int hdr_sz; + + if (eth + 1 > data_end) + return -1; + + hdr_sz = (eth->h_proto == bpf_htons(ETH_P_IPV6)) ? (int)sizeof(struct ipv6hdr) + : (int)sizeof(struct iphdr); + + if (bpf_xdp_adjust_head(xdp, hdr_sz)) + return -1; + + data = (void *)(long)xdp->data; + data_end = (void *)(long)xdp->data_end; + eth = data; + + if (eth + 1 > data_end) + return -1; + + __builtin_memcpy(eth, saved_eth, sizeof(*saved_eth)); + return 0; +} + +static __always_inline void randomize_src(struct xdp_md *xdp, int saddr_off, __u32 *rand_state) +{ + void *data = (void *)(long)xdp->data; + void *data_end = (void *)(long)xdp->data_end; + __u32 *saddr = data + saddr_off; + + *rand_state ^= *rand_state << 13; + *rand_state ^= *rand_state >> 17; + *rand_state ^= *rand_state << 5; + + if ((void *)(saddr + 1) <= data_end) + *saddr = *rand_state & flow_mask; +} + +SEC("xdp") +int xdp_lb_bench(struct xdp_md *xdp) +{ + void *data = (void *)(long)xdp->data; + void *data_end = (void *)(long)xdp->data_end; + struct ethhdr *eth = data; + struct ethhdr saved_eth; + __u32 rand_state = 0; + __u32 batch_hash = 0; + int saddr_off = 0; + bool is_v6; + + if (eth + 1 > data_end) + return XDP_DROP; + + __builtin_memcpy(&saved_eth, eth, sizeof(saved_eth)); + + is_v6 = (saved_eth.h_proto == bpf_htons(ETH_P_IPV6)); + + saddr_off = sizeof(struct ethhdr) + (is_v6 ? offsetof(struct ipv6hdr, saddr) : + offsetof(struct iphdr, saddr)); + + if (flow_mask) + rand_state = bpf_get_prandom_u32() | 1; + + if (cold_lru) { + __u32 *saddr = data + saddr_off; + + batch_gen++; + batch_hash = (batch_gen ^ bpf_get_smp_processor_id()) * KNUTH_HASH_MULT; + if ((void *)(saddr + 1) <= data_end) + *saddr ^= batch_hash; + } + + return BENCH_BPF_LOOP( + process_packet(xdp), + ({ + if (__bench_result == XDP_TX) { + if (strip_encap(xdp, &saved_eth)) + return XDP_DROP; + if (rand_state) + randomize_src(xdp, saddr_off, &rand_state); + } + if (cold_lru) { + void *d = (void *)(long)xdp->data; + void *de = (void *)(long)xdp->data_end; + __u32 *__sa = d + saddr_off; + + if ((void *)(__sa + 1) <= de) + *__sa ^= batch_hash; + } + }) + ); +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From a4b5ba8187cb184aacac4ac8c86b4ef4821a4aa6 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Mon, 27 Apr 2026 16:23:03 -0700 Subject: selftests/bpf: Add XDP load-balancer benchmark driver Wire up the userspace side of the XDP load-balancer benchmark. 24 scenarios cover the full code-path matrix: TCP/UDP, IPv4/IPv6, cross-AF encap, LRU hit/miss/diverse/cold, consistent-hash bypass, SYN/RST flag handling, and early exits (unknown VIP, non-IP, ICMP, fragments, IP options). Before benchmarking each scenario validates correctness: the output packet is compared byte-for-byte against a pre-built expected packet and BPF map counters are checked against the expected values. Usage: sudo ./bench -a -w3 -p1 xdp-lb --scenario tcp-v4-lru-hit sudo ./bench xdp-lb --list-scenarios Signed-off-by: Puranjay Mohan Link: https://lore.kernel.org/r/20260427232313.1582588-7-puranjay@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 2 + tools/testing/selftests/bpf/bench.c | 4 + tools/testing/selftests/bpf/benchs/bench_xdp_lb.c | 1113 +++++++++++++++++++++ 3 files changed, 1119 insertions(+) create mode 100644 tools/testing/selftests/bpf/benchs/bench_xdp_lb.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 97f9fbd41244..bc049620c774 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -907,6 +907,7 @@ $(OUTPUT)/bench_bpf_crypto.o: $(OUTPUT)/crypto_bench.skel.h $(OUTPUT)/bench_sockmap.o: $(OUTPUT)/bench_sockmap_prog.skel.h $(OUTPUT)/bench_lpm_trie_map.o: $(OUTPUT)/lpm_trie_bench.skel.h $(OUTPUT)/lpm_trie_map.skel.h $(OUTPUT)/bench_bpf_nop.o: $(OUTPUT)/bpf_nop_bench.skel.h bench_bpf_timing.h +$(OUTPUT)/bench_xdp_lb.o: $(OUTPUT)/xdp_lb_bench.skel.h bench_bpf_timing.h $(OUTPUT)/bench_bpf_timing.o: bench_bpf_timing.h $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ) $(OUTPUT)/bench: LDLIBS += -lm @@ -932,6 +933,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \ $(OUTPUT)/bench_lpm_trie_map.o \ $(OUTPUT)/bench_bpf_timing.o \ $(OUTPUT)/bench_bpf_nop.o \ + $(OUTPUT)/bench_xdp_lb.o \ $(OUTPUT)/usdt_1.o \ $(OUTPUT)/usdt_2.o \ # diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index 1696de5d6780..6155ce455c27 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -286,6 +286,7 @@ extern struct argp bench_trigger_batch_argp; extern struct argp bench_crypto_argp; extern struct argp bench_sockmap_argp; extern struct argp bench_lpm_trie_map_argp; +extern struct argp bench_xdp_lb_argp; static const struct argp_child bench_parsers[] = { { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 }, @@ -302,6 +303,7 @@ static const struct argp_child bench_parsers[] = { { &bench_crypto_argp, 0, "bpf crypto benchmark", 0 }, { &bench_sockmap_argp, 0, "bpf sockmap benchmark", 0 }, { &bench_lpm_trie_map_argp, 0, "LPM trie map benchmark", 0 }, + { &bench_xdp_lb_argp, 0, "XDP load-balancer benchmark", 0 }, {}, }; @@ -576,6 +578,7 @@ extern const struct bench bench_lpm_trie_update; extern const struct bench bench_lpm_trie_delete; extern const struct bench bench_lpm_trie_free; extern const struct bench bench_bpf_nop; +extern const struct bench bench_xdp_lb; static const struct bench *benchs[] = { &bench_count_global, @@ -655,6 +658,7 @@ static const struct bench *benchs[] = { &bench_lpm_trie_delete, &bench_lpm_trie_free, &bench_bpf_nop, + &bench_xdp_lb, }; static void find_benchmark(void) diff --git a/tools/testing/selftests/bpf/benchs/bench_xdp_lb.c b/tools/testing/selftests/bpf/benchs/bench_xdp_lb.c new file mode 100644 index 000000000000..0b6709a2b03c --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_xdp_lb.c @@ -0,0 +1,1113 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "bench.h" +#include "bench_bpf_timing.h" +#include "xdp_lb_bench.skel.h" +#include "xdp_lb_bench_common.h" +#include "bpf_util.h" + +#define IP4(a, b, c, d) (((__u32)(a) << 24) | ((__u32)(b) << 16) | ((__u32)(c) << 8) | (__u32)(d)) + +#define IP6(a, b, c, d) { (__u32)(a), (__u32)(b), (__u32)(c), (__u32)(d) } + +#define TNL_DST IP4(192, 168, 1, 2) +#define REAL_INDEX 1 +#define REAL_INDEX_V6 2 +#define MAX_PKT_SIZE 256 +#define IP_MF 0x2000 + +static const __u32 tnl_dst_v6[4] = { 0xfd000000, 0, 0, 2 }; + +static const __u8 lb_mac[ETH_ALEN] = {0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff}; +static const __u8 client_mac[ETH_ALEN] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; +static const __u8 router_mac[ETH_ALEN] = {0xde, 0xad, 0xbe, 0xef, 0x00, 0x01}; + +enum scenario_id { + S_TCP_V4_LRU_HIT, + S_TCP_V4_CH, + S_TCP_V6_LRU_HIT, + S_TCP_V6_CH, + S_UDP_V4_LRU_HIT, + S_UDP_V6_LRU_HIT, + S_TCP_V4V6_LRU_HIT, + S_TCP_V4_LRU_DIVERSE, + S_TCP_V4_CH_DIVERSE, + S_TCP_V6_LRU_DIVERSE, + S_TCP_V6_CH_DIVERSE, + S_UDP_V4_LRU_DIVERSE, + S_TCP_V4_LRU_MISS, + S_UDP_V4_LRU_MISS, + S_TCP_V4_LRU_WARMUP, + S_TCP_V4_SYN, + S_TCP_V4_RST_MISS, + S_PASS_V4_NO_VIP, + S_PASS_V6_NO_VIP, + S_PASS_V4_ICMP, + S_PASS_NON_IP, + S_DROP_V4_FRAG, + S_DROP_V4_OPTIONS, + S_DROP_V6_FRAG, + NUM_SCENARIOS, +}; + +enum lru_miss_type { + LRU_MISS_AUTO = 0, /* compute from scenario flags (default) */ + LRU_MISS_NONE, /* 0 misses (all LRU hits) */ + LRU_MISS_ALL, /* batch_iters+1 misses (every op misses) */ + LRU_MISS_FIRST, /* 1 miss (first miss, then hits) */ +}; + +#define S_BASE_ENCAP_V4 \ + .expected_retval = XDP_TX, .expect_encap = true, \ + .tunnel_dst = TNL_DST + +#define S_BASE_ENCAP_V6 \ + .expected_retval = XDP_TX, .expect_encap = true, \ + .is_v6 = true, .encap_v6_outer = true, \ + .tunnel_dst_v6 = { 0xfd000000, 0, 0, 2 } + +#define S_BASE_ENCAP_V4V6 \ + .expected_retval = XDP_TX, .expect_encap = true, \ + .encap_v6_outer = true, \ + .tunnel_dst_v6 = { 0xfd000000, 0, 0, 2 } + +struct test_scenario { + const char *name; + const char *description; + int expected_retval; + bool expect_encap; + bool is_v6; + __u32 vip_addr; + __u32 src_addr; + __u32 tunnel_dst; + __u32 vip_addr_v6[4]; + __u32 src_addr_v6[4]; + __u32 tunnel_dst_v6[4]; + __u16 dst_port; + __u16 src_port; + __u8 ip_proto; + __u32 vip_flags; + __u32 vip_num; + bool prepopulate_lru; + bool set_frag; + __u16 eth_proto; + bool encap_v6_outer; + __u32 flow_mask; + bool cold_lru; + bool set_syn; + bool set_rst; + bool set_ip_options; + __u32 fixed_batch_iters; /* 0 = auto-calibrate, >0 = use this value */ + enum lru_miss_type lru_miss; /* expected LRU miss pattern */ +}; + +static const struct test_scenario scenarios[NUM_SCENARIOS] = { + /* Single-flow baseline */ + [S_TCP_V4_LRU_HIT] = { + S_BASE_ENCAP_V4, .ip_proto = IPPROTO_TCP, + .name = "tcp-v4-lru-hit", + .description = "IPv4 TCP, LRU hit, IPIP encap", + .vip_addr = IP4(10, 10, 1, 1), .dst_port = 80, + .src_addr = IP4(10, 10, 2, 1), .src_port = 12345, + .prepopulate_lru = true, .lru_miss = LRU_MISS_NONE, + }, + [S_TCP_V4_CH] = { + S_BASE_ENCAP_V4, .ip_proto = IPPROTO_TCP, + .name = "tcp-v4-ch", + .description = "IPv4 TCP, CH (LRU bypass), IPIP encap", + .vip_addr = IP4(10, 10, 1, 2), .dst_port = 80, + .src_addr = IP4(10, 10, 2, 2), .src_port = 54321, + .vip_flags = F_LRU_BYPASS, .vip_num = 1, + .lru_miss = LRU_MISS_ALL, + }, + [S_TCP_V6_LRU_HIT] = { + S_BASE_ENCAP_V6, .ip_proto = IPPROTO_TCP, + .name = "tcp-v6-lru-hit", + .description = "IPv6 TCP, LRU hit, IP6IP6 encap", + .vip_addr_v6 = IP6(0xfd000100, 0, 0, 1), .dst_port = 80, + .src_addr_v6 = IP6(0xfd000200, 0, 0, 1), .src_port = 12345, + .vip_num = 10, + .prepopulate_lru = true, .lru_miss = LRU_MISS_NONE, + }, + [S_TCP_V6_CH] = { + S_BASE_ENCAP_V6, .ip_proto = IPPROTO_TCP, + .name = "tcp-v6-ch", + .description = "IPv6 TCP, CH (LRU bypass), IP6IP6 encap", + .vip_addr_v6 = IP6(0xfd000100, 0, 0, 2), .dst_port = 80, + .src_addr_v6 = IP6(0xfd000200, 0, 0, 2), .src_port = 54321, + .vip_flags = F_LRU_BYPASS, .vip_num = 12, + .lru_miss = LRU_MISS_ALL, + }, + [S_UDP_V4_LRU_HIT] = { + S_BASE_ENCAP_V4, .ip_proto = IPPROTO_UDP, + .name = "udp-v4-lru-hit", + .description = "IPv4 UDP, LRU hit, IPIP encap", + .vip_addr = IP4(10, 10, 1, 1), .dst_port = 443, + .src_addr = IP4(10, 10, 3, 1), .src_port = 11111, + .vip_num = 2, + .prepopulate_lru = true, .lru_miss = LRU_MISS_NONE, + }, + [S_UDP_V6_LRU_HIT] = { + S_BASE_ENCAP_V6, .ip_proto = IPPROTO_UDP, + .name = "udp-v6-lru-hit", + .description = "IPv6 UDP, LRU hit, IP6IP6 encap", + .vip_addr_v6 = IP6(0xfd000100, 0, 0, 1), .dst_port = 443, + .src_addr_v6 = IP6(0xfd000200, 0, 0, 3), .src_port = 22222, + .vip_num = 14, + .prepopulate_lru = true, .lru_miss = LRU_MISS_NONE, + }, + [S_TCP_V4V6_LRU_HIT] = { + S_BASE_ENCAP_V4V6, .ip_proto = IPPROTO_TCP, + .name = "tcp-v4v6-lru-hit", + .description = "IPv4 TCP, LRU hit, IPv4-in-IPv6 encap", + .vip_addr = IP4(10, 10, 1, 4), .dst_port = 80, + .src_addr = IP4(10, 10, 2, 4), .src_port = 12347, + .vip_num = 13, + .prepopulate_lru = true, .lru_miss = LRU_MISS_NONE, + }, + + /* Diverse flows (4K src addrs) */ + [S_TCP_V4_LRU_DIVERSE] = { + S_BASE_ENCAP_V4, .ip_proto = IPPROTO_TCP, + .name = "tcp-v4-lru-diverse", + .description = "IPv4 TCP, diverse flows, warm LRU", + .vip_addr = IP4(10, 10, 1, 1), .dst_port = 80, + .src_addr = IP4(10, 10, 2, 1), .src_port = 12345, + .prepopulate_lru = true, .flow_mask = 0xFFF, + .lru_miss = LRU_MISS_NONE, + }, + [S_TCP_V4_CH_DIVERSE] = { + S_BASE_ENCAP_V4, .ip_proto = IPPROTO_TCP, + .name = "tcp-v4-ch-diverse", + .description = "IPv4 TCP, diverse flows, CH (LRU bypass)", + .vip_addr = IP4(10, 10, 1, 2), .dst_port = 80, + .src_addr = IP4(10, 10, 2, 2), .src_port = 54321, + .vip_flags = F_LRU_BYPASS, .vip_num = 1, + .flow_mask = 0xFFF, .lru_miss = LRU_MISS_ALL, + }, + [S_TCP_V6_LRU_DIVERSE] = { + S_BASE_ENCAP_V6, .ip_proto = IPPROTO_TCP, + .name = "tcp-v6-lru-diverse", + .description = "IPv6 TCP, diverse flows, warm LRU", + .vip_addr_v6 = IP6(0xfd000100, 0, 0, 1), .dst_port = 80, + .src_addr_v6 = IP6(0xfd000200, 0, 0, 1), .src_port = 12345, + .vip_num = 10, + .prepopulate_lru = true, .flow_mask = 0xFFF, + .lru_miss = LRU_MISS_NONE, + }, + [S_TCP_V6_CH_DIVERSE] = { + S_BASE_ENCAP_V6, .ip_proto = IPPROTO_TCP, + .name = "tcp-v6-ch-diverse", + .description = "IPv6 TCP, diverse flows, CH (LRU bypass)", + .vip_addr_v6 = IP6(0xfd000100, 0, 0, 2), .dst_port = 80, + .src_addr_v6 = IP6(0xfd000200, 0, 0, 2), .src_port = 54321, + .vip_flags = F_LRU_BYPASS, .vip_num = 12, + .flow_mask = 0xFFF, .lru_miss = LRU_MISS_ALL, + }, + [S_UDP_V4_LRU_DIVERSE] = { + S_BASE_ENCAP_V4, .ip_proto = IPPROTO_UDP, + .name = "udp-v4-lru-diverse", + .description = "IPv4 UDP, diverse flows, warm LRU", + .vip_addr = IP4(10, 10, 1, 1), .dst_port = 443, + .src_addr = IP4(10, 10, 3, 1), .src_port = 11111, + .vip_num = 2, + .prepopulate_lru = true, .flow_mask = 0xFFF, + .lru_miss = LRU_MISS_NONE, + }, + + /* LRU stress */ + [S_TCP_V4_LRU_MISS] = { + S_BASE_ENCAP_V4, .ip_proto = IPPROTO_TCP, + .name = "tcp-v4-lru-miss", + .description = "IPv4 TCP, LRU miss (16M flow space), CH lookup", + .vip_addr = IP4(10, 10, 1, 1), .dst_port = 80, + .src_addr = IP4(10, 10, 2, 1), .src_port = 12345, + .flow_mask = 0xFFFFFF, .cold_lru = true, + .lru_miss = LRU_MISS_FIRST, + }, + [S_UDP_V4_LRU_MISS] = { + S_BASE_ENCAP_V4, .ip_proto = IPPROTO_UDP, + .name = "udp-v4-lru-miss", + .description = "IPv4 UDP, LRU miss (16M flow space), CH lookup", + .vip_addr = IP4(10, 10, 1, 1), .dst_port = 443, + .src_addr = IP4(10, 10, 3, 1), .src_port = 11111, + .vip_num = 2, + .flow_mask = 0xFFFFFF, .cold_lru = true, + .lru_miss = LRU_MISS_FIRST, + }, + [S_TCP_V4_LRU_WARMUP] = { + S_BASE_ENCAP_V4, .ip_proto = IPPROTO_TCP, + .name = "tcp-v4-lru-warmup", + .description = "IPv4 TCP, 4K flows, ~50% LRU miss", + .vip_addr = IP4(10, 10, 1, 1), .dst_port = 80, + .src_addr = IP4(10, 10, 2, 1), .src_port = 12345, + .flow_mask = 0xFFF, .cold_lru = true, + .fixed_batch_iters = 6500, + .lru_miss = LRU_MISS_FIRST, + }, + + /* TCP flags */ + [S_TCP_V4_SYN] = { + S_BASE_ENCAP_V4, .ip_proto = IPPROTO_TCP, + .name = "tcp-v4-syn", + .description = "IPv4 TCP SYN, skip LRU, CH + LRU insert", + .vip_addr = IP4(10, 10, 1, 1), .dst_port = 80, + .src_addr = IP4(10, 10, 8, 2), .src_port = 60001, + .set_syn = true, .lru_miss = LRU_MISS_ALL, + }, + [S_TCP_V4_RST_MISS] = { + S_BASE_ENCAP_V4, .ip_proto = IPPROTO_TCP, + .name = "tcp-v4-rst-miss", + .description = "IPv4 TCP RST, CH lookup, no LRU insert", + .vip_addr = IP4(10, 10, 1, 1), .dst_port = 80, + .src_addr = IP4(10, 10, 8, 1), .src_port = 60000, + .flow_mask = 0xFFFFFF, .cold_lru = true, + .set_rst = true, .lru_miss = LRU_MISS_ALL, + }, + + /* Early exits */ + [S_PASS_V4_NO_VIP] = { + .name = "pass-v4-no-vip", + .description = "IPv4 TCP, unknown VIP, XDP_PASS", + .expected_retval = XDP_PASS, + .ip_proto = IPPROTO_TCP, + .vip_addr = IP4(10, 10, 9, 9), .dst_port = 80, + .src_addr = IP4(10, 10, 4, 1), .src_port = 33333, + }, + [S_PASS_V6_NO_VIP] = { + .name = "pass-v6-no-vip", + .description = "IPv6 TCP, unknown VIP, XDP_PASS", + .expected_retval = XDP_PASS, .is_v6 = true, + .ip_proto = IPPROTO_TCP, + .vip_addr_v6 = IP6(0xfd009900, 0, 0, 1), .dst_port = 80, + .src_addr_v6 = IP6(0xfd000400, 0, 0, 1), .src_port = 33333, + }, + [S_PASS_V4_ICMP] = { + .name = "pass-v4-icmp", + .description = "IPv4 ICMP, non-TCP/UDP protocol, XDP_PASS", + .expected_retval = XDP_PASS, + .ip_proto = IPPROTO_ICMP, + .vip_addr = IP4(10, 10, 1, 1), + .src_addr = IP4(10, 10, 6, 1), + }, + [S_PASS_NON_IP] = { + .name = "pass-non-ip", + .description = "Non-IP (ARP), earliest XDP_PASS exit", + .expected_retval = XDP_PASS, + .eth_proto = ETH_P_ARP, + }, + [S_DROP_V4_FRAG] = { + .name = "drop-v4-frag", + .description = "IPv4 fragmented, XDP_DROP", + .expected_retval = XDP_DROP, .ip_proto = IPPROTO_TCP, + .vip_addr = IP4(10, 10, 1, 1), .dst_port = 80, + .src_addr = IP4(10, 10, 5, 1), .src_port = 44444, + .set_frag = true, + }, + [S_DROP_V4_OPTIONS] = { + .name = "drop-v4-options", + .description = "IPv4 with IP options (ihl>5), XDP_DROP", + .expected_retval = XDP_DROP, .ip_proto = IPPROTO_TCP, + .vip_addr = IP4(10, 10, 1, 1), .dst_port = 80, + .src_addr = IP4(10, 10, 7, 1), .src_port = 55555, + .set_ip_options = true, + }, + [S_DROP_V6_FRAG] = { + .name = "drop-v6-frag", + .description = "IPv6 fragment extension header, XDP_DROP", + .expected_retval = XDP_DROP, .is_v6 = true, + .ip_proto = IPPROTO_TCP, + .vip_addr_v6 = IP6(0xfd000100, 0, 0, 1), .dst_port = 80, + .src_addr_v6 = IP6(0xfd000500, 0, 0, 1), .src_port = 44444, + .set_frag = true, + }, +}; + +#define MAX_ENCAP_SIZE (MAX_PKT_SIZE + sizeof(struct ipv6hdr)) + +static __u8 pkt_buf[NUM_SCENARIOS][MAX_PKT_SIZE]; +static __u32 pkt_len[NUM_SCENARIOS]; +static __u8 expected_buf[NUM_SCENARIOS][MAX_ENCAP_SIZE]; +static __u32 expected_len[NUM_SCENARIOS]; + +static int lru_inner_fds[BENCH_NR_CPUS]; +static int nr_inner_maps; + +static struct ctx { + struct xdp_lb_bench *skel; + struct bpf_bench_timing timing; + int prog_fd; +} ctx; + +static struct { + int scenario; + bool machine_readable; +} args = { + .scenario = -1, +}; + +static __u16 ip_checksum(const void *hdr, int len) +{ + const __u16 *p = hdr; + __u32 csum = 0; + int i; + + for (i = 0; i < len / 2; i++) + csum += p[i]; + + while (csum >> 16) + csum = (csum & 0xffff) + (csum >> 16); + + return ~csum; +} + +static void htonl_v6(__be32 dst[4], const __u32 src[4]) +{ + int i; + + for (i = 0; i < 4; i++) + dst[i] = htonl(src[i]); +} + +static void build_flow_key(struct flow_key *fk, const struct test_scenario *sc) +{ + memset(fk, 0, sizeof(*fk)); + if (sc->is_v6) { + htonl_v6(fk->srcv6, sc->src_addr_v6); + htonl_v6(fk->dstv6, sc->vip_addr_v6); + } else { + fk->src = htonl(sc->src_addr); + fk->dst = htonl(sc->vip_addr); + } + fk->proto = sc->ip_proto; + fk->port16[0] = htons(sc->src_port); + fk->port16[1] = htons(sc->dst_port); +} + +static void build_l4(const struct test_scenario *sc, __u8 *p, __u32 *off) +{ + if (sc->ip_proto == IPPROTO_TCP) { + struct tcphdr tcp = {}; + + tcp.source = htons(sc->src_port); + tcp.dest = htons(sc->dst_port); + tcp.doff = 5; + tcp.syn = sc->set_syn ? 1 : 0; + tcp.rst = sc->set_rst ? 1 : 0; + tcp.window = htons(8192); + memcpy(p + *off, &tcp, sizeof(tcp)); + *off += sizeof(tcp); + } else if (sc->ip_proto == IPPROTO_UDP) { + struct udphdr udp = {}; + + udp.source = htons(sc->src_port); + udp.dest = htons(sc->dst_port); + udp.len = htons(sizeof(udp) + 16); + memcpy(p + *off, &udp, sizeof(udp)); + *off += sizeof(udp); + } +} + +static void build_packet(int idx) +{ + const struct test_scenario *sc = &scenarios[idx]; + __u8 *p = pkt_buf[idx]; + struct ethhdr eth = {}; + __u16 proto; + __u32 off = 0; + + memcpy(eth.h_dest, lb_mac, ETH_ALEN); + memcpy(eth.h_source, client_mac, ETH_ALEN); + + if (sc->eth_proto) + proto = sc->eth_proto; + else if (sc->is_v6) + proto = ETH_P_IPV6; + else + proto = ETH_P_IP; + + eth.h_proto = htons(proto); + memcpy(p, ð, sizeof(eth)); + off += sizeof(eth); + + if (proto != ETH_P_IP && proto != ETH_P_IPV6) { + memcpy(p + off, "bench___payload!", 16); + off += 16; + pkt_len[idx] = off; + return; + } + + if (sc->is_v6) { + struct ipv6hdr ip6h = {}; + __u32 ip6_off = off; + + ip6h.version = 6; + ip6h.nexthdr = sc->set_frag ? 44 : sc->ip_proto; + ip6h.hop_limit = 64; + htonl_v6((__be32 *)&ip6h.saddr, sc->src_addr_v6); + htonl_v6((__be32 *)&ip6h.daddr, sc->vip_addr_v6); + off += sizeof(ip6h); + + if (sc->set_frag) { + memset(p + off, 0, 8); + p[off] = sc->ip_proto; + off += 8; + } + + build_l4(sc, p, &off); + + memcpy(p + off, "bench___payload!", 16); + off += 16; + + ip6h.payload_len = htons(off - ip6_off - sizeof(ip6h)); + memcpy(p + ip6_off, &ip6h, sizeof(ip6h)); + } else { + struct iphdr iph = {}; + __u32 ip_off = off; + + iph.version = 4; + iph.ihl = sc->set_ip_options ? 6 : 5; + iph.ttl = 64; + iph.protocol = sc->ip_proto; + iph.saddr = htonl(sc->src_addr); + iph.daddr = htonl(sc->vip_addr); + iph.frag_off = sc->set_frag ? htons(IP_MF) : 0; + off += sizeof(iph); + + if (sc->set_ip_options) { + /* NOP option padding (4 bytes = 1 word) */ + __u32 nop = htonl(0x01010101); + + memcpy(p + off, &nop, sizeof(nop)); + off += sizeof(nop); + } + + build_l4(sc, p, &off); + + memcpy(p + off, "bench___payload!", 16); + off += 16; + + iph.tot_len = htons(off - ip_off); + iph.check = ip_checksum(&iph, sizeof(iph)); + memcpy(p + ip_off, &iph, sizeof(iph)); + } + + pkt_len[idx] = off; +} + +static void populate_vip(struct xdp_lb_bench *skel, const struct test_scenario *sc) +{ + struct vip_definition key = {}; + struct vip_meta val = {}; + int err; + + if (sc->is_v6) + htonl_v6(key.vipv6, sc->vip_addr_v6); + else + key.vip = htonl(sc->vip_addr); + key.port = htons(sc->dst_port); + key.proto = sc->ip_proto; + val.flags = sc->vip_flags; + val.vip_num = sc->vip_num; + + err = bpf_map_update_elem(bpf_map__fd(skel->maps.vip_map), &key, &val, BPF_ANY); + if (err) { + fprintf(stderr, "vip_map [%s]: %s\n", sc->name, strerror(errno)); + exit(1); + } +} + +static void create_per_cpu_lru_maps(struct xdp_lb_bench *skel) +{ + int outer_fd = bpf_map__fd(skel->maps.lru_mapping); + unsigned int nr_cpus = bpf_num_possible_cpus(); + int i, inner_fd, err; + __u32 cpu; + + if (nr_cpus > BENCH_NR_CPUS) + nr_cpus = BENCH_NR_CPUS; + + for (i = 0; i < (int)nr_cpus; i++) { + LIBBPF_OPTS(bpf_map_create_opts, opts); + + inner_fd = bpf_map_create(BPF_MAP_TYPE_LRU_HASH, "lru_inner", + sizeof(struct flow_key), + sizeof(struct real_pos_lru), + DEFAULT_LRU_SIZE, &opts); + if (inner_fd < 0) { + fprintf(stderr, "lru_inner[%d]: %s\n", i, strerror(errno)); + exit(1); + } + + cpu = i; + err = bpf_map_update_elem(outer_fd, &cpu, &inner_fd, BPF_ANY); + if (err) { + fprintf(stderr, "lru_mapping[%d]: %s\n", i, strerror(errno)); + close(inner_fd); + exit(1); + } + + lru_inner_fds[i] = inner_fd; + } + + nr_inner_maps = nr_cpus; +} + +static void populate_lru(const struct test_scenario *sc, __u32 real_idx) +{ + struct real_pos_lru lru = { .pos = real_idx }; + struct flow_key fk; + int i, err; + + build_flow_key(&fk, sc); + + /* Insert into every per-CPU inner LRU so the entry is found + * regardless of which CPU runs the BPF program. + */ + for (i = 0; i < nr_inner_maps; i++) { + err = bpf_map_update_elem(lru_inner_fds[i], &fk, &lru, BPF_ANY); + if (err) { + fprintf(stderr, "lru_inner[%d] [%s]: %s\n", i, sc->name, + strerror(errno)); + exit(1); + } + } +} + +static void populate_maps(struct xdp_lb_bench *skel) +{ + struct real_definition real_v4 = {}; + struct real_definition real_v6 = {}; + struct ctl_value cval = {}; + __u32 key, real_idx = REAL_INDEX; + int ch_fd, err, i; + + if (scenarios[args.scenario].expect_encap) + populate_vip(skel, &scenarios[args.scenario]); + + ch_fd = bpf_map__fd(skel->maps.ch_rings); + for (i = 0; i < CH_RINGS_SIZE; i++) { + __u32 k = i; + + err = bpf_map_update_elem(ch_fd, &k, &real_idx, BPF_ANY); + if (err) { + fprintf(stderr, "ch_rings[%d]: %s\n", i, strerror(errno)); + exit(1); + } + } + + memcpy(cval.mac, router_mac, ETH_ALEN); + key = 0; + err = bpf_map_update_elem(bpf_map__fd(skel->maps.ctl_array), &key, &cval, BPF_ANY); + if (err) { + fprintf(stderr, "ctl_array: %s\n", strerror(errno)); + exit(1); + } + + key = REAL_INDEX; + real_v4.dst = htonl(TNL_DST); + htonl_v6(real_v4.dstv6, tnl_dst_v6); + err = bpf_map_update_elem(bpf_map__fd(skel->maps.reals), &key, &real_v4, BPF_ANY); + if (err) { + fprintf(stderr, "reals[%d]: %s\n", REAL_INDEX, strerror(errno)); + exit(1); + } + + key = REAL_INDEX_V6; + htonl_v6(real_v6.dstv6, tnl_dst_v6); + real_v6.flags = F_IPV6; + err = bpf_map_update_elem(bpf_map__fd(skel->maps.reals), &key, &real_v6, BPF_ANY); + if (err) { + fprintf(stderr, "reals[%d]: %s\n", REAL_INDEX_V6, strerror(errno)); + exit(1); + } + + create_per_cpu_lru_maps(skel); + + if (scenarios[args.scenario].prepopulate_lru) { + const struct test_scenario *sc = &scenarios[args.scenario]; + __u32 ridx = sc->encap_v6_outer ? REAL_INDEX_V6 : REAL_INDEX; + + populate_lru(sc, ridx); + } + + if (scenarios[args.scenario].expect_encap) { + const struct test_scenario *sc = &scenarios[args.scenario]; + struct vip_definition miss_vip = {}; + + if (sc->is_v6) + htonl_v6(miss_vip.vipv6, sc->vip_addr_v6); + else + miss_vip.vip = htonl(sc->vip_addr); + miss_vip.port = htons(sc->dst_port); + miss_vip.proto = sc->ip_proto; + + key = 0; + err = bpf_map_update_elem(bpf_map__fd(skel->maps.vip_miss_stats), + &key, &miss_vip, BPF_ANY); + if (err) { + fprintf(stderr, "vip_miss_stats: %s\n", strerror(errno)); + exit(1); + } + } +} + +static void build_expected_packet(int idx) +{ + const struct test_scenario *sc = &scenarios[idx]; + __u8 *p = expected_buf[idx]; + struct ethhdr eth = {}; + const __u8 *in = pkt_buf[idx]; + __u32 in_len = pkt_len[idx]; + __u32 off = 0; + __u32 inner_len = in_len - sizeof(struct ethhdr); + + if (sc->expected_retval == XDP_DROP) { + expected_len[idx] = 0; + return; + } + + if (sc->expected_retval == XDP_PASS) { + memcpy(p, in, in_len); + expected_len[idx] = in_len; + return; + } + + memcpy(eth.h_dest, router_mac, ETH_ALEN); + memcpy(eth.h_source, lb_mac, ETH_ALEN); + eth.h_proto = htons(sc->encap_v6_outer ? ETH_P_IPV6 : ETH_P_IP); + memcpy(p, ð, sizeof(eth)); + off += sizeof(eth); + + if (sc->encap_v6_outer) { + struct ipv6hdr ip6h = {}; + __u8 nexthdr = sc->is_v6 ? IPPROTO_IPV6 : IPPROTO_IPIP; + + ip6h.version = 6; + ip6h.nexthdr = nexthdr; + ip6h.payload_len = htons(inner_len); + ip6h.hop_limit = 64; + + create_encap_ipv6_src(htons(sc->src_port), + sc->is_v6 ? htonl(sc->src_addr_v6[0]) + : htonl(sc->src_addr), + (__be32 *)&ip6h.saddr); + htonl_v6((__be32 *)&ip6h.daddr, sc->tunnel_dst_v6); + + memcpy(p + off, &ip6h, sizeof(ip6h)); + off += sizeof(ip6h); + } else { + struct iphdr iph = {}; + + iph.version = 4; + iph.ihl = sizeof(iph) >> 2; + iph.protocol = IPPROTO_IPIP; + iph.tot_len = htons(inner_len + sizeof(iph)); + iph.ttl = 64; + iph.saddr = create_encap_ipv4_src(htons(sc->src_port), + htonl(sc->src_addr)); + iph.daddr = htonl(sc->tunnel_dst); + iph.check = ip_checksum(&iph, sizeof(iph)); + + memcpy(p + off, &iph, sizeof(iph)); + off += sizeof(iph); + } + + memcpy(p + off, in + sizeof(struct ethhdr), inner_len); + off += inner_len; + + expected_len[idx] = off; +} + +static void print_hex_diff(const char *name, const __u8 *got, __u32 got_len, const __u8 *exp, + __u32 exp_len) +{ + __u32 max_len = got_len > exp_len ? got_len : exp_len; + __u32 i, ndiffs = 0; + + fprintf(stderr, " [%s] got %u bytes, expected %u bytes\n", + name, got_len, exp_len); + + for (i = 0; i < max_len && ndiffs < 8; i++) { + __u8 g = i < got_len ? got[i] : 0; + __u8 e = i < exp_len ? exp[i] : 0; + + if (g != e || i >= got_len || i >= exp_len) { + fprintf(stderr, " offset 0x%03x: got 0x%02x expected 0x%02x\n", + i, g, e); + ndiffs++; + } + } + + if (ndiffs >= 8 && i < max_len) + fprintf(stderr, " ... (more differences)\n"); +} + +static void read_stat(int stats_fd, __u32 key, __u64 *v1_out, __u64 *v2_out) +{ + struct lb_stats values[BENCH_NR_CPUS]; + unsigned int nr_cpus = bpf_num_possible_cpus(); + __u64 v1 = 0, v2 = 0; + unsigned int i; + + if (nr_cpus > BENCH_NR_CPUS) + nr_cpus = BENCH_NR_CPUS; + + if (bpf_map_lookup_elem(stats_fd, &key, values) == 0) { + for (i = 0; i < nr_cpus; i++) { + v1 += values[i].v1; + v2 += values[i].v2; + } + } + + *v1_out = v1; + *v2_out = v2; +} + +static void reset_stats(int stats_fd) +{ + struct lb_stats zeros[BENCH_NR_CPUS]; + __u32 key; + + memset(zeros, 0, sizeof(zeros)); + for (key = 0; key < STATS_SIZE; key++) + bpf_map_update_elem(stats_fd, &key, zeros, BPF_ANY); +} + +static bool validate_counters(int idx) +{ + const struct test_scenario *sc = &scenarios[idx]; + int stats_fd = bpf_map__fd(ctx.skel->maps.stats); + __u64 xdp_tx, xdp_pass, xdp_drop, lru_pkts, lru_misses, tcp_misses; + __u64 expected_misses; + __u64 dummy; + /* + * BENCH_BPF_LOOP runs batch_iters timed + 1 untimed iteration. + * Each iteration calls process_packet -> count_action, so all + * counters are incremented (batch_iters + 1) times. + */ + __u64 n = ctx.timing.batch_iters + 1; + bool pass = true; + + read_stat(stats_fd, STATS_XDP_TX, &xdp_tx, &dummy); + read_stat(stats_fd, STATS_XDP_PASS, &xdp_pass, &dummy); + read_stat(stats_fd, STATS_XDP_DROP, &xdp_drop, &dummy); + read_stat(stats_fd, STATS_LRU, &lru_pkts, &lru_misses); + read_stat(stats_fd, STATS_LRU_MISS, &tcp_misses, &dummy); + + if (sc->expected_retval == XDP_TX && xdp_tx != n) { + fprintf(stderr, " [%s] COUNTER FAIL: STATS_XDP_TX=%llu, expected %llu\n", sc->name, + (unsigned long long)xdp_tx, (unsigned long long)n); + pass = false; + } + if (sc->expected_retval == XDP_PASS && xdp_pass != n) { + fprintf(stderr, " [%s] COUNTER FAIL: STATS_XDP_PASS=%llu, expected %llu\n", + sc->name, (unsigned long long)xdp_pass, (unsigned long long)n); + pass = false; + } + if (sc->expected_retval == XDP_DROP && xdp_drop != n) { + fprintf(stderr, " [%s] COUNTER FAIL: STATS_XDP_DROP=%llu, expected %llu\n", + sc->name, (unsigned long long)xdp_drop, (unsigned long long)n); + pass = false; + } + + if (!sc->expect_encap) + goto out; + + if (lru_pkts != n) { + fprintf(stderr, " [%s] COUNTER FAIL: STATS_LRU.v1=%llu, expected %llu\n", + sc->name, (unsigned long long)lru_pkts, (unsigned long long)n); + pass = false; + } + + switch (sc->lru_miss) { + case LRU_MISS_NONE: + expected_misses = 0; + break; + case LRU_MISS_ALL: + expected_misses = n; + break; + case LRU_MISS_FIRST: + expected_misses = 1; + break; + default: + /* LRU_MISS_AUTO: compute from scenario flags */ + if (sc->prepopulate_lru && !sc->set_syn) + expected_misses = 0; + else if (sc->set_syn || sc->set_rst || + (sc->vip_flags & F_LRU_BYPASS)) + expected_misses = n; + else if (sc->cold_lru) + expected_misses = 1; + else + expected_misses = n; + break; + } + + if (lru_misses != expected_misses) { + fprintf(stderr, " [%s] COUNTER FAIL: LRU misses=%llu, expected %llu\n", + sc->name, (unsigned long long)lru_misses, + (unsigned long long)expected_misses); + pass = false; + } + + if (sc->ip_proto == IPPROTO_TCP && lru_misses > 0) { + if (tcp_misses != lru_misses) { + fprintf(stderr, " [%s] COUNTER FAIL: TCP LRU misses=%llu, expected %llu\n", + sc->name, (unsigned long long)tcp_misses, + (unsigned long long)lru_misses); + pass = false; + } + } + +out: + reset_stats(stats_fd); + return pass; +} + +static const char *xdp_action_str(int action) +{ + switch (action) { + case XDP_DROP: return "XDP_DROP"; + case XDP_PASS: return "XDP_PASS"; + case XDP_TX: return "XDP_TX"; + default: return "UNKNOWN"; + } +} + +static bool validate_scenario(int idx) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + const struct test_scenario *sc = &scenarios[idx]; + __u8 out[MAX_ENCAP_SIZE]; + int err; + + topts.data_in = pkt_buf[idx]; + topts.data_size_in = pkt_len[idx]; + topts.data_out = out; + topts.data_size_out = sizeof(out); + topts.repeat = 1; + + err = bpf_prog_test_run_opts(ctx.prog_fd, &topts); + if (err) { + fprintf(stderr, " [%s] FAIL: test_run: %s\n", sc->name, strerror(errno)); + return false; + } + + if ((int)topts.retval != sc->expected_retval) { + fprintf(stderr, " [%s] FAIL: retval %s, expected %s\n", sc->name, + xdp_action_str(topts.retval), xdp_action_str(sc->expected_retval)); + return false; + } + + /* + * Compare output packet when it's deterministic. + * Skip for XDP_DROP (no output) and cold_lru (source IP poisoned). + */ + if (sc->expected_retval != XDP_DROP && !sc->cold_lru) { + if (topts.data_size_out != expected_len[idx] || + memcmp(out, expected_buf[idx], expected_len[idx]) != 0) { + fprintf(stderr, " [%s] FAIL: output packet mismatch\n", sc->name); + print_hex_diff(sc->name, out, topts.data_size_out, expected_buf[idx], + expected_len[idx]); + return false; + } + } + + if (!validate_counters(idx)) + return false; + return true; +} + +static int find_scenario(const char *name) +{ + int i; + + for (i = 0; i < NUM_SCENARIOS; i++) { + if (strcmp(scenarios[i].name, name) == 0) + return i; + } + return -1; +} + +static void xdp_lb_validate(void) +{ + if (env.consumer_cnt != 0) { + fprintf(stderr, "benchmark doesn't support consumers\n"); + exit(1); + } + if (bpf_num_possible_cpus() > BENCH_NR_CPUS) { + fprintf(stderr, "too many CPUs (%d > %d), increase BENCH_NR_CPUS\n", + bpf_num_possible_cpus(), BENCH_NR_CPUS); + exit(1); + } +} + +static void xdp_lb_run_once(void *unused __always_unused) +{ + int idx = args.scenario; + + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = pkt_buf[idx], + .data_size_in = pkt_len[idx], + .repeat = 1, + ); + + bpf_prog_test_run_opts(ctx.prog_fd, &topts); +} + +static void xdp_lb_setup(void) +{ + struct xdp_lb_bench *skel; + int err; + + if (args.scenario < 0) { + fprintf(stderr, "--scenario is required. Use --list-scenarios to see options.\n"); + exit(1); + } + + setup_libbpf(); + + skel = xdp_lb_bench__open(); + if (!skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + err = xdp_lb_bench__load(skel); + if (err) { + fprintf(stderr, "failed to load skeleton: %s\n", strerror(-err)); + xdp_lb_bench__destroy(skel); + exit(1); + } + + ctx.skel = skel; + ctx.prog_fd = bpf_program__fd(skel->progs.xdp_lb_bench); + + build_packet(args.scenario); + build_expected_packet(args.scenario); + + populate_maps(skel); + + BENCH_TIMING_INIT(&ctx.timing, skel, 0); + ctx.timing.machine_readable = args.machine_readable; + + if (scenarios[args.scenario].fixed_batch_iters) { + ctx.timing.batch_iters = scenarios[args.scenario].fixed_batch_iters; + skel->bss->batch_iters = ctx.timing.batch_iters; + } else { + bpf_bench_calibrate(&ctx.timing, xdp_lb_run_once, NULL); + } + + env.duration_sec = 600; + + /* + * Enable cold_lru before validation so LRU miss counters are + * correct. Seed the LRU with one run so the original flow is + * present; validation then sees exactly 1 miss (the poisoned + * flow) regardless of whether calibration ran. + */ + if (scenarios[args.scenario].cold_lru) { + skel->bss->cold_lru = 1; + xdp_lb_run_once(NULL); + } + + reset_stats(bpf_map__fd(skel->maps.stats)); + + if (!validate_scenario(args.scenario)) { + fprintf(stderr, "Validation FAILED - aborting benchmark\n"); + exit(1); + } + + if (scenarios[args.scenario].flow_mask) + skel->bss->flow_mask = scenarios[args.scenario].flow_mask; +} + +static void *xdp_lb_producer(void *input) +{ + while (true) + xdp_lb_run_once(NULL); + + return NULL; +} + +static void xdp_lb_measure(struct bench_res *res) +{ + bpf_bench_timing_measure(&ctx.timing, res); +} + +static void xdp_lb_report_final(struct bench_res res[], int res_cnt) +{ + bpf_bench_timing_report(&ctx.timing, scenarios[args.scenario].name, + scenarios[args.scenario].description); +} + +enum { + ARG_SCENARIO = 9001, + ARG_LIST_SCENARIOS = 9002, + ARG_MACHINE_READABLE = 9003, +}; + +static const struct argp_option opts[] = { + { "scenario", ARG_SCENARIO, "NAME", 0, + "Scenario to benchmark (required)" }, + { "list-scenarios", ARG_LIST_SCENARIOS, NULL, 0, + "List available scenarios and exit" }, + { "machine-readable", ARG_MACHINE_READABLE, NULL, 0, + "Print only a machine-readable RESULT line" }, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + int i; + + switch (key) { + case ARG_SCENARIO: + args.scenario = find_scenario(arg); + if (args.scenario < 0) { + fprintf(stderr, "unknown scenario: '%s'\n", arg); + fprintf(stderr, "use --list-scenarios to see options\n"); + argp_usage(state); + } + break; + case ARG_LIST_SCENARIOS: + printf("Available scenarios:\n"); + for (i = 0; i < NUM_SCENARIOS; i++) + printf(" %-20s %s\n", scenarios[i].name, scenarios[i].description); + exit(0); + case ARG_MACHINE_READABLE: + args.machine_readable = true; + env.quiet = true; + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +const struct argp bench_xdp_lb_argp = { + .options = opts, + .parser = parse_arg, +}; + +const struct bench bench_xdp_lb = { + .name = "xdp-lb", + .argp = &bench_xdp_lb_argp, + .validate = xdp_lb_validate, + .setup = xdp_lb_setup, + .producer_thread = xdp_lb_producer, + .measure = xdp_lb_measure, + .report_final = xdp_lb_report_final, +}; -- cgit v1.2.3 From 51312b6360a92e7bccd7b05b028ba2066b093305 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Mon, 27 Apr 2026 16:23:04 -0700 Subject: selftests/bpf: Add XDP load-balancer benchmark run script Add a convenience script that runs all 24 XDP load-balancer scenarios and formats the results as a table with median, stddev, and p99 columns. ./benchs/run_bench_xdp_lb.sh Signed-off-by: Puranjay Mohan Link: https://lore.kernel.org/r/20260427232313.1582588-8-puranjay@kernel.org Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/benchs/run_bench_xdp_lb.sh | 79 ++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100755 tools/testing/selftests/bpf/benchs/run_bench_xdp_lb.sh (limited to 'tools') diff --git a/tools/testing/selftests/bpf/benchs/run_bench_xdp_lb.sh b/tools/testing/selftests/bpf/benchs/run_bench_xdp_lb.sh new file mode 100755 index 000000000000..f65cf46214a3 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_xdp_lb.sh @@ -0,0 +1,79 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./benchs/run_common.sh + +set -eufo pipefail + +WARMUP=${WARMUP:-3} + +RUN="sudo ./bench -q -w${WARMUP} -a xdp-lb --machine-readable" + +SEP=" +----------------------------------+----------+---------+----------+" +HDR=" | %-32s | %8s | %7s | %8s |\n" +ROW=" | %-32s | %8s | %7s | %8s |\n" + +function group_header() +{ + printf "%s\n" "$SEP" + printf "$HDR" "$1" "p50" "stddev" "p99" + printf "%s\n" "$SEP" +} + +function rval() +{ + echo "$1" | sed -nE "s/.*$2=([^ ]+).*/\1/p" +} + +function run_scenario() +{ + local sc="$1" + shift + local output rline + + output=$($RUN --scenario "$sc" "$@" 2>&1) || true + rline=$(echo "$output" | grep '^RESULT ' || true) + + if [ -z "$rline" ]; then + printf "$ROW" "$sc" "ERR" "-" "-" + return + fi + + printf "$ROW" "$sc" \ + "$(rval "$rline" median)" \ + "$(rval "$rline" stddev)" \ + "$(rval "$rline" p99)" +} + +header "XDP load-balancer benchmark" + +group_header "Single-flow baseline" +for sc in tcp-v4-lru-hit tcp-v4-ch \ + tcp-v6-lru-hit tcp-v6-ch \ + udp-v4-lru-hit udp-v6-lru-hit \ + tcp-v4v6-lru-hit; do + run_scenario "$sc" +done + +group_header "Diverse flows (4K src addrs)" +for sc in tcp-v4-lru-diverse tcp-v4-ch-diverse \ + tcp-v6-lru-diverse tcp-v6-ch-diverse \ + udp-v4-lru-diverse; do + run_scenario "$sc" +done + +group_header "TCP flags" +run_scenario tcp-v4-syn +run_scenario tcp-v4-rst-miss + +group_header "LRU stress" +run_scenario tcp-v4-lru-miss +run_scenario udp-v4-lru-miss +run_scenario tcp-v4-lru-warmup + +group_header "Early exits" +for sc in pass-v4-no-vip pass-v6-no-vip pass-v4-icmp pass-non-ip drop-v4-frag drop-v4-options \ + drop-v6-frag; do + run_scenario "$sc" +done +printf "%s\n" "$SEP" -- cgit v1.2.3 From 25bb05dd06ccffd209c26465f84851f1fd344c8c Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Fri, 8 May 2026 17:57:30 -0700 Subject: selftests/bpf: Use both hrtimer enqueue helpers in vmlinux test The vmlinux selftest triggers nanosleep and checks that both kprobe and fentry programs observe the hrtimer enqueue path. After the hrtimer_start_expires_user() conversion [1], nanosleep reaches hrtimer_start_range_ns_user() instead of hrtimer_start_range_ns(). Hard-coding either symbol makes the test fail either on bpf tree or on linux-next [2]. Update the test to resolve the target symbol at runtime via libbpf_find_vmlinux_btf_id(). This is a nice example of how to modify a BPF program to work on both older and newer kernel revision. [1] https://lore.kernel.org/all/20260408114952.062400833@kernel.org/ [2] https://github.com/kernel-patches/bpf/actions/runs/25485909958/job/74782902203 Signed-off-by: Ihor Solodrai Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20260509005730.250956-1-ihor.solodrai@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/vmlinux.c | 45 ++++++++++++++++++++++-- tools/testing/selftests/bpf/progs/test_vmlinux.c | 4 +-- 2 files changed, 45 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/vmlinux.c b/tools/testing/selftests/bpf/prog_tests/vmlinux.c index 6fb2217d940b..b5fdd593910d 100644 --- a/tools/testing/selftests/bpf/prog_tests/vmlinux.c +++ b/tools/testing/selftests/bpf/prog_tests/vmlinux.c @@ -14,21 +14,61 @@ static void nsleep() (void)syscall(__NR_nanosleep, &ts, NULL); } +static const char *hrtimer_func = "hrtimer_start_range_ns"; + +static int setup_hrtimer_progs(struct test_vmlinux *skel) +{ + int err; + + if (libbpf_find_vmlinux_btf_id("hrtimer_start_range_ns_user", BPF_TRACE_FENTRY) > 0) + hrtimer_func = "hrtimer_start_range_ns_user"; + + err = bpf_program__set_attach_target(skel->progs.handle__fentry, 0, hrtimer_func); + if (err) + return err; + + /* + * Bare SEC("kprobe") has no target function, so attach it manually + * later after selecting the hrtimer function to probe. + */ + bpf_program__set_autoattach(skel->progs.handle__kprobe, false); + + return 0; +} + void test_vmlinux(void) { int err; struct test_vmlinux* skel; struct test_vmlinux__bss *bss; + struct bpf_link *kprobe_link = NULL; - skel = test_vmlinux__open_and_load(); - if (!ASSERT_OK_PTR(skel, "test_vmlinux__open_and_load")) + skel = test_vmlinux__open(); + if (!ASSERT_OK_PTR(skel, "test_vmlinux__open")) return; + + err = setup_hrtimer_progs(skel); + if (!ASSERT_OK(err, "setup_hrtimer_progs")) + goto cleanup; + + err = test_vmlinux__load(skel); + if (!ASSERT_OK(err, "test_vmlinux__load")) + goto cleanup; + bss = skel->bss; err = test_vmlinux__attach(skel); if (!ASSERT_OK(err, "test_vmlinux__attach")) goto cleanup; + /* manually attach kprobe with the selected function */ + if (hrtimer_func) { + kprobe_link = bpf_program__attach_kprobe(skel->progs.handle__kprobe, + false /* retprobe */, hrtimer_func); + if (!ASSERT_OK_PTR(kprobe_link, "bpf_program__attach_kprobe")) + goto cleanup; + } + /* trigger everything */ nsleep(); @@ -39,5 +79,6 @@ void test_vmlinux(void) ASSERT_TRUE(bss->fentry_called, "fentry"); cleanup: + bpf_link__destroy(kprobe_link); test_vmlinux__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/test_vmlinux.c b/tools/testing/selftests/bpf/progs/test_vmlinux.c index 78b23934d9f8..eea556940df6 100644 --- a/tools/testing/selftests/bpf/progs/test_vmlinux.c +++ b/tools/testing/selftests/bpf/progs/test_vmlinux.c @@ -69,7 +69,7 @@ int BPF_PROG(handle__tp_btf, struct pt_regs *regs, long id) return 0; } -SEC("kprobe/hrtimer_start_range_ns") +SEC("kprobe") int BPF_KPROBE(handle__kprobe, struct hrtimer *timer, ktime_t tim, u64 delta_ns, const enum hrtimer_mode mode) { @@ -78,7 +78,7 @@ int BPF_KPROBE(handle__kprobe, struct hrtimer *timer, ktime_t tim, u64 delta_ns, return 0; } -SEC("fentry/hrtimer_start_range_ns") +SEC("fentry") int BPF_PROG(handle__fentry, struct hrtimer *timer, ktime_t tim, u64 delta_ns, const enum hrtimer_mode mode) { -- cgit v1.2.3 From f28771c0691bcb7f477a0f35550b17b88c32dea8 Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Tue, 12 May 2026 23:31:50 +0800 Subject: bpf: Extend BPF syscall with common attributes support Add generic BPF syscall support for passing common attributes. The initial set of common attributes includes: 1. 'log_buf': User-provided buffer for storing logs. 2. 'log_size': Size of the log buffer. 3. 'log_level': Log verbosity level. 4. 'log_true_size': Actual log size reported by kernel. The common-attribute pointer and its size are passed as the 4th and 5th syscall arguments. A new command bit, 'BPF_COMMON_ATTRS' ('1 << 16'), indicates that common attributes are supplied. This commit adds syscall and uapi plumbing. Command-specific handling is added in follow-up patches. Signed-off-by: Leon Hwang Link: https://lore.kernel.org/r/20260512153157.28382-2-leon.hwang@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/syscalls.h | 3 ++- include/uapi/linux/bpf.h | 8 ++++++++ kernel/bpf/syscall.c | 25 +++++++++++++++++++++---- tools/include/uapi/linux/bpf.h | 8 ++++++++ 4 files changed, 39 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index f5639d5ac331..50055ab73649 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -936,7 +936,8 @@ asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, asmlinkage long sys_getrandom(char __user *buf, size_t count, unsigned int flags); asmlinkage long sys_memfd_create(const char __user *uname_ptr, unsigned int flags); -asmlinkage long sys_bpf(int cmd, union bpf_attr __user *attr, unsigned int size); +asmlinkage long sys_bpf(int cmd, union bpf_attr __user *attr, unsigned int size, + struct bpf_common_attr __user *attr_common, unsigned int size_common); asmlinkage long sys_execveat(int dfd, const char __user *filename, const char __user *const __user *argv, const char __user *const __user *envp, int flags); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 552bc5d9afbd..aec171ccb6ef 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -994,6 +994,7 @@ enum bpf_cmd { BPF_PROG_STREAM_READ_BY_FD, BPF_PROG_ASSOC_STRUCT_OPS, __MAX_BPF_CMD, + BPF_COMMON_ATTRS = 1 << 16, /* Indicate carrying syscall common attrs. */ }; enum bpf_map_type { @@ -1500,6 +1501,13 @@ struct bpf_stack_build_id { }; }; +struct bpf_common_attr { + __aligned_u64 log_buf; + __u32 log_size; + __u32 log_level; + __u32 log_true_size; +}; + #define BPF_OBJ_NAME_LEN 16U enum { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 3b1f0ba02f61..354f6f471a08 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -6211,8 +6211,10 @@ put_prog: return ret; } -static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size) +static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size, + bpfptr_t uattr_common, unsigned int size_common) { + struct bpf_common_attr attr_common; union bpf_attr attr; int err; @@ -6226,6 +6228,20 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size) if (copy_from_bpfptr(&attr, uattr, size) != 0) return -EFAULT; + memset(&attr_common, 0, sizeof(attr_common)); + if (cmd & BPF_COMMON_ATTRS) { + err = bpf_check_uarg_tail_zero(uattr_common, sizeof(attr_common), size_common); + if (err) + return err; + + cmd &= ~BPF_COMMON_ATTRS; + size_common = min_t(u32, size_common, sizeof(attr_common)); + if (copy_from_bpfptr(&attr_common, uattr_common, size_common) != 0) + return -EFAULT; + } else { + size_common = 0; + } + err = security_bpf(cmd, &attr, size, uattr.is_kernel); if (err < 0) return err; @@ -6361,9 +6377,10 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size) return err; } -SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) +SYSCALL_DEFINE5(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size, + struct bpf_common_attr __user *, uattr_common, unsigned int, size_common) { - return __sys_bpf(cmd, USER_BPFPTR(uattr), size); + return __sys_bpf(cmd, USER_BPFPTR(uattr), size, USER_BPFPTR(uattr_common), size_common); } static bool syscall_prog_is_valid_access(int off, int size, @@ -6393,7 +6410,7 @@ BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size) default: return -EINVAL; } - return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size); + return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size, KERNEL_BPFPTR(NULL), 0); } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 677be9a47347..37142e6d911a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -994,6 +994,7 @@ enum bpf_cmd { BPF_PROG_STREAM_READ_BY_FD, BPF_PROG_ASSOC_STRUCT_OPS, __MAX_BPF_CMD, + BPF_COMMON_ATTRS = 1 << 16, /* Indicate carrying syscall common attrs. */ }; enum bpf_map_type { @@ -1500,6 +1501,13 @@ struct bpf_stack_build_id { }; }; +struct bpf_common_attr { + __aligned_u64 log_buf; + __u32 log_size; + __u32 log_level; + __u32 log_true_size; +}; + #define BPF_OBJ_NAME_LEN 16U enum { -- cgit v1.2.3 From b1bff40809429bcf80c201255a2bcdf1c5eec06e Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Tue, 12 May 2026 23:31:51 +0800 Subject: libbpf: Add support for extended BPF syscall To support the extended BPF syscall introduced in the previous commit, introduce the following internal APIs: * 'sys_bpf_ext()' * 'sys_bpf_ext_fd()' They wrap the raw 'syscall()' interface to support passing extended attributes. * 'probe_sys_bpf_ext()' Check whether current kernel supports the BPF syscall common attributes. Acked-by: Andrii Nakryiko Signed-off-by: Leon Hwang Link: https://lore.kernel.org/r/20260512153157.28382-3-leon.hwang@linux.dev Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/bpf.c | 36 ++++++++++++++++++++++++++++++++++++ tools/lib/bpf/features.c | 8 ++++++++ tools/lib/bpf/libbpf_internal.h | 3 +++ 3 files changed, 47 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 5846de364209..9d8740761b7a 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -69,6 +69,42 @@ static inline __u64 ptr_to_u64(const void *ptr) return (__u64) (unsigned long) ptr; } +static inline int sys_bpf_ext(enum bpf_cmd cmd, union bpf_attr *attr, + unsigned int size, + struct bpf_common_attr *attr_common, + unsigned int size_common) +{ + cmd = attr_common ? (cmd | BPF_COMMON_ATTRS) : (cmd & ~BPF_COMMON_ATTRS); + return syscall(__NR_bpf, cmd, attr, size, attr_common, size_common); +} + +static inline int sys_bpf_ext_fd(enum bpf_cmd cmd, union bpf_attr *attr, + unsigned int size, + struct bpf_common_attr *attr_common, + unsigned int size_common) +{ + int fd; + + fd = sys_bpf_ext(cmd, attr, size, attr_common, size_common); + return ensure_good_fd(fd); +} + +int probe_sys_bpf_ext(void) +{ + const size_t attr_sz = offsetofend(union bpf_attr, prog_token_fd); + union bpf_attr attr; + int fd; + + memset(&attr, 0, attr_sz); + fd = syscall(__NR_bpf, BPF_PROG_LOAD | BPF_COMMON_ATTRS, &attr, attr_sz, NULL, + sizeof(struct bpf_common_attr)); + if (fd >= 0) { + close(fd); + return -EINVAL; + } + return errno == EFAULT ? 1 : 0; +} + static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, unsigned int size) { diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c index 4f19a0d79b0c..b7e388f99d0b 100644 --- a/tools/lib/bpf/features.c +++ b/tools/lib/bpf/features.c @@ -615,6 +615,11 @@ static int probe_kern_btf_layout(int token_fd) (char *)layout, token_fd)); } +static int probe_bpf_syscall_common_attrs(int token_fd) +{ + return probe_sys_bpf_ext(); +} + typedef int (*feature_probe_fn)(int /* token_fd */); static struct kern_feature_cache feature_cache; @@ -699,6 +704,9 @@ static struct kern_feature_desc { [FEAT_BTF_LAYOUT] = { "kernel supports BTF layout", probe_kern_btf_layout, }, + [FEAT_BPF_SYSCALL_COMMON_ATTRS] = { + "BPF syscall common attributes support", probe_bpf_syscall_common_attrs, + }, }; bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id) diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 3781c45b46d3..7d93c6c01d60 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -398,6 +398,8 @@ enum kern_feature_id { FEAT_UPROBE_SYSCALL, /* Kernel supports BTF layout information */ FEAT_BTF_LAYOUT, + /* Kernel supports BPF syscall common attributes */ + FEAT_BPF_SYSCALL_COMMON_ATTRS, __FEAT_CNT, }; @@ -768,4 +770,5 @@ int probe_fd(int fd); #define SHA256_DWORD_SIZE SHA256_DIGEST_LENGTH / sizeof(__u64) void libbpf_sha256(const void *data, size_t len, __u8 out[SHA256_DIGEST_LENGTH]); +int probe_sys_bpf_ext(void); #endif /* __LIBBPF_LIBBPF_INTERNAL_H */ -- cgit v1.2.3 From 702259006f9303c8773f99a06d1b698f05f082ac Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Tue, 12 May 2026 23:31:56 +0800 Subject: libbpf: Add syscall common attributes support for map_create With the previous commit adding common attribute support for BPF_MAP_CREATE, users can now retrieve detailed error messages when map creation fails via the log_buf field. Introduce struct bpf_log_opts with the following fields: log_buf, log_size, log_level, and log_true_size. Extend bpf_map_create_opts with a new field log_opts, allowing users to capture and inspect log messages on map creation failures. Acked-by: Andrii Nakryiko Signed-off-by: Leon Hwang Link: https://lore.kernel.org/r/20260512153157.28382-8-leon.hwang@linux.dev Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/bpf.c | 16 +++++++++++++++- tools/lib/bpf/bpf.h | 17 ++++++++++++++++- 2 files changed, 31 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 9d8740761b7a..483c02cf21d1 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -209,6 +209,9 @@ int bpf_map_create(enum bpf_map_type map_type, const struct bpf_map_create_opts *opts) { const size_t attr_sz = offsetofend(union bpf_attr, excl_prog_hash_size); + const size_t attr_common_sz = sizeof(struct bpf_common_attr); + struct bpf_common_attr attr_common; + struct bpf_log_opts *log_opts; union bpf_attr attr; int fd; @@ -242,7 +245,18 @@ int bpf_map_create(enum bpf_map_type map_type, attr.excl_prog_hash = ptr_to_u64(OPTS_GET(opts, excl_prog_hash, NULL)); attr.excl_prog_hash_size = OPTS_GET(opts, excl_prog_hash_size, 0); - fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz); + log_opts = OPTS_GET(opts, log_opts, NULL); + if (log_opts && feat_supported(NULL, FEAT_BPF_SYSCALL_COMMON_ATTRS)) { + memset(&attr_common, 0, attr_common_sz); + attr_common.log_buf = ptr_to_u64(OPTS_GET(log_opts, buf, NULL)); + attr_common.log_size = OPTS_GET(log_opts, size, 0); + attr_common.log_level = OPTS_GET(log_opts, level, 0); + fd = sys_bpf_ext_fd(BPF_MAP_CREATE, &attr, attr_sz, &attr_common, attr_common_sz); + OPTS_SET(log_opts, true_size, attr_common.log_true_size); + } else { + fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz); + OPTS_SET(log_opts, true_size, 0); + } return libbpf_err_errno(fd); } diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 2c8e88ddb674..2312900a3263 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -37,6 +37,18 @@ extern "C" { LIBBPF_API int libbpf_set_memlock_rlim(size_t memlock_bytes); +struct bpf_log_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + + char *buf; + __u32 size; + __u32 level; + __u32 true_size; /* out parameter set by kernel */ + + size_t :0; +}; +#define bpf_log_opts__last_field true_size + struct bpf_map_create_opts { size_t sz; /* size of this struct for forward/backward compatibility */ @@ -57,9 +69,12 @@ struct bpf_map_create_opts { const void *excl_prog_hash; __u32 excl_prog_hash_size; + + struct bpf_log_opts *log_opts; + size_t :0; }; -#define bpf_map_create_opts__last_field excl_prog_hash_size +#define bpf_map_create_opts__last_field log_opts LIBBPF_API int bpf_map_create(enum bpf_map_type map_type, const char *map_name, -- cgit v1.2.3 From f675483cac1d762e11f134be1bbd80f876bf2e2f Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Tue, 12 May 2026 23:31:57 +0800 Subject: selftests/bpf: Add tests to verify map create failure log Add tests to verify that the kernel reports the expected error messages and correct log_true_size when map creation fails. Signed-off-by: Leon Hwang Link: https://lore.kernel.org/r/20260512153157.28382-9-leon.hwang@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/map_init.c | 166 ++++++++++++++++++++++ 1 file changed, 166 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/map_init.c b/tools/testing/selftests/bpf/prog_tests/map_init.c index 14a31109dd0e..5c61c8e37306 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_init.c +++ b/tools/testing/selftests/bpf/prog_tests/map_init.c @@ -212,3 +212,169 @@ void test_map_init(void) if (test__start_subtest("pcpu_lru_map_init")) test_pcpu_lru_map_init(); } + +static void test_map_create(enum bpf_map_type map_type, const char *map_name, + struct bpf_map_create_opts *opts, const char *exp_msg) +{ + const int key_size = 4, value_size = 4, max_entries = 1; + char log_buf[128]; + int fd; + LIBBPF_OPTS(bpf_log_opts, log_opts); + + log_buf[0] = '\0'; + log_opts.buf = log_buf; + log_opts.size = sizeof(log_buf); + log_opts.level = 1; + opts->log_opts = &log_opts; + fd = bpf_map_create(map_type, map_name, key_size, value_size, max_entries, opts); + if (!ASSERT_LT(fd, 0, "bpf_map_create")) { + close(fd); + return; + } + + ASSERT_STREQ(log_buf, exp_msg, "log_buf"); + ASSERT_EQ(log_opts.true_size, strlen(exp_msg) + 1, "true_size"); +} + +static void test_map_create_array(struct bpf_map_create_opts *opts, const char *exp_msg) +{ + test_map_create(BPF_MAP_TYPE_ARRAY, "test_map_create", opts, exp_msg); +} + +static void test_invalid_vmlinux_value_type_id_struct_ops(void) +{ + const char *msg = "btf_vmlinux_value_type_id can only be used with struct_ops maps.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts, + .btf_vmlinux_value_type_id = 1, + ); + + test_map_create_array(&opts, msg); +} + +static void test_invalid_vmlinux_value_type_id_kv_type_id(void) +{ + const char *msg = "btf_vmlinux_value_type_id is mutually exclusive with btf_key_type_id and btf_value_type_id.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts, + .btf_vmlinux_value_type_id = 1, + .btf_key_type_id = 1, + ); + + test_map_create(BPF_MAP_TYPE_STRUCT_OPS, "test_map_create", &opts, msg); +} + +static void test_invalid_value_type_id(void) +{ + const char *msg = "Invalid btf_value_type_id.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts, + .btf_key_type_id = 1, + ); + + test_map_create_array(&opts, msg); +} + +static void test_invalid_map_extra(void) +{ + const char *msg = "Invalid map_extra.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts, + .map_extra = 1, + ); + + test_map_create_array(&opts, msg); +} + +static void test_invalid_numa_node(void) +{ + const char *msg = "Invalid numa_node.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts, + .map_flags = BPF_F_NUMA_NODE, + .numa_node = 0xFF, + ); + + test_map_create_array(&opts, msg); +} + +static void test_invalid_map_type(void) +{ + const char *msg = "Invalid map_type.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts); + + test_map_create(__MAX_BPF_MAP_TYPE, "test_map_create", &opts, msg); +} + +static void test_invalid_token_fd(void) +{ + const char *msg = "Invalid map_token_fd.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts, + .map_flags = BPF_F_TOKEN_FD, + .token_fd = 0xFF, + ); + + test_map_create_array(&opts, msg); +} + +static void test_invalid_map_name(void) +{ + const char *msg = "Invalid map_name.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts); + + test_map_create(BPF_MAP_TYPE_ARRAY, "test-!@#", &opts, msg); +} + +static void test_invalid_btf_fd(void) +{ + const char *msg = "Invalid btf_fd.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts, + .btf_fd = -1, + .btf_key_type_id = 1, + .btf_value_type_id = 1, + ); + + test_map_create_array(&opts, msg); +} + +static void test_excl_prog_hash_size_1(void) +{ + const char *msg = "Invalid excl_prog_hash_size.\n"; + const char *hash = "DEADCODE"; + LIBBPF_OPTS(bpf_map_create_opts, opts, + .excl_prog_hash = hash, + ); + + test_map_create_array(&opts, msg); +} + +static void test_excl_prog_hash_size_2(void) +{ + const char *msg = "Invalid excl_prog_hash_size.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts, + .excl_prog_hash_size = 1, + ); + + test_map_create_array(&opts, msg); +} + +void test_map_create_failure(void) +{ + if (test__start_subtest("invalid_vmlinux_value_type_id_struct_ops")) + test_invalid_vmlinux_value_type_id_struct_ops(); + if (test__start_subtest("invalid_vmlinux_value_type_id_kv_type_id")) + test_invalid_vmlinux_value_type_id_kv_type_id(); + if (test__start_subtest("invalid_value_type_id")) + test_invalid_value_type_id(); + if (test__start_subtest("invalid_map_extra")) + test_invalid_map_extra(); + if (test__start_subtest("invalid_numa_node")) + test_invalid_numa_node(); + if (test__start_subtest("invalid_map_type")) + test_invalid_map_type(); + if (test__start_subtest("invalid_token_fd")) + test_invalid_token_fd(); + if (test__start_subtest("invalid_map_name")) + test_invalid_map_name(); + if (test__start_subtest("invalid_btf_fd")) + test_invalid_btf_fd(); + if (test__start_subtest("invalid_excl_prog_hash_size_1")) + test_excl_prog_hash_size_1(); + if (test__start_subtest("invalid_excl_prog_hash_size_2")) + test_excl_prog_hash_size_2(); +} -- cgit v1.2.3 From 79e7ec00634e95e20217ba922906574041b9bbf0 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 12 May 2026 21:51:27 -0700 Subject: selftests/bpf: Add tests for BPF function stack arguments Add selftests covering stack argument passing for both BPF-to-BPF subprog calls and kfunc calls with more than 5 arguments. All tests are guarded by __BPF_FEATURE_STACK_ARGUMENT and __TARGET_ARCH_x86. BPF-to-BPF subprog call tests (stack_arg.c): - Scalar stack args - Pointer stack args - Mixed pointer/scalar stack args - Nested calls - Dynptr stack arg - Two callees with different stack arg counts - Async callback Kfunc call tests (stack_arg_kfunc.c, with bpf_testmod kfuncs): - Scalar stack args - Pointer stack args - Mixed pointer/scalar stack args - Dynptr stack arg - Memory buffer + size pair - Iterator - Const string pointer - Timer pointer Acked-by: Puranjay Mohan Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20260513045127.2397187-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/stack_arg.c | 139 ++++++++++++ tools/testing/selftests/bpf/progs/stack_arg.c | 252 +++++++++++++++++++++ .../testing/selftests/bpf/progs/stack_arg_kfunc.c | 163 +++++++++++++ .../testing/selftests/bpf/test_kmods/bpf_testmod.c | 65 ++++++ .../selftests/bpf/test_kmods/bpf_testmod_kfunc.h | 20 +- 5 files changed, 638 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/stack_arg.c create mode 100644 tools/testing/selftests/bpf/progs/stack_arg.c create mode 100644 tools/testing/selftests/bpf/progs/stack_arg_kfunc.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/stack_arg.c b/tools/testing/selftests/bpf/prog_tests/stack_arg.c new file mode 100644 index 000000000000..d61bac33f809 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/stack_arg.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include "stack_arg.skel.h" +#include "stack_arg_kfunc.skel.h" + +static void run_subtest(struct bpf_program *prog, int expected) +{ + int err, prog_fd; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + + prog_fd = bpf_program__fd(prog); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(topts.retval, expected, "retval"); +} + +static void test_global_many(void) +{ + struct stack_arg *skel; + + skel = stack_arg__open(); + if (!ASSERT_OK_PTR(skel, "open")) + return; + + if (!skel->rodata->has_stack_arg) { + test__skip(); + goto out; + } + + if (!ASSERT_OK(stack_arg__load(skel), "load")) + goto out; + + run_subtest(skel->progs.test_global_many_args, 36); + +out: + stack_arg__destroy(skel); +} + +static void test_async_cb_many(void) +{ + struct stack_arg *skel; + + skel = stack_arg__open(); + if (!ASSERT_OK_PTR(skel, "open")) + return; + + if (!skel->rodata->has_stack_arg) { + test__skip(); + goto out; + } + + if (!ASSERT_OK(stack_arg__load(skel), "load")) + goto out; + + run_subtest(skel->progs.test_async_cb_many_args, 0); + + /* Wait for the timer callback to fire and verify the result. + * 10+20+30+40+50+60+70+80 = 360 + */ + usleep(50); + ASSERT_EQ(skel->bss->timer_result, 360, "timer_result"); + +out: + stack_arg__destroy(skel); +} + +static void test_bpf2bpf(void) +{ + struct stack_arg *skel; + + skel = stack_arg__open(); + if (!ASSERT_OK_PTR(skel, "open")) + return; + + if (!skel->rodata->has_stack_arg) { + test__skip(); + goto out; + } + + if (!ASSERT_OK(stack_arg__load(skel), "load")) + goto out; + + run_subtest(skel->progs.test_bpf2bpf_ptr_stack_arg, 45); + run_subtest(skel->progs.test_bpf2bpf_mix_stack_args, 51); + run_subtest(skel->progs.test_bpf2bpf_nesting_stack_arg, 50); + run_subtest(skel->progs.test_bpf2bpf_dynptr_stack_arg, 69); + run_subtest(skel->progs.test_two_callees, 91); + +out: + stack_arg__destroy(skel); +} + +static void test_kfunc(void) +{ + struct stack_arg_kfunc *skel; + + skel = stack_arg_kfunc__open(); + if (!ASSERT_OK_PTR(skel, "open")) + return; + + if (!skel->rodata->has_stack_arg) { + test__skip(); + goto out; + } + + if (!ASSERT_OK(stack_arg_kfunc__load(skel), "load")) + goto out; + + run_subtest(skel->progs.test_stack_arg_scalar, 36); + run_subtest(skel->progs.test_stack_arg_ptr, 45); + run_subtest(skel->progs.test_stack_arg_mix, 51); + run_subtest(skel->progs.test_stack_arg_dynptr, 69); + run_subtest(skel->progs.test_stack_arg_mem, 151); + run_subtest(skel->progs.test_stack_arg_iter, 115); + run_subtest(skel->progs.test_stack_arg_const_str, 15); + run_subtest(skel->progs.test_stack_arg_timer, 15); + +out: + stack_arg_kfunc__destroy(skel); +} + +void test_stack_arg(void) +{ + if (test__start_subtest("global_many_args")) + test_global_many(); + if (test__start_subtest("async_cb_many_args")) + test_async_cb_many(); + if (test__start_subtest("bpf2bpf")) + test_bpf2bpf(); + if (test__start_subtest("kfunc")) + test_kfunc(); +} diff --git a/tools/testing/selftests/bpf/progs/stack_arg.c b/tools/testing/selftests/bpf/progs/stack_arg.c new file mode 100644 index 000000000000..ab6240b997c5 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/stack_arg.c @@ -0,0 +1,252 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include +#include "bpf_kfuncs.h" + +#define CLOCK_MONOTONIC 1 + +struct timer_elem { + struct bpf_timer timer; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct timer_elem); +} timer_map SEC(".maps"); + +int timer_result; + +#if defined(__TARGET_ARCH_x86) && defined(__BPF_FEATURE_STACK_ARGUMENT) + +const volatile bool has_stack_arg = true; + +__noinline static int static_func_many_args(int a, int b, int c, int d, + int e, int f, int g, int h) +{ + return a + b + c + d + e + f + g + h; +} + +__noinline int global_calls_many_args(int a, int b, int c) +{ + return static_func_many_args(a, b, c, 4, 5, 6, 7, 8); +} + +SEC("tc") +int test_global_many_args(void) +{ + return global_calls_many_args(1, 2, 3); +} + +struct test_data { + long x; + long y; +}; + +/* 1 + 2 + 3 + 4 + 5 + 10 + 20 = 45 */ +__noinline static long func_with_ptr_stack_arg(long a, long b, long c, long d, + long e, struct test_data *p) +{ + return a + b + c + d + e + p->x + p->y; +} + +__noinline long global_ptr_stack_arg(long a, long b, long c, long d, long e) +{ + struct test_data data = { .x = 10, .y = 20 }; + + return func_with_ptr_stack_arg(a, b, c, d, e, &data); +} + +SEC("tc") +int test_bpf2bpf_ptr_stack_arg(void) +{ + return global_ptr_stack_arg(1, 2, 3, 4, 5); +} + +/* 1 + 2 + 3 + 4 + 5 + 10 + 6 + 20 = 51 */ +__noinline static long func_with_mix_stack_args(long a, long b, long c, long d, + long e, struct test_data *p, + long f, struct test_data *q) +{ + return a + b + c + d + e + p->x + f + q->y; +} + +__noinline long global_mix_stack_args(long a, long b, long c, long d, long e) +{ + struct test_data p = { .x = 10 }; + struct test_data q = { .y = 20 }; + + return func_with_mix_stack_args(a, b, c, d, e, &p, e + 1, &q); +} + +SEC("tc") +int test_bpf2bpf_mix_stack_args(void) +{ + return global_mix_stack_args(1, 2, 3, 4, 5); +} + +/* + * Nesting test: func_outer calls func_inner, both with struct pointer + * as stack arg. + * + * func_inner: (a+1) + (b+1) + (c+1) + (d+1) + (e+1) + p->x + p->y + * = 2 + 3 + 4 + 5 + 6 + 10 + 20 = 50 + */ +__noinline static long func_inner_ptr(long a, long b, long c, long d, + long e, struct test_data *p) +{ + return a + b + c + d + e + p->x + p->y; +} + +__noinline static long func_outer_ptr(long a, long b, long c, long d, + long e, struct test_data *p) +{ + return func_inner_ptr(a + 1, b + 1, c + 1, d + 1, e + 1, p); +} + +__noinline long global_nesting_ptr(long a, long b, long c, long d, long e) +{ + struct test_data data = { .x = 10, .y = 20 }; + + return func_outer_ptr(a, b, c, d, e, &data); +} + +SEC("tc") +int test_bpf2bpf_nesting_stack_arg(void) +{ + return global_nesting_ptr(1, 2, 3, 4, 5); +} + +/* 1 + 2 + 3 + 4 + 5 + sizeof(pkt_v4) = 15 + 54 = 69 */ +__noinline static long func_with_dynptr(long a, long b, long c, long d, + long e, struct bpf_dynptr *ptr) +{ + return a + b + c + d + e + bpf_dynptr_size(ptr); +} + +__noinline long global_dynptr_stack_arg(void *ctx __arg_ctx, long a, long b, + long c, long d) +{ + struct bpf_dynptr ptr; + + bpf_dynptr_from_skb(ctx, 0, &ptr); + return func_with_dynptr(a, b, c, d, d + 1, &ptr); +} + +SEC("tc") +int test_bpf2bpf_dynptr_stack_arg(struct __sk_buff *skb) +{ + return global_dynptr_stack_arg(skb, 1, 2, 3, 4); +} + +/* foo1: a+b+c+d+e+f+g+h */ +__noinline static int foo1(int a, int b, int c, int d, + int e, int f, int g, int h) +{ + return a + b + c + d + e + f + g + h; +} + +/* foo2: a+b+c+d+e+f+g+h+i+j */ +__noinline static int foo2(int a, int b, int c, int d, int e, + int f, int g, int h, int i, int j) +{ + return a + b + c + d + e + f + g + h + i + j; +} + +/* global_two_callees calls foo1 (3 stack args) and foo2 (5 stack args). + * The outgoing stack arg area is sized for foo2 (the larger callee). + * Stores for foo1 are a subset of the area used by foo2. + * Result: foo1(1,2,3,4,5,6,7,8) + foo2(1,2,3,4,5,6,7,8,9,10) = 36 + 55 = 91 + * + * Pass a-e through so the compiler can't constant-fold the stack args away. + */ +__noinline int global_two_callees(int a, int b, int c, int d, int e) +{ + int ret; + + ret = foo1(a, b, c, d, e, a + 5, a + 6, a + 7); + ret += foo2(a, b, c, d, e, a + 5, a + 6, a + 7, a + 8, a + 9); + return ret; +} + +SEC("tc") +int test_two_callees(void) +{ + return global_two_callees(1, 2, 3, 4, 5); +} + +static int timer_cb_many_args(void *map, int *key, struct bpf_timer *timer) +{ + timer_result = static_func_many_args(10, 20, 30, 40, 50, 60, 70, 80); + return 0; +} + +SEC("tc") +int test_async_cb_many_args(void) +{ + struct timer_elem *elem; + int key = 0; + + elem = bpf_map_lookup_elem(&timer_map, &key); + if (!elem) + return -1; + + bpf_timer_init(&elem->timer, &timer_map, CLOCK_MONOTONIC); + bpf_timer_set_callback(&elem->timer, timer_cb_many_args); + bpf_timer_start(&elem->timer, 1, 0); + return 0; +} + +#else + +const volatile bool has_stack_arg = false; + +SEC("tc") +int test_global_many_args(void) +{ + return 0; +} + +SEC("tc") +int test_bpf2bpf_ptr_stack_arg(void) +{ + return 0; +} + +SEC("tc") +int test_bpf2bpf_mix_stack_args(void) +{ + return 0; +} + +SEC("tc") +int test_bpf2bpf_nesting_stack_arg(void) +{ + return 0; +} + +SEC("tc") +int test_bpf2bpf_dynptr_stack_arg(struct __sk_buff *skb) +{ + return 0; +} + +SEC("tc") +int test_two_callees(void) +{ + return 0; +} + +SEC("tc") +int test_async_cb_many_args(void) +{ + return 0; +} + +#endif + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/stack_arg_kfunc.c b/tools/testing/selftests/bpf/progs/stack_arg_kfunc.c new file mode 100644 index 000000000000..fa9def876ea5 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/stack_arg_kfunc.c @@ -0,0 +1,163 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include "bpf_kfuncs.h" +#include "../test_kmods/bpf_testmod_kfunc.h" + +#if defined(__TARGET_ARCH_x86) && defined(__BPF_FEATURE_STACK_ARGUMENT) + +const volatile bool has_stack_arg = true; + +struct bpf_iter_testmod_seq { + u64 :64; + u64 :64; +}; + +extern int bpf_iter_testmod_seq_new(struct bpf_iter_testmod_seq *it, s64 value, int cnt) __ksym; +extern void bpf_iter_testmod_seq_destroy(struct bpf_iter_testmod_seq *it) __ksym; + +struct timer_map_value { + struct bpf_timer timer; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct timer_map_value); +} kfunc_timer_map SEC(".maps"); + +SEC("tc") +int test_stack_arg_scalar(struct __sk_buff *skb) +{ + return bpf_kfunc_call_stack_arg(1, 2, 3, 4, 5, 6, 7, 8); +} + +SEC("tc") +int test_stack_arg_ptr(struct __sk_buff *skb) +{ + struct prog_test_pass1 p = { .x0 = 10, .x1 = 20 }; + + return bpf_kfunc_call_stack_arg_ptr(1, 2, 3, 4, 5, &p); +} + +SEC("tc") +int test_stack_arg_mix(struct __sk_buff *skb) +{ + struct prog_test_pass1 p = { .x0 = 10 }; + struct prog_test_pass1 q = { .x1 = 20 }; + + return bpf_kfunc_call_stack_arg_mix(1, 2, 3, 4, 5, &p, 6, &q); +} + +/* 1 + 2 + 3 + 4 + 5 + sizeof(pkt_v4) = 15 + 54 = 69 */ +SEC("tc") +int test_stack_arg_dynptr(struct __sk_buff *skb) +{ + struct bpf_dynptr ptr; + + bpf_dynptr_from_skb(skb, 0, &ptr); + return bpf_kfunc_call_stack_arg_dynptr(1, 2, 3, 4, 5, &ptr); +} + +/* 1 + 2 + 3 + 4 + 5 + (1 + 2 + ... + 16) = 15 + 136 = 151 */ +SEC("tc") +int test_stack_arg_mem(struct __sk_buff *skb) +{ + char buf[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + + return bpf_kfunc_call_stack_arg_mem(1, 2, 3, 4, 5, buf, sizeof(buf)); +} + +/* 1 + 2 + 3 + 4 + 5 + 100 = 115 */ +SEC("tc") +int test_stack_arg_iter(struct __sk_buff *skb) +{ + struct bpf_iter_testmod_seq it; + u64 ret; + + bpf_iter_testmod_seq_new(&it, 100, 10); + ret = bpf_kfunc_call_stack_arg_iter(1, 2, 3, 4, 5, &it); + bpf_iter_testmod_seq_destroy(&it); + return ret; +} + +const char cstr[] = "hello"; + +/* 1 + 2 + 3 + 4 + 5 = 15 */ +SEC("tc") +int test_stack_arg_const_str(struct __sk_buff *skb) +{ + return bpf_kfunc_call_stack_arg_const_str(1, 2, 3, 4, 5, cstr); +} + +/* 1 + 2 + 3 + 4 + 5 = 15 */ +SEC("tc") +int test_stack_arg_timer(struct __sk_buff *skb) +{ + struct timer_map_value *val; + int key = 0; + + val = bpf_map_lookup_elem(&kfunc_timer_map, &key); + if (!val) + return 0; + return bpf_kfunc_call_stack_arg_timer(1, 2, 3, 4, 5, &val->timer); +} + +#else + +const volatile bool has_stack_arg = false; + +SEC("tc") +int test_stack_arg_scalar(struct __sk_buff *skb) +{ + return 0; +} + +SEC("tc") +int test_stack_arg_ptr(struct __sk_buff *skb) +{ + return 0; +} + +SEC("tc") +int test_stack_arg_mix(struct __sk_buff *skb) +{ + return 0; +} + +SEC("tc") +int test_stack_arg_dynptr(struct __sk_buff *skb) +{ + return 0; +} + +SEC("tc") +int test_stack_arg_mem(struct __sk_buff *skb) +{ + return 0; +} + +SEC("tc") +int test_stack_arg_iter(struct __sk_buff *skb) +{ + return 0; +} + +SEC("tc") +int test_stack_arg_const_str(struct __sk_buff *skb) +{ + return 0; +} + +SEC("tc") +int test_stack_arg_timer(struct __sk_buff *skb) +{ + return 0; +} + +#endif + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c index d876314a4d67..aef2f68b7e83 100644 --- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c +++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c @@ -825,6 +825,63 @@ __bpf_kfunc int bpf_kfunc_call_test5(u8 a, u16 b, u32 c) return 0; } +__bpf_kfunc u64 bpf_kfunc_call_stack_arg(u64 a, u64 b, u64 c, u64 d, + u64 e, u64 f, u64 g, u64 h) +{ + return a + b + c + d + e + f + g + h; +} + +__bpf_kfunc u64 bpf_kfunc_call_stack_arg_ptr(u64 a, u64 b, u64 c, u64 d, u64 e, + struct prog_test_pass1 *p) +{ + return a + b + c + d + e + p->x0 + p->x1; +} + +__bpf_kfunc u64 bpf_kfunc_call_stack_arg_mix(u64 a, u64 b, u64 c, u64 d, u64 e, + struct prog_test_pass1 *p, u64 f, + struct prog_test_pass1 *q) +{ + return a + b + c + d + e + p->x0 + f + q->x1; +} + +__bpf_kfunc u64 bpf_kfunc_call_stack_arg_dynptr(u64 a, u64 b, u64 c, u64 d, u64 e, + struct bpf_dynptr *ptr) +{ + const struct bpf_dynptr_kern *kern_ptr = (void *)ptr; + + return a + b + c + d + e + (kern_ptr->size & 0xFFFFFF); +} + +__bpf_kfunc u64 bpf_kfunc_call_stack_arg_mem(u64 a, u64 b, u64 c, u64 d, u64 e, + void *mem, int mem__sz) +{ + const unsigned char *p = mem; + u64 sum = a + b + c + d + e; + int i; + + for (i = 0; i < mem__sz; i++) + sum += p[i]; + return sum; +} + +__bpf_kfunc u64 bpf_kfunc_call_stack_arg_iter(u64 a, u64 b, u64 c, u64 d, u64 e, + struct bpf_iter_testmod_seq *it__iter) +{ + return a + b + c + d + e + it__iter->value; +} + +__bpf_kfunc u64 bpf_kfunc_call_stack_arg_const_str(u64 a, u64 b, u64 c, u64 d, u64 e, + const char *str__str) +{ + return a + b + c + d + e; +} + +__bpf_kfunc u64 bpf_kfunc_call_stack_arg_timer(u64 a, u64 b, u64 c, u64 d, u64 e, + struct bpf_timer *timer) +{ + return a + b + c + d + e; +} + static struct prog_test_ref_kfunc prog_test_struct = { .a = 42, .b = 108, @@ -1288,6 +1345,14 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_test2) BTF_ID_FLAGS(func, bpf_kfunc_call_test3) BTF_ID_FLAGS(func, bpf_kfunc_call_test4) BTF_ID_FLAGS(func, bpf_kfunc_call_test5) +BTF_ID_FLAGS(func, bpf_kfunc_call_stack_arg) +BTF_ID_FLAGS(func, bpf_kfunc_call_stack_arg_ptr) +BTF_ID_FLAGS(func, bpf_kfunc_call_stack_arg_mix) +BTF_ID_FLAGS(func, bpf_kfunc_call_stack_arg_dynptr) +BTF_ID_FLAGS(func, bpf_kfunc_call_stack_arg_mem) +BTF_ID_FLAGS(func, bpf_kfunc_call_stack_arg_iter) +BTF_ID_FLAGS(func, bpf_kfunc_call_stack_arg_const_str) +BTF_ID_FLAGS(func, bpf_kfunc_call_stack_arg_timer) BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1) BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2) BTF_ID_FLAGS(func, bpf_kfunc_call_test_acquire, KF_ACQUIRE | KF_RET_NULL) diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h index aa0b8d41e71b..2c1cb118f886 100644 --- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h +++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h @@ -26,6 +26,8 @@ struct prog_test_ref_kfunc { }; #endif +struct bpf_iter_testmod_seq; + struct prog_test_pass1 { int x0; struct { @@ -111,7 +113,23 @@ int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym; struct sock *bpf_kfunc_call_test3(struct sock *sk) __ksym; long bpf_kfunc_call_test4(signed char a, short b, int c, long d) __ksym; int bpf_kfunc_call_test5(__u8 a, __u16 b, __u32 c) __ksym; - +__u64 bpf_kfunc_call_stack_arg(__u64 a, __u64 b, __u64 c, __u64 d, + __u64 e, __u64 f, __u64 g, __u64 h) __ksym; +__u64 bpf_kfunc_call_stack_arg_ptr(__u64 a, __u64 b, __u64 c, __u64 d, __u64 e, + struct prog_test_pass1 *p) __ksym; +__u64 bpf_kfunc_call_stack_arg_mix(__u64 a, __u64 b, __u64 c, __u64 d, __u64 e, + struct prog_test_pass1 *p, __u64 f, + struct prog_test_pass1 *q) __ksym; +__u64 bpf_kfunc_call_stack_arg_dynptr(__u64 a, __u64 b, __u64 c, __u64 d, __u64 e, + struct bpf_dynptr *ptr) __ksym; +__u64 bpf_kfunc_call_stack_arg_mem(__u64 a, __u64 b, __u64 c, __u64 d, __u64 e, + void *mem, int mem__sz) __ksym; +__u64 bpf_kfunc_call_stack_arg_iter(__u64 a, __u64 b, __u64 c, __u64 d, __u64 e, + struct bpf_iter_testmod_seq *it__iter) __ksym; +__u64 bpf_kfunc_call_stack_arg_const_str(__u64 a, __u64 b, __u64 c, __u64 d, __u64 e, + const char *str__str) __ksym; +__u64 bpf_kfunc_call_stack_arg_timer(__u64 a, __u64 b, __u64 c, __u64 d, __u64 e, + struct bpf_timer *timer) __ksym; void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) __ksym; void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) __ksym; void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) __ksym; -- cgit v1.2.3 From 9f42204c62d51d666df0acb83af8d154c7580ace Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 12 May 2026 21:51:32 -0700 Subject: selftests/bpf: Add tests for stack argument validation Add negative tests that verify the kfunc (rejecting kfunc call with >8 byte struct as stack argument) and the verifier (rejecting invalid uses of r11 for stack arguments). Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20260513045132.2398371-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/stack_arg_fail.c | 10 ++ tools/testing/selftests/bpf/progs/stack_arg_fail.c | 114 +++++++++++++++++++++ .../testing/selftests/bpf/test_kmods/bpf_testmod.c | 7 ++ .../selftests/bpf/test_kmods/bpf_testmod_kfunc.h | 8 ++ 4 files changed, 139 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/stack_arg_fail.c create mode 100644 tools/testing/selftests/bpf/progs/stack_arg_fail.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/stack_arg_fail.c b/tools/testing/selftests/bpf/prog_tests/stack_arg_fail.c new file mode 100644 index 000000000000..090af1330953 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/stack_arg_fail.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ + +#include +#include "stack_arg_fail.skel.h" + +void test_stack_arg_fail(void) +{ + RUN_TESTS(stack_arg_fail); +} diff --git a/tools/testing/selftests/bpf/progs/stack_arg_fail.c b/tools/testing/selftests/bpf/progs/stack_arg_fail.c new file mode 100644 index 000000000000..ad9d4bfe15dc --- /dev/null +++ b/tools/testing/selftests/bpf/progs/stack_arg_fail.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include "../test_kmods/bpf_testmod_kfunc.h" +#include "bpf_misc.h" + +#if defined(__BPF_FEATURE_STACK_ARGUMENT) + +SEC("tc") +__failure __msg("Unrecognized *(R11-8) type STRUCT") +int test_stack_arg_big(struct __sk_buff *skb) +{ + struct prog_test_big_arg s = { .a = 1, .b = 2 }; + + return bpf_kfunc_call_stack_arg_big(1, 2, 3, 4, 5, s); +} + +SEC("socket") +__description("r11 in ALU instruction") +__failure __msg("R11 is invalid") +__naked void r11_alu_reject(void) +{ + asm volatile ( + "r11 += 1;" + "r0 = 0;" + "exit;" + ::: __clobber_all); +} + +SEC("socket") +__description("r11 store with non-DW size") +__failure __msg("R11 is invalid") +__naked void r11_store_non_dw(void) +{ + asm volatile ( + "*(u32 *)(r11 - 8) = r1;" + "r0 = 0;" + "exit;" + ::: __clobber_all); +} + +SEC("socket") +__description("r11 store with unaligned offset") +__failure __msg("R11 is invalid") +__naked void r11_store_unaligned(void) +{ + asm volatile ( + "*(u64 *)(r11 - 4) = r1;" + "r0 = 0;" + "exit;" + ::: __clobber_all); +} + +SEC("socket") +__description("r11 store with positive offset") +__failure __msg("R11 is invalid") +__naked void r11_store_positive_off(void) +{ + asm volatile ( + "*(u64 *)(r11 + 8) = r1;" + "r0 = 0;" + "exit;" + ::: __clobber_all); +} + +SEC("socket") +__description("r11 load with negative offset") +__failure __msg("R11 is invalid") +__naked void r11_load_negative_off(void) +{ + asm volatile ( + "r0 = *(u64 *)(r11 - 8);" + "exit;" + ::: __clobber_all); +} + +SEC("socket") +__description("r11 load with non-DW size") +__failure __msg("R11 is invalid") +__naked void r11_load_non_dw(void) +{ + asm volatile ( + "r0 = *(u32 *)(r11 + 8);" + "exit;" + ::: __clobber_all); +} + +SEC("socket") +__description("r11 store with zero offset") +__failure __msg("R11 is invalid") +__naked void r11_store_zero_off(void) +{ + asm volatile ( + "*(u64 *)(r11 + 0) = r1;" + "r0 = 0;" + "exit;" + ::: __clobber_all); +} + +#else + +SEC("tc") +__description("stack_arg_fail: not supported, dummy test") +__success +int test_stack_arg_big(struct __sk_buff *skb) +{ + return 0; +} + +#endif + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c index aef2f68b7e83..0be918fe3021 100644 --- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c +++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c @@ -882,6 +882,12 @@ __bpf_kfunc u64 bpf_kfunc_call_stack_arg_timer(u64 a, u64 b, u64 c, u64 d, u64 e return a + b + c + d + e; } +__bpf_kfunc u64 bpf_kfunc_call_stack_arg_big(u64 a, u64 b, u64 c, u64 d, u64 e, + struct prog_test_big_arg s) +{ + return a + b + c + d + e + s.a + s.b; +} + static struct prog_test_ref_kfunc prog_test_struct = { .a = 42, .b = 108, @@ -1353,6 +1359,7 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_stack_arg_mem) BTF_ID_FLAGS(func, bpf_kfunc_call_stack_arg_iter) BTF_ID_FLAGS(func, bpf_kfunc_call_stack_arg_const_str) BTF_ID_FLAGS(func, bpf_kfunc_call_stack_arg_timer) +BTF_ID_FLAGS(func, bpf_kfunc_call_stack_arg_big) BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1) BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2) BTF_ID_FLAGS(func, bpf_kfunc_call_test_acquire, KF_ACQUIRE | KF_RET_NULL) diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h index 2c1cb118f886..2edc36b66de9 100644 --- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h +++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h @@ -50,6 +50,11 @@ struct prog_test_pass2 { } x; }; +struct prog_test_big_arg { + __u64 a; + __u64 b; +}; + struct prog_test_fail1 { void *p; int x; @@ -130,6 +135,9 @@ __u64 bpf_kfunc_call_stack_arg_const_str(__u64 a, __u64 b, __u64 c, __u64 d, __u const char *str__str) __ksym; __u64 bpf_kfunc_call_stack_arg_timer(__u64 a, __u64 b, __u64 c, __u64 d, __u64 e, struct bpf_timer *timer) __ksym; +__u64 bpf_kfunc_call_stack_arg_big(__u64 a, __u64 b, __u64 c, __u64 d, __u64 e, + struct prog_test_big_arg s) __ksym; + void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) __ksym; void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) __ksym; void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) __ksym; -- cgit v1.2.3 From 5b31de88920b867edcbcd8d6d77b8be5b822b3dd Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 12 May 2026 21:51:38 -0700 Subject: selftests/bpf: Add BTF fixup for __naked subprog parameter names When __naked subprogs are used in verifier tests, clang drops parameter names from their BTF FUNC_PROTO entries. This prevents the verifier from resolving stack argument slots by name. Add a __btf_func_path(path) annotation that points to a separate BTF file containing properly-named FUNC entries. The test_loader matches FUNC entries by name, detects anonymous parameters, and replaces the FUNC_PROTO with a new one that carries parameter names from the custom file while preserving the original type IDs. The custom BTF file also serves as btf_custom_path for kfunc resolution when no separate btf_custom_path is specified. Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20260513045138.2398886-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/bpf_misc.h | 1 + tools/testing/selftests/bpf/test_loader.c | 136 ++++++++++++++++++++++++++- 2 files changed, 136 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index a0d7b15a24b1..9eeb5b0b63d6 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -152,6 +152,7 @@ #define __auxiliary __test_tag("test_auxiliary") #define __auxiliary_unpriv __test_tag("test_auxiliary_unpriv") #define __btf_path(path) __test_tag("test_btf_path=" path) +#define __btf_func_path(path) __test_tag("test_btf_func_path=" path) #define __arch(arch) __test_tag("test_arch=" arch) #define __arch_x86_64 __arch("X86_64") #define __arch_arm64 __arch("ARM64") diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index ee637809a1d4..abdb9e6e3713 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -63,6 +63,7 @@ struct test_spec { struct test_subspec priv; struct test_subspec unpriv; const char *btf_custom_path; + const char *btf_custom_func_path; int log_level; int prog_flags; int mode_mask; @@ -590,6 +591,8 @@ static int parse_test_spec(struct test_loader *tester, jit_on_next_line = true; } else if ((val = str_has_pfx(s, "test_btf_path="))) { spec->btf_custom_path = val; + } else if ((val = str_has_pfx(s, "test_btf_func_path="))) { + spec->btf_custom_func_path = val; } else if ((val = str_has_pfx(s, "test_caps_unpriv="))) { err = parse_caps(val, &spec->unpriv.caps, "test caps"); if (err) @@ -1175,6 +1178,123 @@ static int get_stream(int stream_id, int prog_fd, char *text, size_t text_sz) return ret; } +/* + * Fix up the program's BTF using BTF from a separate file. + * + * For __naked subprogs, clang drops parameter names from BTF. Find FUNC + * entries with anonymous parameters and replace their FUNC_PROTO with the + * properly-named version from the custom file. + */ +static int fixup_btf_from_path(struct bpf_object *obj, const char *path) +{ + struct btf *prog_btf, *custom_btf; + __u32 i, j, cnt, custom_cnt; + int err = 0; + + prog_btf = bpf_object__btf(obj); + if (!prog_btf) + return 0; + + custom_btf = btf__parse(path, NULL); + if (!ASSERT_OK_PTR(custom_btf, "parse_custom_btf")) + return -EINVAL; + + cnt = btf__type_cnt(prog_btf); + custom_cnt = btf__type_cnt(custom_btf); + + /* Fix up FUNC entries with anonymous params. + * Save all data from prog_btf BEFORE calling btf__add_*, + * since those calls may reallocate the BTF data buffer + * and invalidate any pointers obtained from btf__type_by_id. + */ + for (i = 1; i < cnt; i++) { + const struct btf_type *t = btf__type_by_id(prog_btf, i); + const struct btf_type *fp, *custom_t, *custom_fp; + const struct btf_param *params, *custom_params; + __u32 ret_type_id, vlen; + __u32 *prog_param_types = NULL; + const char *name; + int new_proto_id; + + if (!btf_is_func(t)) + continue; + + fp = btf__type_by_id(prog_btf, t->type); + if (!fp || !btf_is_func_proto(fp) || btf_vlen(fp) == 0) + continue; + + /* Check if any param is anonymous */ + params = btf_params(fp); + if (params[0].name_off != 0) + continue; + + /* Find matching FUNC by name in custom BTF */ + name = btf__name_by_offset(prog_btf, t->name_off); + if (!name) + continue; + + for (j = 1; j < custom_cnt; j++) { + const char *cname; + + custom_t = btf__type_by_id(custom_btf, j); + if (!btf_is_func(custom_t)) + continue; + cname = btf__name_by_offset(custom_btf, custom_t->name_off); + if (cname && strcmp(name, cname) == 0) + break; + } + if (j >= custom_cnt) + continue; + + custom_fp = btf__type_by_id(custom_btf, custom_t->type); + if (!custom_fp || !btf_is_func_proto(custom_fp)) + continue; + + vlen = btf_vlen(fp); + if (vlen != btf_vlen(custom_fp)) + continue; + + /* Save data before btf__add_* calls invalidate pointers */ + ret_type_id = fp->type; + prog_param_types = malloc(vlen * sizeof(*prog_param_types)); + if (!prog_param_types) { + err = -ENOMEM; + break; + } + for (j = 0; j < vlen; j++) + prog_param_types[j] = params[j].type; + + /* Add a new FUNC_PROTO: param names from custom, types from prog */ + new_proto_id = btf__add_func_proto(prog_btf, ret_type_id); + if (new_proto_id < 0) { + err = new_proto_id; + free(prog_param_types); + break; + } + + custom_params = btf_params(custom_fp); + for (j = 0; j < vlen; j++) { + const char *pname; + + pname = btf__name_by_offset(custom_btf, custom_params[j].name_off); + err = btf__add_func_param(prog_btf, pname ?: "", prog_param_types[j]); + if (err) + break; + } + free(prog_param_types); + if (err) + break; + + /* Update the FUNC to point to the new FUNC_PROTO (re-fetch + * since btf__add_* may have reallocated the data buffer). + */ + ((struct btf_type *)btf__type_by_id(prog_btf, i))->type = new_proto_id; + } + + btf__free(custom_btf); + return err; +} + /* this function is forced noinline and has short generic name to look better * in test_progs output (in case of a failure) */ @@ -1231,13 +1351,27 @@ void run_subtest(struct test_loader *tester, } } - /* Implicitly reset to NULL if next test case doesn't specify */ + /* Implicitly reset to NULL if next test case doesn't specify. + * btf_custom_func_path also serves as btf_custom_path for kfunc resolution. + */ open_opts->btf_custom_path = spec->btf_custom_path; + if (!open_opts->btf_custom_path) + open_opts->btf_custom_path = spec->btf_custom_func_path; tobj = bpf_object__open_mem(obj_bytes, obj_byte_cnt, open_opts); if (!ASSERT_OK_PTR(tobj, "obj_open_mem")) /* shouldn't happen */ goto subtest_cleanup; + /* Fix up __naked subprog BTF using a separate file with named params */ + if (spec->btf_custom_func_path) { + err = fixup_btf_from_path(tobj, spec->btf_custom_func_path); + if (err) { + PRINT_FAIL("failed to fixup BTF from %s: %d\n", + spec->btf_custom_func_path, err); + goto tobj_cleanup; + } + } + i = 0; bpf_object__for_each_program(tprog_iter, tobj) { spec_iter = &specs[i++]; -- cgit v1.2.3 From 00c3ac4292a6bc3039008cdb45bd423087acb98e Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 12 May 2026 21:51:43 -0700 Subject: selftests/bpf: Add verifier tests for stack argument validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add inline-asm based verifier tests that exercise stack argument validation logic directly. Positive tests: - subprog call with 6 arg's - Two sequential calls to different subprogs (6-arg and 7-arg) - Share a r11 store for both branches Negative tests — verifier rejection: - Read from uninitialized incoming stack arg slot - Gap in outgoing slots: only r11-16 written, r11-8 missing - Write at r11-80, exceeding max 7 stack args - Missing store on one branch with a shared store - First call has proper stack arguments and the second call intends to inherit stack arguments but not working - r11 load ordering issue Negative tests — pointer/ref tracking: - Pruning type mismatch: one branch stores PTR_TO_STACK, the other stores a scalar, callee dereferences — must not prune - Release invalidation: bpf_sk_release invalidates a socket pointer stored in a stack arg slot - Packet pointer invalidation: bpf_skb_pull_data invalidates a packet pointer stored in a stack arg slot - Null propagation: PTR_TO_MAP_VALUE_OR_NULL stored in stack arg slot, null branch attempts dereference via callee Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20260513045143.2399278-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/verifier.c | 4 + .../bpf/progs/btf__verifier_stack_arg_order.c | 40 ++ .../selftests/bpf/progs/verifier_stack_arg.c | 444 +++++++++++++++++++++ .../selftests/bpf/progs/verifier_stack_arg_order.c | 126 ++++++ 4 files changed, 614 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/btf__verifier_stack_arg_order.c create mode 100644 tools/testing/selftests/bpf/progs/verifier_stack_arg.c create mode 100644 tools/testing/selftests/bpf/progs/verifier_stack_arg_order.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index a96b25ebff23..ee3d929fac8a 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -91,6 +91,8 @@ #include "verifier_sockmap_mutate.skel.h" #include "verifier_spill_fill.skel.h" #include "verifier_spin_lock.skel.h" +#include "verifier_stack_arg.skel.h" +#include "verifier_stack_arg_order.skel.h" #include "verifier_stack_ptr.skel.h" #include "verifier_store_release.skel.h" #include "verifier_subprog_precision.skel.h" @@ -238,6 +240,8 @@ void test_verifier_sock_addr(void) { RUN(verifier_sock_addr); } void test_verifier_sockmap_mutate(void) { RUN(verifier_sockmap_mutate); } void test_verifier_spill_fill(void) { RUN(verifier_spill_fill); } void test_verifier_spin_lock(void) { RUN(verifier_spin_lock); } +void test_verifier_stack_arg(void) { RUN(verifier_stack_arg); } +void test_verifier_stack_arg_order(void) { RUN(verifier_stack_arg_order); } void test_verifier_stack_ptr(void) { RUN(verifier_stack_ptr); } void test_verifier_store_release(void) { RUN(verifier_store_release); } void test_verifier_subprog_precision(void) { RUN(verifier_subprog_precision); } diff --git a/tools/testing/selftests/bpf/progs/btf__verifier_stack_arg_order.c b/tools/testing/selftests/bpf/progs/btf__verifier_stack_arg_order.c new file mode 100644 index 000000000000..83692570d5bc --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__verifier_stack_arg_order.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ +#include +#include + +#if defined(__TARGET_ARCH_x86) && defined(__BPF_FEATURE_STACK_ARGUMENT) + +int subprog_bad_order_6args(int a, int b, int c, int d, int e, int f) +{ + return a + b + c + d + e + f; +} + +int subprog_call_before_load_6args(int a, int b, int c, int d, int e, int f) +{ + return a + b + c + d + e + f; +} + +int subprog_pruning_call_before_load_6args(int a, int b, int c, int d, int e, int f) +{ + return a + b + c + d + e + f; +} + +#else + +int subprog_bad_order_6args(void) +{ + return 0; +} + +int subprog_call_before_load_6args(void) +{ + return 0; +} + +int subprog_pruning_call_before_load_6args(void) +{ + return 0; +} + +#endif diff --git a/tools/testing/selftests/bpf/progs/verifier_stack_arg.c b/tools/testing/selftests/bpf/progs/verifier_stack_arg.c new file mode 100644 index 000000000000..6587bf912bc0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_stack_arg.c @@ -0,0 +1,444 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include "bpf_misc.h" + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, long long); + __type(value, long long); +} map_hash_8b SEC(".maps"); + +#if defined(__TARGET_ARCH_x86) && defined(__BPF_FEATURE_STACK_ARGUMENT) + +__noinline __used +static int subprog_6args(int a, int b, int c, int d, int e, int f) +{ + return a + b + c + d + e + f; +} + +__noinline __used +static int subprog_7args(int a, int b, int c, int d, int e, int f, int g) +{ + return a + b + c + d + e + f + g; +} + +__noinline __used +static long subprog_deref_arg6(long a, long b, long c, long d, long e, long *f) +{ + return *f; +} + +SEC("tc") +__description("stack_arg: subprog with 6 args") +__success __retval(21) +__naked void stack_arg_6args(void) +{ + asm volatile ( + "r1 = 1;" + "r2 = 2;" + "r3 = 3;" + "r4 = 4;" + "r5 = 5;" + "*(u64 *)(r11 - 8) = 6;" + "call subprog_6args;" + "exit;" + ::: __clobber_all + ); +} + +SEC("tc") +__description("stack_arg: two subprogs with >5 args") +__success __retval(90) +__naked void stack_arg_two_subprogs(void) +{ + asm volatile ( + "r1 = 1;" + "r2 = 2;" + "r3 = 3;" + "r4 = 4;" + "r5 = 5;" + "*(u64 *)(r11 - 8) = 10;" + "call subprog_6args;" + "r6 = r0;" + "r1 = 1;" + "r2 = 2;" + "r3 = 3;" + "r4 = 4;" + "r5 = 5;" + "*(u64 *)(r11 - 16) = 30;" + "*(u64 *)(r11 - 8) = 20;" + "call subprog_7args;" + "r0 += r6;" + "exit;" + ::: __clobber_all + ); +} + +SEC("tc") +__description("stack_arg: read from uninitialized stack arg slot") +__failure +__msg("invalid read from stack arg off 8 depth 0") +__naked void stack_arg_read_uninitialized(void) +{ + asm volatile ( + "r0 = *(u64 *)(r11 + 8);" + "r0 = 0;" + "exit;" + ::: __clobber_all + ); +} + +SEC("tc") +__description("stack_arg: gap at offset -8, only wrote -16") +__failure +__msg("callee expects 7 args, stack arg1 is not initialized") +__naked void stack_arg_gap_at_minus8(void) +{ + asm volatile ( + "r1 = 1;" + "r2 = 2;" + "r3 = 3;" + "r4 = 4;" + "r5 = 5;" + "*(u64 *)(r11 - 16) = 30;" + "call subprog_7args;" + "exit;" + ::: __clobber_all + ); +} + +SEC("tc") +__description("stack_arg: pruning with different stack arg types") +__failure +__flag(BPF_F_TEST_STATE_FREQ) +__msg("R{{[0-9]}} invalid mem access 'scalar'") +__naked void stack_arg_pruning_type_mismatch(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r6 = r0;" + /* local = 0 on program stack */ + "r7 = 0;" + "*(u64 *)(r10 - 8) = r7;" + /* Branch based on random value */ + "if r6 s> 3 goto l0_%=;" + /* Path 1: store stack pointer to outgoing arg6 */ + "r1 = r10;" + "r1 += -8;" + "*(u64 *)(r11 - 8) = r1;" + "goto l1_%=;" + "l0_%=:" + /* Path 2: store scalar to outgoing arg6 */ + "*(u64 *)(r11 - 8) = 42;" + "l1_%=:" + /* Call subprog that dereferences arg6 */ + "r1 = r6;" + "r2 = 0;" + "r3 = 0;" + "r4 = 0;" + "r5 = 0;" + "call subprog_deref_arg6;" + "exit;" + :: __imm(bpf_get_prandom_u32) + : __clobber_all + ); +} + +SEC("tc") +__description("stack_arg: release_reference invalidates stack arg slot") +__failure +__msg("R{{[0-9]}} !read_ok") +__naked void stack_arg_release_ref(void) +{ + asm volatile ( + "r6 = r1;" + /* struct bpf_sock_tuple tuple = {} */ + "r2 = 0;" + "*(u32 *)(r10 - 8) = r2;" + "*(u64 *)(r10 - 16) = r2;" + "*(u64 *)(r10 - 24) = r2;" + "*(u64 *)(r10 - 32) = r2;" + "*(u64 *)(r10 - 40) = r2;" + "*(u64 *)(r10 - 48) = r2;" + /* sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof(tuple), 0, 0) */ + "r1 = r6;" + "r2 = r10;" + "r2 += -48;" + "r3 = %[sizeof_bpf_sock_tuple];" + "r4 = 0;" + "r5 = 0;" + "call %[bpf_sk_lookup_tcp];" + /* r0 = sk (PTR_TO_SOCK_OR_NULL) */ + "if r0 == 0 goto l0_%=;" + /* Store sock ref to outgoing arg6 slot */ + "*(u64 *)(r11 - 8) = r0;" + /* Release the reference — invalidates the stack arg slot */ + "r1 = r0;" + "call %[bpf_sk_release];" + /* Call subprog that dereferences arg6 — should fail */ + "r1 = 1;" + "r2 = 2;" + "r3 = 3;" + "r4 = 4;" + "r5 = 5;" + "call subprog_deref_arg6;" + "l0_%=:" + "r0 = 0;" + "exit;" + : + : __imm(bpf_sk_lookup_tcp), + __imm(bpf_sk_release), + __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple)) + : __clobber_all + ); +} + +SEC("tc") +__description("stack_arg: pkt pointer in stack arg slot invalidated after pull_data") +__failure +__msg("R{{[0-9]}} !read_ok") +__naked void stack_arg_stale_pkt_ptr(void) +{ + asm volatile ( + "r6 = r1;" + "r7 = *(u32 *)(r6 + %[__sk_buff_data]);" + "r8 = *(u32 *)(r6 + %[__sk_buff_data_end]);" + /* check pkt has at least 1 byte */ + "r0 = r7;" + "r0 += 8;" + "if r0 > r8 goto l0_%=;" + /* Store valid pkt pointer to outgoing arg6 slot */ + "*(u64 *)(r11 - 8) = r7;" + /* bpf_skb_pull_data invalidates all pkt pointers */ + "r1 = r6;" + "r2 = 0;" + "call %[bpf_skb_pull_data];" + /* Call subprog that dereferences arg6 — should fail */ + "r1 = 1;" + "r2 = 2;" + "r3 = 3;" + "r4 = 4;" + "r5 = 5;" + "call subprog_deref_arg6;" + "l0_%=:" + "r0 = 0;" + "exit;" + : + : __imm(bpf_skb_pull_data), + __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)), + __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)) + : __clobber_all + ); +} + +SEC("tc") +__description("stack_arg: null propagation rejects deref on null branch") +__failure +__msg("R{{[0-9]}} invalid mem access 'scalar'") +__naked void stack_arg_null_propagation_fail(void) +{ + asm volatile ( + "r1 = 0;" + "*(u64 *)(r10 - 8) = r1;" + /* r0 = bpf_map_lookup_elem(&map_hash_8b, &key) */ + "r2 = r10;" + "r2 += -8;" + "r1 = %[map_hash_8b] ll;" + "call %[bpf_map_lookup_elem];" + /* Store PTR_TO_MAP_VALUE_OR_NULL to outgoing arg6 slot */ + "*(u64 *)(r11 - 8) = r0;" + /* null check on r0 */ + "if r0 != 0 goto l0_%=;" + /* + * On null branch, outgoing slot is SCALAR(0). + * Call subprog that dereferences arg6 — should fail. + */ + "r1 = 0;" + "r2 = 0;" + "r3 = 0;" + "r4 = 0;" + "r5 = 0;" + "call subprog_deref_arg6;" + "l0_%=:" + "r0 = 0;" + "exit;" + : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all + ); +} + +SEC("tc") +__description("stack_arg: missing store on one branch") +__failure +__msg("callee expects 7 args, stack arg1 is not initialized") +__naked void stack_arg_missing_store_one_branch(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r1 = 1;" + "r2 = 2;" + "r3 = 3;" + "r4 = 4;" + "r5 = 5;" + /* Write arg7 (r11-16) before branch */ + "*(u64 *)(r11 - 16) = 20;" + "if r0 > 0 goto l0_%=;" + /* Path 1: write arg6 and call */ + "*(u64 *)(r11 - 8) = 10;" + "r1 = 1;" + "r2 = 2;" + "r3 = 3;" + "r4 = 4;" + "r5 = 5;" + "call subprog_7args;" + "goto l1_%=;" + "l0_%=:" + /* Path 2: missing arg6 store, call should fail */ + "r1 = 1;" + "r2 = 2;" + "r3 = 3;" + "r4 = 4;" + "r5 = 5;" + "call subprog_7args;" + "l1_%=:" + "r0 = 0;" + "exit;" + :: __imm(bpf_get_prandom_u32) + : __clobber_all + ); +} + +SEC("tc") +__description("stack_arg: share a store for both branches") +__success __retval(0) +__naked void stack_arg_shared_store(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r1 = 1;" + "r2 = 2;" + "r3 = 3;" + "r4 = 4;" + "r5 = 5;" + /* Write arg7 (r11-16) before branch */ + "*(u64 *)(r11 - 16) = 20;" + "if r0 > 0 goto l0_%=;" + /* Path 1: write arg6 and call */ + "*(u64 *)(r11 - 8) = 10;" + "r1 = 1;" + "r2 = 2;" + "r3 = 3;" + "r4 = 4;" + "r5 = 5;" + "call subprog_7args;" + "goto l1_%=;" + "l0_%=:" + /* Path 2: also write arg6 and call */ + "*(u64 *)(r11 - 8) = 30;" + "r1 = 1;" + "r2 = 2;" + "r3 = 3;" + "r4 = 4;" + "r5 = 5;" + "call subprog_7args;" + "l1_%=:" + "r0 = 0;" + "exit;" + :: __imm(bpf_get_prandom_u32) + : __clobber_all + ); +} + +SEC("tc") +__description("stack_arg: write beyond max outgoing depth") +__failure +__msg("stack arg write offset -80 exceeds max 7 stack args") +__naked void stack_arg_write_beyond_max(void) +{ + asm volatile ( + "r1 = 1;" + "r2 = 2;" + "r3 = 3;" + "r4 = 4;" + "r5 = 5;" + /* Write to offset -80, way beyond any callee's needs */ + "*(u64 *)(r11 - 80) = 99;" + "*(u64 *)(r11 - 16) = 20;" + "*(u64 *)(r11 - 8) = 10;" + "call subprog_7args;" + "r0 = 0;" + "exit;" + ::: __clobber_all + ); +} + +SEC("tc") +__description("stack_arg: write unused stack arg slot") +__failure +__msg("func#0 writes 5 stack arg slots, but calls only require 2") +__naked void stack_arg_write_unused_slot(void) +{ + asm volatile ( + "r1 = 1;" + "r2 = 2;" + "r3 = 3;" + "r4 = 4;" + "r5 = 5;" + /* Write to offset -40, unused for the callee */ + "*(u64 *)(r11 - 40) = 99;" + "*(u64 *)(r11 - 16) = 20;" + "*(u64 *)(r11 - 8) = 10;" + "call subprog_7args;" + "r0 = 0;" + "exit;" + ::: __clobber_all + ); +} + +SEC("tc") +__description("stack_arg: sequential calls reuse slots") +__failure +__msg("callee expects 7 args, stack arg1 is not initialized") +__naked void stack_arg_sequential_calls(void) +{ + asm volatile ( + "r1 = 1;" + "r2 = 2;" + "r3 = 3;" + "r4 = 4;" + "r5 = 5;" + "*(u64 *)(r11 - 8) = 6;" + "*(u64 *)(r11 - 16) = 7;" + "call subprog_7args;" + "r6 = r0;" + "r1 = 1;" + "r2 = 2;" + "r3 = 3;" + "r4 = 4;" + "r5 = 5;" + "call subprog_7args;" + "r0 += r6;" + "exit;" + ::: __clobber_all + ); +} + +#else + +SEC("socket") +__description("stack_arg is not supported by compiler or jit, use a dummy test") +__success +int dummy_test(void) +{ + return 0; +} + +#endif + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_stack_arg_order.c b/tools/testing/selftests/bpf/progs/verifier_stack_arg_order.c new file mode 100644 index 000000000000..938f4a2f5482 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_stack_arg_order.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include "bpf_misc.h" + +#if defined(__TARGET_ARCH_x86) && defined(__BPF_FEATURE_STACK_ARGUMENT) + +__noinline __used __naked +static int subprog_bad_order_6args(int a, int b, int c, int d, int e, int f) +{ + asm volatile ( + "*(u64 *)(r11 - 8) = r1;" + "r0 = *(u64 *)(r11 + 8);" + "exit;" + ::: __clobber_all + ); +} + +SEC("tc") +__description("stack_arg: r11 load after r11 store") +__failure +__msg("r11 load must be before any r11 store or call insn") +__btf_func_path("btf__verifier_stack_arg_order.bpf.o") +__naked void stack_arg_load_after_store(void) +{ + asm volatile ( + "r1 = 1;" + "r2 = 2;" + "r3 = 3;" + "r4 = 4;" + "r5 = 5;" + "*(u64 *)(r11 - 8) = 6;" + "call subprog_bad_order_6args;" + "exit;" + ::: __clobber_all + ); +} + +__noinline __used __naked +static int subprog_call_before_load_6args(int a, int b, int c, int d, int e, + int f) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r0 = *(u64 *)(r11 + 8);" + "exit;" + :: __imm(bpf_get_prandom_u32) + : __clobber_all + ); +} + +SEC("tc") +__description("stack_arg: r11 load after a call") +__failure +__msg("r11 load must be before any r11 store or call insn") +__btf_func_path("btf__verifier_stack_arg_order.bpf.o") +__naked void stack_arg_load_after_call(void) +{ + asm volatile ( + "r1 = 1;" + "r2 = 2;" + "r3 = 3;" + "r4 = 4;" + "r5 = 5;" + "*(u64 *)(r11 - 8) = 6;" + "call subprog_call_before_load_6args;" + "exit;" + ::: __clobber_all + ); +} + +__noinline __used __naked +static int subprog_pruning_call_before_load_6args(int a, int b, int c, int d, + int e, int f) +{ + asm volatile ( + "if r1 s> 0 goto l0_%=;" + "goto l1_%=;" + "l0_%=:" + "call %[bpf_get_prandom_u32];" + "l1_%=:" + "r0 = *(u64 *)(r11 + 8);" + "exit;" + :: __imm(bpf_get_prandom_u32) + : __clobber_all + ); +} + +SEC("tc") +__description("stack_arg: pruning keeps r11 load ordering") +__failure +__flag(BPF_F_TEST_STATE_FREQ) +__msg("r11 load must be before any r11 store or call insn") +__btf_func_path("btf__verifier_stack_arg_order.bpf.o") +__naked void stack_arg_pruning_load_after_call(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r1 = r0;" + "r2 = 2;" + "r3 = 3;" + "r4 = 4;" + "r5 = 5;" + "*(u64 *)(r11 - 8) = 6;" + "call subprog_pruning_call_before_load_6args;" + "exit;" + :: __imm(bpf_get_prandom_u32) + : __clobber_all + ); +} + +#else + +SEC("socket") +__description("stack_arg order is not supported by compiler or jit, use a dummy test") +__success +int dummy_test(void) +{ + return 0; +} + +#endif + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 6e277efbb19dd1a536cbffd9ea5c049a427dc7cb Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 12 May 2026 21:51:48 -0700 Subject: selftests/bpf: Add precision backtracking test for stack arguments Add a test that verifies precision backtracking works correctly across BPF-to-BPF calls when stack arguments are involved. The test passes a size value as incoming stack arg (arg6) to a subprog, which forwards it as the mem__sz parameter (outgoing arg7) to bpf_kfunc_call_stack_arg_mem. The expected __msg annotations verify that precision propagates from the kfunc's mem__sz argument back through the subprog frame to the caller's outgoing stack arg store. A companion BTF file (btf__stack_arg_precision.c) provides named parameter BTF for the __naked subprog via __btf_func_path. Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20260513045148.2400087-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/stack_arg_precision.c | 10 ++ .../selftests/bpf/progs/btf__stack_arg_precision.c | 23 ++++ .../selftests/bpf/progs/stack_arg_precision.c | 134 +++++++++++++++++++++ 3 files changed, 167 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/stack_arg_precision.c create mode 100644 tools/testing/selftests/bpf/progs/btf__stack_arg_precision.c create mode 100644 tools/testing/selftests/bpf/progs/stack_arg_precision.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/stack_arg_precision.c b/tools/testing/selftests/bpf/prog_tests/stack_arg_precision.c new file mode 100644 index 000000000000..1ab041d66de3 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/stack_arg_precision.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ + +#include +#include "stack_arg_precision.skel.h" + +void test_stack_arg_precision(void) +{ + RUN_TESTS(stack_arg_precision); +} diff --git a/tools/testing/selftests/bpf/progs/btf__stack_arg_precision.c b/tools/testing/selftests/bpf/progs/btf__stack_arg_precision.c new file mode 100644 index 000000000000..296fddfe6804 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__stack_arg_precision.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ +#include +#include +#include "../test_kmods/bpf_testmod_kfunc.h" + +#if defined(__TARGET_ARCH_x86) && defined(__BPF_FEATURE_STACK_ARGUMENT) + +long subprog_call_mem_kfunc(long a, long b, long c, long d, long e, long size) +{ + char buf[8] = {}; + + return bpf_kfunc_call_stack_arg_mem(a, b, c, d, e, buf, size); +} + +#else + +long subprog_call_mem_kfunc(void) +{ + return 0; +} + +#endif diff --git a/tools/testing/selftests/bpf/progs/stack_arg_precision.c b/tools/testing/selftests/bpf/progs/stack_arg_precision.c new file mode 100644 index 000000000000..2a0a344c83ca --- /dev/null +++ b/tools/testing/selftests/bpf/progs/stack_arg_precision.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include "../test_kmods/bpf_testmod_kfunc.h" +#include "bpf_misc.h" + +#if defined(__TARGET_ARCH_x86) && defined(__BPF_FEATURE_STACK_ARGUMENT) + +/* Force kfunc extern BTF generation for inline asm call below. + * Uses its own SEC so it's not included as a .text subprog. + * The '?' prefix sets autoload=false so libbpf won't load it. + */ +SEC("?tc") +int __btf_kfunc_gen(struct __sk_buff *ctx) +{ + char buf[8] = {}; + + return bpf_kfunc_call_stack_arg_mem(0, 0, 0, 0, 0, buf, sizeof(buf)); +} + +/* + * Test precision backtracking across bpf-to-bpf call for kfunc stack arg. + * subprog_call_mem_kfunc receives a size as incoming stack arg (arg6) + * and forwards it as mem__sz (arg7) to bpf_kfunc_call_stack_arg_mem. + */ +__naked __noinline __used +static long subprog_call_mem_kfunc(long a, long b, long c, long d, long e, long size) +{ + asm volatile ( + "r1 = *(u64 *)(r11 + 8);" /* r1 = incoming arg6 (size) */ + "r2 = 0x0807060504030201 ll;" /* r2 = buf contents */ + "*(u64 *)(r10 - 8) = r2;" /* store buf to stack */ + "r2 = r10;" + "r2 += -8;" /* r2 = &buf */ + "*(u64 *)(r11 - 8) = r2;" /* outgoing arg6 = buf */ + "*(u64 *)(r11 - 16) = r1;" /* outgoing arg7 = size */ + "r1 = 1;" + "r2 = 2;" + "r3 = 3;" + "r4 = 4;" + "r5 = 5;" + "call %[bpf_kfunc_call_stack_arg_mem];" + "exit;" + : + : __imm(bpf_kfunc_call_stack_arg_mem) + : __clobber_all + ); +} + +SEC("tc") +__description("stack_arg: precision backtracking across bpf2bpf call for kfunc") +__success +__log_level(2) +__flag(BPF_F_TEST_STATE_FREQ) +__btf_func_path("btf__stack_arg_precision.bpf.o") +__msg("mark_precise: frame1: last_idx 26 first_idx 13 subseq_idx -1") +__msg("mark_precise: frame1: regs= stack= before 25: (b7) r5 = 5") +__msg("mark_precise: frame1: regs= stack= before 24: (b7) r4 = 4") +__msg("mark_precise: frame1: regs= stack= before 23: (b7) r3 = 3") +__msg("mark_precise: frame1: regs= stack= before 22: (b7) r2 = 2") +__msg("mark_precise: frame1: regs= stack= before 21: (b7) r1 = 1") +__msg("mark_precise: frame1: regs= stack= before 20: (7b) *(u64 *)(r11 -16) = r1") +__msg("mark_precise: frame1: regs=r1 stack= before 19: (7b) *(u64 *)(r11 -8) = r2") +__msg("mark_precise: frame1: regs=r1 stack= before 18: (07) r2 += -8") +__msg("mark_precise: frame1: regs=r1 stack= before 17: (bf) r2 = r10") +__msg("mark_precise: frame1: regs=r1 stack= before 16: (7b) *(u64 *)(r10 -8) = r2") +__msg("mark_precise: frame1: regs=r1 stack= before 14: (18) r2 = 0x807060504030201") +__msg("mark_precise: frame1: regs=r1 stack= before 13: (79) r1 = *(u64 *)(r11 +8)") +__msg("mark_precise: frame1: parent state regs= stack=: frame1: R10=fp0") +__msg("mark_precise: frame0: parent state regs= stack=: R10=fp0") +__msg("mark_precise: frame1: last_idx 11 first_idx 11 subseq_idx 13") +__msg("mark_precise: frame1: regs= stack= before 11: (85) call pc+1") +__msg("mark_precise: frame0: parent state regs= stack=: R1=1 R2=2 R3=3 R4=4 R5=5 R10=fp0") +__msg("mark_precise: frame0: last_idx 9 first_idx 7 subseq_idx 11") +__msg("mark_precise: frame0: regs= stack= before 9: (05) goto pc+1") +__msg("mark_precise: frame0: regs= stack= before 8: (7a) *(u64 *)(r11 -8) = 4") +__msg("mark_precise: frame1: last_idx 26 first_idx 13 subseq_idx -1 ") +__msg("mark_precise: frame1: regs= stack= before 25: (b7) r5 = 5") +__msg("mark_precise: frame1: regs= stack= before 24: (b7) r4 = 4") +__msg("mark_precise: frame1: regs= stack= before 23: (b7) r3 = 3") +__msg("mark_precise: frame1: regs= stack= before 22: (b7) r2 = 2") +__msg("mark_precise: frame1: regs= stack= before 21: (b7) r1 = 1") +__msg("mark_precise: frame1: regs= stack= before 20: (7b) *(u64 *)(r11 -16) = r1") +__msg("mark_precise: frame1: regs=r1 stack= before 19: (7b) *(u64 *)(r11 -8) = r2") +__msg("mark_precise: frame1: regs=r1 stack= before 18: (07) r2 += -8") +__msg("mark_precise: frame1: regs=r1 stack= before 17: (bf) r2 = r10") +__msg("mark_precise: frame1: regs=r1 stack= before 16: (7b) *(u64 *)(r10 -8) = r2") +__msg("mark_precise: frame1: regs=r1 stack= before 14: (18) r2 = 0x807060504030201") +__msg("mark_precise: frame1: regs=r1 stack= before 13: (79) r1 = *(u64 *)(r11 +8)") +__msg("mark_precise: frame1: parent state regs= stack=: frame1: R10=fp0") +__msg("mark_precise: frame0: parent state regs= stack=: R10=fp0") +__msg("mark_precise: frame1: last_idx 11 first_idx 11 subseq_idx 13 ") +__msg("mark_precise: frame1: regs= stack= before 11: (85) call pc+1") +__msg("mark_precise: frame0: parent state regs= stack=: R1=1 R2=2 R3=3 R4=4 R5=5 R10=fp0") +__msg("mark_precise: frame0: last_idx 10 first_idx 10 subseq_idx 11 ") +__msg("mark_precise: frame0: regs= stack= before 10: (7a) *(u64 *)(r11 -8) = 6") +__naked void stack_arg_precision_bpf2bpf(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r6 = r0;" + "r1 = 1;" + "r2 = 2;" + "r3 = 3;" + "r4 = 4;" + "r5 = 5;" + "if r6 < 2 goto l0_%=;" + "*(u64 *)(r11 - 8) = 4;" + "goto l1_%=;" + "l0_%=:" + "*(u64 *)(r11 - 8) = 6;" + "l1_%=:" + "call subprog_call_mem_kfunc;" + "exit;" + :: __imm(bpf_get_prandom_u32) + : __clobber_all + ); +} + +#else + +SEC("socket") +__description("stack_arg_precision: not supported, dummy test") +__success +int dummy_test(void) +{ + return 0; +} + +#endif + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 68e5627579d788d9e992cc06a69760f20b6841d6 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Tue, 12 May 2026 21:51:53 -0700 Subject: bpf, arm64: Map BPF_REG_0 to x8 instead of x7 Move the BPF return value register from x7 to x8, freeing x7 for use as an argument register. AAPCS64 designates x8 as the indirect result location register; it is caller-saved and not used for argument passing, making it a suitable home for BPF_REG_0. This is a prerequisite for stack argument support, which needs x5-x7 to pass arguments 6-8 to native kfuncs following the AAPCS64 calling convention. Signed-off-by: Puranjay Mohan Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20260513045153.2402197-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- arch/arm64/net/bpf_jit_comp.c | 4 ++-- arch/arm64/net/bpf_timed_may_goto.S | 8 ++++---- tools/testing/selftests/bpf/progs/verifier_jit_inline.c | 2 +- tools/testing/selftests/bpf/progs/verifier_ldsx.c | 6 +++--- tools/testing/selftests/bpf/progs/verifier_private_stack.c | 10 +++++----- 5 files changed, 15 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 0816c40fc7af..085e650662e3 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -47,7 +47,7 @@ /* Map BPF registers to A64 registers */ static const int bpf2a64[] = { /* return value from in-kernel function, and exit value from eBPF */ - [BPF_REG_0] = A64_R(7), + [BPF_REG_0] = A64_R(8), /* arguments from eBPF program to in-kernel function */ [BPF_REG_1] = A64_R(0), [BPF_REG_2] = A64_R(1), @@ -1048,7 +1048,7 @@ static void build_epilogue(struct jit_ctx *ctx, bool was_classic) /* Restore FP/LR registers */ emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx); - /* Move the return value from bpf:r0 (aka x7) to x0 */ + /* Move the return value from bpf:r0 (aka x8) to x0 */ emit(A64_MOV(1, A64_R(0), r0), ctx); /* Authenticate lr */ diff --git a/arch/arm64/net/bpf_timed_may_goto.S b/arch/arm64/net/bpf_timed_may_goto.S index 894cfcd7b241..a9a802711a7f 100644 --- a/arch/arm64/net/bpf_timed_may_goto.S +++ b/arch/arm64/net/bpf_timed_may_goto.S @@ -8,8 +8,8 @@ SYM_FUNC_START(arch_bpf_timed_may_goto) stp x29, x30, [sp, #-64]! mov x29, sp - /* Save BPF registers R0 - R5 (x7, x0-x4)*/ - stp x7, x0, [sp, #16] + /* Save BPF registers R0 - R5 (x8, x0-x4)*/ + stp x8, x0, [sp, #16] stp x1, x2, [sp, #32] stp x3, x4, [sp, #48] @@ -28,8 +28,8 @@ SYM_FUNC_START(arch_bpf_timed_may_goto) /* BPF_REG_AX(x9) will be stored into count, so move return value to it. */ mov x9, x0 - /* Restore BPF registers R0 - R5 (x7, x0-x4) */ - ldp x7, x0, [sp, #16] + /* Restore BPF registers R0 - R5 (x8, x0-x4) */ + ldp x8, x0, [sp, #16] ldp x1, x2, [sp, #32] ldp x3, x4, [sp, #48] diff --git a/tools/testing/selftests/bpf/progs/verifier_jit_inline.c b/tools/testing/selftests/bpf/progs/verifier_jit_inline.c index 4ea254063646..885ff69a3a62 100644 --- a/tools/testing/selftests/bpf/progs/verifier_jit_inline.c +++ b/tools/testing/selftests/bpf/progs/verifier_jit_inline.c @@ -9,7 +9,7 @@ __success __retval(0) __arch_x86_64 __jited(" addq %gs:{{.*}}, %rax") __arch_arm64 -__jited(" mrs x7, SP_EL0") +__jited(" mrs x8, SP_EL0") int inline_bpf_get_current_task(void) { bpf_get_current_task(); diff --git a/tools/testing/selftests/bpf/progs/verifier_ldsx.c b/tools/testing/selftests/bpf/progs/verifier_ldsx.c index 1026524a1983..41340877dc9d 100644 --- a/tools/testing/selftests/bpf/progs/verifier_ldsx.c +++ b/tools/testing/selftests/bpf/progs/verifier_ldsx.c @@ -274,11 +274,11 @@ __jited("movslq 0x10(%rdi,%r12), %r15") __jited("movswq 0x18(%rdi,%r12), %r15") __jited("movsbq 0x20(%rdi,%r12), %r15") __arch_arm64 -__jited("add x11, x7, x28") +__jited("add x11, x8, x28") __jited("ldrsw x21, [x11, #0x10]") -__jited("add x11, x7, x28") +__jited("add x11, x8, x28") __jited("ldrsh x21, [x11, #0x18]") -__jited("add x11, x7, x28") +__jited("add x11, x8, x28") __jited("ldrsb x21, [x11, #0x20]") __jited("add x11, x0, x28") __jited("ldrsw x22, [x11, #0x10]") diff --git a/tools/testing/selftests/bpf/progs/verifier_private_stack.c b/tools/testing/selftests/bpf/progs/verifier_private_stack.c index 646e8ef82051..c5078face38d 100644 --- a/tools/testing/selftests/bpf/progs/verifier_private_stack.c +++ b/tools/testing/selftests/bpf/progs/verifier_private_stack.c @@ -170,12 +170,12 @@ __jited(" mrs x10, TPIDR_EL{{[0-1]}}") __jited(" add x27, x27, x10") __jited(" add x25, x27, {{.*}}") __jited(" bl 0x{{.*}}") -__jited(" mov x7, x0") +__jited(" mov x8, x0") __jited(" mov x0, #0x2a") __jited(" str x0, [x27]") __jited(" bl 0x{{.*}}") -__jited(" mov x7, x0") -__jited(" mov x7, #0x0") +__jited(" mov x8, x0") +__jited(" mov x8, #0x0") __jited(" ldp x25, x27, [sp], {{.*}}") __naked void private_stack_callback(void) { @@ -220,7 +220,7 @@ __jited(" mov x0, #0x2a") __jited(" str x0, [x27]") __jited(" mov x0, #0x0") __jited(" bl 0x{{.*}}") -__jited(" mov x7, x0") +__jited(" mov x8, x0") __jited(" ldp x27, x28, [sp], #0x10") int private_stack_exception_main_prog(void) { @@ -258,7 +258,7 @@ __jited(" add x25, x27, {{.*}}") __jited(" mov x0, #0x2a") __jited(" str x0, [x27]") __jited(" bl 0x{{.*}}") -__jited(" mov x7, x0") +__jited(" mov x8, x0") __jited(" ldp x27, x28, [sp], #0x10") int private_stack_exception_sub_prog(void) { -- cgit v1.2.3 From 90e43f1b47535cc7aceef3add1a61ba3260b7aee Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Tue, 12 May 2026 21:52:04 -0700 Subject: selftests/bpf: Enable stack argument tests for arm64 Now that arm64 supports stack arguments, enable the existing stack_arg, stack_arg_kfunc and verifier_stack_arg tests for __TARGET_ARCH_arm64. Signed-off-by: Puranjay Mohan Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20260513045204.2403441-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/btf__stack_arg_precision.c | 3 ++- tools/testing/selftests/bpf/progs/btf__verifier_stack_arg_order.c | 3 ++- tools/testing/selftests/bpf/progs/stack_arg.c | 3 ++- tools/testing/selftests/bpf/progs/stack_arg_kfunc.c | 3 ++- tools/testing/selftests/bpf/progs/stack_arg_precision.c | 3 ++- tools/testing/selftests/bpf/progs/verifier_stack_arg.c | 3 ++- tools/testing/selftests/bpf/progs/verifier_stack_arg_order.c | 3 ++- 7 files changed, 14 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/btf__stack_arg_precision.c b/tools/testing/selftests/bpf/progs/btf__stack_arg_precision.c index 296fddfe6804..8d38aafe66a2 100644 --- a/tools/testing/selftests/bpf/progs/btf__stack_arg_precision.c +++ b/tools/testing/selftests/bpf/progs/btf__stack_arg_precision.c @@ -4,7 +4,8 @@ #include #include "../test_kmods/bpf_testmod_kfunc.h" -#if defined(__TARGET_ARCH_x86) && defined(__BPF_FEATURE_STACK_ARGUMENT) +#if (defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)) && \ + defined(__BPF_FEATURE_STACK_ARGUMENT) long subprog_call_mem_kfunc(long a, long b, long c, long d, long e, long size) { diff --git a/tools/testing/selftests/bpf/progs/btf__verifier_stack_arg_order.c b/tools/testing/selftests/bpf/progs/btf__verifier_stack_arg_order.c index 83692570d5bc..da34e8456b6c 100644 --- a/tools/testing/selftests/bpf/progs/btf__verifier_stack_arg_order.c +++ b/tools/testing/selftests/bpf/progs/btf__verifier_stack_arg_order.c @@ -3,7 +3,8 @@ #include #include -#if defined(__TARGET_ARCH_x86) && defined(__BPF_FEATURE_STACK_ARGUMENT) +#if (defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)) && \ + defined(__BPF_FEATURE_STACK_ARGUMENT) int subprog_bad_order_6args(int a, int b, int c, int d, int e, int f) { diff --git a/tools/testing/selftests/bpf/progs/stack_arg.c b/tools/testing/selftests/bpf/progs/stack_arg.c index ab6240b997c5..b5e9929a4d63 100644 --- a/tools/testing/selftests/bpf/progs/stack_arg.c +++ b/tools/testing/selftests/bpf/progs/stack_arg.c @@ -21,7 +21,8 @@ struct { int timer_result; -#if defined(__TARGET_ARCH_x86) && defined(__BPF_FEATURE_STACK_ARGUMENT) +#if (defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)) && \ + defined(__BPF_FEATURE_STACK_ARGUMENT) const volatile bool has_stack_arg = true; diff --git a/tools/testing/selftests/bpf/progs/stack_arg_kfunc.c b/tools/testing/selftests/bpf/progs/stack_arg_kfunc.c index fa9def876ea5..da0d4f91d273 100644 --- a/tools/testing/selftests/bpf/progs/stack_arg_kfunc.c +++ b/tools/testing/selftests/bpf/progs/stack_arg_kfunc.c @@ -6,7 +6,8 @@ #include "bpf_kfuncs.h" #include "../test_kmods/bpf_testmod_kfunc.h" -#if defined(__TARGET_ARCH_x86) && defined(__BPF_FEATURE_STACK_ARGUMENT) +#if (defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)) && \ + defined(__BPF_FEATURE_STACK_ARGUMENT) const volatile bool has_stack_arg = true; diff --git a/tools/testing/selftests/bpf/progs/stack_arg_precision.c b/tools/testing/selftests/bpf/progs/stack_arg_precision.c index 2a0a344c83ca..bee2eeec021d 100644 --- a/tools/testing/selftests/bpf/progs/stack_arg_precision.c +++ b/tools/testing/selftests/bpf/progs/stack_arg_precision.c @@ -6,7 +6,8 @@ #include "../test_kmods/bpf_testmod_kfunc.h" #include "bpf_misc.h" -#if defined(__TARGET_ARCH_x86) && defined(__BPF_FEATURE_STACK_ARGUMENT) +#if (defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)) && \ + defined(__BPF_FEATURE_STACK_ARGUMENT) /* Force kfunc extern BTF generation for inline asm call below. * Uses its own SEC so it's not included as a .text subprog. diff --git a/tools/testing/selftests/bpf/progs/verifier_stack_arg.c b/tools/testing/selftests/bpf/progs/verifier_stack_arg.c index 6587bf912bc0..d43a9b42034c 100644 --- a/tools/testing/selftests/bpf/progs/verifier_stack_arg.c +++ b/tools/testing/selftests/bpf/progs/verifier_stack_arg.c @@ -12,7 +12,8 @@ struct { __type(value, long long); } map_hash_8b SEC(".maps"); -#if defined(__TARGET_ARCH_x86) && defined(__BPF_FEATURE_STACK_ARGUMENT) +#if (defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)) && \ + defined(__BPF_FEATURE_STACK_ARGUMENT) __noinline __used static int subprog_6args(int a, int b, int c, int d, int e, int f) diff --git a/tools/testing/selftests/bpf/progs/verifier_stack_arg_order.c b/tools/testing/selftests/bpf/progs/verifier_stack_arg_order.c index 938f4a2f5482..1240cf8a40d6 100644 --- a/tools/testing/selftests/bpf/progs/verifier_stack_arg_order.c +++ b/tools/testing/selftests/bpf/progs/verifier_stack_arg_order.c @@ -5,7 +5,8 @@ #include #include "bpf_misc.h" -#if defined(__TARGET_ARCH_x86) && defined(__BPF_FEATURE_STACK_ARGUMENT) +#if (defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)) && \ + defined(__BPF_FEATURE_STACK_ARGUMENT) __noinline __used __naked static int subprog_bad_order_6args(int a, int b, int c, int d, int e, int f) -- cgit v1.2.3 From 74a9bb761a434ea3be1e0c59cd67b37217eb042c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 12 May 2026 22:08:07 -0700 Subject: libbpf: Use strscpy() in kernel code for skel_map_create() Linux has deprecated[1] strncpy(), and the use in skel_map_create() is best replaced with strscpy(). Since we still need to build this file in userspace, leave the strncpy() in place in that case. This is the last use of strncpy() in the kernel. Link: https://github.com/KSPP/linux/issues/90 [1] Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20260513050806.do.620-kees@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/skel_internal.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h index 6a8f5c7a02eb..74503d358bc8 100644 --- a/tools/lib/bpf/skel_internal.h +++ b/tools/lib/bpf/skel_internal.h @@ -243,7 +243,12 @@ static inline int skel_map_create(enum bpf_map_type map_type, attr.excl_prog_hash = (unsigned long) excl_prog_hash; attr.excl_prog_hash_size = excl_prog_hash_sz; +#ifdef __KERNEL__ + if (strscpy(attr.map_name, map_name) < 0) + return -EINVAL; +#else strncpy(attr.map_name, map_name, sizeof(attr.map_name)); +#endif attr.key_size = key_size; attr.value_size = value_size; attr.max_entries = max_entries; -- cgit v1.2.3 From 2a5b22e87ba5aeb5cad8acb1c7d9866981c37d1b Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Wed, 13 May 2026 21:35:01 +0200 Subject: selftests/bpf: Test reported max stack depth This patch tests the maximum stack depth reporting in verifier logs, with a couple special cases covered: fastcall, private stacks (main subprog & callee), and rounding up to 16 bytes. For that last one, we need to skip the test when JIT compilation is disabled as the rounding is then to 32 bytes. Signed-off-by: Paul Chaignon Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/075d22efd4338385a92f13b7817025cc3f04ec60.1778700777.git.paul.chaignon@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c | 3 +-- .../testing/selftests/bpf/progs/verifier_private_stack.c | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c index 0d9e167555b5..8d7ff38e4c06 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c +++ b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c @@ -799,8 +799,7 @@ __naked int bpf_loop_interaction2(void) SEC("raw_tp") __arch_x86_64 -__log_level(4) -__msg("stack depth 512+0") +__log_level(4) __msg("stack depth 512+0 max 512") /* just to print xlated version when debugging */ __xlated("r0 = &(void __percpu *)(r0)") __success diff --git a/tools/testing/selftests/bpf/progs/verifier_private_stack.c b/tools/testing/selftests/bpf/progs/verifier_private_stack.c index c5078face38d..046f7445a458 100644 --- a/tools/testing/selftests/bpf/progs/verifier_private_stack.c +++ b/tools/testing/selftests/bpf/progs/verifier_private_stack.c @@ -86,6 +86,7 @@ __naked static void cumulative_stack_depth_subprog(void) SEC("kprobe") __description("Private stack, subtree > MAX_BPF_STACK") __success +__log_level(4) __msg("stack depth 512+32 max 512") __arch_x86_64 /* private stack fp for the main prog */ __jited(" movabsq $0x{{.*}}, %r9") @@ -324,6 +325,8 @@ int private_stack_async_callback_1(void) SEC("fentry/bpf_fentry_test9") __description("Private stack, async callback, potential nesting") __success __retval(0) +__load_if_JITed() +__log_level(4) __msg("stack depth 8+0+256+0 max 272") __arch_x86_64 __jited(" subq $0x100, %rsp") __arch_arm64 @@ -344,6 +347,18 @@ int private_stack_async_callback_2(void) return 0; } +SEC("fentry/bpf_fentry_test9") +__description("private stack, max stack depth is private stack") +__success +__log_level(4) __msg("stack depth 8+256+0 max 256") +int private_stack_max_depth(void) +{ + int x = 0; + + subprog1(&x); + return 0; +} + #else SEC("kprobe") -- cgit v1.2.3 From f0015ffbf40c7c6db148163bd6f8c53f14933b53 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Wed, 13 May 2026 21:35:36 +0200 Subject: veristat: Report max stack depth This patch adds a new "Max stack depth" field to the set of gathered statistics. This field reports the maximum combined stack depth compared to the 512 bytes limit. It is null for rejected programs. Suggested-by: Eduard Zingerman Signed-off-by: Paul Chaignon Link: https://lore.kernel.org/r/a27ed8f336669152c4b1b05e920aee4438e3e2b3.1778700777.git.paul.chaignon@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/veristat.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 5c82950e6978..a7db6f04f7e1 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -48,6 +48,7 @@ enum stat_id { SIZE, JITED_SIZE, STACK, + MAX_STACK, PROG_TYPE, ATTACH_TYPE, MEMORY_PEAK, @@ -789,13 +790,13 @@ cleanup: } static const struct stat_specs default_csv_output_spec = { - .spec_cnt = 15, + .spec_cnt = 16, .ids = { FILE_NAME, PROG_NAME, VERDICT, DURATION, TOTAL_INSNS, TOTAL_STATES, PEAK_STATES, MAX_STATES_PER_INSN, MARK_READ_MAX_LEN, SIZE, JITED_SIZE, PROG_TYPE, ATTACH_TYPE, - STACK, MEMORY_PEAK, + STACK, MAX_STACK, MEMORY_PEAK, }, }; @@ -834,6 +835,7 @@ static struct stat_def { [SIZE] = { "Program size", {"prog_size"}, }, [JITED_SIZE] = { "Jited size", {"prog_size_jited"}, }, [STACK] = {"Stack depth", {"stack_depth", "stack"}, }, + [MAX_STACK] = {"Max stack depth", {"max_stack_depth"}, }, [PROG_TYPE] = { "Program type", {"prog_type"}, }, [ATTACH_TYPE] = { "Attach type", {"attach_type", }, }, [MEMORY_PEAK] = { "Peak memory (MiB)", {"mem_peak", }, }, @@ -1023,7 +1025,7 @@ static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats * &s->stats[MARK_READ_MAX_LEN])) continue; - if (1 == sscanf(cur, "stack depth %511s", stack)) + if (2 == sscanf(cur, "stack depth %511s max %ld", stack, &s->stats[MAX_STACK])) continue; } while ((token = strtok_r(cnt++ ? NULL : stack, "+", &state))) { @@ -2278,6 +2280,7 @@ static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2, case SIZE: case JITED_SIZE: case STACK: + case MAX_STACK: case VERDICT: case DURATION: case TOTAL_INSNS: @@ -2512,6 +2515,7 @@ static void prepare_value(const struct verif_stats *s, enum stat_id id, case MAX_STATES_PER_INSN: case MARK_READ_MAX_LEN: case STACK: + case MAX_STACK: case SIZE: case JITED_SIZE: case MEMORY_PEAK: @@ -2602,7 +2606,8 @@ static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats case SIZE: case JITED_SIZE: case MEMORY_PEAK: - case STACK: { + case STACK: + case MAX_STACK: { long val; int err, n; -- cgit v1.2.3 From 9ef647114201b50b60a43054506af893f74ae8b8 Mon Sep 17 00:00:00 2001 From: Samuel Wu Date: Mon, 11 May 2026 10:45:57 -0700 Subject: selftests/bpf: Add tests for wakeup_sources kfuncs Introduce a set of BPF selftests to verify the safety and functionality of wakeup_source kfuncs. The suite includes: 1. A functional test (test_wakeup_source.c) that iterates over the global wakeup_sources list. It uses CO-RE to read timing statistics and validates them in user-space via the BPF ring buffer. 2. A negative test suite (wakeup_source_fail.c) ensuring the BPF verifier correctly enforces reference tracking and type safety. 3. Enable CONFIG_PM_WAKELOCKS in the test config, allowing creation of wakeup sources via /sys/power/wake_lock. A shared header (wakeup_source.h) is introduced to ensure consistent memory layout for the Ring Buffer data between BPF and user-space. Signed-off-by: Samuel Wu Link: https://lore.kernel.org/r/20260511174559.659782-3-wusamuel@google.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/config | 3 +- .../selftests/bpf/prog_tests/wakeup_source.c | 118 +++++++++++++++++++++ .../selftests/bpf/progs/test_wakeup_source.c | 92 ++++++++++++++++ tools/testing/selftests/bpf/progs/wakeup_source.h | 22 ++++ .../selftests/bpf/progs/wakeup_source_fail.c | 76 +++++++++++++ 5 files changed, 310 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/wakeup_source.c create mode 100644 tools/testing/selftests/bpf/progs/test_wakeup_source.c create mode 100644 tools/testing/selftests/bpf/progs/wakeup_source.h create mode 100644 tools/testing/selftests/bpf/progs/wakeup_source_fail.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 24855381290d..bac60b444551 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -130,4 +130,5 @@ CONFIG_INFINIBAND=y CONFIG_SMC=y CONFIG_SMC_HS_CTRL_BPF=y CONFIG_DIBS=y -CONFIG_DIBS_LO=y \ No newline at end of file +CONFIG_DIBS_LO=y +CONFIG_PM_WAKELOCKS=y diff --git a/tools/testing/selftests/bpf/prog_tests/wakeup_source.c b/tools/testing/selftests/bpf/prog_tests/wakeup_source.c new file mode 100644 index 000000000000..ebfdc03271b9 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/wakeup_source.c @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2026 Google LLC */ + +#include +#include +#include +#include "test_wakeup_source.skel.h" +#include "wakeup_source_fail.skel.h" +#include "progs/wakeup_source.h" + +static int lock_ws(const char *name) +{ + int fd; + ssize_t bytes; + + fd = open("/sys/power/wake_lock", O_WRONLY); + if (!ASSERT_OK_FD(fd, "open /sys/power/wake_lock")) + return -1; + + bytes = write(fd, name, strlen(name)); + close(fd); + if (!ASSERT_EQ(bytes, strlen(name), "write to wake_lock")) + return -1; + + return 0; +} + +static void unlock_ws(const char *name) +{ + int fd; + + fd = open("/sys/power/wake_unlock", O_WRONLY); + if (fd < 0) + return; + + write(fd, name, strlen(name)); + close(fd); +} + +struct rb_ctx { + const char *name; + bool found; + long long active_time_ns; + long long total_time_ns; +}; + +static int process_sample(void *ctx, void *data, size_t len) +{ + struct rb_ctx *rb_ctx = ctx; + struct wakeup_event_t *e = data; + + if (strcmp(e->name, rb_ctx->name) == 0) { + rb_ctx->found = true; + rb_ctx->active_time_ns = e->active_time_ns; + rb_ctx->total_time_ns = e->total_time_ns; + } + return 0; +} + +void test_wakeup_source(void) +{ + struct btf *btf; + int id; + + btf = btf__load_vmlinux_btf(); + if (!ASSERT_OK_PTR(btf, "btf_vmlinux")) + return; + + id = btf__find_by_name_kind(btf, "bpf_wakeup_sources_get_head", BTF_KIND_FUNC); + btf__free(btf); + + if (id < 0) { + printf("%s:SKIP:bpf_wakeup_sources_get_head kfunc not found in BTF\n", __func__); + test__skip(); + return; + } + + if (test__start_subtest("iterate_and_verify_times")) { + struct test_wakeup_source *skel; + struct ring_buffer *rb = NULL; + struct rb_ctx rb_ctx = { + .name = "bpf_selftest_ws_times", + .found = false, + }; + int err; + + skel = test_wakeup_source__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + rb = ring_buffer__new(bpf_map__fd(skel->maps.rb), process_sample, &rb_ctx, NULL); + if (!ASSERT_OK_PTR(rb, "ring_buffer__new")) + goto destroy; + + /* Create a temporary wakeup source */ + if (!ASSERT_OK(lock_ws(rb_ctx.name), "lock_ws")) + goto unlock; + + err = bpf_prog_test_run_opts(bpf_program__fd( + skel->progs.iterate_wakeupsources), NULL); + ASSERT_OK(err, "bpf_prog_test_run"); + + ring_buffer__consume(rb); + + ASSERT_TRUE(rb_ctx.found, "found_test_ws_in_rb"); + ASSERT_GT(rb_ctx.active_time_ns, 0, "active_time_gt_0"); + ASSERT_GT(rb_ctx.total_time_ns, 0, "total_time_gt_0"); + +unlock: + unlock_ws(rb_ctx.name); +destroy: + if (rb) + ring_buffer__free(rb); + test_wakeup_source__destroy(skel); + } + + RUN_TESTS(wakeup_source_fail); +} diff --git a/tools/testing/selftests/bpf/progs/test_wakeup_source.c b/tools/testing/selftests/bpf/progs/test_wakeup_source.c new file mode 100644 index 000000000000..fd2fb6aebd82 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_wakeup_source.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2026 Google LLC */ + +#include "vmlinux.h" +#include +#include +#include "bpf_experimental.h" +#include "bpf_misc.h" +#include "wakeup_source.h" + +#define MAX_LOOP_ITER 1000 +#define RB_SIZE (16384 * 4) + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, RB_SIZE); +} rb SEC(".maps"); + +struct bpf_ws_lock; +struct bpf_ws_lock *bpf_wakeup_sources_read_lock(void) __ksym; +void bpf_wakeup_sources_read_unlock(struct bpf_ws_lock *lock) __ksym; +void *bpf_wakeup_sources_get_head(void) __ksym; + +SEC("syscall") +__success __retval(0) +int iterate_wakeupsources(void *ctx) +{ + struct list_head *head = bpf_wakeup_sources_get_head(); + struct list_head *pos = head; + struct bpf_ws_lock *lock; + int i; + + lock = bpf_wakeup_sources_read_lock(); + if (!lock) + return 0; + + bpf_for(i, 0, MAX_LOOP_ITER) { + if (bpf_core_read(&pos, sizeof(pos), &pos->next) || !pos || pos == head) + break; + + struct wakeup_event_t *e = bpf_ringbuf_reserve(&rb, sizeof(*e), 0); + + if (!e) + break; + + struct wakeup_source *ws = bpf_core_cast( + (void *)pos - bpf_core_field_offset(struct wakeup_source, entry), + struct wakeup_source); + s64 active_time = 0; + bool active = BPF_CORE_READ_BITFIELD(ws, active); + bool autosleep_enable = BPF_CORE_READ_BITFIELD(ws, autosleep_enabled); + s64 last_time = ws->last_time; + s64 max_time = ws->max_time; + s64 prevent_sleep_time = ws->prevent_sleep_time; + s64 total_time = ws->total_time; + + if (active) { + s64 curr_time = bpf_ktime_get_ns(); + s64 prevent_time = ws->start_prevent_time; + + if (curr_time > last_time) + active_time = curr_time - last_time; + + total_time += active_time; + if (active_time > max_time) + max_time = active_time; + if (autosleep_enable && curr_time > prevent_time) + prevent_sleep_time += curr_time - prevent_time; + } + + e->active_count = ws->active_count; + e->active_time_ns = active_time; + e->event_count = ws->event_count; + e->expire_count = ws->expire_count; + e->last_time_ns = last_time; + e->max_time_ns = max_time; + e->prevent_sleep_time_ns = prevent_sleep_time; + e->total_time_ns = total_time; + e->wakeup_count = ws->wakeup_count; + + if (bpf_probe_read_kernel_str( + e->name, WAKEUP_NAME_LEN, ws->name) < 0) + e->name[0] = '\0'; + + bpf_ringbuf_submit(e, 0); + } + + bpf_wakeup_sources_read_unlock(lock); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/wakeup_source.h b/tools/testing/selftests/bpf/progs/wakeup_source.h new file mode 100644 index 000000000000..cd74de92c82f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/wakeup_source.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright 2026 Google LLC */ + +#ifndef __WAKEUP_SOURCE_H__ +#define __WAKEUP_SOURCE_H__ + +#define WAKEUP_NAME_LEN 128 + +struct wakeup_event_t { + unsigned long active_count; + long long active_time_ns; + unsigned long event_count; + unsigned long expire_count; + long long last_time_ns; + long long max_time_ns; + long long prevent_sleep_time_ns; + long long total_time_ns; + unsigned long wakeup_count; + char name[WAKEUP_NAME_LEN]; +}; + +#endif /* __WAKEUP_SOURCE_H__ */ diff --git a/tools/testing/selftests/bpf/progs/wakeup_source_fail.c b/tools/testing/selftests/bpf/progs/wakeup_source_fail.c new file mode 100644 index 000000000000..b8bbb61d4d4e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/wakeup_source_fail.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2026 Google LLC */ + +#include +#include +#include "bpf_misc.h" + +struct bpf_ws_lock; + +struct bpf_ws_lock *bpf_wakeup_sources_read_lock(void) __ksym; +void bpf_wakeup_sources_read_unlock(struct bpf_ws_lock *lock) __ksym; +void *bpf_wakeup_sources_get_head(void) __ksym; + +SEC("syscall") +__failure __msg("BPF_EXIT instruction in main prog would lead to reference leak") +int wakeup_source_lock_no_unlock(void *ctx) +{ + struct bpf_ws_lock *lock; + + lock = bpf_wakeup_sources_read_lock(); + if (!lock) + return 0; + + return 0; +} + +SEC("syscall") +__failure __msg("access beyond struct") +int wakeup_source_access_lock_fields(void *ctx) +{ + struct bpf_ws_lock *lock; + int val; + + lock = bpf_wakeup_sources_read_lock(); + if (!lock) + return 0; + + val = *(int *)lock; + + bpf_wakeup_sources_read_unlock(lock); + return val; +} + +SEC("syscall") +__failure __msg("type=scalar expected=fp") +int wakeup_source_unlock_no_lock(void *ctx) +{ + struct bpf_ws_lock *lock = (void *)0x1; + + bpf_wakeup_sources_read_unlock(lock); + + return 0; +} + +SEC("syscall") +__failure __msg("Possibly NULL pointer passed to trusted") +int wakeup_source_unlock_null(void *ctx) +{ + bpf_wakeup_sources_read_unlock(NULL); + + return 0; +} + +SEC("syscall") +__failure __msg("R0 invalid mem access 'scalar'") +int wakeup_source_unsafe_dereference(void *ctx) +{ + struct list_head *head = bpf_wakeup_sources_get_head(); + + if (head->next) + return 1; + + return 0; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From ef1b54e0db671a161887475ef70cd570cbb2a6ab Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 15 May 2026 15:50:45 -0700 Subject: selftests/bpf: Add test for stack arg read without caller write Add negative tests for the outgoing stack arg validation. A static subprog with a 'long *' arg causes btf_prepare_func_args() to fail after setting arg_cnt. The validation ensures check_outgoing_stack_args() still runs. Also update two existing tests (release_ref, stale_pkt_ptr) whose expected error messages changed: invalidated stack arg slots are now caught by check_outgoing_stack_args() at the call site instead of at the callee's dereference. Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20260515225045.822104-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- .../bpf/progs/btf__verifier_stack_arg_order.c | 8 +++ .../selftests/bpf/progs/verifier_stack_arg.c | 4 +- .../selftests/bpf/progs/verifier_stack_arg_order.c | 58 ++++++++++++++++++++++ 3 files changed, 68 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/btf__verifier_stack_arg_order.c b/tools/testing/selftests/bpf/progs/btf__verifier_stack_arg_order.c index da34e8456b6c..99bc115f8380 100644 --- a/tools/testing/selftests/bpf/progs/btf__verifier_stack_arg_order.c +++ b/tools/testing/selftests/bpf/progs/btf__verifier_stack_arg_order.c @@ -21,6 +21,10 @@ int subprog_pruning_call_before_load_6args(int a, int b, int c, int d, int e, in return a + b + c + d + e + f; } +void subprog_bad_ptr_7args(long *a, int b, int c, int d, int e, int f, int g) +{ +} + #else int subprog_bad_order_6args(void) @@ -38,4 +42,8 @@ int subprog_pruning_call_before_load_6args(void) return 0; } +void subprog_bad_ptr_7args(void) +{ +} + #endif diff --git a/tools/testing/selftests/bpf/progs/verifier_stack_arg.c b/tools/testing/selftests/bpf/progs/verifier_stack_arg.c index d43a9b42034c..d45339b83795 100644 --- a/tools/testing/selftests/bpf/progs/verifier_stack_arg.c +++ b/tools/testing/selftests/bpf/progs/verifier_stack_arg.c @@ -152,7 +152,7 @@ __naked void stack_arg_pruning_type_mismatch(void) SEC("tc") __description("stack_arg: release_reference invalidates stack arg slot") __failure -__msg("R{{[0-9]}} !read_ok") +__msg("callee expects 6 args, stack arg1 is not initialized") __naked void stack_arg_release_ref(void) { asm volatile ( @@ -201,7 +201,7 @@ __naked void stack_arg_release_ref(void) SEC("tc") __description("stack_arg: pkt pointer in stack arg slot invalidated after pull_data") __failure -__msg("R{{[0-9]}} !read_ok") +__msg("callee expects 6 args, stack arg1 is not initialized") __naked void stack_arg_stale_pkt_ptr(void) { asm volatile ( diff --git a/tools/testing/selftests/bpf/progs/verifier_stack_arg_order.c b/tools/testing/selftests/bpf/progs/verifier_stack_arg_order.c index 1240cf8a40d6..c9fe4857da3f 100644 --- a/tools/testing/selftests/bpf/progs/verifier_stack_arg_order.c +++ b/tools/testing/selftests/bpf/progs/verifier_stack_arg_order.c @@ -112,6 +112,64 @@ __naked void stack_arg_pruning_load_after_call(void) ); } +/* + * "bad_ptr": the first arg is 'long *', which is not a recognized pointer + * type for static subprogs (not ctx, dynptr, or tagged). btf_prepare_func_args() + * sets arg_cnt = 7 / stack_arg_cnt = 2, then fails with -EINVAL. The subprog + * is marked unreliable but the call still proceeds for static subprogs. + */ +__noinline __used __naked +static void subprog_bad_ptr_7args(long *a, int b, int c, int d, int e, int f, int g) +{ + asm volatile ( + "r0 = *(u64 *)(r11 + 8);" + "r1 = *(u64 *)(r11 + 16);" + "exit;" + ::: __clobber_all + ); +} + +SEC("tc") +__description("stack_arg: read without caller write") +__failure +__msg("callee expects 7 args, stack arg1 is not initialized") +__btf_func_path("btf__verifier_stack_arg_order.bpf.o") +__naked void stack_arg_read_without_write_1(void) +{ + asm volatile ( + "r1 = 0;" + "r2 = 0;" + "r3 = 0;" + "r4 = 0;" + "r5 = 0;" + "call subprog_bad_ptr_7args;" + "exit;" + ::: __clobber_all + ); +} + +SEC("tc") +__description("stack_arg: read with not-initialized caller write") +__failure +__msg("R0 !read_ok") +__btf_func_path("btf__verifier_stack_arg_order.bpf.o") +__naked void stack_arg_read_without_write_2(void) +{ + asm volatile ( + "r1 = 0;" + "r2 = 0;" + "r3 = 0;" + "r4 = 0;" + "r5 = 0;" + "*(u64 *)(r11 - 8) = 0;" + "*(u64 *)(r11 - 16) = 0;" + "call subprog_bad_ptr_7args;" + "call subprog_bad_ptr_7args;" + "exit;" + ::: __clobber_all + ); +} + #else SEC("socket") -- cgit v1.2.3 From 0e2647792f60df746422d6089daf9d56945d5f91 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 15 May 2026 15:50:51 -0700 Subject: selftests/bpf: Log arg_track_join for stack arg slots in liveness analysis Commit 2af4e792773f ("bpf: Extend liveness analysis to track stack argument slots") added stack arg supports. For selftest verifier_stack_arg/stack_arg: pruning with different stack arg types the following are two arg JOIN messages: arg JOIN insn 9 -> 10 r1: fp0-8 + _ => fp0-8|fp0+0 arg JOIN insn 9 -> 10 r11: fp0-8 + _ => fp0-8|fp0+0 Here the "r11:" label for stack arg slot 0 is misleading since r11 is a special register (BPF_REG_PARAMS). The next patch corrects this to "sa0:", properly representing the 'stack arg slot 0'. Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20260515225051.822739-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/verifier_stack_arg.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_stack_arg.c b/tools/testing/selftests/bpf/progs/verifier_stack_arg.c index d45339b83795..df0c3438529e 100644 --- a/tools/testing/selftests/bpf/progs/verifier_stack_arg.c +++ b/tools/testing/selftests/bpf/progs/verifier_stack_arg.c @@ -114,8 +114,10 @@ __naked void stack_arg_gap_at_minus8(void) SEC("tc") __description("stack_arg: pruning with different stack arg types") -__failure +__failure __log_level(2) __flag(BPF_F_TEST_STATE_FREQ) +__msg("arg JOIN insn 9 -> 10 r1: fp0-8 + _ => fp0-8|fp0+0") +__msg("arg JOIN insn 9 -> 10 r11: fp0-8 + _ => fp0-8|fp0+0") __msg("R{{[0-9]}} invalid mem access 'scalar'") __naked void stack_arg_pruning_type_mismatch(void) { -- cgit v1.2.3 From d1dbe443a0abb4ea3ec35a16e36efe6d3bbf72f6 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 15 May 2026 15:50:56 -0700 Subject: bpf: Fix arg_track_join log to use sa prefix for stack arg slots arg_track_join() logs state transitions at CFG merge points. For stack arg slots (r >= MAX_BPF_REG), it printed "r11:", "r12:", etc., which is misleading since r11 is a special register (BPF_REG_PARAMS) not meaningful to the user. Fix it to print "sa0:", "sa1:", etc., matching the per-instruction transition log in arg_track_log() which already uses the "sa" prefix. Update the existing stack_arg_pruning_type_mismatch selftest to expect the corrected format. Fixes: 2af4e792773f ("bpf: Extend liveness analysis to track stack argument slots") Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20260515225056.823086-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- kernel/bpf/liveness.c | 4 +++- tools/testing/selftests/bpf/progs/verifier_stack_arg.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/kernel/bpf/liveness.c b/kernel/bpf/liveness.c index 7f4a0e4c2c49..0aadfbae0acc 100644 --- a/kernel/bpf/liveness.c +++ b/kernel/bpf/liveness.c @@ -806,7 +806,9 @@ static bool arg_track_join(struct bpf_verifier_env *env, int idx, int target, in return true; verbose(env, "arg JOIN insn %d -> %d ", idx, target); - if (r >= 0) + if (r >= MAX_BPF_REG) + verbose(env, "sa%d: ", r - MAX_BPF_REG); + else if (r >= 0) verbose(env, "r%d: ", r); else verbose(env, "fp%+d: ", r * 8); diff --git a/tools/testing/selftests/bpf/progs/verifier_stack_arg.c b/tools/testing/selftests/bpf/progs/verifier_stack_arg.c index df0c3438529e..7e0ce5db28a0 100644 --- a/tools/testing/selftests/bpf/progs/verifier_stack_arg.c +++ b/tools/testing/selftests/bpf/progs/verifier_stack_arg.c @@ -117,7 +117,7 @@ __description("stack_arg: pruning with different stack arg types") __failure __log_level(2) __flag(BPF_F_TEST_STATE_FREQ) __msg("arg JOIN insn 9 -> 10 r1: fp0-8 + _ => fp0-8|fp0+0") -__msg("arg JOIN insn 9 -> 10 r11: fp0-8 + _ => fp0-8|fp0+0") +__msg("arg JOIN insn 9 -> 10 sa0: fp0-8 + _ => fp0-8|fp0+0") __msg("R{{[0-9]}} invalid mem access 'scalar'") __naked void stack_arg_pruning_type_mismatch(void) { -- cgit v1.2.3 From 576482b55c19e7ec00e162a0fde4c4f1a95128c7 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sun, 17 May 2026 08:07:07 -0700 Subject: selftests/bpf: Add exception tests with stack arguments Add tests to verify that bpf_throw() correctly unwinds the stack when the program uses outgoing stack arguments (functions with >5 args). Without the preceding x86 fix, these tests crash the kernel on x86 due to corrupted callee-saved register restore. There is no change for arm64 to support exception with stack arguments. Acked-by: Kumar Kartikeya Dwivedi Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20260517150707.289273-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/prog_tests/exceptions.c | 7 ++ tools/testing/selftests/bpf/progs/exceptions.c | 114 +++++++++++++++++++++ 2 files changed, 121 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/exceptions.c b/tools/testing/selftests/bpf/prog_tests/exceptions.c index e8cbaf2a3e82..3588d6f97fd4 100644 --- a/tools/testing/selftests/bpf/prog_tests/exceptions.c +++ b/tools/testing/selftests/bpf/prog_tests/exceptions.c @@ -85,6 +85,13 @@ static void test_exceptions_success(void) RUN_SUCCESS(exception_bad_assert_range_with, 10); RUN_SUCCESS(exception_throw_from_void_global, 11); + if (skel->rodata->has_stack_arg) { + RUN_SUCCESS(exception_throw_stack_arg, 56); + RUN_SUCCESS(exception_throw_after_stack_arg, 56); + RUN_SUCCESS(exception_throw_subprog_stack_arg, 56); + RUN_SUCCESS(exception_throw_subprog_after_stack_arg, 56); + } + #define RUN_EXT(load_ret, attach_err, expr, msg, after_link) \ { \ LIBBPF_OPTS(bpf_object_open_opts, o, .kernel_log_buf = log_buf, \ diff --git a/tools/testing/selftests/bpf/progs/exceptions.c b/tools/testing/selftests/bpf/progs/exceptions.c index 4206f59d7b86..c8d716fbd419 100644 --- a/tools/testing/selftests/bpf/progs/exceptions.c +++ b/tools/testing/selftests/bpf/progs/exceptions.c @@ -379,4 +379,118 @@ int exception_bad_assert_range_with(struct __sk_buff *ctx) return 1; } +#if (defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)) \ + && defined(__BPF_FEATURE_STACK_ARGUMENT) + +const volatile bool has_stack_arg = true; + +long arg1 = 1, arg2 = 2, arg3 = 3, arg4 = 4, arg5 = 5; +long arg6 = 6, arg7 = 7, arg8 = 8, arg9 = 9, arg10 = 10; + +__noinline static long throwing_many_args(long a, long b, long c, long d, + long e, long f, long g, long h, + long i, long j) +{ + bpf_throw(a + b + c + d + e + f + g + h + i + j); + return 0; +} + +__noinline int exception_cb_sa(u64 cookie) +{ + return cookie + 1; +} + +SEC("tc") +__exception_cb(exception_cb_sa) +int exception_throw_stack_arg(struct __sk_buff *ctx) +{ + throwing_many_args(arg1, arg2, arg3, arg4, arg5, + arg6, arg7, arg8, arg9, arg10); + return 0; +} + +__noinline static long no_throw_many_args(long a, long b, long c, long d, + long e, long f, long g, long h, + long i, long j) +{ + return a + b + c + d + e + f + g + h + i + j; +} + +SEC("tc") +__exception_cb(exception_cb_sa) +int exception_throw_after_stack_arg(struct __sk_buff *ctx) +{ + long ret; + + ret = no_throw_many_args(arg1, arg2, arg3, arg4, arg5, + arg6, arg7, arg8, arg9, arg10); + if (ret > 0) + bpf_throw(ret); + return 0; +} + +__noinline static long subprog_throw_sa(long val) +{ + throwing_many_args(val, val + 1, val + 2, val + 3, val + 4, + val + 5, val + 6, val + 7, val + 8, val + 9); + return 0; +} + +SEC("tc") +__exception_cb(exception_cb_sa) +int exception_throw_subprog_stack_arg(struct __sk_buff *ctx) +{ + subprog_throw_sa(arg1); + return 0; +} + +__noinline static long subprog_throw_after_sa(long val) +{ + long ret; + + ret = no_throw_many_args(val, val + 1, val + 2, val + 3, val + 4, + val + 5, val + 6, val + 7, val + 8, val + 9); + if (ret > 0) + bpf_throw(ret); + return 0; +} + +SEC("tc") +__exception_cb(exception_cb_sa) +int exception_throw_subprog_after_stack_arg(struct __sk_buff *ctx) +{ + subprog_throw_after_sa(arg1); + return 0; +} + +#else + +const volatile bool has_stack_arg = false; + +SEC("tc") +int exception_throw_stack_arg(struct __sk_buff *ctx) +{ + return 0; +} + +SEC("tc") +int exception_throw_after_stack_arg(struct __sk_buff *ctx) +{ + return 0; +} + +SEC("tc") +int exception_throw_subprog_stack_arg(struct __sk_buff *ctx) +{ + return 0; +} + +SEC("tc") +int exception_throw_subprog_after_stack_arg(struct __sk_buff *ctx) +{ + return 0; +} + +#endif + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From b4844cb6d1ecff732c99b70998749973c6f50591 Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Mon, 18 May 2026 22:54:44 +0800 Subject: libbpf: Add OPTS_VALID() for log_opts in bpf_map_create There should be an OPTS_VALID() check for log_opts before extracting its fields. If no such OPTS_VALID() check and an application compiled against a future libbpf header passes a log_opts with new, non-zero fields to libbpf.so, those fields will be ignored silently. Fixes: 702259006f93 ("libbpf: Add syscall common attributes support for map_create") Signed-off-by: Leon Hwang Link: https://lore.kernel.org/r/20260518145446.6794-4-leon.hwang@linux.dev Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/bpf.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 483c02cf21d1..3cd705802330 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -246,6 +246,9 @@ int bpf_map_create(enum bpf_map_type map_type, attr.excl_prog_hash_size = OPTS_GET(opts, excl_prog_hash_size, 0); log_opts = OPTS_GET(opts, log_opts, NULL); + if (!OPTS_VALID(log_opts, bpf_log_opts)) + return libbpf_err(-EINVAL); + if (log_opts && feat_supported(NULL, FEAT_BPF_SYSCALL_COMMON_ATTRS)) { memset(&attr_common, 0, attr_common_sz); attr_common.log_buf = ptr_to_u64(OPTS_GET(log_opts, buf, NULL)); -- cgit v1.2.3 From 652f0c2c999d28d820bbe2e1aa16d8e0fea369ea Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Mon, 18 May 2026 22:54:45 +0800 Subject: selftests/bpf: Use -1 as token_fd in map create failure test Because 0xFF can be an open BPF token fd in the test runner that will fail test_invalid_token_fd(), change token_fd from 0xFF to -1 to avoid such test failure. Fixes: f675483cac1d ("selftests/bpf: Add tests to verify map create failure log") Signed-off-by: Leon Hwang Link: https://lore.kernel.org/r/20260518145446.6794-5-leon.hwang@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/map_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/map_init.c b/tools/testing/selftests/bpf/prog_tests/map_init.c index 5c61c8e37306..b0b902d5783d 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_init.c +++ b/tools/testing/selftests/bpf/prog_tests/map_init.c @@ -306,7 +306,7 @@ static void test_invalid_token_fd(void) const char *msg = "Invalid map_token_fd.\n"; LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_TOKEN_FD, - .token_fd = 0xFF, + .token_fd = -1, ); test_map_create_array(&opts, msg); -- cgit v1.2.3 From 7732ad2412fd402913976e490921f7e792a0a33b Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Mon, 18 May 2026 22:54:46 +0800 Subject: selftests/bpf: Add test to verify checking padding bytes for BPF syscall common attributes Add a test to verify that the tailing padding 4 bytes are checked in syscall.c::__sys_bpf() using bpf_check_uarg_tail_zero(). Without the fix, the test fails with: test_common_attr_padding:FAIL:syscall unexpected syscall: actual 4 >= expected 0 #213/12 map_create_failure/common_attr_padding:FAIL Signed-off-by: Leon Hwang Link: https://lore.kernel.org/r/20260518145446.6794-6-leon.hwang@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/map_init.c | 26 +++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/map_init.c b/tools/testing/selftests/bpf/prog_tests/map_init.c index b0b902d5783d..c804c3ce9be9 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_init.c +++ b/tools/testing/selftests/bpf/prog_tests/map_init.c @@ -353,6 +353,30 @@ static void test_excl_prog_hash_size_2(void) test_map_create_array(&opts, msg); } +static void test_common_attr_padding(void) +{ + struct bpf_common_attr_fake { + __u8 attrs[offsetofend(struct bpf_common_attr, log_true_size)]; + __u32 pad; + } attr_common = { + .pad = 1, + }; + union bpf_attr attr = { + .map_type = BPF_MAP_TYPE_ARRAY, + .key_size = 4, + .value_size = 4, + .max_entries = 1, + }; + int fd; + + fd = syscall(__NR_bpf, BPF_MAP_CREATE | BPF_COMMON_ATTRS, &attr, sizeof(attr), &attr_common, + sizeof(attr_common)); + if (!ASSERT_LT(fd, 0, "syscall")) + close(fd); + else + ASSERT_EQ(errno, E2BIG, "errno"); +} + void test_map_create_failure(void) { if (test__start_subtest("invalid_vmlinux_value_type_id_struct_ops")) @@ -377,4 +401,6 @@ void test_map_create_failure(void) test_excl_prog_hash_size_1(); if (test__start_subtest("invalid_excl_prog_hash_size_2")) test_excl_prog_hash_size_2(); + if (test__start_subtest("common_attr_padding")) + test_common_attr_padding(); } -- cgit v1.2.3 From 879daba303f7d7c3057f4d218921621e751f1912 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Wed, 13 May 2026 13:24:37 +0200 Subject: selftests/bpf: Override EXTRA_LDFLAGS for static builds When running vmtest.sh with static linking, the bpftool_map_access selftests fail. These selftests are calling the bpftool binary in tools/sbin/ directly, which results in the following error: error while loading shared libraries: libLLVM.so.21.1: cannot open shared object file: No such file or directory To fix this, we need to also build bpftool statically. That can be done by setting EXTRA_LDFLAGS=-static. Fixes: 2d96bbdfd3b5 ("selftests/bpf: convert test_bpftool_map_access.sh into test_progs framework") Signed-off-by: Paul Chaignon Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/r/714556da329c812988010ffe53173d9152570a78.1778669303.git.paul.chaignon@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst index 776fbe3cb8f9..37164322a102 100644 --- a/tools/testing/selftests/bpf/README.rst +++ b/tools/testing/selftests/bpf/README.rst @@ -77,7 +77,7 @@ In case of linker errors when running selftests, try using static linking: .. code-block:: console - $ LDLIBS=-static PKG_CONFIG='pkg-config --static' vmtest.sh + $ LDLIBS=-static EXTRA_LDFLAGS=-static PKG_CONFIG='pkg-config --static' vmtest.sh .. note:: Some distros may not support static linking. -- cgit v1.2.3 From 6df582112aa9ac9d190169abdb0e42e496659ec9 Mon Sep 17 00:00:00 2001 From: Roman Kvasnytskyi Date: Sat, 16 May 2026 14:06:25 +0200 Subject: selftests/bpf: Reject unsupported -k option in vmtest.sh vmtest.sh does not document a -k option and does not handle it in the getopts case statement. However, the getopts optstring includes k, which causes the script to accept -k silently instead of reporting it as an invalid option. Remove k from the optstring so unsupported options are rejected through the existing invalid-option path. Fixes: c9709f52386d ("bpf: Helper script for running BPF presubmit tests") Signed-off-by: Roman Kvasnytskyi Acked-by: Paul Chaignon Link: https://lore.kernel.org/r/20260516120625.80839-1-roman@kvasnytskyi.net Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/vmtest.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh index 2f869daf8a06..9ca802285393 100755 --- a/tools/testing/selftests/bpf/vmtest.sh +++ b/tools/testing/selftests/bpf/vmtest.sh @@ -382,7 +382,7 @@ main() local exit_command="poweroff -f" local debug_shell="no" - while getopts ':hskl:id:j:' opt; do + while getopts ':hsl:id:j:' opt; do case ${opt} in l) LOCAL_ROOTFS_IMAGE="$OPTARG" -- cgit v1.2.3 From 523d2f42b406f5be2989f436b03eacebf3679835 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Mon, 18 May 2026 18:26:35 +0200 Subject: selftests/bpf: Fix test for refinement of single-value tnum This patch fixes the "bounds refinement with single-value tnum on umin" verifier selftest. This selftest was introduced in commit e6ad477d1bf8 ("selftests/bpf: Test refinement of single-value tnum") to cover the logic from __update_reg64_bounds(), introduced in commit efc11a667878 ("bpf: Improve bounds when tnum has a single possible value"). However, the test still passes if that last commit is reverted. The test is supposed to cover the case when the tnum and u64 range (or cnum64 now) overlap in a single value. __update_reg64_bounds() detects that case and refines the bounds to a known constant. However, the constants for the test were poorly chosen and the bounds get refined to a known constant even without __update_reg64_bounds(). The code is as follows: 0: call bpf_get_prandom_u32#7 ; R0=scalar() 1: r0 |= 224 ; R0=scalar(umin=umin32=224,var_off=(0xe0; 0xffffffffffffff1f)) 2: r0 &= 240 ; R0=scalar(smin=umin=smin32=umin32=224,smax=umax=smax32=umax32=240,var_off=(0xe0; 0x10)) 3: if r0 == 0xf0 goto pc+2 ; R0=224 After instruction 3, we have u64=[0xe0; 0xef] and tnum=(0xe0; 0x10). __reg_bound_offset() is able to deduce a new tnum from the u64, tnum=(0xe0; 0x0f), which combined with the existing tnum gives us a constant: 0xe0 or 224. We can easily fix this by choosing different starting bounds. If we make it u64=[0xe1; 0xf0], then __reg_bound_offset() doesn't have any impact. Fixes: e6ad477d1bf8 ("selftests/bpf: Test refinement of single-value tnum") Signed-off-by: Paul Chaignon Link: https://lore.kernel.org/r/be2dc2c3d85120286e60b3029b3338fff339f942.1779121582.git.paul.chaignon@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/verifier_bounds.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c index a3e4c0945137..bc038ac2df98 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c @@ -1892,25 +1892,25 @@ __naked void bounds_refinement_tnum_umax(void *ctx) /* This test covers the bounds deduction when the u64 range and the tnum * overlap only at umin. After instruction 3, the ranges look as follows: * - * 0 umin=0xe00 umax=0xeff U64_MAX + * 0 umin=0xe1 umax=0xf0 U64_MAX * | [xxxxxxxxxxxxxx] | * |----------------------------|------------------------------| * | x x | tnum values * - * The verifier can therefore deduce that the R0=0xe0=224. + * The verifier can therefore deduce that the R0=0xe1=225. */ SEC("socket") __description("bounds refinement with single-value tnum on umin") -__msg("3: (15) if r0 == 0xf0 {{.*}} R0=224") +__msg("3: (15) if r0 == 0xf1 {{.*}} R0=225") __success __log_level(2) __naked void bounds_refinement_tnum_umin(void *ctx) { asm volatile(" \ call %[bpf_get_prandom_u32]; \ - r0 |= 0xe0; \ - r0 &= 0xf0; \ - if r0 == 0xf0 goto +2; \ - if r0 == 0xe0 goto +1; \ + r0 |= 0xe1; \ + r0 &= 0xf1; \ + if r0 == 0xf1 goto +2; \ + if r0 == 0xe1 goto +1; \ r10 = 0; \ exit; \ " : -- cgit v1.2.3 From fa747e9f843ba3a0fa4d3fabaf50c9e11aaf963f Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Wed, 20 May 2026 06:33:30 -0700 Subject: selftests/bpf: Fix cold_lru producing zero batch_hash in XDP LB benchmark batch_hash = (batch_gen ^ cpu_id) * KNUTH_HASH_MULT; When batch_gen == cpu_id the XOR produces zero, batch_hash is zero, and *saddr ^= 0 is a no-op. Every iteration hits the warm LRU entry. During validation batch_gen is 2, so running on CPU 2 triggers: [udp-v4-lru-miss] COUNTER FAIL: LRU misses=0, expected 1 Replace XOR with addition so the multiplier input is always >= 1. This also preserves the per-CPU salt for multi-producer runs. Fixes: 4b4f2229104c ("selftests/bpf: Add XDP load-balancer BPF program") Signed-off-by: Puranjay Mohan Link: https://lore.kernel.org/r/20260520133338.3392667-2-puranjay@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/xdp_lb_bench.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/xdp_lb_bench.c b/tools/testing/selftests/bpf/progs/xdp_lb_bench.c index b9fd848c035d..13777b3dcac8 100644 --- a/tools/testing/selftests/bpf/progs/xdp_lb_bench.c +++ b/tools/testing/selftests/bpf/progs/xdp_lb_bench.c @@ -618,7 +618,7 @@ int xdp_lb_bench(struct xdp_md *xdp) __u32 *saddr = data + saddr_off; batch_gen++; - batch_hash = (batch_gen ^ bpf_get_smp_processor_id()) * KNUTH_HASH_MULT; + batch_hash = (batch_gen + bpf_get_smp_processor_id()) * KNUTH_HASH_MULT; if ((void *)(saddr + 1) <= data_end) *saddr ^= batch_hash; } -- cgit v1.2.3 From 12e896b9794bbd88f56aeac2a5807ae8d4bb5ad8 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Wed, 20 May 2026 06:33:31 -0700 Subject: selftests/bpf: Fix expired UDP LRU entries in XDP LB benchmark populate_lru() zero-initializes atime: struct real_pos_lru lru = { .pos = real_idx }; connection_table_lookup() treats UDP entries with cur_time - atime > 30s as expired, so every pre-populated entry expires immediately. Calibration masks this on the CPU it runs on, but if validation migrates to another CPU: [udp-v4-lru-hit] COUNTER FAIL: LRU misses=1, expected 0 Initialize atime from CLOCK_MONOTONIC for UDP flows. Fixes: a4b5ba8187cb ("selftests/bpf: Add XDP load-balancer benchmark driver") Signed-off-by: Puranjay Mohan Link: https://lore.kernel.org/r/20260520133338.3392667-3-puranjay@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/benchs/bench_xdp_lb.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/benchs/bench_xdp_lb.c b/tools/testing/selftests/bpf/benchs/bench_xdp_lb.c index 0b6709a2b03c..8e25bccbde92 100644 --- a/tools/testing/selftests/bpf/benchs/bench_xdp_lb.c +++ b/tools/testing/selftests/bpf/benchs/bench_xdp_lb.c @@ -563,12 +563,23 @@ static void create_per_cpu_lru_maps(struct xdp_lb_bench *skel) nr_inner_maps = nr_cpus; } +static __u64 ktime_get_ns(void) +{ + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC, &ts); + return (__u64)ts.tv_sec * 1000000000ULL + ts.tv_nsec; +} + static void populate_lru(const struct test_scenario *sc, __u32 real_idx) { struct real_pos_lru lru = { .pos = real_idx }; struct flow_key fk; int i, err; + if (sc->ip_proto == IPPROTO_UDP) + lru.atime = ktime_get_ns(); + build_flow_key(&fk, sc); /* Insert into every per-CPU inner LRU so the entry is found -- cgit v1.2.3 From abac8acb633a9448369d658889ac2bcfbd96f54b Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Wed, 20 May 2026 06:33:32 -0700 Subject: selftests/bpf: Filter timing outliers with IQR in batch-timing library System noise (timer interrupts, scheduling) can inflate the reported stddev. tcp-v4-syn showed stddev 37.86 ns without filtering vs 0.16 ns with filtering on the same run data. Filter samples outside [Q1 - 1.5*IQR, Q3 + 1.5*IQR] before computing statistics. Scenarios with genuinely wide distributions have large IQR so the fences stay wide and the filter has minimal effect. Signed-off-by: Puranjay Mohan Link: https://lore.kernel.org/r/20260520133338.3392667-4-puranjay@kernel.org Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/benchs/bench_bpf_timing.c | 26 ++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_timing.c b/tools/testing/selftests/bpf/benchs/bench_bpf_timing.c index 75a39da69655..e02ad324f7bc 100644 --- a/tools/testing/selftests/bpf/benchs/bench_bpf_timing.c +++ b/tools/testing/selftests/bpf/benchs/bench_bpf_timing.c @@ -65,6 +65,31 @@ static int collect_samples(struct bpf_bench_timing *t, return total; } +static int filter_outliers_iqr(double *sorted, int n) +{ + double q1, q3, iqr, lo, hi; + int start = 0, end = n; + + if (n < 8) + return n; + + q1 = sorted[n / 4]; + q3 = sorted[3 * n / 4]; + iqr = q3 - q1; + lo = q1 - 1.5 * iqr; + hi = q3 + 1.5 * iqr; + + while (start < end && sorted[start] < lo) + start++; + while (end > start && sorted[end - 1] > hi) + end--; + + if (start > 0) + memmove(sorted, sorted + start, (end - start) * sizeof(double)); + + return end - start; +} + static void compute_stats(const double *sorted, int n, struct timing_stats *s) { @@ -150,6 +175,7 @@ void bpf_bench_timing_report(struct bpf_bench_timing *t, const char *name, const return; } + total = filter_outliers_iqr(all, total); compute_stats(all, total, &s); if (t->machine_readable) { -- cgit v1.2.3 From ba3dc064f4065471487a8cc93c47efda4fe358dd Mon Sep 17 00:00:00 2001 From: Kaitao Cheng Date: Thu, 21 May 2026 11:23:06 +0800 Subject: selftests/bpf: Add test cases for bpf_list_del/add/is_first/is_last/empty Extend refcounted_kptr with tests for bpf_list_add (including prev from bpf_list_front and bpf_refcount_acquire), bpf_list_del (including node from bpf_list_front, bpf_rbtree_remove and bpf_refcount_acquire), bpf_list_empty, bpf_list_is_first/last, and push_back on uninit head. To verify the validity of bpf_list_del/add, the test also expects the verifier to reject calls to bpf_list_del/add made without holding the spin_lock. Signed-off-by: Kaitao Cheng Link: https://lore.kernel.org/r/20260521032306.97118-9-kaitao.cheng@linux.dev Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/progs/refcounted_kptr.c | 421 +++++++++++++++++++++ 1 file changed, 421 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr.c b/tools/testing/selftests/bpf/progs/refcounted_kptr.c index c847398837cc..13de169ad68f 100644 --- a/tools/testing/selftests/bpf/progs/refcounted_kptr.c +++ b/tools/testing/selftests/bpf/progs/refcounted_kptr.c @@ -367,6 +367,427 @@ long insert_rbtree_and_stash__del_tree_##rem_tree(void *ctx) \ INSERT_STASH_READ(true, "insert_stash_read: remove from tree"); INSERT_STASH_READ(false, "insert_stash_read: don't remove from tree"); +SEC("tc") +__description("list_empty_test: list empty before add, non-empty after add") +__success __retval(0) +int list_empty_test(void *ctx) +{ + struct node_data *node_new; + + bpf_spin_lock(&lock); + if (!bpf_list_empty(&head)) { + bpf_spin_unlock(&lock); + return -1; + } + bpf_spin_unlock(&lock); + + node_new = bpf_obj_new(typeof(*node_new)); + if (!node_new) + return -2; + + bpf_spin_lock(&lock); + bpf_list_push_front(&head, &node_new->l); + + if (bpf_list_empty(&head)) { + bpf_spin_unlock(&lock); + return -3; + } + bpf_spin_unlock(&lock); + return 0; +} + +static struct node_data *__add_in_list(struct bpf_list_head *head, + struct bpf_spin_lock *lock) +{ + struct node_data *node_new, *node_ref; + + node_new = bpf_obj_new(typeof(*node_new)); + if (!node_new) + return NULL; + + node_ref = bpf_refcount_acquire(node_new); + + bpf_spin_lock(lock); + bpf_list_push_front(head, &node_new->l); + bpf_spin_unlock(lock); + return node_ref; +} + +SEC("tc") +__description("list_is_edge_test1: is_first on first node, is_last on last node") +__success __retval(0) +int list_is_edge_test1(void *ctx) +{ + struct node_data *node_first, *node_last; + int err = 0; + + node_last = __add_in_list(&head, &lock); + if (!node_last) + return -1; + + node_first = __add_in_list(&head, &lock); + if (!node_first) { + bpf_obj_drop(node_last); + return -2; + } + + bpf_spin_lock(&lock); + if (!bpf_list_is_first(&head, &node_first->l)) { + err = -3; + goto fail; + } + if (!bpf_list_is_last(&head, &node_last->l)) + err = -4; + +fail: + bpf_spin_unlock(&lock); + bpf_obj_drop(node_first); + bpf_obj_drop(node_last); + return err; +} + +SEC("tc") +__description("list_is_edge_test2: accept list_front/list_back return value") +__success __retval(0) +int list_is_edge_test2(void *ctx) +{ + struct bpf_list_node *front, *back; + struct node_data *a, *b; + long err = 0; + + a = __add_in_list(&head, &lock); + if (!a) + return -1; + + b = __add_in_list(&head, &lock); + if (!b) { + bpf_obj_drop(a); + return -2; + } + + bpf_spin_lock(&lock); + front = bpf_list_front(&head); + back = bpf_list_back(&head); + if (!front || !back) { + err = -3; + goto out_unlock; + } + + if (!bpf_list_is_first(&head, front) || bpf_list_is_last(&head, front)) { + err = -4; + goto out_unlock; + } + + if (!bpf_list_is_last(&head, back) || bpf_list_is_first(&head, back)) { + err = -5; + goto out_unlock; + } + +out_unlock: + bpf_spin_unlock(&lock); + bpf_obj_drop(a); + bpf_obj_drop(b); + return err; +} + +SEC("tc") +__description("list_is_edge_test3: single node is both first and last") +__success __retval(0) +int list_is_edge_test3(void *ctx) +{ + struct node_data *tmp; + struct bpf_list_node *node; + long err = 0; + + tmp = __add_in_list(&head, &lock); + if (!tmp) + return -1; + + bpf_spin_lock(&lock); + node = bpf_list_front(&head); + if (!node) { + bpf_spin_unlock(&lock); + bpf_obj_drop(tmp); + return -2; + } + + if (!bpf_list_is_first(&head, node) || !bpf_list_is_last(&head, node)) + err = -3; + bpf_spin_unlock(&lock); + + bpf_obj_drop(tmp); + return err; +} + +SEC("tc") +__description("list_del_test1: del returns removed nodes") +__success __retval(0) +int list_del_test1(void *ctx) +{ + struct node_data *node_first, *node_last; + struct bpf_list_node *bpf_node_first, *bpf_node_last; + int err = 0; + + node_last = __add_in_list(&head, &lock); + if (!node_last) + return -1; + + node_first = __add_in_list(&head, &lock); + if (!node_first) { + bpf_obj_drop(node_last); + return -2; + } + + bpf_spin_lock(&lock); + bpf_node_last = bpf_list_del(&head, &node_last->l); + bpf_node_first = bpf_list_del(&head, &node_first->l); + bpf_spin_unlock(&lock); + + if (bpf_node_first) + bpf_obj_drop(container_of(bpf_node_first, struct node_data, l)); + else + err = -3; + + if (bpf_node_last) + bpf_obj_drop(container_of(bpf_node_last, struct node_data, l)); + else + err = -4; + + bpf_obj_drop(node_first); + bpf_obj_drop(node_last); + return err; +} + +SEC("tc") +__description("list_del_test2: remove an arbitrary node from the list") +__success __retval(0) +int list_del_test2(void *ctx) +{ + struct bpf_rb_node *rb; + struct bpf_list_node *l; + struct node_data *n; + long err; + + err = __insert_in_tree_and_list(&head, &root, &lock); + if (err) + return err; + + bpf_spin_lock(&lock); + rb = bpf_rbtree_first(&root); + if (!rb) { + bpf_spin_unlock(&lock); + return -4; + } + + rb = bpf_rbtree_remove(&root, rb); + if (!rb) { + bpf_spin_unlock(&lock); + return -5; + } + + n = container_of(rb, struct node_data, r); + l = bpf_list_del(&head, &n->l); + bpf_spin_unlock(&lock); + bpf_obj_drop(n); + if (!l) + return -6; + + bpf_obj_drop(container_of(l, struct node_data, l)); + return 0; +} + +SEC("tc") +__description("list_del_test3: list_del accepts list_front return value as node") +__success __retval(0) +int list_del_test3(void *ctx) +{ + struct node_data *tmp; + struct bpf_list_node *bpf_node, *l; + long err = 0; + + tmp = __add_in_list(&head, &lock); + if (!tmp) + return -1; + + bpf_spin_lock(&lock); + bpf_node = bpf_list_front(&head); + if (!bpf_node) { + bpf_spin_unlock(&lock); + err = -2; + goto fail; + } + + l = bpf_list_del(&head, bpf_node); + bpf_spin_unlock(&lock); + if (!l) { + err = -3; + goto fail; + } + + bpf_obj_drop(container_of(l, struct node_data, l)); + bpf_obj_drop(tmp); + return 0; + +fail: + bpf_obj_drop(tmp); + return err; +} + +SEC("tc") +__description("list_add_test1: insert new node after prev") +__success __retval(0) +int list_add_test1(void *ctx) +{ + struct node_data *node_first; + struct node_data *new_node; + long err = 0; + + node_first = __add_in_list(&head, &lock); + if (!node_first) + return -1; + + new_node = bpf_obj_new(typeof(*new_node)); + if (!new_node) { + err = -2; + goto fail; + } + + bpf_spin_lock(&lock); + err = bpf_list_add(&head, &new_node->l, &node_first->l); + bpf_spin_unlock(&lock); + if (err) { + err = -3; + goto fail; + } + +fail: + bpf_obj_drop(node_first); + return err; +} + +SEC("tc") +__description("list_add_test2: list_add accepts list_front return value as prev") +__success __retval(0) +int list_add_test2(void *ctx) +{ + struct node_data *new_node, *tmp; + struct bpf_list_node *bpf_node; + long err = 0; + + tmp = __add_in_list(&head, &lock); + if (!tmp) + return -1; + + new_node = bpf_obj_new(typeof(*new_node)); + if (!new_node) { + err = -2; + goto fail; + } + + bpf_spin_lock(&lock); + bpf_node = bpf_list_front(&head); + if (!bpf_node) { + bpf_spin_unlock(&lock); + bpf_obj_drop(new_node); + err = -3; + goto fail; + } + + err = bpf_list_add(&head, &new_node->l, bpf_node); + bpf_spin_unlock(&lock); + if (err) { + err = -4; + goto fail; + } + +fail: + bpf_obj_drop(tmp); + return err; +} + +struct uninit_head_val { + struct bpf_spin_lock lock; + struct bpf_list_head head __contains(node_data, l); +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct uninit_head_val); + __uint(max_entries, 1); +} uninit_head_map SEC(".maps"); + +SEC("tc") +__description("list_push_back_uninit_head: push_back on 0-initialized list head") +__success __retval(0) +int list_push_back_uninit_head(void *ctx) +{ + struct uninit_head_val *st; + struct node_data *node; + int ret = -1, key = 0; + + st = bpf_map_lookup_elem(&uninit_head_map, &key); + if (!st) + return -1; + + node = bpf_obj_new(typeof(*node)); + if (!node) + return -1; + + bpf_spin_lock(&st->lock); + ret = bpf_list_push_back(&st->head, &node->l); + bpf_spin_unlock(&st->lock); + + return ret; +} + +SEC("?tc") +__failure __msg("bpf_spin_lock at off=32 must be held for bpf_list_head") +long list_del_without_lock_fail(void *ctx) +{ + struct node_data *n; + struct bpf_list_node *l; + + n = bpf_obj_new(typeof(*n)); + if (!n) + return -1; + + /* Error case: delete list node without holding lock */ + l = bpf_list_del(&head, &n->l); + bpf_obj_drop(n); + if (!l) + return -2; + bpf_obj_drop(container_of(l, struct node_data, l)); + + return 0; +} + +SEC("?tc") +__failure __msg("bpf_spin_lock at off=32 must be held for bpf_list_head") +long list_add_without_lock_fail(void *ctx) +{ + struct node_data *n, *prev; + long err; + + n = bpf_obj_new(typeof(*n)); + if (!n) + return -1; + + prev = bpf_obj_new(typeof(*prev)); + if (!prev) { + bpf_obj_drop(n); + return -1; + } + + /* Error case: add list node without holding lock */ + err = bpf_list_add(&head, &n->l, &prev->l); + bpf_obj_drop(prev); + if (err) + return -2; + + return 0; +} + SEC("tc") __success long rbtree_refcounted_node_ref_escapes(void *ctx) -- cgit v1.2.3 From fee9a38174f4c6454fb1fbaf2b9b5a1cca9070d0 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Fri, 22 May 2026 16:13:53 -0400 Subject: libbpf: Harden parse_vma_segs() path parsing parse_vma_segs() in tools/lib/bpf/usdt.c parses /proc//maps with two widthless scansets, "%s" into mode[16] and "%[^\n]" into line[4096]. A VMA name in maps is not limited to that local buffer; a deeply nested backing path can produce a maps record long enough to overflow the stack buffer. Bound both scansets to the declared buffer sizes ("%15s" for mode[16] and "%4095[^\n]" for line[4096]) and drain any residue past line[4094] with "%*[^\n]" before the trailing "\n". Without the drain, the residue of an over-long record would stay in the stream and break the next "%zx-%zx" parse, so the loop would exit early and silently skip later maps records. Also stop using sscanf(..., "%s") to peel the /proc//root prefix from lib_path. Parse the pid and prefix length with "%n", check for the following slash, and copy the remainder with libbpf_strlcpy(). That removes a second unbounded stack write and preserves paths containing spaces. Fixes: 74cc6311cec9 ("libbpf: Add USDT notes parsing and resolution logic") Signed-off-by: Michael Bommarito Signed-off-by: Andrii Nakryiko Reviewed-by: Emil Tsalapatis Link: https://lore.kernel.org/bpf/20260522201353.1454653-1-michael.bommarito@gmail.com --- tools/lib/bpf/usdt.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index e3710933fd52..57fb82bb81b5 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -468,10 +468,10 @@ static int parse_elf_segs(Elf *elf, const char *path, struct elf_seg **segs, siz static int parse_vma_segs(int pid, const char *lib_path, struct elf_seg **segs, size_t *seg_cnt) { - char path[PATH_MAX], line[PATH_MAX], mode[16]; + char path[PATH_MAX], line[4096], mode[16]; size_t seg_start, seg_end, seg_off; struct elf_seg *seg; - int tmp_pid, i, err; + int tmp_pid, n, i, err; FILE *f; *seg_cnt = 0; @@ -480,8 +480,13 @@ static int parse_vma_segs(int pid, const char *lib_path, struct elf_seg **segs, * /proc//root/. They will be reported as just / in * /proc//maps. */ - if (sscanf(lib_path, "/proc/%d/root%s", &tmp_pid, path) == 2 && pid == tmp_pid) + /* %n is not counted in sscanf() return value, so initialize it. */ + n = 0; + if (sscanf(lib_path, "/proc/%d/root%n", &tmp_pid, &n) == 1 && + n > 0 && pid == tmp_pid && lib_path[n] == '/') { + libbpf_strlcpy(path, lib_path + n, sizeof(path)); goto proceed; + } if (!realpath(lib_path, path)) { pr_warn("usdt: failed to get absolute path of '%s' (err %s), using path as is...\n", @@ -504,8 +509,11 @@ proceed: * 7f5c6f5d1000-7f5c6f5d3000 rw-p 001c7000 08:04 21238613 /usr/lib64/libc-2.17.so * 7f5c6f5d3000-7f5c6f5d8000 rw-p 00000000 00:00 0 * 7f5c6f5d8000-7f5c6f5d9000 r-xp 00000000 103:01 362990598 /data/users/andriin/linux/tools/bpf/usdt/libhello_usdt.so + * + * Some VMA names can be longer than the local buffer. Bound the + * writes, but still consume the rest of the line. */ - while (fscanf(f, "%zx-%zx %s %zx %*s %*d%[^\n]\n", + while (fscanf(f, "%zx-%zx %15s %zx %*s %*d%4095[^\n]%*[^\n]\n", &seg_start, &seg_end, mode, &seg_off, line) == 5) { void *tmp; -- cgit v1.2.3 From be4c6c7bc42952b71188894933946b410deadcfe Mon Sep 17 00:00:00 2001 From: Siddharth Nayyar Date: Wed, 20 May 2026 09:40:44 +0000 Subject: bpftool: Fix typo in struct_ops map FD generation for light skeleton When generating light skeletons for BPF programs containing struct_ops maps, bpftool incorrectly outputs a stray literal 't' instead of a tab character for the map file descriptor member in the links structure. This causes a compilation error when the generated light skeleton is used. Correct the format string by replacing 't' with '\t'. Fixes: 08ac454e258e ("libbpf: Auto-attach struct_ops BPF maps in BPF skeleton") Signed-off-by: Siddharth Nayyar Signed-off-by: Andrii Nakryiko Acked-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20260520-struct_ops_gen_typo_fix-v1-1-4dee3771da46@google.com --- tools/bpf/bpftool/gen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index 37159e02f418..6ae7262ebe0c 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -1399,7 +1399,7 @@ static int do_skeleton(int argc, char **argv) continue; if (use_loader) - printf("t\tint %s_fd;\n", ident); + printf("\t\tint %s_fd;\n", ident); else printf("\t\tstruct bpf_link *%s;\n", ident); } -- cgit v1.2.3 From b23705e6afb6ac4ae6d220dcb35975698667dd76 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Sat, 23 May 2026 16:27:21 +0000 Subject: libbpf: Fix UAF in strset__add_str() strset_add_str_mem() might reallocate the strset data buffer in order to accommodate the provided string 's'. However, if 's' points to a string already present in the buffer, it becomes dangling after the realloc. This leads to a use-after-free when attempting to memcpy() the string into the new buffer. One scenario that triggers this problematic path is when resolve_btfids attempts to patch kfunc prototypes using existing BTF parameter names: | resolve_btfids: function bpf_list_push_back_impl already exists in BTF | Segmentation fault (core dumped) Compiling resolve_btfids with fsanitize=address generates a detailed report of the UAF: | ================================================================= | ERROR: AddressSanitizer: heap-use-after-free on address 0x7f4c4a500bd4 | ==1507892==ERROR: AddressSanitizer: heap-use-after-free on address 0x7f4c4a500bd4 at pc 0x55d25155a2a8 bp 0x7ffcef879060 sp 0x7ffcef878818 | READ of size 5 at 0x7f4c4a500bd4 thread T0 | #0 0x55d25155a2a7 in memcpy (tools/bpf/resolve_btfids/resolve_btfids+0xcf2a7) | #1 0x55d2515d708e in strset__add_str tools/lib/bpf/strset.c:162:2 | #2 0x55d2515c730b in btf__add_str tools/lib/bpf/btf.c:2109:8 | #3 0x55d2515c9020 in btf__add_func_param tools/lib/bpf/btf.c:3108:14 | #4 0x55d25159f0b5 in process_kfunc_with_implicit_args tools/bpf/resolve_btfids/main.c:1196:9 | #5 0x55d25159e004 in btf2btf tools/bpf/resolve_btfids/main.c:1229:9 | #6 0x55d25159cee7 in main tools/bpf/resolve_btfids/main.c:1535:6 | #7 0x7f4c78e29f76 in __libc_start_call_main csu/../sysdeps/nptl/libc_start_call_main.h:58:16 | #8 0x7f4c78e2a026 in __libc_start_main csu/../csu/libc-start.c:360:3 | #9 0x55d2514bb860 in _start (tools/bpf/resolve_btfids/resolve_btfids+0x30860) | | 0x7f4c4a500bd4 is located 13268 bytes inside of 2829000-byte region [0x7f4c4a4fd800,0x7f4c4a7b02c8) | freed by thread T0 here: | #0 0x55d25155b700 in realloc (tools/bpf/resolve_btfids/resolve_btfids+0xd0700) | #1 0x55d2515c426c in libbpf_reallocarray tools/lib/bpf/./libbpf_internal.h:220:9 | #2 0x55d2515c426c in libbpf_add_mem tools/lib/bpf/btf.c:224:13 | | previously allocated by thread T0 here: | #0 0x55d25155b2e3 in malloc (tools/bpf/resolve_btfids/resolve_btfids+0xd02e3) | #1 0x55d2515d6e7d in strset__new tools/lib/bpf/strset.c:58:20 While resolve_btfids could be refactored to avoid this call path, let's instead fix this issue at the source in strset__add_str() and avoid similar scenarios. Let's check if set->strs_data was reallocated and whether 's' points to an internal string within the old strset buffer. In such case, 's' is reconstructed to point to the new buffer. While already here, also fix strset__find_str() which suffers from the same problem by factoring out the common operations into a new helper function strset_str_append(). Fixes: 90d76d3ececc ("libbpf: Extract internal set-of-strings datastructure APIs") Suggested-by: Andrii Nakryiko Suggested-by: Mykyta Yatsenko Signed-off-by: Carlos Llamas Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20260523162722.2718940-1-cmllamas@google.com --- tools/lib/bpf/strset.c | 62 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 41 insertions(+), 21 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/strset.c b/tools/lib/bpf/strset.c index 2464bcbd04e0..ace73c6b3d62 100644 --- a/tools/lib/bpf/strset.c +++ b/tools/lib/bpf/strset.c @@ -107,6 +107,41 @@ static void *strset_add_str_mem(struct strset *set, size_t add_sz) set->strs_data_len, set->strs_data_max_len, add_sz); } +static long strset_str_append(struct strset *set, const char *s) +{ + uintptr_t old_data = (uintptr_t)set->strs_data; + size_t old_data_len = set->strs_data_len; + uintptr_t old_s = (uintptr_t)s; + long len = strlen(s) + 1; + void *p; + + /* + * Hashmap keys are always offsets within set->strs_data, so to even + * look up some string from the "outside", we need to first append it + * at the end, so that it can be addressed with an offset. Luckily, + * until set->strs_data_len is incremented, that string is just a piece + * of garbage for the rest of the code, so no harm, no foul. On the + * other hand, if the string is unique, it's already appended and + * ready to be used, only a simple set->strs_data_len increment away. + */ + p = strset_add_str_mem(set, len); + if (!p) + return -ENOMEM; + + /* + * The set->strs_data might have reallocated and if 's' pointed + * to an internal string within the old buffer, then it became + * dangling and needs to be reconstructed before the copy. + */ + if (old_data && old_data != (uintptr_t)set->strs_data && + old_s >= old_data && old_s < old_data + old_data_len) + s = set->strs_data + (old_s - old_data); + + memcpy(p, s, len); + + return len; +} + /* Find string offset that corresponds to a given string *s*. * Returns: * - >0 offset into string data, if string is found; @@ -116,16 +151,12 @@ static void *strset_add_str_mem(struct strset *set, size_t add_sz) int strset__find_str(struct strset *set, const char *s) { long old_off, new_off, len; - void *p; - /* see strset__add_str() for why we do this */ - len = strlen(s) + 1; - p = strset_add_str_mem(set, len); - if (!p) - return -ENOMEM; + len = strset_str_append(set, s); + if (len < 0) + return len; new_off = set->strs_data_len; - memcpy(p, s, len); if (hashmap__find(set->strs_hash, new_off, &old_off)) return old_off; @@ -142,24 +173,13 @@ int strset__find_str(struct strset *set, const char *s) int strset__add_str(struct strset *set, const char *s) { long old_off, new_off, len; - void *p; int err; - /* Hashmap keys are always offsets within set->strs_data, so to even - * look up some string from the "outside", we need to first append it - * at the end, so that it can be addressed with an offset. Luckily, - * until set->strs_data_len is incremented, that string is just a piece - * of garbage for the rest of the code, so no harm, no foul. On the - * other hand, if the string is unique, it's already appended and - * ready to be used, only a simple set->strs_data_len increment away. - */ - len = strlen(s) + 1; - p = strset_add_str_mem(set, len); - if (!p) - return -ENOMEM; + len = strset_str_append(set, s); + if (len < 0) + return len; new_off = set->strs_data_len; - memcpy(p, s, len); /* Now attempt to add the string, but only if the string with the same * contents doesn't exist already (HASHMAP_ADD strategy). If such -- cgit v1.2.3 From a4a5d4ee061240a1d39053db0a87f841d43277c0 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 26 May 2026 14:39:36 +0800 Subject: libbpf: Add __NR_bpf definition for LoongArch LoongArch uses the generic syscall table, where __NR_bpf is defined as 280 in include/uapi/asm-generic/unistd.h. To align with other architectures, add the __NR_bpf definition for LoongArch to avoid a potential compilation failure: "error __NR_bpf not defined. libbpf does not support your arch." This is a follow up patch of: commit b0c47807d31d ("bpf: Add sparc support to tools and samples.") commit bad1926dd2f6 ("bpf, s390: fix build for libbpf and selftest suite") commit ca31ca8247e2 ("tools/bpf: fix perf build error with uClibc (seen on ARC)") commit e32cb12ff52a ("bpf, mips: Fix build errors about __NR_bpf undeclared") Signed-off-by: Tiezhu Yang Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20260526063936.16769-1-yangtiezhu@loongson.cn --- tools/build/feature/test-bpf.c | 2 ++ tools/lib/bpf/bpf.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/build/feature/test-bpf.c b/tools/build/feature/test-bpf.c index e7a405f83af6..89d59674f39b 100644 --- a/tools/build/feature/test-bpf.c +++ b/tools/build/feature/test-bpf.c @@ -20,6 +20,8 @@ # define __NR_bpf 6319 # elif defined(__mips__) && defined(_ABI64) # define __NR_bpf 5315 +# elif defined(__loongarch__) +# define __NR_bpf 280 # else # error __NR_bpf not defined. libbpf does not support your arch. # endif diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 3cd705802330..bc513aa8f404 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -59,6 +59,8 @@ # define __NR_bpf 6319 # elif defined(__mips__) && defined(_ABI64) # define __NR_bpf 5315 +# elif defined(__loongarch__) +# define __NR_bpf 280 # else # error __NR_bpf not defined. libbpf does not support your arch. # endif -- cgit v1.2.3 From 9a720e090eb5155fbd584a3f7eca18f82610a2b3 Mon Sep 17 00:00:00 2001 From: Suchit Karunakaran Date: Sun, 24 May 2026 08:28:53 +0530 Subject: bpf: replace pop/push emptiness check with bpf_list_empty() Simplify fq_flows_is_empty() by replacing the pop/push based emptiness check with a direct call to bpf_list_empty(). This avoids unnecessary list mutation and simplifies the code while preserving correctness. Signed-off-by: Suchit Karunakaran Changes since v1: - Removed unused variable node Reviewed-by: Emil Tsalapatis Link: https://lore.kernel.org/r/20260524025853.13786-1-suchitkarunakaran@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c index 1a3233a275c7..8107f5934d2d 100644 --- a/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c +++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c @@ -196,18 +196,13 @@ fq_flows_remove_front(struct bpf_list_head *head, struct bpf_spin_lock *lock, static bool fq_flows_is_empty(struct bpf_list_head *head, struct bpf_spin_lock *lock) { - struct bpf_list_node *node; + bool empty; bpf_spin_lock(lock); - node = bpf_list_pop_front(head); - if (node) { - bpf_list_push_front(head, node); - bpf_spin_unlock(lock); - return false; - } + empty = bpf_list_empty(head); bpf_spin_unlock(lock); - return true; + return empty; } /* flow->age is used to denote the state of the flow (not-detached, detached, throttled) -- cgit v1.2.3 From 5add3a4ad1a3bc15404e8bd338813ed0a636f5c9 Mon Sep 17 00:00:00 2001 From: Yuyang Huang Date: Sun, 31 May 2026 15:56:00 +0800 Subject: selftests/bpf: add verification for BPF_PROG_QUERY attr size boundaries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new selftest to verify that the BPF syscall (specifically BPF_PROG_QUERY) correctly handles different user-declared attribute sizes. Specifically, verify that: - For cgroup queries, a query with a size that covers 'prog_cnt' but is smaller than 'revision' (OLD_QUERY_SIZE) succeeds, but does not write to 'revision' (verifying backward compatibility). - A query with full size (FULL_QUERY_SIZE) succeeds and writes both 'prog_cnt' and 'revision'. Fixes: 120933984460 ("bpf: Implement mprog API on top of existing cgroup progs") Cc: Maciej Å»enczykowski Cc: Lorenzo Colitti Signed-off-by: Yuyang Huang Link: https://lore.kernel.org/r/20260531075600.4058207-3-yuyanghuang@google.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/bpf_attr_size.c | 69 ++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/bpf_attr_size.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_attr_size.c b/tools/testing/selftests/bpf/prog_tests/bpf_attr_size.c new file mode 100644 index 000000000000..32159dc64da8 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/bpf_attr_size.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Google LLC */ +#include +#include +#include +#include +#include +#include "cgroup_skb_direct_packet_access.skel.h" + +#define OLD_QUERY_SIZE offsetofend(union bpf_attr, query.prog_cnt) +#define FULL_QUERY_SIZE offsetofend(union bpf_attr, query.revision) + +static void test_query_size_boundaries(void) +{ + struct cgroup_skb_direct_packet_access *skel; + struct bpf_link *link = NULL; + union bpf_attr attr; + int cg_fd = -1; + int err; + + skel = cgroup_skb_direct_packet_access__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + return; + + cg_fd = test__join_cgroup("/attr_size_cg"); + if (!ASSERT_GE(cg_fd, 0, "join_cgroup")) + goto cleanup; + + link = bpf_program__attach_cgroup(skel->progs.direct_packet_access, + cg_fd); + if (!ASSERT_OK_PTR(link, "cg_attach")) + goto cleanup; + + memset(&attr, 0, sizeof(attr)); + attr.query.target_fd = cg_fd; + attr.query.attach_type = BPF_CGROUP_INET_INGRESS; + attr.query.revision = 0xdeadbeefdeadbeefULL; + + err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, OLD_QUERY_SIZE); + if (ASSERT_OK(err, "query_old_size")) { + ASSERT_EQ(attr.query.prog_cnt, 1, "prog_cnt_written_old"); + ASSERT_EQ(attr.query.revision, 0xdeadbeefdeadbeefULL, + "revision_not_written_old"); + } + + memset(&attr, 0, sizeof(attr)); + attr.query.target_fd = cg_fd; + attr.query.attach_type = BPF_CGROUP_INET_INGRESS; + + err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, FULL_QUERY_SIZE); + if (!ASSERT_OK(err, "query_full_size")) + goto cleanup; + + ASSERT_EQ(attr.query.prog_cnt, 1, "prog_cnt_written"); + ASSERT_GT(attr.query.revision, 0, "revision_written"); + +cleanup: + if (link) + bpf_link__destroy(link); + if (cg_fd >= 0) + close(cg_fd); + cgroup_skb_direct_packet_access__destroy(skel); +} + +void test_bpf_attr_size(void) +{ + if (test__start_subtest("query_size_boundaries")) + test_query_size_boundaries(); +} -- cgit v1.2.3 From e2c88266147ff92ca25e6577158a9a0b3b261a30 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 29 May 2026 11:41:16 +0200 Subject: libbpf: Drop redundant self-loop in emit_check_err When the cleanup-label jump offset does not fit in s16, emit_check_err() sets gen->error = -ERANGE and then emits a BPF_JMP_IMM(BPF_JA, 0, 0, -1) self-loop. The latter emit() is dead: gen->error is assigned on the preceding line, and emit() then bails out early in realloc_insn_buf() the moment gen->error is set, so the jump is never written into the instruction stream. gen->error alone already marks the generation as failed. This is a follow-up to 7dd62566e0d1 ("libbpf: fix off-by-one in emit_signature_match jump offset") which removed the jump in emit_signature_match() but not in other locations. Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20260529094119.307264-1-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/gen_loader.c | 1 - 1 file changed, 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index 9478b8f78f26..7b95ced7bcba 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -293,7 +293,6 @@ static void emit_check_err(struct bpf_gen *gen) emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, off)); } else { gen->error = -ERANGE; - emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, -1)); } } -- cgit v1.2.3 From 3c5e2f1a85844abbb65df4694f5ebad0a13e219c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 29 May 2026 11:41:17 +0200 Subject: libbpf: Skip hash computation when loader generation failed bpf_gen__finish() calls compute_sha_update_offsets() gated only on the gen_hash option, without first consulting gen->error. On a failed generation this is buggy: a failed realloc_data_buf() sets gen->data_start to NULL (leaving gen->data_cur dangling), so compute_sha_update_offsets() runs libbpf_sha256() over a NULL buffer with a bogus length; a failed realloc_insn_buf() likewise sets gen->insn_start to NULL and the hash immediates get patched through that NULL base. The computed program is discarded in either case, since the following "if (!gen->error)" block does not publish opts->insns once an error is set. Thus, skip the hash pass when generation has already failed. Fixes: ea923080c145 ("libbpf: Embed and verify the metadata hash in the loader") Reported-by: sashiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20260529094119.307264-2-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/gen_loader.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index 7b95ced7bcba..3a6e1d53f287 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -397,13 +397,12 @@ int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps) blob_fd_array_off(gen, i)); emit(gen, BPF_MOV64_IMM(BPF_REG_0, 0)); emit(gen, BPF_EXIT_INSN()); - if (OPTS_GET(gen->opts, gen_hash, false)) - compute_sha_update_offsets(gen); - - pr_debug("gen: finish %s\n", errstr(gen->error)); if (!gen->error) { struct gen_loader_opts *opts = gen->opts; + if (OPTS_GET(opts, gen_hash, false)) + compute_sha_update_offsets(gen); + opts->insns = gen->insn_start; opts->insns_sz = gen->insn_cur - gen->insn_start; opts->data = gen->data_start; @@ -418,6 +417,7 @@ int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps) bpf_insn_bswap(insn++); } } + pr_debug("gen: finish %s\n", errstr(gen->error)); return gen->error; } -- cgit v1.2.3 From d2f7bd066ed492aeaf82864fbf1f06770f9d9f9d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 29 May 2026 11:41:18 +0200 Subject: libbpf: Also reset {insn,data}_cur on realloc failure realloc_insn_buf() as well as realloc_data_buf() free and NULL gen->insn_start / gen->data_start on -ENOMEM but leave gen->insn_cur / gen->data_cur pointing into the old, freed buffer. Just reset the cursors to NULL alongside the base pointers so the freed state is coherent. Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20260529094119.307264-3-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/gen_loader.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index 3a6e1d53f287..492360ca07ea 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -63,6 +63,7 @@ static int realloc_insn_buf(struct bpf_gen *gen, __u32 size) gen->error = -ENOMEM; free(gen->insn_start); gen->insn_start = NULL; + gen->insn_cur = NULL; return -ENOMEM; } gen->insn_start = insn_start; @@ -86,6 +87,7 @@ static int realloc_data_buf(struct bpf_gen *gen, __u32 size) gen->error = -ENOMEM; free(gen->data_start); gen->data_start = NULL; + gen->data_cur = NULL; return -ENOMEM; } gen->data_start = data_start; -- cgit v1.2.3 From 41300d032a1b1d91a3ed996ad21905463e344beb Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 29 May 2026 18:28:29 +0200 Subject: libbpf: Skip endianness swap when loader generation failed bpf_gen__prog_load() byte-swaps the program insns and the {func,line}_info and CO-RE relo blobs in place for cross-endian targets. The blob offsets come from add_data(), which returns 0 on failure: realloc_data_buf() either frees and NULLs gen->data_start (realloc OOM) or returns early on an already-latched gen->error, leaving a stale, possibly too-small buffer. Neither bswap site checked for this. With gen->swapped_endian set and a failed generation, "gen->data_start + off" becomes NULL + 0. Guard the same way via !gen->error so they are skipped once generation has failed. Fixes: 8ca3323dce43 ("libbpf: Support creating light skeleton of either endianness") Reported-by: sashiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20260529162829.315921-1-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/gen_loader.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index 492360ca07ea..3702c5944bc0 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -1054,7 +1054,7 @@ void bpf_gen__prog_load(struct bpf_gen *gen, prog_idx, prog_type, insns_off, insn_cnt, license_off); /* convert blob insns to target endianness */ - if (gen->swapped_endian) { + if (gen->swapped_endian && !gen->error) { struct bpf_insn *insn = gen->data_start + insns_off; int i; @@ -1092,7 +1092,7 @@ void bpf_gen__prog_load(struct bpf_gen *gen, sizeof(struct bpf_core_relo)); /* convert all info blobs to target endianness */ - if (gen->swapped_endian) + if (gen->swapped_endian && !gen->error) info_blob_bswap(gen, func_info, line_info, core_relos, load_attr); libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name)); -- cgit v1.2.3 From 157317ba662a7c476320fdb334216154eaa8b856 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Thu, 28 May 2026 09:17:48 -0700 Subject: selftests/bpf: Use at least 10 args in stack argument tests On arm64, the first 8 arguments are passed in registers (x0-x7), so tests with 8 or fewer arguments never exercise the native stack argument path in the JIT. Increase argument counts to at least 10 across all BPF-to-BPF subprog and kfunc stack argument tests so that at least 2 arguments land on the arm64 stack. For the two-callees test, bump foo1 from 8 to 10 and foo2 from 10 to 12 args to preserve the different-stack-depth flavor of the test. The bpf_kfunc_call_stack_arg_mem kfunc is left unchanged at 7 args to avoid breaking the precision backtracking test which relies on hardcoded verifier log instruction indices. Suggested-by: Will Deacon Signed-off-by: Puranjay Mohan Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20260528161750.1900674-3-puranjay@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/stack_arg.c | 30 ++++---- tools/testing/selftests/bpf/progs/stack_arg.c | 90 +++++++++++++--------- .../testing/selftests/bpf/progs/stack_arg_kfunc.c | 24 +++--- .../testing/selftests/bpf/test_kmods/bpf_testmod.c | 25 +++--- .../selftests/bpf/test_kmods/bpf_testmod_kfunc.h | 11 ++- 5 files changed, 108 insertions(+), 72 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/stack_arg.c b/tools/testing/selftests/bpf/prog_tests/stack_arg.c index d61bac33f809..57193543f260 100644 --- a/tools/testing/selftests/bpf/prog_tests/stack_arg.c +++ b/tools/testing/selftests/bpf/prog_tests/stack_arg.c @@ -37,7 +37,7 @@ static void test_global_many(void) if (!ASSERT_OK(stack_arg__load(skel), "load")) goto out; - run_subtest(skel->progs.test_global_many_args, 36); + run_subtest(skel->progs.test_global_many_args, 55); out: stack_arg__destroy(skel); @@ -62,10 +62,10 @@ static void test_async_cb_many(void) run_subtest(skel->progs.test_async_cb_many_args, 0); /* Wait for the timer callback to fire and verify the result. - * 10+20+30+40+50+60+70+80 = 360 + * 10+20+30+40+50+60+70+80+90+100 = 550 */ usleep(50); - ASSERT_EQ(skel->bss->timer_result, 360, "timer_result"); + ASSERT_EQ(skel->bss->timer_result, 550, "timer_result"); out: stack_arg__destroy(skel); @@ -87,11 +87,11 @@ static void test_bpf2bpf(void) if (!ASSERT_OK(stack_arg__load(skel), "load")) goto out; - run_subtest(skel->progs.test_bpf2bpf_ptr_stack_arg, 45); - run_subtest(skel->progs.test_bpf2bpf_mix_stack_args, 51); - run_subtest(skel->progs.test_bpf2bpf_nesting_stack_arg, 50); - run_subtest(skel->progs.test_bpf2bpf_dynptr_stack_arg, 69); - run_subtest(skel->progs.test_two_callees, 91); + run_subtest(skel->progs.test_bpf2bpf_ptr_stack_arg, 75); + run_subtest(skel->progs.test_bpf2bpf_mix_stack_args, 66); + run_subtest(skel->progs.test_bpf2bpf_nesting_stack_arg, 84); + run_subtest(skel->progs.test_bpf2bpf_dynptr_stack_arg, 99); + run_subtest(skel->progs.test_two_callees, 133); out: stack_arg__destroy(skel); @@ -113,14 +113,14 @@ static void test_kfunc(void) if (!ASSERT_OK(stack_arg_kfunc__load(skel), "load")) goto out; - run_subtest(skel->progs.test_stack_arg_scalar, 36); - run_subtest(skel->progs.test_stack_arg_ptr, 45); - run_subtest(skel->progs.test_stack_arg_mix, 51); - run_subtest(skel->progs.test_stack_arg_dynptr, 69); + run_subtest(skel->progs.test_stack_arg_scalar, 55); + run_subtest(skel->progs.test_stack_arg_ptr, 75); + run_subtest(skel->progs.test_stack_arg_mix, 66); + run_subtest(skel->progs.test_stack_arg_dynptr, 99); run_subtest(skel->progs.test_stack_arg_mem, 151); - run_subtest(skel->progs.test_stack_arg_iter, 115); - run_subtest(skel->progs.test_stack_arg_const_str, 15); - run_subtest(skel->progs.test_stack_arg_timer, 15); + run_subtest(skel->progs.test_stack_arg_iter, 145); + run_subtest(skel->progs.test_stack_arg_const_str, 45); + run_subtest(skel->progs.test_stack_arg_timer, 45); out: stack_arg_kfunc__destroy(skel); diff --git a/tools/testing/selftests/bpf/progs/stack_arg.c b/tools/testing/selftests/bpf/progs/stack_arg.c index b5e9929a4d63..944e3bb603e7 100644 --- a/tools/testing/selftests/bpf/progs/stack_arg.c +++ b/tools/testing/selftests/bpf/progs/stack_arg.c @@ -27,14 +27,16 @@ int timer_result; const volatile bool has_stack_arg = true; __noinline static int static_func_many_args(int a, int b, int c, int d, - int e, int f, int g, int h) + int e, int f, int g, int h, + int i, int j) { - return a + b + c + d + e + f + g + h; + return a + b + c + d + e + f + g + h + i + j; } __noinline int global_calls_many_args(int a, int b, int c) { - return static_func_many_args(a, b, c, 4, 5, 6, 7, 8); + return static_func_many_args(a, b, c, a + 3, a + 4, a + 5, a + 6, + a + 7, a + 8, a + 9); } SEC("tc") @@ -48,18 +50,20 @@ struct test_data { long y; }; -/* 1 + 2 + 3 + 4 + 5 + 10 + 20 = 45 */ +/* 1+2+3+4+5+6+7+8+9+10+20 = 75 */ __noinline static long func_with_ptr_stack_arg(long a, long b, long c, long d, - long e, struct test_data *p) + long e, long f, long g, long h, + long i, struct test_data *p) { - return a + b + c + d + e + p->x + p->y; + return a + b + c + d + e + f + g + h + i + p->x + p->y; } __noinline long global_ptr_stack_arg(long a, long b, long c, long d, long e) { struct test_data data = { .x = 10, .y = 20 }; - return func_with_ptr_stack_arg(a, b, c, d, e, &data); + return func_with_ptr_stack_arg(a, b, c, d, e, a + 5, a + 6, a + 7, + a + 8, &data); } SEC("tc") @@ -68,12 +72,13 @@ int test_bpf2bpf_ptr_stack_arg(void) return global_ptr_stack_arg(1, 2, 3, 4, 5); } -/* 1 + 2 + 3 + 4 + 5 + 10 + 6 + 20 = 51 */ +/* 1+2+3+4+5+6+7+10+8+20 = 66 */ __noinline static long func_with_mix_stack_args(long a, long b, long c, long d, - long e, struct test_data *p, - long f, struct test_data *q) + long e, long f, long g, + struct test_data *p, + long h, struct test_data *q) { - return a + b + c + d + e + p->x + f + q->y; + return a + b + c + d + e + f + g + p->x + h + q->y; } __noinline long global_mix_stack_args(long a, long b, long c, long d, long e) @@ -81,7 +86,8 @@ __noinline long global_mix_stack_args(long a, long b, long c, long d, long e) struct test_data p = { .x = 10 }; struct test_data q = { .y = 20 }; - return func_with_mix_stack_args(a, b, c, d, e, &p, e + 1, &q); + return func_with_mix_stack_args(a, b, c, d, e, e + 1, e + 2, &p, + e + 3, &q); } SEC("tc") @@ -94,26 +100,30 @@ int test_bpf2bpf_mix_stack_args(void) * Nesting test: func_outer calls func_inner, both with struct pointer * as stack arg. * - * func_inner: (a+1) + (b+1) + (c+1) + (d+1) + (e+1) + p->x + p->y - * = 2 + 3 + 4 + 5 + 6 + 10 + 20 = 50 + * func_inner: (a+1)+...+(i+1) + p->x + p->y + * = 2+3+4+5+6+7+8+9+10+10+20 = 84 */ __noinline static long func_inner_ptr(long a, long b, long c, long d, - long e, struct test_data *p) + long e, long f, long g, long h, + long i, struct test_data *p) { - return a + b + c + d + e + p->x + p->y; + return a + b + c + d + e + f + g + h + i + p->x + p->y; } __noinline static long func_outer_ptr(long a, long b, long c, long d, - long e, struct test_data *p) + long e, long f, long g, long h, + long i, struct test_data *p) { - return func_inner_ptr(a + 1, b + 1, c + 1, d + 1, e + 1, p); + return func_inner_ptr(a + 1, b + 1, c + 1, d + 1, e + 1, + f + 1, g + 1, h + 1, i + 1, p); } __noinline long global_nesting_ptr(long a, long b, long c, long d, long e) { struct test_data data = { .x = 10, .y = 20 }; - return func_outer_ptr(a, b, c, d, e, &data); + return func_outer_ptr(a, b, c, d, e, a + 5, a + 6, a + 7, a + 8, + &data); } SEC("tc") @@ -122,11 +132,12 @@ int test_bpf2bpf_nesting_stack_arg(void) return global_nesting_ptr(1, 2, 3, 4, 5); } -/* 1 + 2 + 3 + 4 + 5 + sizeof(pkt_v4) = 15 + 54 = 69 */ +/* 1+2+3+4+5+6+7+8+9+sizeof(pkt_v4) = 45+54 = 99 */ __noinline static long func_with_dynptr(long a, long b, long c, long d, - long e, struct bpf_dynptr *ptr) + long e, long f, long g, long h, + long i, struct bpf_dynptr *ptr) { - return a + b + c + d + e + bpf_dynptr_size(ptr); + return a + b + c + d + e + f + g + h + i + bpf_dynptr_size(ptr); } __noinline long global_dynptr_stack_arg(void *ctx __arg_ctx, long a, long b, @@ -135,7 +146,8 @@ __noinline long global_dynptr_stack_arg(void *ctx __arg_ctx, long a, long b, struct bpf_dynptr ptr; bpf_dynptr_from_skb(ctx, 0, &ptr); - return func_with_dynptr(a, b, c, d, d + 1, &ptr); + return func_with_dynptr(a, b, c, d, d + 1, d + 2, d + 3, d + 4, + d + 5, &ptr); } SEC("tc") @@ -144,24 +156,25 @@ int test_bpf2bpf_dynptr_stack_arg(struct __sk_buff *skb) return global_dynptr_stack_arg(skb, 1, 2, 3, 4); } -/* foo1: a+b+c+d+e+f+g+h */ -__noinline static int foo1(int a, int b, int c, int d, - int e, int f, int g, int h) +/* foo1: a+b+c+d+e+f+g+h+i+j */ +__noinline static int foo1(int a, int b, int c, int d, int e, + int f, int g, int h, int i, int j) { - return a + b + c + d + e + f + g + h; + return a + b + c + d + e + f + g + h + i + j; } -/* foo2: a+b+c+d+e+f+g+h+i+j */ +/* foo2: a+b+c+d+e+f+g+h+i+j+k+l */ __noinline static int foo2(int a, int b, int c, int d, int e, - int f, int g, int h, int i, int j) + int f, int g, int h, int i, int j, + int k, int l) { - return a + b + c + d + e + f + g + h + i + j; + return a + b + c + d + e + f + g + h + i + j + k + l; } -/* global_two_callees calls foo1 (3 stack args) and foo2 (5 stack args). +/* global_two_callees calls foo1 (5 stack args) and foo2 (7 stack args). * The outgoing stack arg area is sized for foo2 (the larger callee). * Stores for foo1 are a subset of the area used by foo2. - * Result: foo1(1,2,3,4,5,6,7,8) + foo2(1,2,3,4,5,6,7,8,9,10) = 36 + 55 = 91 + * Result: foo1(1..10) + foo2(1..12) = 55 + 78 = 133 * * Pass a-e through so the compiler can't constant-fold the stack args away. */ @@ -169,8 +182,9 @@ __noinline int global_two_callees(int a, int b, int c, int d, int e) { int ret; - ret = foo1(a, b, c, d, e, a + 5, a + 6, a + 7); - ret += foo2(a, b, c, d, e, a + 5, a + 6, a + 7, a + 8, a + 9); + ret = foo1(a, b, c, d, e, a + 5, a + 6, a + 7, a + 8, a + 9); + ret += foo2(a, b, c, d, e, a + 5, a + 6, a + 7, a + 8, a + 9, + a + 10, a + 11); return ret; } @@ -180,9 +194,15 @@ int test_two_callees(void) return global_two_callees(1, 2, 3, 4, 5); } +const volatile int timer_base = 10; + static int timer_cb_many_args(void *map, int *key, struct bpf_timer *timer) { - timer_result = static_func_many_args(10, 20, 30, 40, 50, 60, 70, 80); + int v = timer_base; + + timer_result = static_func_many_args(v, v * 2, v * 3, v * 4, v * 5, + v * 6, v * 7, v * 8, v * 9, + v * 10); return 0; } diff --git a/tools/testing/selftests/bpf/progs/stack_arg_kfunc.c b/tools/testing/selftests/bpf/progs/stack_arg_kfunc.c index da0d4f91d273..345f2da2e361 100644 --- a/tools/testing/selftests/bpf/progs/stack_arg_kfunc.c +++ b/tools/testing/selftests/bpf/progs/stack_arg_kfunc.c @@ -33,7 +33,7 @@ struct { SEC("tc") int test_stack_arg_scalar(struct __sk_buff *skb) { - return bpf_kfunc_call_stack_arg(1, 2, 3, 4, 5, 6, 7, 8); + return bpf_kfunc_call_stack_arg(1, 2, 3, 4, 5, 6, 7, 8, 9, 10); } SEC("tc") @@ -41,7 +41,7 @@ int test_stack_arg_ptr(struct __sk_buff *skb) { struct prog_test_pass1 p = { .x0 = 10, .x1 = 20 }; - return bpf_kfunc_call_stack_arg_ptr(1, 2, 3, 4, 5, &p); + return bpf_kfunc_call_stack_arg_ptr(1, 2, 3, 4, 5, 6, 7, 8, 9, &p); } SEC("tc") @@ -50,17 +50,17 @@ int test_stack_arg_mix(struct __sk_buff *skb) struct prog_test_pass1 p = { .x0 = 10 }; struct prog_test_pass1 q = { .x1 = 20 }; - return bpf_kfunc_call_stack_arg_mix(1, 2, 3, 4, 5, &p, 6, &q); + return bpf_kfunc_call_stack_arg_mix(1, 2, 3, 4, 5, 6, 7, &p, 8, &q); } -/* 1 + 2 + 3 + 4 + 5 + sizeof(pkt_v4) = 15 + 54 = 69 */ +/* 1+2+3+4+5+6+7+8+9+sizeof(pkt_v4) = 45+54 = 99 */ SEC("tc") int test_stack_arg_dynptr(struct __sk_buff *skb) { struct bpf_dynptr ptr; bpf_dynptr_from_skb(skb, 0, &ptr); - return bpf_kfunc_call_stack_arg_dynptr(1, 2, 3, 4, 5, &ptr); + return bpf_kfunc_call_stack_arg_dynptr(1, 2, 3, 4, 5, 6, 7, 8, 9, &ptr); } /* 1 + 2 + 3 + 4 + 5 + (1 + 2 + ... + 16) = 15 + 136 = 151 */ @@ -72,7 +72,7 @@ int test_stack_arg_mem(struct __sk_buff *skb) return bpf_kfunc_call_stack_arg_mem(1, 2, 3, 4, 5, buf, sizeof(buf)); } -/* 1 + 2 + 3 + 4 + 5 + 100 = 115 */ +/* 1+2+3+4+5+6+7+8+9+100 = 145 */ SEC("tc") int test_stack_arg_iter(struct __sk_buff *skb) { @@ -80,21 +80,22 @@ int test_stack_arg_iter(struct __sk_buff *skb) u64 ret; bpf_iter_testmod_seq_new(&it, 100, 10); - ret = bpf_kfunc_call_stack_arg_iter(1, 2, 3, 4, 5, &it); + ret = bpf_kfunc_call_stack_arg_iter(1, 2, 3, 4, 5, 6, 7, 8, 9, &it); bpf_iter_testmod_seq_destroy(&it); return ret; } const char cstr[] = "hello"; -/* 1 + 2 + 3 + 4 + 5 = 15 */ +/* 1+2+3+4+5+6+7+8+9 = 45 */ SEC("tc") int test_stack_arg_const_str(struct __sk_buff *skb) { - return bpf_kfunc_call_stack_arg_const_str(1, 2, 3, 4, 5, cstr); + return bpf_kfunc_call_stack_arg_const_str(1, 2, 3, 4, 5, 6, 7, 8, 9, + cstr); } -/* 1 + 2 + 3 + 4 + 5 = 15 */ +/* 1+2+3+4+5+6+7+8+9 = 45 */ SEC("tc") int test_stack_arg_timer(struct __sk_buff *skb) { @@ -104,7 +105,8 @@ int test_stack_arg_timer(struct __sk_buff *skb) val = bpf_map_lookup_elem(&kfunc_timer_map, &key); if (!val) return 0; - return bpf_kfunc_call_stack_arg_timer(1, 2, 3, 4, 5, &val->timer); + return bpf_kfunc_call_stack_arg_timer(1, 2, 3, 4, 5, 6, 7, 8, 9, + &val->timer); } #else diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c index 0be918fe3021..30f1cd23093c 100644 --- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c +++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c @@ -826,30 +826,34 @@ __bpf_kfunc int bpf_kfunc_call_test5(u8 a, u16 b, u32 c) } __bpf_kfunc u64 bpf_kfunc_call_stack_arg(u64 a, u64 b, u64 c, u64 d, - u64 e, u64 f, u64 g, u64 h) + u64 e, u64 f, u64 g, u64 h, + u64 i, u64 j) { - return a + b + c + d + e + f + g + h; + return a + b + c + d + e + f + g + h + i + j; } __bpf_kfunc u64 bpf_kfunc_call_stack_arg_ptr(u64 a, u64 b, u64 c, u64 d, u64 e, + u64 f, u64 g, u64 h, u64 i, struct prog_test_pass1 *p) { - return a + b + c + d + e + p->x0 + p->x1; + return a + b + c + d + e + f + g + h + i + p->x0 + p->x1; } __bpf_kfunc u64 bpf_kfunc_call_stack_arg_mix(u64 a, u64 b, u64 c, u64 d, u64 e, - struct prog_test_pass1 *p, u64 f, + u64 f, u64 g, + struct prog_test_pass1 *p, u64 h, struct prog_test_pass1 *q) { - return a + b + c + d + e + p->x0 + f + q->x1; + return a + b + c + d + e + f + g + p->x0 + h + q->x1; } __bpf_kfunc u64 bpf_kfunc_call_stack_arg_dynptr(u64 a, u64 b, u64 c, u64 d, u64 e, + u64 f, u64 g, u64 h, u64 i, struct bpf_dynptr *ptr) { const struct bpf_dynptr_kern *kern_ptr = (void *)ptr; - return a + b + c + d + e + (kern_ptr->size & 0xFFFFFF); + return a + b + c + d + e + f + g + h + i + (kern_ptr->size & 0xFFFFFF); } __bpf_kfunc u64 bpf_kfunc_call_stack_arg_mem(u64 a, u64 b, u64 c, u64 d, u64 e, @@ -865,21 +869,24 @@ __bpf_kfunc u64 bpf_kfunc_call_stack_arg_mem(u64 a, u64 b, u64 c, u64 d, u64 e, } __bpf_kfunc u64 bpf_kfunc_call_stack_arg_iter(u64 a, u64 b, u64 c, u64 d, u64 e, + u64 f, u64 g, u64 h, u64 i, struct bpf_iter_testmod_seq *it__iter) { - return a + b + c + d + e + it__iter->value; + return a + b + c + d + e + f + g + h + i + it__iter->value; } __bpf_kfunc u64 bpf_kfunc_call_stack_arg_const_str(u64 a, u64 b, u64 c, u64 d, u64 e, + u64 f, u64 g, u64 h, u64 i, const char *str__str) { - return a + b + c + d + e; + return a + b + c + d + e + f + g + h + i; } __bpf_kfunc u64 bpf_kfunc_call_stack_arg_timer(u64 a, u64 b, u64 c, u64 d, u64 e, + u64 f, u64 g, u64 h, u64 i, struct bpf_timer *timer) { - return a + b + c + d + e; + return a + b + c + d + e + f + g + h + i; } __bpf_kfunc u64 bpf_kfunc_call_stack_arg_big(u64 a, u64 b, u64 c, u64 d, u64 e, diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h index 2edc36b66de9..c36bb911defa 100644 --- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h +++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h @@ -119,21 +119,28 @@ struct sock *bpf_kfunc_call_test3(struct sock *sk) __ksym; long bpf_kfunc_call_test4(signed char a, short b, int c, long d) __ksym; int bpf_kfunc_call_test5(__u8 a, __u16 b, __u32 c) __ksym; __u64 bpf_kfunc_call_stack_arg(__u64 a, __u64 b, __u64 c, __u64 d, - __u64 e, __u64 f, __u64 g, __u64 h) __ksym; + __u64 e, __u64 f, __u64 g, __u64 h, + __u64 i, __u64 j) __ksym; __u64 bpf_kfunc_call_stack_arg_ptr(__u64 a, __u64 b, __u64 c, __u64 d, __u64 e, + __u64 f, __u64 g, __u64 h, __u64 i, struct prog_test_pass1 *p) __ksym; __u64 bpf_kfunc_call_stack_arg_mix(__u64 a, __u64 b, __u64 c, __u64 d, __u64 e, - struct prog_test_pass1 *p, __u64 f, + __u64 f, __u64 g, + struct prog_test_pass1 *p, __u64 h, struct prog_test_pass1 *q) __ksym; __u64 bpf_kfunc_call_stack_arg_dynptr(__u64 a, __u64 b, __u64 c, __u64 d, __u64 e, + __u64 f, __u64 g, __u64 h, __u64 i, struct bpf_dynptr *ptr) __ksym; __u64 bpf_kfunc_call_stack_arg_mem(__u64 a, __u64 b, __u64 c, __u64 d, __u64 e, void *mem, int mem__sz) __ksym; __u64 bpf_kfunc_call_stack_arg_iter(__u64 a, __u64 b, __u64 c, __u64 d, __u64 e, + __u64 f, __u64 g, __u64 h, __u64 i, struct bpf_iter_testmod_seq *it__iter) __ksym; __u64 bpf_kfunc_call_stack_arg_const_str(__u64 a, __u64 b, __u64 c, __u64 d, __u64 e, + __u64 f, __u64 g, __u64 h, __u64 i, const char *str__str) __ksym; __u64 bpf_kfunc_call_stack_arg_timer(__u64 a, __u64 b, __u64 c, __u64 d, __u64 e, + __u64 f, __u64 g, __u64 h, __u64 i, struct bpf_timer *timer) __ksym; __u64 bpf_kfunc_call_stack_arg_big(__u64 a, __u64 b, __u64 c, __u64 d, __u64 e, struct prog_test_big_arg s) __ksym; -- cgit v1.2.3 From de36adca634634c205a9eb8b56a28175ab7abf5f Mon Sep 17 00:00:00 2001 From: Taegu Ha Date: Thu, 28 May 2026 15:21:55 +0900 Subject: bpf: reject overlarge global subprog argument sizes Global subprogram argument checking derives generic pointer sizes from BTF and passes the resolved size to check_mem_reg() as a u32. The access-size validation path then uses a signed int, and stack pointers negate the value before calling check_helper_mem_access(). This creates a wrap when BTF describes a pointee size larger than S32_MAX. For example, a global subprogram argument of type: int (*p)[0x3fffffff] has a BTF-resolved pointee size of 0xfffffffc bytes. At a call site the caller can pass a pointer to a 4-byte stack slot at fp-4. The current PTR_TO_STACK path computes: size = -(int)mem_size so 0xfffffffc becomes -4 as a signed int and the negation validates only a 4-byte stack range. That range is covered by the caller's stack slot, so the call is accepted. The callee is then verified independently with R1 as PTR_TO_MEM and mem_size 0xfffffffc. A small instruction such as: r0 = *(u32 *)(r1 + 4) is accepted as being inside that BTF-described memory region. At run time, however, the actual argument value is still fp-4, so r1 + 4 addresses fp+0, outside the 4-byte object that the caller provided. Reject sizes that cannot be represented by the verifier's signed access-size API before the stack-specific negation. Add a verifier regression test for the oversized BTF argument. Fixes: 2cb27158adb3 ("bpf: poison dead stack slots") Signed-off-by: Taegu Ha Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20260528062155.3988156-1-hataegu0826@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 6 ++++++ .../selftests/bpf/progs/verifier_global_subprogs.c | 17 +++++++++++++++++ 2 files changed, 23 insertions(+) (limited to 'tools') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c8d980fdd709..3a270bc485c2 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6927,6 +6927,12 @@ static int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg if (bpf_register_is_null(reg)) return 0; + if (mem_size > S32_MAX) { + verbose(env, "%s memory size %u is too large\n", + reg_arg_name(env, argno), mem_size); + return -EACCES; + } + /* Assuming that the register contains a value check if the memory * access is safe. Temporarily save and restore the register's state as * the conversion shouldn't be visible to a caller. diff --git a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c index dc09d0e2d8ad..75a2e3f48d0f 100644 --- a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c +++ b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c @@ -152,6 +152,23 @@ int anon_user_mem_valid(void *ctx) return subprog_user_anon_mem(&t); } +__noinline __weak int subprog_user_anon_mem_huge(int (*p)[0x3fffffff]) +{ + return p ? (*p)[1] : 0; +} + +SEC("?tracepoint") +__failure __log_level(2) +__msg("R1 memory size 4294967292 is too large") +int anon_user_mem_huge_size_invalid(void *ctx) +{ + int (*p)[0x3fffffff]; + int tiny = 42; + + p = (void *)&tiny; + return subprog_user_anon_mem_huge(p) + tiny; +} + __noinline __weak int subprog_nonnull_ptr_good(int *p1 __arg_nonnull, int *p2 __arg_nonnull) { return (*p1) * (*p2); /* good, no need for NULL checks */ -- cgit v1.2.3 From 308c7a0ae8859b34d9d90a3dff953b2d14242145 Mon Sep 17 00:00:00 2001 From: Amery Hung Date: Thu, 28 May 2026 18:49:28 -0700 Subject: bpf: Refactor object relationship tracking and fix dynptr UAF bug Refactor object relationship tracking in the verifier and fix a dynptr use-after-free bug where file/skb dynptrs are not invalidated when the parent referenced object is freed. Add parent_id to bpf_reg_state to precisely track child-parent relationships. A child object's parent_id points to the parent object's id. This replaces the PTR_TO_MEM-specific dynptr_id. Remove ref_obj_id from bpf_reg_state by folding its role into the existing id field. Previously, id tracked pointer identity for null checking while ref_obj_id tracked the owning reference for lifetime management. These are now unified: acquire helpers and kfuncs set id to the acquired reference id, and release paths use id directly. Add reg_is_referenced() which checks if a register is referenced by looking up its id in the reference array. This replaces all former ref_obj_id checks. For release_reference(), invalidating an object now also invalidates all descendants by traversing the object tree. This is done using stack-based DFS to avoid recursive call chains of release_reference() -> unmark_stack_slots_dynptr() -> release_reference(). Referenced objects encountered during tree traversal are reported as leaked references. Add parent_id to bpf_reference_state to enable hierarchical reference tracking. When acquiring a reference, a parent_id can be specified to link the new reference to an existing one (e.g., referenced dynptrs acquire a reference with parent_id linking to the parent object's reference). Pointer casting: For pointer casting helpers (bpf_sk_fullsock, bpf_tcp_sock), instead of propagating ref_obj_id, the cast result reuses the same reference id as the source pointer. Since the cast may return NULL for a non-NULL input, the NULL case is explored as a separate verifier branch. This allows releasing any of the original or cast pointers to invalidate all others. Referenced dynptrs: When constructing a referenced dynptr, acquire a intermediate reference with parent_id linking to the parent referenced object. The dynptr and all clones share the same parent_id (pointing to the intermediate ref) but get unique ids for independent slice tracking. Releasing a referenced dynptr releases the parent reference, which in turn invalidates all clones and their derived slices. Owning to non-owning reference conversion: After converting owning to non-owning by clearing id (e.g., object(id=1) -> object(id=0)), the verifier releases the reference state via release_reference_nomark(). Note that the error message "reference has not been acquired before" in the helper and kfunc release paths is removed. This message was already unreachable. The verifier only calls release_reference() after confirming the reference is valid, so the condition could never trigger in practice. Fixes: 870c28588afa ("bpf: net_sched: Add basic bpf qdisc kfuncs") Signed-off-by: Amery Hung Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20260529014936.2811085-6-ameryhung@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 4 +- include/linux/bpf_verifier.h | 77 ++- kernel/bpf/btf.c | 2 +- kernel/bpf/fixups.c | 2 +- kernel/bpf/log.c | 18 +- kernel/bpf/states.c | 11 +- kernel/bpf/verifier.c | 560 ++++++++++----------- tools/testing/selftests/bpf/prog_tests/spin_lock.c | 4 +- tools/testing/selftests/bpf/progs/dynptr_fail.c | 4 +- .../selftests/bpf/progs/iters_state_safety.c | 4 +- .../selftests/bpf/progs/iters_testmod_seq.c | 12 +- 11 files changed, 338 insertions(+), 360 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1c6863ce89e0..d1a17c118316 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1062,7 +1062,7 @@ struct bpf_insn_access_aux { struct { struct btf *btf; u32 btf_id; - u32 ref_obj_id; + u32 ref_id; }; }; struct bpf_verifier_log *log; /* for verbose logs */ @@ -1631,7 +1631,7 @@ struct bpf_ctx_arg_aux { enum bpf_reg_type reg_type; struct btf *btf; u32 btf_id; - u32 ref_obj_id; + u32 ref_id; bool refcounted; }; diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 3a5c226bf1c3..75b287d8d92f 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -66,7 +66,6 @@ struct bpf_reg_state { struct { /* for PTR_TO_MEM | PTR_TO_MEM_OR_NULL */ u32 mem_size; - u32 dynptr_id; /* for dynptr slices */ }; /* For dynptr stack slots */ @@ -148,46 +147,14 @@ struct bpf_reg_state { #define BPF_ADD_CONST32 (1U << 30) #define BPF_ADD_CONST (BPF_ADD_CONST64 | BPF_ADD_CONST32) u32 id; - /* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned - * from a pointer-cast helper, bpf_sk_fullsock() and - * bpf_tcp_sock(). - * - * Consider the following where "sk" is a reference counted - * pointer returned from "sk = bpf_sk_lookup_tcp();": - * - * 1: sk = bpf_sk_lookup_tcp(); - * 2: if (!sk) { return 0; } - * 3: fullsock = bpf_sk_fullsock(sk); - * 4: if (!fullsock) { bpf_sk_release(sk); return 0; } - * 5: tp = bpf_tcp_sock(fullsock); - * 6: if (!tp) { bpf_sk_release(sk); return 0; } - * 7: bpf_sk_release(sk); - * 8: snd_cwnd = tp->snd_cwnd; // verifier will complain - * - * After bpf_sk_release(sk) at line 7, both "fullsock" ptr and - * "tp" ptr should be invalidated also. In order to do that, - * the reg holding "fullsock" and "sk" need to remember - * the original refcounted ptr id (i.e. sk_reg->id) in ref_obj_id - * such that the verifier can reset all regs which have - * ref_obj_id matching the sk_reg->id. - * - * sk_reg->ref_obj_id is set to sk_reg->id at line 1. - * sk_reg->id will stay as NULL-marking purpose only. - * After NULL-marking is done, sk_reg->id can be reset to 0. - * - * After "fullsock = bpf_sk_fullsock(sk);" at line 3, - * fullsock_reg->ref_obj_id is set to sk_reg->ref_obj_id. - * - * After "tp = bpf_tcp_sock(fullsock);" at line 5, - * tp_reg->ref_obj_id is set to fullsock_reg->ref_obj_id - * which is the same as sk_reg->ref_obj_id. - * - * From the verifier perspective, if sk, fullsock and tp - * are not NULL, they are the same ptr with different - * reg->type. In particular, bpf_sk_release(tp) is also - * allowed and has the same effect as bpf_sk_release(sk). + /* + * Tracks the parent object this register was derived from. + * Used for cascading invalidation: when the parent object is + * released or invalidated, all registers with matching parent_id + * are also invalidated. For example, a slice from bpf_dynptr_data() + * gets parent_id set to the dynptr's id. */ - u32 ref_obj_id; + u32 parent_id; /* Inside the callee two registers can be both PTR_TO_STACK like * R1=fp-8 and R2=fp-8, but one of them points to this function stack * while another to the caller's stack. To differentiate them 'frameno' @@ -364,10 +331,14 @@ struct bpf_reference_state { * is used purely to inform the user of a reference leak. */ int insn_idx; - /* Use to keep track of the source object of a lock, to ensure - * it matches on unlock. - */ - void *ptr; + union { + /* For REF_TYPE_PTR */ + int parent_id; + /* Use to keep track of the source object of a lock, to ensure + * it matches on unlock. + */ + void *ptr; + }; }; struct bpf_retval_range { @@ -585,7 +556,7 @@ bpf_get_spilled_stack_arg(int slot, struct bpf_func_state *frame) iter < frame->out_stack_arg_cnt; \ iter++, reg = bpf_get_spilled_stack_arg(iter, frame)) -#define bpf_for_each_reg_in_vstate_mask(__vst, __state, __reg, __mask, __expr) \ +#define bpf_for_each_reg_in_vstate_mask(__vst, __state, __reg, __stack, __mask, __expr) \ ({ \ struct bpf_verifier_state *___vstate = __vst; \ int ___i, ___j; \ @@ -593,6 +564,7 @@ bpf_get_spilled_stack_arg(int slot, struct bpf_func_state *frame) struct bpf_reg_state *___regs; \ __state = ___vstate->frame[___i]; \ ___regs = __state->regs; \ + __stack = NULL; \ for (___j = 0; ___j < MAX_BPF_REG; ___j++) { \ __reg = &___regs[___j]; \ (void)(__expr); \ @@ -600,8 +572,10 @@ bpf_get_spilled_stack_arg(int slot, struct bpf_func_state *frame) bpf_for_each_spilled_reg(___j, __state, __reg, __mask) { \ if (!__reg) \ continue; \ + __stack = &__state->stack[___j]; \ (void)(__expr); \ } \ + __stack = NULL; \ bpf_for_each_spilled_stack_arg(___j, __state, __reg) { \ if (!__reg) \ continue; \ @@ -611,8 +585,13 @@ bpf_get_spilled_stack_arg(int slot, struct bpf_func_state *frame) }) /* Invoke __expr over regsiters in __vst, setting __state and __reg */ -#define bpf_for_each_reg_in_vstate(__vst, __state, __reg, __expr) \ - bpf_for_each_reg_in_vstate_mask(__vst, __state, __reg, 1 << STACK_SPILL, __expr) +#define bpf_for_each_reg_in_vstate(__vst, __state, __reg, __expr) \ + ({ \ + struct bpf_stack_state * ___stack; \ + (void)___stack; \ + bpf_for_each_reg_in_vstate_mask(__vst, __state, __reg, ___stack,\ + 1 << STACK_SPILL, __expr); \ + }) /* linked list of verifier states used to prune search */ struct bpf_verifier_state_list { @@ -1442,7 +1421,7 @@ struct bpf_map_desc { struct bpf_dynptr_desc { enum bpf_dynptr_type type; u32 id; - u32 ref_obj_id; + u32 parent_id; }; struct bpf_kfunc_call_arg_meta { @@ -1453,7 +1432,7 @@ struct bpf_kfunc_call_arg_meta { const struct btf_type *func_proto; const char *func_name; /* Out parameters */ - u32 ref_obj_id; + u32 id; u8 release_regno; bool r0_rdonly; u32 ret_btf_id; diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 17d4ab0a8206..f429f6f58cb2 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -6957,7 +6957,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, info->reg_type = ctx_arg_info->reg_type; info->btf = ctx_arg_info->btf ? : btf_vmlinux; info->btf_id = ctx_arg_info->btf_id; - info->ref_obj_id = ctx_arg_info->ref_obj_id; + info->ref_id = ctx_arg_info->ref_id; return true; } } diff --git a/kernel/bpf/fixups.c b/kernel/bpf/fixups.c index 12739add2dda..5aa3f7d99ac9 100644 --- a/kernel/bpf/fixups.c +++ b/kernel/bpf/fixups.c @@ -870,7 +870,7 @@ int bpf_convert_ctx_accesses(struct bpf_verifier_env *env) case PTR_TO_BTF_ID: case PTR_TO_BTF_ID | PTR_UNTRUSTED: /* PTR_TO_BTF_ID | MEM_ALLOC always has a valid lifetime, unlike - * PTR_TO_BTF_ID, and an active ref_obj_id, but the same cannot + * PTR_TO_BTF_ID, and an active referenced id, but the same cannot * be said once it is marked PTR_UNTRUSTED, hence we must handle * any faults for loads into such types. BPF_WRITE is disallowed * for this case. diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c index 62fe6ed18374..b740fa73ee26 100644 --- a/kernel/bpf/log.c +++ b/kernel/bpf/log.c @@ -665,8 +665,8 @@ static void print_reg_state(struct bpf_verifier_env *env, verbose_a("id=%d", reg->id & ~BPF_ADD_CONST); if (reg->id & BPF_ADD_CONST) verbose(env, "%+d", reg->delta); - if (reg->ref_obj_id) - verbose_a("ref_obj_id=%d", reg->ref_obj_id); + if (reg->parent_id) + verbose_a("parent_id=%d", reg->parent_id); if (type_is_non_owning_ref(reg->type)) verbose_a("%s", "non_own_ref"); if (type_is_map_ptr(t)) { @@ -768,21 +768,19 @@ void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_verifie verbose(env, "=dynptr_%s(", dynptr_type_str(reg->dynptr.type)); if (reg->id) verbose_a("id=%d", reg->id); - if (reg->ref_obj_id) - verbose_a("ref_id=%d", reg->ref_obj_id); - if (reg->dynptr_id) - verbose_a("dynptr_id=%d", reg->dynptr_id); + if (reg->parent_id) + verbose_a("parent_id=%d", reg->parent_id); verbose(env, ")"); break; case STACK_ITER: - /* only main slot has ref_obj_id set; skip others */ - if (!reg->ref_obj_id) + /* only main slot has id set; skip others */ + if (!reg->id) continue; - verbose(env, " fp%d=iter_%s(ref_id=%d,state=%s,depth=%u)", + verbose(env, " fp%d=iter_%s(id=%d,state=%s,depth=%u)", (-i - 1) * BPF_REG_SIZE, iter_type_str(reg->iter.btf, reg->iter.btf_id), - reg->ref_obj_id, iter_state_str(reg->iter.state), + reg->id, iter_state_str(reg->iter.state), reg->iter.depth); break; case STACK_MISC: diff --git a/kernel/bpf/states.c b/kernel/bpf/states.c index 877338136009..5945956a7573 100644 --- a/kernel/bpf/states.c +++ b/kernel/bpf/states.c @@ -489,7 +489,7 @@ static bool regs_exact(const struct bpf_reg_state *rold, { return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && check_ids(rold->id, rcur->id, idmap) && - check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap); + check_ids(rold->parent_id, rcur->parent_id, idmap); } enum exact_level { @@ -614,7 +614,7 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off) && check_ids(rold->id, rcur->id, idmap) && - check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap); + check_ids(rold->parent_id, rcur->parent_id, idmap); case PTR_TO_PACKET_META: case PTR_TO_PACKET: /* We must have at least as much range as the old ptr @@ -794,7 +794,8 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, cur_reg = &cur->stack[spi].spilled_ptr; if (old_reg->dynptr.type != cur_reg->dynptr.type || old_reg->dynptr.first_slot != cur_reg->dynptr.first_slot || - !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap)) + !check_ids(old_reg->id, cur_reg->id, idmap) || + !check_ids(old_reg->parent_id, cur_reg->parent_id, idmap)) return false; break; case STACK_ITER: @@ -810,13 +811,13 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, old_reg->iter.btf_id != cur_reg->iter.btf_id || old_reg->iter.state != cur_reg->iter.state || /* ignore {old_reg,cur_reg}->iter.depth, see above */ - !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap)) + !check_ids(old_reg->id, cur_reg->id, idmap)) return false; break; case STACK_IRQ_FLAG: old_reg = &old->stack[spi].spilled_ptr; cur_reg = &cur->stack[spi].spilled_ptr; - if (!check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap) || + if (!check_ids(old_reg->id, cur_reg->id, idmap) || old_reg->irq.kfunc_class != cur_reg->irq.kfunc_class) return false; break; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 0d8be0b68bd8..6d82ca5acacb 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -200,14 +200,14 @@ struct bpf_verifier_stack_elem { #define BPF_PRIV_STACK_MIN_SIZE 64 -static int acquire_reference(struct bpf_verifier_env *env, int insn_idx); -static int release_reference_nomark(struct bpf_verifier_state *state, int ref_obj_id); -static int release_reference(struct bpf_verifier_env *env, int ref_obj_id); +static int acquire_reference(struct bpf_verifier_env *env, int insn_idx, int parent_id); +static int release_reference_nomark(struct bpf_verifier_state *state, int id); +static int release_reference(struct bpf_verifier_env *env, int id); static void invalidate_non_owning_refs(struct bpf_verifier_env *env); static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env); static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg); -static bool is_trusted_reg(const struct bpf_reg_state *reg); +static bool is_trusted_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg); static inline bool in_sleepable_context(struct bpf_verifier_env *env); static const char *non_sleepable_context_description(struct bpf_verifier_env *env); static void scalar32_min_max_add(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg); @@ -241,7 +241,7 @@ struct bpf_call_arg_meta { int access_size; int mem_size; u64 msize_max_value; - int ref_obj_id; + u32 id; int func_id; struct btf *btf; u32 btf_id; @@ -339,7 +339,7 @@ static void verbose_invalid_scalar(struct bpf_verifier_env *env, verbose(env, " should have been in [%d, %d]\n", range.minval, range.maxval); } -static bool reg_not_null(const struct bpf_reg_state *reg) +static bool reg_not_null(struct bpf_verifier_env *env, const struct bpf_reg_state *reg) { enum bpf_reg_type type; @@ -353,7 +353,7 @@ static bool reg_not_null(const struct bpf_reg_state *reg) type == PTR_TO_MAP_VALUE || type == PTR_TO_MAP_KEY || type == PTR_TO_SOCK_COMMON || - (type == PTR_TO_BTF_ID && is_trusted_reg(reg)) || + (type == PTR_TO_BTF_ID && is_trusted_reg(env, reg)) || (type == PTR_TO_MEM && !(reg->type & PTR_UNTRUSTED)) || type == CONST_PTR_TO_MAP; } @@ -638,43 +638,44 @@ static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type) } } -static bool dynptr_type_refcounted(enum bpf_dynptr_type type) +static bool dynptr_type_referenced(enum bpf_dynptr_type type) { return type == BPF_DYNPTR_TYPE_RINGBUF || type == BPF_DYNPTR_TYPE_FILE; } static void __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type type, - bool first_slot, int dynptr_id); + bool first_slot, int id, int parent_id); static void mark_dynptr_stack_regs(struct bpf_verifier_env *env, struct bpf_reg_state *sreg1, struct bpf_reg_state *sreg2, - enum bpf_dynptr_type type) + enum bpf_dynptr_type type, int parent_id) { int id = ++env->id_gen; - __mark_dynptr_reg(sreg1, type, true, id); - __mark_dynptr_reg(sreg2, type, false, id); + __mark_dynptr_reg(sreg1, type, true, id, parent_id); + __mark_dynptr_reg(sreg2, type, false, id, parent_id); } static void mark_dynptr_cb_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, enum bpf_dynptr_type type) { - __mark_dynptr_reg(reg, type, true, ++env->id_gen); + __mark_dynptr_reg(reg, type, true, ++env->id_gen, 0); } static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env, struct bpf_func_state *state, int spi); static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg, - enum bpf_arg_type arg_type, int insn_idx, int clone_ref_obj_id) + enum bpf_arg_type arg_type, int insn_idx, int parent_id, + struct bpf_dynptr_desc *dynptr) { struct bpf_func_state *state = bpf_func(env, reg); - enum bpf_dynptr_type type; int spi, i, err; + enum bpf_dynptr_type type; spi = dynptr_get_spi(env, reg); if (spi < 0) @@ -705,85 +706,62 @@ static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_ if (type == BPF_DYNPTR_TYPE_INVALID) return -EINVAL; - mark_dynptr_stack_regs(env, &state->stack[spi].spilled_ptr, - &state->stack[spi - 1].spilled_ptr, type); - - if (dynptr_type_refcounted(type)) { - /* The id is used to track proper releasing */ - int id; + if (dynptr->type == BPF_DYNPTR_TYPE_INVALID) { /* dynptr constructors */ + if (dynptr_type_referenced(type)) { + int id; - if (clone_ref_obj_id) - id = clone_ref_obj_id; - else - id = acquire_reference(env, insn_idx); - - if (id < 0) - return id; + /* + * Create an intermediate reference that tracks the referenced + * object for the referenced dynptr. Freeing a referenced dynptr + * through helpers/kfuncs will invalidate all clones. + */ + id = acquire_reference(env, insn_idx, parent_id); + if (id < 0) + return id; - state->stack[spi].spilled_ptr.ref_obj_id = id; - state->stack[spi - 1].spilled_ptr.ref_obj_id = id; + parent_id = id; + } + } else { /* bpf_dynptr_clone() */ + parent_id = dynptr->parent_id; } + mark_dynptr_stack_regs(env, &state->stack[spi].spilled_ptr, + &state->stack[spi - 1].spilled_ptr, type, parent_id); + return 0; } -static void invalidate_dynptr(struct bpf_verifier_env *env, struct bpf_func_state *state, int spi) +static void invalidate_dynptr(struct bpf_verifier_env *env, struct bpf_stack_state *stack) { int i; for (i = 0; i < BPF_REG_SIZE; i++) { - state->stack[spi].slot_type[i] = STACK_INVALID; - state->stack[spi - 1].slot_type[i] = STACK_INVALID; + stack[0].slot_type[i] = STACK_INVALID; + stack[1].slot_type[i] = STACK_INVALID; } - bpf_mark_reg_not_init(env, &state->stack[spi].spilled_ptr); - bpf_mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr); + bpf_mark_reg_not_init(env, &stack[0].spilled_ptr); + bpf_mark_reg_not_init(env, &stack[1].spilled_ptr); } static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg) { struct bpf_func_state *state = bpf_func(env, reg); - int spi, ref_obj_id, i; + int spi; spi = dynptr_get_spi(env, reg); if (spi < 0) return spi; - if (!dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) { - invalidate_dynptr(env, state, spi); - return 0; - } - - ref_obj_id = state->stack[spi].spilled_ptr.ref_obj_id; - - /* If the dynptr has a ref_obj_id, then we need to invalidate - * two things: - * - * 1) Any dynptrs with a matching ref_obj_id (clones) - * 2) Any slices derived from this dynptr. + /* + * For referenced dynptr, release the parent ref which cascades to + * all clones and derived slices. For non-referenced dynptr, only + * the dynptr and slices derived from it will be invalidated. */ - - /* Invalidate any slices associated with this dynptr */ - WARN_ON_ONCE(release_reference(env, ref_obj_id)); - - /* Invalidate any dynptr clones */ - for (i = 1; i < state->allocated_stack / BPF_REG_SIZE; i++) { - if (state->stack[i].spilled_ptr.ref_obj_id != ref_obj_id) - continue; - - /* it should always be the case that if the ref obj id - * matches then the stack slot also belongs to a - * dynptr - */ - if (state->stack[i].slot_type[0] != STACK_DYNPTR) { - verifier_bug(env, "misconfigured ref_obj_id"); - return -EFAULT; - } - if (state->stack[i].spilled_ptr.dynptr.first_slot) - invalidate_dynptr(env, state, i); - } - - return 0; + reg = &state->stack[spi].spilled_ptr; + return release_reference(env, dynptr_type_referenced(reg->dynptr.type) + ? reg->parent_id + : reg->id); } static void __mark_reg_unknown(const struct bpf_verifier_env *env, @@ -800,9 +778,7 @@ static void mark_reg_invalid(const struct bpf_verifier_env *env, struct bpf_reg_ static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env, struct bpf_func_state *state, int spi) { - struct bpf_func_state *fstate; - struct bpf_reg_state *dreg; - int i, dynptr_id; + int i, err = 0; /* We always ensure that STACK_DYNPTR is never set partially, * hence just checking for slot_type[0] is enough. This is @@ -816,13 +792,13 @@ static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env, if (!state->stack[spi].spilled_ptr.dynptr.first_slot) spi = spi + 1; - if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) { - int ref_obj_id = state->stack[spi].spilled_ptr.ref_obj_id; + if (dynptr_type_referenced(state->stack[spi].spilled_ptr.dynptr.type)) { + int v_parent_id = state->stack[spi].spilled_ptr.parent_id; int ref_cnt = 0; /* * A referenced dynptr can be overwritten only if there is at - * least one other dynptr sharing the same ref_obj_id, + * least one other dynptr sharing the same virtual ref parent, * ensuring the reference can still be properly released. */ for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { @@ -830,7 +806,7 @@ static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env, continue; if (!state->stack[i].spilled_ptr.dynptr.first_slot) continue; - if (state->stack[i].spilled_ptr.ref_obj_id == ref_obj_id) + if (state->stack[i].spilled_ptr.parent_id == v_parent_id) ref_cnt++; } @@ -840,32 +816,14 @@ static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env, } } - mark_stack_slot_scratched(env, spi); - mark_stack_slot_scratched(env, spi - 1); - - /* Writing partially to one dynptr stack slot destroys both. */ - for (i = 0; i < BPF_REG_SIZE; i++) { - state->stack[spi].slot_type[i] = STACK_INVALID; - state->stack[spi - 1].slot_type[i] = STACK_INVALID; + /* Invalidate the dynptr and any derived slices */ + err = release_reference(env, state->stack[spi].spilled_ptr.id); + if (!err) { + mark_stack_slot_scratched(env, spi); + mark_stack_slot_scratched(env, spi - 1); } - dynptr_id = state->stack[spi].spilled_ptr.id; - /* Invalidate any slices associated with this dynptr */ - bpf_for_each_reg_in_vstate(env->cur_state, fstate, dreg, ({ - /* Dynptr slices are only PTR_TO_MEM_OR_NULL and PTR_TO_MEM */ - if (dreg->type != (PTR_TO_MEM | PTR_MAYBE_NULL) && dreg->type != PTR_TO_MEM) - continue; - if (dreg->dynptr_id == dynptr_id) - mark_reg_invalid(env, dreg); - })); - - /* Do not release reference state, we are destroying dynptr on stack, - * not using some helper to release it. Just reset register. - */ - bpf_mark_reg_not_init(env, &state->stack[spi].spilled_ptr); - bpf_mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr); - - return 0; + return err; } static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg) @@ -965,7 +923,7 @@ static int mark_stack_slots_iter(struct bpf_verifier_env *env, if (spi < 0) return spi; - id = acquire_reference(env, insn_idx); + id = acquire_reference(env, insn_idx, 0); if (id < 0) return id; @@ -981,7 +939,7 @@ static int mark_stack_slots_iter(struct bpf_verifier_env *env, else st->type |= PTR_UNTRUSTED; } - st->ref_obj_id = i == 0 ? id : 0; + st->id = i == 0 ? id : 0; st->iter.btf = btf; st->iter.btf_id = btf_id; st->iter.state = BPF_ITER_STATE_ACTIVE; @@ -1011,7 +969,7 @@ static int unmark_stack_slots_iter(struct bpf_verifier_env *env, struct bpf_reg_state *st = &slot->spilled_ptr; if (i == 0) - WARN_ON_ONCE(release_reference(env, st->ref_obj_id)); + WARN_ON_ONCE(release_reference(env, st->id)); bpf_mark_reg_not_init(env, st); @@ -1067,10 +1025,10 @@ static int is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_s if (st->type & PTR_UNTRUSTED) return -EPROTO; - /* only main (first) slot has ref_obj_id set */ - if (i == 0 && !st->ref_obj_id) + /* only main (first) slot has id set */ + if (i == 0 && !st->id) return -EINVAL; - if (i != 0 && st->ref_obj_id) + if (i != 0 && st->id) return -EINVAL; if (st->iter.btf != btf || st->iter.btf_id != btf_id) return -EINVAL; @@ -1109,7 +1067,7 @@ static int mark_stack_slot_irq_flag(struct bpf_verifier_env *env, __mark_reg_known_zero(st); st->type = PTR_TO_STACK; /* we don't have dedicated reg type */ - st->ref_obj_id = id; + st->id = id; st->irq.kfunc_class = kfunc_class; for (i = 0; i < BPF_REG_SIZE; i++) @@ -1143,7 +1101,7 @@ static int unmark_stack_slot_irq_flag(struct bpf_verifier_env *env, struct bpf_r return -EINVAL; } - err = release_irq_state(env->cur_state, st->ref_obj_id); + err = release_irq_state(env->cur_state, st->id); WARN_ON_ONCE(err && err != -EACCES); if (err) { int insn_idx = 0; @@ -1207,7 +1165,7 @@ static int is_irq_flag_reg_valid_init(struct bpf_verifier_env *env, struct bpf_r slot = &state->stack[spi]; st = &slot->spilled_ptr; - if (!st->ref_obj_id) + if (!st->id) return -EINVAL; for (i = 0; i < BPF_REG_SIZE; i++) @@ -1448,7 +1406,7 @@ static struct bpf_reference_state *acquire_reference_state(struct bpf_verifier_e return &state->refs[new_ofs]; } -static int acquire_reference(struct bpf_verifier_env *env, int insn_idx) +static int acquire_reference(struct bpf_verifier_env *env, int insn_idx, int parent_id) { struct bpf_reference_state *s; @@ -1457,6 +1415,7 @@ static int acquire_reference(struct bpf_verifier_env *env, int insn_idx) return -ENOMEM; s->type = REF_TYPE_PTR; s->id = ++env->id_gen; + s->parent_id = parent_id; return s->id; } @@ -1513,17 +1472,25 @@ static void release_reference_state(struct bpf_verifier_state *state, int idx) return; } -static bool find_reference_state(struct bpf_verifier_state *state, int ptr_id) +static bool find_reference_state(struct bpf_verifier_state *state, int id) { int i; - for (i = 0; i < state->acquired_refs; i++) - if (state->refs[i].id == ptr_id) + for (i = 0; i < state->acquired_refs; i++) { + if (state->refs[i].type != REF_TYPE_PTR) + continue; + if (state->refs[i].id == id) return true; + } return false; } +static bool reg_is_referenced(struct bpf_verifier_env *env, const struct bpf_reg_state *reg) +{ + return find_reference_state(env->cur_state, reg->id); +} + static int release_lock_state(struct bpf_verifier_state *state, int type, int id, void *ptr) { void *prev_ptr = NULL; @@ -1837,7 +1804,7 @@ static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm) memset(((u8 *)reg) + sizeof(reg->type), 0, offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type)); reg->id = 0; - reg->ref_obj_id = 0; + reg->parent_id = 0; ___mark_reg_known(reg, imm); } @@ -1872,7 +1839,7 @@ static void mark_reg_known_zero(struct bpf_verifier_env *env, } static void __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type type, - bool first_slot, int dynptr_id) + bool first_slot, int id, int parent_id) { /* reg->type has no meaning for STACK_DYNPTR, but when we set reg for * callback arguments, it does need to be CONST_PTR_TO_DYNPTR, so simply @@ -1881,7 +1848,8 @@ static void __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type ty __mark_reg_known_zero(reg); reg->type = CONST_PTR_TO_DYNPTR; /* Give each dynptr a unique id to uniquely associate slices to it. */ - reg->id = dynptr_id; + reg->id = id; + reg->parent_id = parent_id; reg->dynptr.type = type; reg->dynptr.first_slot = first_slot; } @@ -2161,17 +2129,12 @@ out: /* Mark a register as having a completely unknown (scalar) value. */ void bpf_mark_reg_unknown_imprecise(struct bpf_reg_state *reg) { - /* - * Clear type, off, and union(map_ptr, range) and - * padding between 'type' and union - */ - memset(reg, 0, offsetof(struct bpf_reg_state, var_off)); + s32 subreg_def = reg->subreg_def; + + memset(reg, 0, sizeof(*reg)); reg->type = SCALAR_VALUE; - reg->id = 0; - reg->ref_obj_id = 0; reg->var_off = tnum_unknown; - reg->frameno = 0; - reg->precise = false; + reg->subreg_def = subreg_def; __mark_reg_unbounded(reg); } @@ -4330,7 +4293,7 @@ static int map_kptr_match_type(struct bpf_verifier_env *env, * referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the * normal store of unreferenced kptr, we must ensure var_off is zero. * Since ref_ptr cannot be accessed directly by BPF insns, check for - * reg->ref_obj_id is not needed here. + * reg->id is not needed here. */ if (__check_ptr_off_reg(env, reg, argno_from_reg(regno), true)) return -EACCES; @@ -4703,8 +4666,8 @@ static int __check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int of * type of narrower access. */ if (base_type(info->reg_type) == PTR_TO_BTF_ID) { - if (info->ref_obj_id && - !find_reference_state(env->cur_state, info->ref_obj_id)) { + if (info->ref_id && + !find_reference_state(env->cur_state, info->ref_id)) { verbose(env, "invalid bpf_context access off=%d. Reference may already be released\n", off); return -EACCES; @@ -4873,10 +4836,10 @@ static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = { [CONST_PTR_TO_MAP] = btf_bpf_map_id, }; -static bool is_trusted_reg(const struct bpf_reg_state *reg) +static bool is_trusted_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg) { /* A referenced register is always trusted. */ - if (reg->ref_obj_id) + if (reg_is_referenced(env, reg)) return true; /* Types listed in the reg2btf_ids are always trusted */ @@ -5790,7 +5753,7 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, ret = env->ops->btf_struct_access(&env->log, reg, off, size); } else { /* Writes are permitted with default btf_struct_access for - * program allocated objects (which always have ref_obj_id > 0), + * program allocated objects (which always have id > 0), * but not for untrusted PTR_TO_BTF_ID | MEM_ALLOC. */ if (atype != BPF_READ && !type_is_ptr_alloc_obj(reg->type)) { @@ -5799,8 +5762,8 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, } if (type_is_alloc(reg->type) && !type_is_non_owning_ref(reg->type) && - !(reg->type & MEM_RCU) && !reg->ref_obj_id) { - verifier_bug(env, "ref_obj_id for allocated object must be non-zero"); + !(reg->type & MEM_RCU) && !reg_is_referenced(env, reg)) { + verifier_bug(env, "allocated object must have a referenced id"); return -EFAULT; } @@ -5819,7 +5782,7 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, */ flag = PTR_UNTRUSTED; - } else if (is_trusted_reg(reg) || is_rcu_reg(reg)) { + } else if (is_trusted_reg(env, reg) || is_rcu_reg(reg)) { /* By default any pointer obtained from walking a trusted pointer is no * longer trusted, unless the field being accessed has explicitly been * marked as inheriting its parent's state of trust (either full or RCU). @@ -6217,8 +6180,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, struct b if (base_type(info.reg_type) == PTR_TO_BTF_ID) { regs[value_regno].btf = info.btf; regs[value_regno].btf_id = info.btf_id; - regs[value_regno].id = info.ref_obj_id; - regs[value_regno].ref_obj_id = info.ref_obj_id; + regs[value_regno].id = info.ref_id; } if (type_may_be_null(info.reg_type) && !regs[value_regno].id) regs[value_regno].id = ++env->id_gen; @@ -7201,7 +7163,16 @@ static int process_kptr_func(struct bpf_verifier_env *env, int regno, return 0; } -/* There are two register types representing a bpf_dynptr, one is PTR_TO_STACK +/* + * Validate dynptr arguments for helper, kfunc and subprog. + * + * @dynptr is both input and output. It is populated when the argument is + * tagged with MEM_UNINIT (i.e., the dynptr argument that will be constructed) + * and consumed when the argument is expecting to be an initialized dynptr. + * @parent_id is used to track the referenced parent object (e.g., file or skb in + * qdisc program) when constructing a dynptr. + * + * There are two register types representing a bpf_dynptr, one is PTR_TO_STACK * which points to a stack slot, and the other is CONST_PTR_TO_DYNPTR. * * In both cases we deal with the first 8 bytes, but need to mark the next 8 @@ -7217,7 +7188,7 @@ static int process_kptr_func(struct bpf_verifier_env *env, int regno, */ static int process_dynptr_func(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, int insn_idx, enum bpf_arg_type arg_type, - int clone_ref_obj_id, struct bpf_dynptr_desc *dynptr) + int parent_id, struct bpf_dynptr_desc *dynptr) { int spi, err = 0; @@ -7258,7 +7229,7 @@ static int process_dynptr_func(struct bpf_verifier_env *env, struct bpf_reg_stat return err; } - err = mark_stack_slots_dynptr(env, reg, arg_type, insn_idx, clone_ref_obj_id); + err = mark_stack_slots_dynptr(env, reg, arg_type, insn_idx, parent_id, dynptr); } else /* OBJ_RELEASE and None case from above */ { /* For the reg->type == PTR_TO_STACK case, bpf_dynptr is never const */ if (reg->type == CONST_PTR_TO_DYNPTR && (arg_type & OBJ_RELEASE)) { @@ -7300,17 +7271,17 @@ static int process_dynptr_func(struct bpf_verifier_env *env, struct bpf_reg_stat if (dynptr) { dynptr->type = reg->dynptr.type; dynptr->id = reg->id; - dynptr->ref_obj_id = reg->ref_obj_id; + dynptr->parent_id = reg->parent_id; } } return err; } -static u32 iter_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int spi) +static u32 iter_ref_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int spi) { struct bpf_func_state *state = bpf_func(env, reg); - return state->stack[spi].spilled_ptr.ref_obj_id; + return state->stack[spi].spilled_ptr.id; } static bool is_iter_kfunc(struct bpf_kfunc_call_arg_meta *meta) @@ -7416,7 +7387,7 @@ static int process_iter_arg(struct bpf_verifier_env *env, struct bpf_reg_state * /* remember meta->iter info for process_iter_next_call() */ meta->iter.spi = spi; meta->iter.frameno = reg->frameno; - meta->ref_obj_id = iter_ref_obj_id(env, reg, spi); + meta->id = iter_ref_id(env, reg, spi); if (is_iter_destroy_kfunc(meta)) { err = unmark_stack_slots_iter(env, reg, nr_slots); @@ -7999,7 +7970,7 @@ static int check_func_arg_reg_off(struct bpf_verifier_env *env, /* When referenced register is passed to release function, its fixed * offset must be 0. * - * We will check arg_type_is_release reg has ref_obj_id when storing + * We will check arg_type_is_release reg has id when storing * meta->release_regno. */ if (arg_type_is_release(arg_type)) { @@ -8260,7 +8231,7 @@ skip_type_check: */ if (reg->type == PTR_TO_STACK) { spi = dynptr_get_spi(env, reg); - if (spi < 0 || !state->stack[spi].spilled_ptr.ref_obj_id) { + if (spi < 0 || !state->stack[spi].spilled_ptr.id) { verbose(env, "arg %d is an unacquired reference\n", regno); return -EINVAL; } @@ -8268,7 +8239,7 @@ skip_type_check: verbose(env, "cannot release unowned const bpf_dynptr\n"); return -EINVAL; } - } else if (!reg->ref_obj_id && !bpf_register_is_null(reg)) { + } else if (!reg_is_referenced(env, reg) && !bpf_register_is_null(reg)) { verbose(env, "R%d must be referenced when passed to release function\n", regno); return -EINVAL; @@ -8280,14 +8251,14 @@ skip_type_check: meta->release_regno = regno; } - if (reg->ref_obj_id && base_type(arg_type) != ARG_KPTR_XCHG_DEST) { - if (meta->ref_obj_id) { - verbose(env, "more than one arg with ref_obj_id R%d %u %u", - regno, reg->ref_obj_id, - meta->ref_obj_id); + if (reg_is_referenced(env, reg) && base_type(arg_type) != ARG_KPTR_XCHG_DEST) { + if (meta->id) { + verbose(env, "more than one arg with referenced id R%d %u %u", + regno, reg->id, + meta->id); return -EACCES; } - meta->ref_obj_id = reg->ref_obj_id; + meta->id = reg->id; } switch (base_type(arg_type)) { @@ -8898,14 +8869,14 @@ static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range reg->range = AT_PKT_END; } -static int release_reference_nomark(struct bpf_verifier_state *state, int ref_obj_id) +static int release_reference_nomark(struct bpf_verifier_state *state, int id) { int i; for (i = 0; i < state->acquired_refs; i++) { if (state->refs[i].type != REF_TYPE_PTR) continue; - if (state->refs[i].id == ref_obj_id) { + if (state->refs[i].id == id) { release_reference_state(state, i); return 0; } @@ -8913,26 +8884,83 @@ static int release_reference_nomark(struct bpf_verifier_state *state, int ref_ob return -EINVAL; } -/* The pointer with the specified id has released its reference to kernel - * resources. Identify all copies of the same pointer and clear the reference. - * - * This is the release function corresponding to acquire_reference(). Idempotent. - */ -static int release_reference(struct bpf_verifier_env *env, int ref_obj_id) +static int idstack_push(struct bpf_idmap *idmap, u32 id) +{ + int i; + + if (!id) + return 0; + + for (i = 0; i < idmap->cnt; i++) + if (idmap->map[i].old == id) + return 0; + + if (WARN_ON_ONCE(idmap->cnt >= BPF_ID_MAP_SIZE)) + return -EFAULT; + + idmap->map[idmap->cnt++].old = id; + return 0; +} + +static int idstack_pop(struct bpf_idmap *idmap) { + if (!idmap->cnt) + return 0; + + return idmap->map[--idmap->cnt].old; +} + +/* Release id and objects derived from it iteratively in a DFS manner */ +static int release_reference(struct bpf_verifier_env *env, int id) +{ + u32 mask = (1 << STACK_SPILL) | (1 << STACK_DYNPTR); struct bpf_verifier_state *vstate = env->cur_state; + struct bpf_idmap *idstack = &env->idmap_scratch; + struct bpf_stack_state *stack; struct bpf_func_state *state; struct bpf_reg_state *reg; - int err; + int i, err; - err = release_reference_nomark(vstate, ref_obj_id); + idstack->cnt = 0; + err = idstack_push(idstack, id); if (err) return err; - bpf_for_each_reg_in_vstate(vstate, state, reg, ({ - if (reg->ref_obj_id == ref_obj_id) - mark_reg_invalid(env, reg); - })); + if (find_reference_state(vstate, id)) + WARN_ON_ONCE(release_reference_nomark(vstate, id)); + + while ((id = idstack_pop(idstack))) { + /* + * Child references are inaccessible after parent is released, + * any child references that exist at this point are a leak. + */ + for (i = 0; i < vstate->acquired_refs; i++) { + if (vstate->refs[i].type != REF_TYPE_PTR) + continue; + if (vstate->refs[i].parent_id != id) + continue; + verbose(env, "Leaking reference id=%d alloc_insn=%d. Release it first.\n", + vstate->refs[i].id, vstate->refs[i].insn_idx); + return -EINVAL; + } + + bpf_for_each_reg_in_vstate_mask(vstate, state, reg, stack, mask, ({ + if (reg->id != id && reg->parent_id != id) + continue; + + /* Free objects derived from the current object */ + if (reg->parent_id == id) { + err = idstack_push(idstack, reg->id); + if (err) + return err; + } + + if (!stack || stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL) + mark_reg_invalid(env, reg); + else if (stack->slot_type[BPF_REG_SIZE - 1] == STACK_DYNPTR) + invalidate_dynptr(env, stack); + })); + } return 0; } @@ -9833,7 +9861,7 @@ static int check_reference_leak(struct bpf_verifier_env *env, bool exception_exi * kernel. Type checks are performed later in check_return_code. */ if (type == BPF_PROG_TYPE_STRUCT_OPS && !exception_exit && - reg->ref_obj_id == state->refs[i].id) + reg->id == state->refs[i].id) continue; verbose(env, "Unreleased reference id=%d alloc_insn=%d\n", state->refs[i].id, state->refs[i].insn_idx); @@ -10116,18 +10144,18 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn err = -EINVAL; if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1])) { err = unmark_stack_slots_dynptr(env, ®s[meta.release_regno]); - } else if (func_id == BPF_FUNC_kptr_xchg && meta.ref_obj_id) { - u32 ref_obj_id = meta.ref_obj_id; + } else if (func_id == BPF_FUNC_kptr_xchg && meta.id) { + u32 id = meta.id; bool in_rcu = in_rcu_cs(env); struct bpf_func_state *state; struct bpf_reg_state *reg; - err = release_reference_nomark(env->cur_state, ref_obj_id); + err = release_reference_nomark(env->cur_state, id); if (!err) { bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ - if (reg->ref_obj_id == ref_obj_id) { + if (reg->id == id) { if (in_rcu && (reg->type & MEM_ALLOC) && (reg->type & MEM_PERCPU)) { - reg->ref_obj_id = 0; + reg->id = 0; reg->type &= ~MEM_ALLOC; reg->type |= MEM_RCU; } else { @@ -10136,19 +10164,16 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn } })); } - } else if (meta.ref_obj_id) { - err = release_reference(env, meta.ref_obj_id); + } else if (meta.id) { + err = release_reference(env, meta.id); } else if (bpf_register_is_null(®s[meta.release_regno])) { - /* meta.ref_obj_id can only be 0 if register that is meant to be + /* meta.id can only be 0 if register that is meant to be * released is NULL, which must be > R0. */ err = 0; } - if (err) { - verbose(env, "func %s#%d reference has not been acquired before\n", - func_id_name(func_id), func_id); + if (err) return err; - } } switch (func_id) { @@ -10413,24 +10438,40 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn return -EFAULT; } - if (is_ptr_cast_function(func_id)) { - /* For release_reference() */ - regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id; + if (is_ptr_cast_function(func_id) && + find_reference_state(env->cur_state, meta.id)) { + struct bpf_verifier_state *branch; + struct bpf_reg_state *r0; + + /* + * In order for a release of any of the original or cast pointers + * to invalidate all other pointers, reuse the same reference id for + * the cast result. + * This reference id can't be used for nullness propagation, + * as cast might return NULL for a non-NULL input. + * Hence, explore the NULL case as a separate branch. + */ + branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false); + if (IS_ERR(branch)) + return PTR_ERR(branch); + + r0 = &branch->frame[branch->curframe]->regs[BPF_REG_0]; + __mark_reg_known_zero(r0); + r0->type = SCALAR_VALUE; + + regs[BPF_REG_0].type &= ~PTR_MAYBE_NULL; + regs[BPF_REG_0].id = meta.id; } else if (is_acquire_function(func_id, meta.map.ptr)) { - int id = acquire_reference(env, insn_idx); + int id = acquire_reference(env, insn_idx, 0); if (id < 0) return id; - /* For mark_ptr_or_null_reg() */ + regs[BPF_REG_0].id = id; - /* For release_reference() */ - regs[BPF_REG_0].ref_obj_id = id; } - if (func_id == BPF_FUNC_dynptr_data) { - regs[BPF_REG_0].dynptr_id = meta.dynptr.id; - regs[BPF_REG_0].ref_obj_id = meta.dynptr.ref_obj_id; - } + if (func_id == BPF_FUNC_dynptr_data) + regs[BPF_REG_0].parent_id = meta.dynptr.id; err = do_refine_retval_range(env, regs, fn->ret_type, func_id, &meta); if (err) @@ -11242,7 +11283,7 @@ static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env, * btf_struct_ids_match() to walk the struct at the 0th offset, and * resolve types. */ - if ((is_kfunc_release(meta) && reg->ref_obj_id) || + if ((is_kfunc_release(meta) && reg_is_referenced(env, reg)) || btf_type_ids_nocast_alias(&env->log, reg_btf, reg_ref_id, meta->btf, ref_id)) strict_type_match = true; @@ -11346,36 +11387,21 @@ static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state return 0; } -static int ref_convert_owning_non_owning(struct bpf_verifier_env *env, u32 ref_obj_id) +static void ref_convert_owning_non_owning(struct bpf_verifier_env *env, u32 id) { - struct bpf_verifier_state *state = env->cur_state; struct bpf_func_state *unused; struct bpf_reg_state *reg; - int i; - if (!ref_obj_id) { - verifier_bug(env, "ref_obj_id is zero for owning -> non-owning conversion"); - return -EFAULT; - } + WARN_ON_ONCE(release_reference_nomark(env->cur_state, id)); - for (i = 0; i < state->acquired_refs; i++) { - if (state->refs[i].id != ref_obj_id) - continue; - - /* Clear ref_obj_id here so release_reference doesn't clobber - * the whole reg - */ - bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({ - if (reg->ref_obj_id == ref_obj_id) { - reg->ref_obj_id = 0; - ref_set_non_owning(env, reg); - } - })); - return 0; - } + bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({ + if (reg->id == id) { + reg->id = 0; + ref_set_non_owning(env, reg); + } + })); - verifier_bug(env, "ref state missing for ref_obj_id"); - return -EFAULT; + return; } /* Implementation details: @@ -11907,14 +11933,14 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ return -EACCES; } - if (reg->ref_obj_id) { - if (is_kfunc_release(meta) && meta->ref_obj_id) { - verifier_bug(env, "more than one arg with ref_obj_id %s %u %u", - reg_arg_name(env, argno), reg->ref_obj_id, - meta->ref_obj_id); + if (reg_is_referenced(env, reg)) { + if (is_kfunc_release(meta) && meta->id) { + verifier_bug(env, "more than one arg with referenced id %s %u %u", + reg_arg_name(env, argno), reg->id, + meta->id); return -EFAULT; } - meta->ref_obj_id = reg->ref_obj_id; + meta->id = reg->id; if (is_kfunc_release(meta)) { if (regno < 0) { verbose(env, "%s release arg cannot be a stack argument\n", @@ -11975,7 +12001,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ fallthrough; case KF_ARG_PTR_TO_ALLOC_BTF_ID: case KF_ARG_PTR_TO_BTF_ID: - if (!is_trusted_reg(reg)) { + if (!is_trusted_reg(env, reg)) { if (!is_kfunc_rcu(meta)) { verbose(env, "%s must be referenced or trusted\n", reg_arg_name(env, argno)); @@ -12013,7 +12039,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ return -EFAULT; } - if (is_kfunc_release(meta) && reg->ref_obj_id) + if (is_kfunc_release(meta) && reg_is_referenced(env, reg)) arg_type |= OBJ_RELEASE; ret = check_func_arg_reg_off(env, reg, argno, arg_type); if (ret < 0) @@ -12052,7 +12078,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ reg_arg_name(env, argno)); return -EINVAL; } - if (!reg->ref_obj_id) { + if (!reg_is_referenced(env, reg)) { verbose(env, "allocated object must be referenced\n"); return -EINVAL; } @@ -12064,7 +12090,6 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ case KF_ARG_PTR_TO_DYNPTR: { enum bpf_arg_type dynptr_arg_type = ARG_PTR_TO_DYNPTR; - int clone_ref_obj_id = 0; if (is_kfunc_arg_uninit(btf, &args[i])) dynptr_arg_type |= MEM_UNINIT; @@ -12095,15 +12120,10 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ } dynptr_arg_type |= (unsigned int)get_dynptr_type_flag(parent_type); - clone_ref_obj_id = meta->dynptr.ref_obj_id; - if (dynptr_type_refcounted(parent_type) && !clone_ref_obj_id) { - verifier_bug(env, "missing ref obj id for parent of clone"); - return -EFAULT; - } } ret = process_dynptr_func(env, reg, argno, insn_idx, dynptr_arg_type, - clone_ref_obj_id, &meta->dynptr); + meta->id, &meta->dynptr); if (ret < 0) return ret; break; @@ -12126,7 +12146,8 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ reg_arg_name(env, argno)); return -EINVAL; } - if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) { + if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && + !reg_is_referenced(env, reg)) { verbose(env, "allocated object must be referenced\n"); return -EINVAL; } @@ -12141,7 +12162,8 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ reg_arg_name(env, argno)); return -EINVAL; } - if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) { + if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && + !reg_is_referenced(env, reg)) { verbose(env, "allocated object must be referenced\n"); return -EINVAL; } @@ -12151,7 +12173,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ break; case KF_ARG_PTR_TO_LIST_NODE: if (is_kfunc_arg_nonown_allowed(btf, &args[i]) && - type_is_non_owning_ref(reg->type) && !reg->ref_obj_id) { + type_is_non_owning_ref(reg->type) && !reg_is_referenced(env, reg)) { /* Allow bpf_list_front/back return value for * __nonown_allowed list-node arguments. */ @@ -12162,7 +12184,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ reg_arg_name(env, argno)); return -EINVAL; } - if (!reg->ref_obj_id) { + if (!reg_is_referenced(env, reg)) { verbose(env, "allocated object must be referenced\n"); return -EINVAL; } @@ -12178,12 +12200,13 @@ check_ok: reg_arg_name(env, argno)); return -EINVAL; } - if (!reg->ref_obj_id) { + if (!reg_is_referenced(env, reg)) { verbose(env, "allocated object must be referenced\n"); return -EINVAL; } } else { - if (!type_is_non_owning_ref(reg->type) && !reg->ref_obj_id) { + if (!type_is_non_owning_ref(reg->type) && + !reg_is_referenced(env, reg)) { verbose(env, "%s can only take non-owning or refcounted bpf_rb_node pointer\n", func_name); return -EINVAL; } @@ -12764,12 +12787,7 @@ static int check_special_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_ca verifier_bug(env, "no dynptr id"); return -EFAULT; } - regs[BPF_REG_0].dynptr_id = meta->dynptr.id; - - /* we don't need to set BPF_REG_0's ref obj id - * because packet slices are not refcounted (see - * dynptr_type_refcounted) - */ + regs[BPF_REG_0].parent_id = meta->dynptr.id; } else { return 0; } @@ -12783,13 +12801,13 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx_p) { bool sleepable, rcu_lock, rcu_unlock, preempt_disable, preempt_enable; - u32 i, nargs, ptr_type_id, release_ref_obj_id; struct bpf_reg_state *regs = cur_regs(env); const char *func_name, *ptr_type_name; const struct btf_type *t, *ptr_type; struct bpf_kfunc_call_arg_meta meta; struct bpf_insn_aux_data *insn_aux; int err, insn_idx = *insn_idx_p; + u32 i, nargs, ptr_type_id, id; const struct btf_param *args; struct btf *desc_btf; @@ -12902,6 +12920,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, if (rcu_lock) { env->cur_state->active_rcu_locks++; } else if (rcu_unlock) { + struct bpf_stack_state *stack; struct bpf_func_state *state; struct bpf_reg_state *reg; u32 clear_mask = (1 << STACK_SPILL) | (1 << STACK_ITER); @@ -12911,7 +12930,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, return -EINVAL; } if (--env->cur_state->active_rcu_locks == 0) { - bpf_for_each_reg_in_vstate_mask(env->cur_state, state, reg, clear_mask, ({ + bpf_for_each_reg_in_vstate_mask(env->cur_state, state, reg, stack, clear_mask, ({ if (reg->type & MEM_RCU) { reg->type &= ~(MEM_RCU | PTR_MAYBE_NULL); reg->type |= PTR_UNTRUSTED; @@ -12950,35 +12969,20 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, if (meta.release_regno) { struct bpf_reg_state *reg = ®s[meta.release_regno]; - if (meta.dynptr.ref_obj_id) { + if (meta.dynptr.id) { err = unmark_stack_slots_dynptr(env, reg); } else { - err = release_reference(env, reg->ref_obj_id); - if (err) - verbose(env, "kfunc %s#%d reference has not been acquired before\n", - func_name, meta.func_id); + err = release_reference(env, reg->id); } if (err) return err; } if (is_bpf_list_push_kfunc(meta.func_id) || is_bpf_rbtree_add_kfunc(meta.func_id)) { - release_ref_obj_id = regs[BPF_REG_2].ref_obj_id; + id = regs[BPF_REG_2].id; insn_aux->insert_off = regs[BPF_REG_2].var_off.value; insn_aux->kptr_struct_meta = btf_find_struct_meta(meta.arg_btf, meta.arg_btf_id); - err = ref_convert_owning_non_owning(env, release_ref_obj_id); - if (err) { - verbose(env, "kfunc %s#%d conversion of owning ref to non-owning failed\n", - func_name, meta.func_id); - return err; - } - - err = release_reference(env, release_ref_obj_id); - if (err) { - verbose(env, "kfunc %s#%d reference has not been acquired before\n", - func_name, meta.func_id); - return err; - } + ref_convert_owning_non_owning(env, id); } if (meta.func_id == special_kfunc_list[KF_bpf_throw]) { @@ -13063,8 +13067,8 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, regs[BPF_REG_0].type |= MEM_RDONLY; /* Ensures we don't access the memory after a release_reference() */ - if (meta.ref_obj_id) - regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id; + if (meta.id) + regs[BPF_REG_0].parent_id = meta.id; if (is_kfunc_rcu_protected(&meta)) regs[BPF_REG_0].type |= MEM_RCU; @@ -13110,13 +13114,10 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, } mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *)); if (is_kfunc_acquire(&meta)) { - int id = acquire_reference(env, insn_idx); - + id = acquire_reference(env, insn_idx, 0); if (id < 0) return id; - if (is_kfunc_ret_null(&meta)) - regs[BPF_REG_0].id = id; - regs[BPF_REG_0].ref_obj_id = id; + regs[BPF_REG_0].id = id; } else if (is_rbtree_node_type(ptr_type) || is_list_node_type(ptr_type)) { ref_set_non_owning(env, ®s[BPF_REG_0]); } @@ -15347,7 +15348,7 @@ static int is_branch_taken(struct bpf_verifier_env *env, struct bpf_reg_state *r if (!is_reg_const(reg2, is_jmp32)) return -1; - if (!reg_not_null(reg1)) + if (!reg_not_null(env, reg1)) return -1; /* If pointer is valid tests against zero will fail so we can @@ -15564,7 +15565,7 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, WARN_ON_ONCE(!tnum_equals_const(reg->var_off, 0))) return; if (is_null) { - /* We don't need id and ref_obj_id from this point + /* We don't need id from this point * onwards anymore, thus we should better reset it, * so that state pruning has chances to take effect. */ @@ -15591,10 +15592,9 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, { struct bpf_func_state *state = vstate->frame[vstate->curframe]; struct bpf_reg_state *regs = state->regs, *reg; - u32 ref_obj_id = regs[regno].ref_obj_id; u32 id = regs[regno].id; - if (ref_obj_id && ref_obj_id == id && is_null) + if (is_null && find_reference_state(vstate, id)) /* regs[regno] is in the " == NULL" branch. * No one could have freed the reference state before * doing the NULL check. @@ -16433,7 +16433,7 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char ret_type = btf_type_resolve_ptr(prog->aux->attach_btf, prog->aux->attach_func_proto->type, NULL); - if (ret_type && ret_type == reg_type && reg->ref_obj_id) + if (ret_type && ret_type == reg_type && reg_is_referenced(env, reg)) return __check_ptr_off_reg(env, reg, argno_from_reg(regno), false); } @@ -18302,7 +18302,7 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog) mark_reg_unknown(env, regs, i); } else if (arg->arg_type == ARG_PTR_TO_DYNPTR) { /* assume unspecial LOCAL dynptr type */ - __mark_dynptr_reg(reg, BPF_DYNPTR_TYPE_LOCAL, true, ++env->id_gen); + __mark_dynptr_reg(reg, BPF_DYNPTR_TYPE_LOCAL, true, ++env->id_gen, 0); } else if (base_type(arg->arg_type) == ARG_PTR_TO_MEM) { reg->type = PTR_TO_MEM; reg->type |= arg->arg_type & @@ -18361,8 +18361,8 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog) /* Acquire references for struct_ops program arguments tagged with "__ref" */ if (!subprog && env->prog->type == BPF_PROG_TYPE_STRUCT_OPS) { for (i = 0; i < aux->ctx_arg_info_size; i++) - aux->ctx_arg_info[i].ref_obj_id = aux->ctx_arg_info[i].refcounted ? - acquire_reference(env, 0) : 0; + aux->ctx_arg_info[i].ref_id = aux->ctx_arg_info[i].refcounted ? + acquire_reference(env, 0, 0) : 0; } ret = do_check(env); diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c index bbe476f4c47d..5c3579438427 100644 --- a/tools/testing/selftests/bpf/prog_tests/spin_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c @@ -13,8 +13,8 @@ static struct { const char *err_msg; } spin_lock_fail_tests[] = { { "lock_id_kptr_preserve", - "[0-9]\\+: (bf) r1 = r0 ; R0=ptr_foo(id=2,ref_obj_id=2)" - " R1=ptr_foo(id=2,ref_obj_id=2) refs=2\n" + "[0-9]\\+: (bf) r1 = r0 ; R0=ptr_foo(id=2)" + " R1=ptr_foo(id=2) refs=2\n" "[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n" "R1 type=ptr_ expected=percpu_ptr_" }, { "lock_id_global_zero", diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c index dbd97add5a5a..fa0beeaad1be 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_fail.c +++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c @@ -78,7 +78,7 @@ static int get_map_val_dynptr(struct bpf_dynptr *ptr) * bpf_ringbuf_submit/discard_dynptr call */ SEC("?raw_tp") -__failure __msg("Unreleased reference id=2") +__failure __msg("Unreleased reference id=1") int ringbuf_missing_release1(void *ctx) { struct bpf_dynptr ptr = {}; @@ -91,7 +91,7 @@ int ringbuf_missing_release1(void *ctx) } SEC("?raw_tp") -__failure __msg("Unreleased reference id=4") +__failure __msg("Unreleased reference id=3") int ringbuf_missing_release2(void *ctx) { struct bpf_dynptr ptr1, ptr2; diff --git a/tools/testing/selftests/bpf/progs/iters_state_safety.c b/tools/testing/selftests/bpf/progs/iters_state_safety.c index af8f9ec1ea98..646026430e9b 100644 --- a/tools/testing/selftests/bpf/progs/iters_state_safety.c +++ b/tools/testing/selftests/bpf/progs/iters_state_safety.c @@ -30,7 +30,7 @@ int force_clang_to_emit_btf_for_externs(void *ctx) SEC("?raw_tp") __success __log_level(2) -__msg("fp-8=iter_num(ref_id=1,state=active,depth=0)") +__msg("fp-8=iter_num(id=1,state=active,depth=0)") int create_and_destroy(void *ctx) { struct bpf_iter_num iter; @@ -196,7 +196,7 @@ int leak_iter_from_subprog_fail(void *ctx) SEC("?raw_tp") __success __log_level(2) -__msg("fp-8=iter_num(ref_id=1,state=active,depth=0)") +__msg("fp-8=iter_num(id=1,state=active,depth=0)") int valid_stack_reuse(void *ctx) { struct bpf_iter_num iter; diff --git a/tools/testing/selftests/bpf/progs/iters_testmod_seq.c b/tools/testing/selftests/bpf/progs/iters_testmod_seq.c index 9b760dac333e..d00888f6687a 100644 --- a/tools/testing/selftests/bpf/progs/iters_testmod_seq.c +++ b/tools/testing/selftests/bpf/progs/iters_testmod_seq.c @@ -20,8 +20,8 @@ __s64 res_empty; SEC("raw_tp/sys_enter") __success __log_level(2) -__msg("fp-16=iter_testmod_seq(ref_id=1,state=active,depth=0)") -__msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)") +__msg("fp-16=iter_testmod_seq(id=1,state=active,depth=0)") +__msg("fp-16=iter_testmod_seq(id=1,state=drained,depth=0)") __msg("call bpf_iter_testmod_seq_destroy") int testmod_seq_empty(const void *ctx) { @@ -38,8 +38,8 @@ __s64 res_full; SEC("raw_tp/sys_enter") __success __log_level(2) -__msg("fp-16=iter_testmod_seq(ref_id=1,state=active,depth=0)") -__msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)") +__msg("fp-16=iter_testmod_seq(id=1,state=active,depth=0)") +__msg("fp-16=iter_testmod_seq(id=1,state=drained,depth=0)") __msg("call bpf_iter_testmod_seq_destroy") int testmod_seq_full(const void *ctx) { @@ -58,8 +58,8 @@ static volatile int zero = 0; SEC("raw_tp/sys_enter") __success __log_level(2) -__msg("fp-16=iter_testmod_seq(ref_id=1,state=active,depth=0)") -__msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)") +__msg("fp-16=iter_testmod_seq(id=1,state=active,depth=0)") +__msg("fp-16=iter_testmod_seq(id=1,state=drained,depth=0)") __msg("call bpf_iter_testmod_seq_destroy") int testmod_seq_truncated(const void *ctx) { -- cgit v1.2.3 From 92d681b42746d4497dcc8afb45edd4af5737542f Mon Sep 17 00:00:00 2001 From: Amery Hung Date: Thu, 28 May 2026 18:49:29 -0700 Subject: bpf: Remove redundant dynptr arg check for helper unmark_stack_slots_dynptr() already makes sure that CONST_PTR_TO_DYNPTR cannot be released. process_dynptr_func() also prevents passing uninitialized dynptr to helpers expecting initialized dynptr. Now that unmark_stack_slots_dynptr() also reports error returned from release_reference(), there should be no reason to keep these redundant checks. Acked-by: Eduard Zingerman Signed-off-by: Amery Hung Link: https://lore.kernel.org/r/20260529014936.2811085-7-ameryhung@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 21 +-------------------- tools/testing/selftests/bpf/progs/dynptr_fail.c | 6 +++--- .../testing/selftests/bpf/progs/user_ringbuf_fail.c | 4 ++-- 3 files changed, 6 insertions(+), 25 deletions(-) (limited to 'tools') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 6d82ca5acacb..4f75e5f95d27 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -8220,26 +8220,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, skip_type_check: if (arg_type_is_release(arg_type)) { - if (arg_type_is_dynptr(arg_type)) { - struct bpf_func_state *state = bpf_func(env, reg); - int spi; - - /* Only dynptr created on stack can be released, thus - * the get_spi and stack state checks for spilled_ptr - * should only be done before process_dynptr_func for - * PTR_TO_STACK. - */ - if (reg->type == PTR_TO_STACK) { - spi = dynptr_get_spi(env, reg); - if (spi < 0 || !state->stack[spi].spilled_ptr.id) { - verbose(env, "arg %d is an unacquired reference\n", regno); - return -EINVAL; - } - } else { - verbose(env, "cannot release unowned const bpf_dynptr\n"); - return -EINVAL; - } - } else if (!reg_is_referenced(env, reg) && !bpf_register_is_null(reg)) { + if (!arg_type_is_dynptr(arg_type) && !reg_is_referenced(env, reg) && !bpf_register_is_null(reg)) { verbose(env, "R%d must be referenced when passed to release function\n", regno); return -EINVAL; diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c index fa0beeaad1be..40a14a5174a5 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_fail.c +++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c @@ -136,7 +136,7 @@ int ringbuf_missing_release_callback(void *ctx) /* Can't call bpf_ringbuf_submit/discard_dynptr on a non-initialized dynptr */ SEC("?raw_tp") -__failure __msg("arg 1 is an unacquired reference") +__failure __msg("Expected an initialized dynptr as R1") int ringbuf_release_uninit_dynptr(void *ctx) { struct bpf_dynptr ptr; @@ -650,7 +650,7 @@ int invalid_offset(void *ctx) /* Can't release a dynptr twice */ SEC("?raw_tp") -__failure __msg("arg 1 is an unacquired reference") +__failure __msg("Expected an initialized dynptr as R1") int release_twice(void *ctx) { struct bpf_dynptr ptr; @@ -677,7 +677,7 @@ static int release_twice_callback_fn(__u32 index, void *data) * within a callback function, fails */ SEC("?raw_tp") -__failure __msg("arg 1 is an unacquired reference") +__failure __msg("Expected an initialized dynptr as R1") int release_twice_callback(void *ctx) { struct bpf_dynptr ptr; diff --git a/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c index 54de0389f878..c0d0422b8030 100644 --- a/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c +++ b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c @@ -146,7 +146,7 @@ try_discard_dynptr(struct bpf_dynptr *dynptr, void *context) * not be able to read past the end of the pointer. */ SEC("?raw_tp") -__failure __msg("cannot release unowned const bpf_dynptr") +__failure __msg("CONST_PTR_TO_DYNPTR cannot be released") int user_ringbuf_callback_discard_dynptr(void *ctx) { bpf_user_ringbuf_drain(&user_ringbuf, try_discard_dynptr, NULL, 0); @@ -166,7 +166,7 @@ try_submit_dynptr(struct bpf_dynptr *dynptr, void *context) * not be able to read past the end of the pointer. */ SEC("?raw_tp") -__failure __msg("cannot release unowned const bpf_dynptr") +__failure __msg("CONST_PTR_TO_DYNPTR cannot be released") int user_ringbuf_callback_submit_dynptr(void *ctx) { bpf_user_ringbuf_drain(&user_ringbuf, try_submit_dynptr, NULL, 0); -- cgit v1.2.3 From b7dd2b388657d99689161e82ed13515505838232 Mon Sep 17 00:00:00 2001 From: Amery Hung Date: Thu, 28 May 2026 18:49:30 -0700 Subject: bpf: Unify referenced object tracking in verifier Helpers and kfuncs independently tracked referenced object metadata using standalone id fields in their respective arg_meta structs. This led to duplicated logic and inconsistent error handling between the two paths. Introduce struct ref_obj_desc to consolidate id and parent_id along with a count of how many arguments carry a reference. Add update_ref_obj() to populate it from a bpf_reg_state, replacing open-coded assignments in check_func_arg(), check_kfunc_args(), and process_iter_arg(). Add validate_ref_obj() to check for ambiguous ref_obj before using it. For ref_obj releasing helpers and kfuncs, keep checking it before calling update_ref_obj() for now. A later patch will make these functions not depending on ref_obj. For other users of ref_obj, move the checks to the use locations. For helper, this means moving the checks inside helper_multiple_ref_obj_use() to use locations. is_acquire_function() is dropped as ref_obj is never used. Pass ref_obj_desc into process_dynptr_func()/mark_stack_slots_dynptr() instead of a bare parent_id to make it less confusing. Drop the selftest introduced in 7ec899ac90a2 ("selftests/bpf: Negative test case for ref_obj_id in args") since the verifier no longer complains about ambiguous ref_obj if it is not used. Acked-by: Eduard Zingerman Signed-off-by: Amery Hung Link: https://lore.kernel.org/r/20260529014936.2811085-8-ameryhung@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 14 ++- kernel/bpf/verifier.c | 122 +++++++++++---------- .../selftests/bpf/progs/test_ringbuf_map_key.c | 11 +- tools/testing/selftests/bpf/verifier/calls.c | 24 ---- 4 files changed, 78 insertions(+), 93 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 75b287d8d92f..b0521ba7787a 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -1424,6 +1424,18 @@ struct bpf_dynptr_desc { u32 parent_id; }; +/* + * The last seen rereferenced object; Updated by update_ref_obj() when a register refers to a + * referenced object. Used when the helper or kfunc is releasing a referenced object, casting + * a referenced object, returning allocated memory derived from referenced object or creating + * a dynptr with a referenced object as parent. + */ +struct ref_obj_desc { + u32 id; + u32 parent_id; + u8 cnt; +}; + struct bpf_kfunc_call_arg_meta { /* In parameters */ struct btf *btf; @@ -1432,7 +1444,6 @@ struct bpf_kfunc_call_arg_meta { const struct btf_type *func_proto; const char *func_name; /* Out parameters */ - u32 id; u8 release_regno; bool r0_rdonly; u32 ret_btf_id; @@ -1470,6 +1481,7 @@ struct bpf_kfunc_call_arg_meta { } iter; struct bpf_map_desc map; struct bpf_dynptr_desc dynptr; + struct ref_obj_desc ref_obj; u64 mem_size; }; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 4f75e5f95d27..bc8a09c858d8 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -231,9 +231,28 @@ static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state) (poisoned ? BPF_MAP_KEY_POISON : 0ULL); } +static void update_ref_obj(struct ref_obj_desc *ref_obj, struct bpf_reg_state *reg) +{ + ref_obj->id = reg->id; + ref_obj->parent_id = reg->parent_id; + ref_obj->cnt++; +} + +static int validate_ref_obj(struct bpf_verifier_env *env, struct ref_obj_desc *ref_obj) +{ + if (ref_obj->cnt > 1) { + verifier_bug(env, "function expects only one referenced object but got %d\n", + ref_obj->cnt); + return -EFAULT; + } + + return 0; +} + struct bpf_call_arg_meta { struct bpf_map_desc map; struct bpf_dynptr_desc dynptr; + struct ref_obj_desc ref_obj; bool raw_mode; bool pkt_access; u8 release_regno; @@ -241,7 +260,6 @@ struct bpf_call_arg_meta { int access_size; int mem_size; u64 msize_max_value; - u32 id; int func_id; struct btf *btf; u32 btf_id; @@ -528,20 +546,6 @@ bool bpf_is_may_goto_insn(struct bpf_insn *insn) return insn->code == (BPF_JMP | BPF_JCOND) && insn->src_reg == BPF_MAY_GOTO; } -static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id, - const struct bpf_map *map) -{ - int ref_obj_uses = 0; - - if (is_ptr_cast_function(func_id)) - ref_obj_uses++; - if (is_acquire_function(func_id, map)) - ref_obj_uses++; - - return ref_obj_uses > 1; -} - - static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots) { int allocated_slots = state->allocated_stack / BPF_REG_SIZE; @@ -670,11 +674,11 @@ static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env, struct bpf_func_state *state, int spi); static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg, - enum bpf_arg_type arg_type, int insn_idx, int parent_id, - struct bpf_dynptr_desc *dynptr) + enum bpf_arg_type arg_type, int insn_idx, + struct ref_obj_desc *ref_obj, struct bpf_dynptr_desc *dynptr) { struct bpf_func_state *state = bpf_func(env, reg); - int spi, i, err; + int spi, i, err, parent_id = 0; enum bpf_dynptr_type type; spi = dynptr_get_spi(env, reg); @@ -707,6 +711,13 @@ static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_ return -EINVAL; if (dynptr->type == BPF_DYNPTR_TYPE_INVALID) { /* dynptr constructors */ + err = validate_ref_obj(env, ref_obj); + if (err) + return err; + + /* Track parent's id if the parent is a referenced object */ + parent_id = ref_obj->id; + if (dynptr_type_referenced(type)) { int id; @@ -7188,7 +7199,7 @@ static int process_kptr_func(struct bpf_verifier_env *env, int regno, */ static int process_dynptr_func(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, int insn_idx, enum bpf_arg_type arg_type, - int parent_id, struct bpf_dynptr_desc *dynptr) + struct ref_obj_desc *ref_obj, struct bpf_dynptr_desc *dynptr) { int spi, err = 0; @@ -7229,7 +7240,7 @@ static int process_dynptr_func(struct bpf_verifier_env *env, struct bpf_reg_stat return err; } - err = mark_stack_slots_dynptr(env, reg, arg_type, insn_idx, parent_id, dynptr); + err = mark_stack_slots_dynptr(env, reg, arg_type, insn_idx, ref_obj, dynptr); } else /* OBJ_RELEASE and None case from above */ { /* For the reg->type == PTR_TO_STACK case, bpf_dynptr is never const */ if (reg->type == CONST_PTR_TO_DYNPTR && (arg_type & OBJ_RELEASE)) { @@ -7277,13 +7288,6 @@ static int process_dynptr_func(struct bpf_verifier_env *env, struct bpf_reg_stat return err; } -static u32 iter_ref_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int spi) -{ - struct bpf_func_state *state = bpf_func(env, reg); - - return state->stack[spi].spilled_ptr.id; -} - static bool is_iter_kfunc(struct bpf_kfunc_call_arg_meta *meta) { return meta->kfunc_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY); @@ -7316,6 +7320,7 @@ static bool is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta *meta, int arg_idx, static int process_iter_arg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, int insn_idx, struct bpf_kfunc_call_arg_meta *meta) { + struct bpf_func_state *state = bpf_func(env, reg); const struct btf_type *t; u32 arg_idx = arg_idx_from_argno(argno); int spi, err, i, nr_slots, btf_id; @@ -7387,7 +7392,7 @@ static int process_iter_arg(struct bpf_verifier_env *env, struct bpf_reg_state * /* remember meta->iter info for process_iter_next_call() */ meta->iter.spi = spi; meta->iter.frameno = reg->frameno; - meta->id = iter_ref_id(env, reg, spi); + update_ref_obj(&meta->ref_obj, &state->stack[spi].spilled_ptr); if (is_iter_destroy_kfunc(meta)) { err = unmark_stack_slots_iter(env, reg, nr_slots); @@ -8166,6 +8171,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, u32 regno = BPF_REG_1 + arg; struct bpf_reg_state *reg = reg_state(env, regno); enum bpf_arg_type arg_type = fn->arg_type[arg]; + argno_t argno = argno_from_arg(arg + 1); enum bpf_reg_type type = reg->type; u32 *arg_btf_id = NULL; u32 key_size; @@ -8232,15 +8238,8 @@ skip_type_check: meta->release_regno = regno; } - if (reg_is_referenced(env, reg) && base_type(arg_type) != ARG_KPTR_XCHG_DEST) { - if (meta->id) { - verbose(env, "more than one arg with referenced id R%d %u %u", - regno, reg->id, - meta->id); - return -EACCES; - } - meta->id = reg->id; - } + if (reg_is_referenced(env, reg)) + update_ref_obj(&meta->ref_obj, reg); switch (base_type(arg_type)) { case ARG_CONST_MAP_PTR: @@ -8379,7 +8378,7 @@ skip_type_check: true, meta); break; case ARG_PTR_TO_DYNPTR: - err = process_dynptr_func(env, reg, argno_from_reg(regno), insn_idx, arg_type, 0, + err = process_dynptr_func(env, reg, argno_from_reg(regno), insn_idx, arg_type, &meta->ref_obj, &meta->dynptr); if (err) return err; @@ -9042,6 +9041,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog, struct bpf_subprog_info *sub = subprog_info(env, subprog); struct bpf_func_state *caller = cur_func(env); struct bpf_verifier_log *log = &env->log; + struct ref_obj_desc ref_obj = {}; u32 i; int ret, err; @@ -9119,7 +9119,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog, if (ret) return ret; - ret = process_dynptr_func(env, reg, argno, -1, arg->arg_type, 0, NULL); + ret = process_dynptr_func(env, reg, argno, -1, arg->arg_type, &ref_obj, NULL); if (ret) return ret; } else if (base_type(arg->arg_type) == ARG_PTR_TO_BTF_ID) { @@ -10125,8 +10125,8 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn err = -EINVAL; if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1])) { err = unmark_stack_slots_dynptr(env, ®s[meta.release_regno]); - } else if (func_id == BPF_FUNC_kptr_xchg && meta.id) { - u32 id = meta.id; + } else if (func_id == BPF_FUNC_kptr_xchg && meta.ref_obj.id) { + u32 id = meta.ref_obj.id; bool in_rcu = in_rcu_cs(env); struct bpf_func_state *state; struct bpf_reg_state *reg; @@ -10145,10 +10145,10 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn } })); } - } else if (meta.id) { - err = release_reference(env, meta.id); + } else if (meta.ref_obj.id) { + err = release_reference(env, meta.ref_obj.id); } else if (bpf_register_is_null(®s[meta.release_regno])) { - /* meta.id can only be 0 if register that is meant to be + /* meta.ref_obj.id can only be 0 if register that is meant to be * released is NULL, which must be > R0. */ err = 0; @@ -10413,17 +10413,15 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn if (type_may_be_null(regs[BPF_REG_0].type)) regs[BPF_REG_0].id = ++env->id_gen; - if (helper_multiple_ref_obj_use(func_id, meta.map.ptr)) { - verifier_bug(env, "func %s#%d sets ref_obj_id more than once", - func_id_name(func_id), func_id); - return -EFAULT; - } - if (is_ptr_cast_function(func_id) && - find_reference_state(env->cur_state, meta.id)) { + find_reference_state(env->cur_state, meta.ref_obj.id)) { struct bpf_verifier_state *branch; struct bpf_reg_state *r0; + err = validate_ref_obj(env, &meta.ref_obj); + if (err) + return err; + /* * In order for a release of any of the original or cast pointers * to invalidate all other pointers, reuse the same reference id for @@ -10441,7 +10439,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn r0->type = SCALAR_VALUE; regs[BPF_REG_0].type &= ~PTR_MAYBE_NULL; - regs[BPF_REG_0].id = meta.id; + regs[BPF_REG_0].id = meta.ref_obj.id; } else if (is_acquire_function(func_id, meta.map.ptr)) { int id = acquire_reference(env, insn_idx, 0); @@ -11915,13 +11913,13 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ } if (reg_is_referenced(env, reg)) { - if (is_kfunc_release(meta) && meta->id) { - verifier_bug(env, "more than one arg with referenced id %s %u %u", - reg_arg_name(env, argno), reg->id, - meta->id); + if (is_kfunc_release(meta) && meta->ref_obj.cnt) { + verbose(env, "more than one arg with referenced id %s %u %u", + reg_arg_name(env, argno), reg->id, + meta->ref_obj.id); return -EFAULT; } - meta->id = reg->id; + update_ref_obj(&meta->ref_obj, reg); if (is_kfunc_release(meta)) { if (regno < 0) { verbose(env, "%s release arg cannot be a stack argument\n", @@ -12104,7 +12102,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ } ret = process_dynptr_func(env, reg, argno, insn_idx, dynptr_arg_type, - meta->id, &meta->dynptr); + &meta->ref_obj, &meta->dynptr); if (ret < 0) return ret; break; @@ -13048,8 +13046,12 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, regs[BPF_REG_0].type |= MEM_RDONLY; /* Ensures we don't access the memory after a release_reference() */ - if (meta.id) - regs[BPF_REG_0].parent_id = meta.id; + if (meta.ref_obj.id) { + err = validate_ref_obj(env, &meta.ref_obj); + if (err) + return err; + regs[BPF_REG_0].parent_id = meta.ref_obj.id; + } if (is_kfunc_rcu_protected(&meta)) regs[BPF_REG_0].type |= MEM_RCU; diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c b/tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c index 21bb7da90ea5..0efafa927a3d 100644 --- a/tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c +++ b/tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c @@ -35,7 +35,7 @@ SEC("fentry/" SYS_PREFIX "sys_getpgid") int test_ringbuf_mem_map_key(void *ctx) { int cur_pid = bpf_get_current_pid_tgid() >> 32; - struct sample *sample, sample_copy; + struct sample *sample; int *lookup_val; if (cur_pid != pid) @@ -55,16 +55,11 @@ int test_ringbuf_mem_map_key(void *ctx) lookup_val = (int *)bpf_map_lookup_elem(&hash_map, sample); __sink(lookup_val); - /* workaround - memcpy is necessary so that verifier doesn't - * complain with: - * verifier internal error: more than one arg with ref_obj_id R3 - * when trying to do bpf_map_update_elem(&hash_map, sample, &sample->seq, BPF_ANY); - * + /* * Since bpf_map_lookup_elem above uses 'sample' as key, test using * sample field as value below */ - __builtin_memcpy(&sample_copy, sample, sizeof(struct sample)); - bpf_map_update_elem(&hash_map, &sample_copy, &sample->seq, BPF_ANY); + bpf_map_update_elem(&hash_map, sample, &sample->seq, BPF_ANY); bpf_ringbuf_submit(sample, 0); return 0; diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 0bb4337552c8..42d523a21a43 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -2410,27 +2410,3 @@ .errstr_unpriv = "", .prog_type = BPF_PROG_TYPE_CGROUP_SKB, }, -{ - "calls: several args with ref_obj_id", - .insns = { - /* Reserve at least sizeof(struct iphdr) bytes in the ring buffer. - * With a smaller size, the verifier would reject the call to - * bpf_tcp_raw_gen_syncookie_ipv4 before we can reach the - * ref_obj_id error. - */ - BPF_MOV64_IMM(BPF_REG_2, 20), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve), - /* if r0 == 0 goto */ - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tcp_raw_gen_syncookie_ipv4), - BPF_EXIT_INSN(), - }, - .fixup_map_ringbuf = { 2 }, - .result = REJECT, - .errstr = "more than one arg with ref_obj_id", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -- cgit v1.2.3 From bcfcb15fde94ed39068eb1d6e4b9b37d27111965 Mon Sep 17 00:00:00 2001 From: Amery Hung Date: Thu, 28 May 2026 18:49:31 -0700 Subject: bpf: Unify release handling for helpers and kfuncs Introduce release_reg() to consolidate the release logic shared by both helpers and kfuncs: dynptr release, kptr_xchg percpu-to-RCU conversion, regular reference release, and NULL pass-through. NULL pass-through is only allowed if the prototype indicates the argument may be null. Determine release_regno from the function prototype/metadata before argument checking, rather than discovering it dynamically during argument processing. For helpers, scan the arg_type array in check_func_proto() via check_proto_release_reg(). For kfuncs, set release_regno to BPF_REG_1 in bpf_fetch_kfunc_arg_meta() when KF_RELEASE is set. In the future when we start adding decl_tag to kfunc arguments, we can just look at the function prototype instead of a release_regno. Extract ref_convert_alloc_rcu_protected() and invalidate_rcu_protected_refs() to make it more clear what the code is doing. For ref_convert_alloc_rcu_protected(), it pre-converts MEM_ALLOC | MEM_PERCPU registers to MEM_RCU (clearing id so they survive), then calls release_reference() to invalidate the remaining registers and release the reference state. Add KF_RELEASE to bpf_dynptr_file_discard() so its release_regno is set via fetch_kfunc_meta rather than being assigned manually in the dynptr argument processing. Set arg_type to ARG_PTR_TO_DYNPTR for KF_ARG_PTR_TO_DYNPTR so that check_func_arg_reg_off() correctly allows non-zero stack offsets for dynptr release arguments same as helper. Acked-by: Eduard Zingerman Signed-off-by: Amery Hung Link: https://lore.kernel.org/r/20260529014936.2811085-9-ameryhung@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 6 +- kernel/bpf/helpers.c | 2 +- kernel/bpf/verifier.c | 198 +++++++++++---------- tools/testing/selftests/bpf/prog_tests/cb_refs.c | 2 +- .../selftests/bpf/progs/cgrp_kfunc_failure.c | 6 +- tools/testing/selftests/bpf/progs/map_kptr_fail.c | 2 +- .../selftests/bpf/progs/task_kfunc_failure.c | 6 +- .../selftests/bpf/progs/verifier_global_ptr_args.c | 2 +- .../selftests/bpf/progs/verifier_ref_tracking.c | 2 +- tools/testing/selftests/bpf/progs/verifier_sock.c | 6 +- .../selftests/bpf/progs/verifier_vfs_reject.c | 2 +- .../selftests/bpf/progs/wakeup_source_fail.c | 2 +- 12 files changed, 122 insertions(+), 114 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index b0521ba7787a..3dd2d21230af 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -1426,9 +1426,9 @@ struct bpf_dynptr_desc { /* * The last seen rereferenced object; Updated by update_ref_obj() when a register refers to a - * referenced object. Used when the helper or kfunc is releasing a referenced object, casting - * a referenced object, returning allocated memory derived from referenced object or creating - * a dynptr with a referenced object as parent. + * referenced object. Used when the helper or kfunc is casting a referenced object, returning + * allocated memory derived from referenced object or creating a dynptr with a referenced + * object as parent. */ struct ref_obj_desc { u32 id; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 9ca195104667..03004e4451f5 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -4957,7 +4957,7 @@ BTF_ID_FLAGS(func, bpf_stream_print_stack, KF_IMPLICIT_ARGS) BTF_ID_FLAGS(func, bpf_task_work_schedule_signal, KF_IMPLICIT_ARGS) BTF_ID_FLAGS(func, bpf_task_work_schedule_resume, KF_IMPLICIT_ARGS) BTF_ID_FLAGS(func, bpf_dynptr_from_file) -BTF_ID_FLAGS(func, bpf_dynptr_file_discard) +BTF_ID_FLAGS(func, bpf_dynptr_file_discard, KF_RELEASE) BTF_ID_FLAGS(func, bpf_timer_cancel_async) BTF_KFUNCS_END(common_btf_ids) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index bc8a09c858d8..caa455fad877 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -8225,17 +8225,11 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, return err; skip_type_check: - if (arg_type_is_release(arg_type)) { - if (!arg_type_is_dynptr(arg_type) && !reg_is_referenced(env, reg) && !bpf_register_is_null(reg)) { - verbose(env, "R%d must be referenced when passed to release function\n", - regno); - return -EINVAL; - } - if (meta->release_regno) { - verifier_bug(env, "more than one release argument"); - return -EFAULT; - } - meta->release_regno = regno; + if (arg_type_is_release(arg_type) && !arg_type_is_dynptr(arg_type) && + !reg_is_referenced(env, reg) && !bpf_register_is_null(reg)) { + verbose(env, "release helper %s expects referenced PTR_TO_BTF_ID passed to %s\n", + func_id_name(meta->func_id), reg_arg_name(env, argno)); + return -EINVAL; } if (reg_is_referenced(env, reg)) @@ -8798,11 +8792,29 @@ static bool check_mem_arg_rw_flag_ok(const struct bpf_func_proto *fn) return true; } -static int check_func_proto(const struct bpf_func_proto *fn) +static bool check_proto_release_reg(const struct bpf_func_proto *fn, struct bpf_call_arg_meta *meta) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) { + enum bpf_arg_type arg_type = fn->arg_type[i]; + + if (arg_type_is_release(arg_type)) { + if (meta->release_regno) + return false; + meta->release_regno = i + 1; + } + } + + return true; +} + +static int check_func_proto(const struct bpf_func_proto *fn, struct bpf_call_arg_meta *meta) { return check_raw_mode_ok(fn) && check_arg_pair_ok(fn) && check_mem_arg_rw_flag_ok(fn) && + check_proto_release_reg(fn, meta) && check_btf_id_ok(fn) ? 0 : -EINVAL; } @@ -8956,6 +8968,42 @@ static void invalidate_non_owning_refs(struct bpf_verifier_env *env) })); } +static void invalidate_rcu_protected_refs(struct bpf_verifier_env *env) +{ + struct bpf_stack_state *stack; + struct bpf_func_state *state; + struct bpf_reg_state *reg; + u32 clear_mask = (1 << STACK_SPILL) | (1 << STACK_ITER); + + bpf_for_each_reg_in_vstate_mask(env->cur_state, state, reg, stack, clear_mask, ({ + if (reg->type & MEM_RCU) { + reg->type &= ~(MEM_RCU | PTR_MAYBE_NULL); + reg->type |= PTR_UNTRUSTED; + } + })); +} + +static int ref_convert_alloc_rcu_protected(struct bpf_verifier_env *env, u32 id) +{ + struct bpf_func_state *state; + struct bpf_reg_state *reg; + int err; + + err = release_reference_nomark(env->cur_state, id); + + bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ + if (reg->id != id) + continue; + if ((reg->type & MEM_ALLOC) && (reg->type & MEM_PERCPU)) { + reg->id = 0; + reg->type &= ~MEM_ALLOC; + reg->type |= MEM_RCU; + } + })); + + return err; +} + static void clear_caller_saved_regs(struct bpf_verifier_env *env, struct bpf_reg_state *regs) { @@ -10028,6 +10076,24 @@ static const char *non_sleepable_context_description(struct bpf_verifier_env *en return "non-sleepable prog"; } +static int release_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, + bool convert_rcu, bool release_dynptr) +{ + int err = -EINVAL; + + if (bpf_register_is_null(reg)) + return 0; + + if (release_dynptr) + err = unmark_stack_slots_dynptr(env, reg); + else if (convert_rcu) + err = ref_convert_alloc_rcu_protected(env, reg->id); + else if (reg_is_referenced(env, reg)) + err = release_reference(env, reg->id); + + return err; +} + static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx_p) { @@ -10077,7 +10143,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn memset(&meta, 0, sizeof(meta)); meta.pkt_access = fn->pkt_access; - err = check_func_proto(fn); + err = check_func_proto(fn, &meta); if (err) { verifier_bug(env, "incorrect func proto %s#%d", func_id_name(func_id), func_id); return err; @@ -10122,37 +10188,11 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn } if (meta.release_regno) { - err = -EINVAL; - if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1])) { - err = unmark_stack_slots_dynptr(env, ®s[meta.release_regno]); - } else if (func_id == BPF_FUNC_kptr_xchg && meta.ref_obj.id) { - u32 id = meta.ref_obj.id; - bool in_rcu = in_rcu_cs(env); - struct bpf_func_state *state; - struct bpf_reg_state *reg; - - err = release_reference_nomark(env->cur_state, id); - if (!err) { - bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ - if (reg->id == id) { - if (in_rcu && (reg->type & MEM_ALLOC) && (reg->type & MEM_PERCPU)) { - reg->id = 0; - reg->type &= ~MEM_ALLOC; - reg->type |= MEM_RCU; - } else { - mark_reg_invalid(env, reg); - } - } - })); - } - } else if (meta.ref_obj.id) { - err = release_reference(env, meta.ref_obj.id); - } else if (bpf_register_is_null(®s[meta.release_regno])) { - /* meta.ref_obj.id can only be 0 if register that is meant to be - * released is NULL, which must be > R0. - */ - err = 0; - } + struct bpf_reg_state *reg = ®s[meta.release_regno]; + bool convert_rcu = (func_id == BPF_FUNC_kptr_xchg) && in_rcu_cs(env) && + (reg->type & MEM_ALLOC) && (reg->type & MEM_PERCPU); + + err = release_reg(env, reg, convert_rcu, !!meta.dynptr.id); if (err) return err; } @@ -10547,7 +10587,6 @@ static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta) return meta->kfunc_flags & KF_RELEASE; } - static bool is_kfunc_destructive(struct bpf_kfunc_call_arg_meta *meta) { return meta->kfunc_flags & KF_DESTRUCTIVE; @@ -11912,24 +11951,16 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ return -EACCES; } - if (reg_is_referenced(env, reg)) { - if (is_kfunc_release(meta) && meta->ref_obj.cnt) { - verbose(env, "more than one arg with referenced id %s %u %u", - reg_arg_name(env, argno), reg->id, - meta->ref_obj.id); - return -EFAULT; - } - update_ref_obj(&meta->ref_obj, reg); - if (is_kfunc_release(meta)) { - if (regno < 0) { - verbose(env, "%s release arg cannot be a stack argument\n", - reg_arg_name(env, argno)); - return -EINVAL; - } - meta->release_regno = regno; - } + if (regno == meta->release_regno && !is_kfunc_arg_dynptr(meta->btf, &args[i]) && + !reg_is_referenced(env, reg) && !bpf_register_is_null(reg)) { + verbose(env, "release kfunc %s expects referenced PTR_TO_BTF_ID passed to %s\n", + func_name, reg_arg_name(env, argno)); + return -EINVAL; } + if (reg_is_referenced(env, reg)) + update_ref_obj(&meta->ref_obj, reg); + ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id); ref_tname = btf_name_by_offset(btf, ref_t->name_off); @@ -11993,7 +12024,6 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ } } fallthrough; - case KF_ARG_PTR_TO_DYNPTR: case KF_ARG_PTR_TO_ITER: case KF_ARG_PTR_TO_LIST_HEAD: case KF_ARG_PTR_TO_LIST_NODE: @@ -12010,6 +12040,9 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ case KF_ARG_PTR_TO_IRQ_FLAG: case KF_ARG_PTR_TO_RES_SPIN_LOCK: break; + case KF_ARG_PTR_TO_DYNPTR: + arg_type = ARG_PTR_TO_DYNPTR; + break; case KF_ARG_PTR_TO_CTX: arg_type = ARG_PTR_TO_CTX; break; @@ -12018,7 +12051,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ return -EFAULT; } - if (is_kfunc_release(meta) && reg_is_referenced(env, reg)) + if (regno == meta->release_regno) arg_type |= OBJ_RELEASE; ret = check_func_arg_reg_off(env, reg, argno, arg_type); if (ret < 0) @@ -12083,12 +12116,6 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ dynptr_arg_type |= DYNPTR_TYPE_FILE; } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_file_discard]) { dynptr_arg_type |= DYNPTR_TYPE_FILE | OBJ_RELEASE; - if (regno < 0) { - verbose(env, "%s release arg cannot be a stack argument\n", - reg_arg_name(env, argno)); - return -EINVAL; - } - meta->release_regno = regno; } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_clone] && (dynptr_arg_type & MEM_UNINIT)) { enum bpf_dynptr_type parent_type = meta->dynptr.type; @@ -12377,12 +12404,6 @@ check_ok: } } - if (is_kfunc_release(meta) && !meta->release_regno) { - verbose(env, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n", - func_name); - return -EINVAL; - } - return 0; } @@ -12409,6 +12430,10 @@ int bpf_fetch_kfunc_arg_meta(struct bpf_verifier_env *env, meta->kfunc_flags = *kfunc.flags; + /* Only support release referenced argument passed by register */ + if (is_kfunc_release(meta)) + meta->release_regno = BPF_REG_1; + return 0; } @@ -12899,23 +12924,12 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, if (rcu_lock) { env->cur_state->active_rcu_locks++; } else if (rcu_unlock) { - struct bpf_stack_state *stack; - struct bpf_func_state *state; - struct bpf_reg_state *reg; - u32 clear_mask = (1 << STACK_SPILL) | (1 << STACK_ITER); - if (env->cur_state->active_rcu_locks == 0) { verbose(env, "unmatched rcu read unlock (kernel function %s)\n", func_name); return -EINVAL; } - if (--env->cur_state->active_rcu_locks == 0) { - bpf_for_each_reg_in_vstate_mask(env->cur_state, state, reg, stack, clear_mask, ({ - if (reg->type & MEM_RCU) { - reg->type &= ~(MEM_RCU | PTR_MAYBE_NULL); - reg->type |= PTR_UNTRUSTED; - } - })); - } + if (--env->cur_state->active_rcu_locks == 0) + invalidate_rcu_protected_refs(env); } else if (preempt_disable) { env->cur_state->active_preempt_locks++; } else if (preempt_enable) { @@ -12946,13 +12960,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now. */ if (meta.release_regno) { - struct bpf_reg_state *reg = ®s[meta.release_regno]; - - if (meta.dynptr.id) { - err = unmark_stack_slots_dynptr(env, reg); - } else { - err = release_reference(env, reg->id); - } + err = release_reg(env, ®s[meta.release_regno], false, !!meta.dynptr.id); if (err) return err; } diff --git a/tools/testing/selftests/bpf/prog_tests/cb_refs.c b/tools/testing/selftests/bpf/prog_tests/cb_refs.c index 6300b67a3a84..78566b817fd7 100644 --- a/tools/testing/selftests/bpf/prog_tests/cb_refs.c +++ b/tools/testing/selftests/bpf/prog_tests/cb_refs.c @@ -11,7 +11,7 @@ struct { const char *prog_name; const char *err_msg; } cb_refs_tests[] = { - { "underflow_prog", "must point to scalar, or struct with scalar" }, + { "underflow_prog", "release kfunc bpf_kfunc_call_test_release expects referenced PTR_TO_BTF_ID passed to R1" }, { "leak_prog", "Possibly NULL pointer passed to helper R2" }, { "nested_cb", "Unreleased reference id=4 alloc_insn=2" }, /* alloc_insn=2{4,5} */ { "non_cb_transfer_ref", "Unreleased reference id=4 alloc_insn=1" }, /* alloc_insn=1{1,2} */ diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c index a875ba8e5007..d0d65d6d450c 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c @@ -154,7 +154,7 @@ int BPF_PROG(cgrp_kfunc_xchg_unreleased, struct cgroup *cgrp, const char *path) } SEC("tp_btf/cgroup_mkdir") -__failure __msg("must be referenced or trusted") +__failure __msg("release kfunc bpf_cgroup_release expects referenced PTR_TO_BTF_ID passed to R1") int BPF_PROG(cgrp_kfunc_rcu_get_release, struct cgroup *cgrp, const char *path) { struct cgroup *kptr; @@ -191,7 +191,7 @@ int BPF_PROG(cgrp_kfunc_release_untrusted, struct cgroup *cgrp, const char *path } SEC("tp_btf/cgroup_mkdir") -__failure __msg("R1 pointer type STRUCT cgroup must point") +__failure __msg("release kfunc bpf_cgroup_release expects referenced PTR_TO_BTF_ID passed to R1") int BPF_PROG(cgrp_kfunc_release_fp, struct cgroup *cgrp, const char *path) { struct cgroup *acquired = (struct cgroup *)&path; @@ -237,7 +237,7 @@ int BPF_PROG(cgrp_kfunc_release_null, struct cgroup *cgrp, const char *path) } SEC("tp_btf/cgroup_mkdir") -__failure __msg("release kernel function bpf_cgroup_release expects") +__failure __msg("release kfunc bpf_cgroup_release expects referenced PTR_TO_BTF_ID passed to R1") int BPF_PROG(cgrp_kfunc_release_unacquired, struct cgroup *cgrp, const char *path) { /* Cannot release trusted cgroup pointer which was not acquired. */ diff --git a/tools/testing/selftests/bpf/progs/map_kptr_fail.c b/tools/testing/selftests/bpf/progs/map_kptr_fail.c index 8f36e74fd8f9..f11848dfa78f 100644 --- a/tools/testing/selftests/bpf/progs/map_kptr_fail.c +++ b/tools/testing/selftests/bpf/progs/map_kptr_fail.c @@ -252,7 +252,7 @@ int reject_untrusted_store_to_ref(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("R2 must be referenced") +__failure __msg("release helper bpf_kptr_xchg expects referenced PTR_TO_BTF_ID passed to R2") int reject_untrusted_xchg(struct __sk_buff *ctx) { struct prog_test_ref_kfunc *p; diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c index 41047d81ec42..8e947d445f8e 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c +++ b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c @@ -178,7 +178,7 @@ int BPF_PROG(task_kfunc_release_untrusted, struct task_struct *task, u64 clone_f } SEC("tp_btf/task_newtask") -__failure __msg("R1 pointer type STRUCT task_struct must point") +__failure __msg("release kfunc bpf_task_release expects referenced PTR_TO_BTF_ID passed to R1") int BPF_PROG(task_kfunc_release_fp, struct task_struct *task, u64 clone_flags) { struct task_struct *acquired = (struct task_struct *)&clone_flags; @@ -224,7 +224,7 @@ int BPF_PROG(task_kfunc_release_null, struct task_struct *task, u64 clone_flags) } SEC("tp_btf/task_newtask") -__failure __msg("release kernel function bpf_task_release expects") +__failure __msg("release kfunc bpf_task_release expects referenced PTR_TO_BTF_ID passed to R1") int BPF_PROG(task_kfunc_release_unacquired, struct task_struct *task, u64 clone_flags) { /* Cannot release trusted task pointer which was not acquired. */ @@ -313,7 +313,7 @@ int BPF_PROG(task_access_comm4, struct task_struct *task, const char *buf, bool } SEC("tp_btf/task_newtask") -__failure __msg("R1 must be referenced or trusted") +__failure __msg("release kfunc bpf_task_release expects referenced PTR_TO_BTF_ID passed to R1") int BPF_PROG(task_kfunc_release_in_map, struct task_struct *task, u64 clone_flags) { struct task_struct *local; diff --git a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c index e7dae0cf9c17..ea273e152209 100644 --- a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c +++ b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c @@ -153,7 +153,7 @@ __weak int subprog_trusted_destroy(struct task_struct *task __arg_trusted) SEC("?tp_btf/task_newtask") __failure __log_level(2) -__msg("release kernel function bpf_task_release expects refcounted PTR_TO_BTF_ID") +__msg("release kfunc bpf_task_release expects referenced PTR_TO_BTF_ID passed to R1") int BPF_PROG(trusted_destroy_fail, struct task_struct *task, u64 clone_flags) { return subprog_trusted_destroy(task); diff --git a/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c b/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c index 139f70bb3595..199ad18f8eb5 100644 --- a/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c +++ b/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c @@ -1288,7 +1288,7 @@ l1_%=: r1 = r6; \ SEC("tc") __description("reference tracking: bpf_sk_release(listen_sk)") -__failure __msg("R1 must be referenced when passed to release function") +__failure __msg("release helper bpf_sk_release expects referenced PTR_TO_BTF_ID passed to R1") __naked void bpf_sk_release_listen_sk(void) { asm volatile ( diff --git a/tools/testing/selftests/bpf/progs/verifier_sock.c b/tools/testing/selftests/bpf/progs/verifier_sock.c index a2132c72d3b8..9f680cf44512 100644 --- a/tools/testing/selftests/bpf/progs/verifier_sock.c +++ b/tools/testing/selftests/bpf/progs/verifier_sock.c @@ -603,7 +603,7 @@ l2_%=: r0 = *(u32*)(r0 + %[bpf_tcp_sock_snd_cwnd]); \ SEC("tc") __description("bpf_sk_release(skb->sk)") -__failure __msg("R1 must be referenced when passed to release function") +__failure __msg("release helper bpf_sk_release expects referenced PTR_TO_BTF_ID passed to R1") __naked void bpf_sk_release_skb_sk(void) { asm volatile (" \ @@ -620,7 +620,7 @@ l0_%=: r0 = 0; \ SEC("tc") __description("bpf_sk_release(bpf_sk_fullsock(skb->sk))") -__failure __msg("R1 must be referenced when passed to release function") +__failure __msg("release helper bpf_sk_release expects referenced PTR_TO_BTF_ID passed to R1") __naked void bpf_sk_fullsock_skb_sk(void) { asm volatile (" \ @@ -644,7 +644,7 @@ l1_%=: r1 = r0; \ SEC("tc") __description("bpf_sk_release(bpf_tcp_sock(skb->sk))") -__failure __msg("R1 must be referenced when passed to release function") +__failure __msg("release helper bpf_sk_release expects referenced PTR_TO_BTF_ID passed to R1") __naked void bpf_tcp_sock_skb_sk(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c b/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c index 0990de076844..2870738d93f7 100644 --- a/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c +++ b/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c @@ -80,7 +80,7 @@ int BPF_PROG(get_task_exe_file_kfunc_unreleased) } SEC("lsm.s/file_open") -__failure __msg("release kernel function bpf_put_file expects") +__failure __msg("release kfunc bpf_put_file expects referenced PTR_TO_BTF_ID passed to R1") int BPF_PROG(put_file_kfunc_unacquired, struct file *file) { /* Can't release an unacquired pointer. */ diff --git a/tools/testing/selftests/bpf/progs/wakeup_source_fail.c b/tools/testing/selftests/bpf/progs/wakeup_source_fail.c index b8bbb61d4d4e..d4d0f1610853 100644 --- a/tools/testing/selftests/bpf/progs/wakeup_source_fail.c +++ b/tools/testing/selftests/bpf/progs/wakeup_source_fail.c @@ -42,7 +42,7 @@ int wakeup_source_access_lock_fields(void *ctx) } SEC("syscall") -__failure __msg("type=scalar expected=fp") +__failure __msg("release kfunc bpf_wakeup_sources_read_unlock expects referenced PTR_TO_BTF_ID passed to R1") int wakeup_source_unlock_no_lock(void *ctx) { struct bpf_ws_lock *lock = (void *)0x1; -- cgit v1.2.3 From fbcc68af60479c4beebe411c1ee5e3c873e3adcf Mon Sep 17 00:00:00 2001 From: Amery Hung Date: Thu, 28 May 2026 18:49:33 -0700 Subject: selftests/bpf: Test creating dynptr from dynptr data and slice The verifier currently does not allow creating dynptr from dynptr data or slice. Add a selftest to test this explicitly. Signed-off-by: Amery Hung Link: https://lore.kernel.org/r/20260529014936.2811085-11-ameryhung@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/dynptr_fail.c | 42 +++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c index 40a14a5174a5..344fb2aa0813 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_fail.c +++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c @@ -705,6 +705,48 @@ int dynptr_from_mem_invalid_api(void *ctx) return 0; } +/* Cannot create dynptr from dynptr data */ +SEC("?raw_tp") +__failure __msg("Unsupported reg type mem for bpf_dynptr_from_mem data") +int dynptr_from_dynptr_data(void *ctx) +{ + struct bpf_dynptr ptr, ptr2; + __u8 *data; + + if (get_map_val_dynptr(&ptr)) + return 0; + + data = bpf_dynptr_data(&ptr, 0, sizeof(__u32)); + if (!data) + return 0; + + /* this should fail */ + bpf_dynptr_from_mem(data, sizeof(__u32), 0, &ptr2); + + return 0; +} + +/* Cannot create dynptr from dynptr slice */ +SEC("?tc") +__failure __msg("Unsupported reg type mem for bpf_dynptr_from_mem data") +int dynptr_from_dynptr_slice(struct __sk_buff *skb) +{ + struct bpf_dynptr ptr, ptr2; + struct ethhdr *hdr; + char buffer[sizeof(*hdr)] = {}; + + bpf_dynptr_from_skb(skb, 0, &ptr); + + hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer)); + if (!hdr) + return SK_DROP; + + /* this should fail */ + bpf_dynptr_from_mem(hdr, sizeof(*hdr), 0, &ptr2); + + return SK_PASS; +} + SEC("?tc") __failure __msg("cannot overwrite referenced dynptr") __log_level(2) int dynptr_pruning_overwrite(struct __sk_buff *ctx) -- cgit v1.2.3 From 925320666e0644c2e884a0dc49ab2dc22b061891 Mon Sep 17 00:00:00 2001 From: Amery Hung Date: Thu, 28 May 2026 18:49:34 -0700 Subject: selftests/bpf: Test using slice after invalidating dynptr clone The parent object of a cloned dynptr is skb not the original dynptr. Invalidate the original dynptr should not prevent the program from using the slice derived from the cloned dynptr. Signed-off-by: Amery Hung Link: https://lore.kernel.org/r/20260529014936.2811085-12-ameryhung@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c | 8 +++ .../bpf_qdisc_dynptr_use_after_invalidate_clone.c | 74 ++++++++++++++++++++++ 2 files changed, 82 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/bpf_qdisc_dynptr_use_after_invalidate_clone.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c b/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c index 730357cd0c9a..77f1c0550c9b 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c @@ -8,6 +8,10 @@ #include "bpf_qdisc_fifo.skel.h" #include "bpf_qdisc_fq.skel.h" #include "bpf_qdisc_fail__incompl_ops.skel.h" +#include "bpf_qdisc_fail__invalid_dynptr.skel.h" +#include "bpf_qdisc_fail__invalid_dynptr_slice.skel.h" +#include "bpf_qdisc_fail__invalid_dynptr_cross_frame.skel.h" +#include "bpf_qdisc_dynptr_use_after_invalidate_clone.skel.h" #define LO_IFINDEX 1 @@ -223,6 +227,10 @@ void test_ns_bpf_qdisc(void) test_qdisc_attach_to_non_root(); if (test__start_subtest("incompl_ops")) test_incompl_ops(); + RUN_TESTS(bpf_qdisc_fail__invalid_dynptr); + RUN_TESTS(bpf_qdisc_fail__invalid_dynptr_cross_frame); + RUN_TESTS(bpf_qdisc_fail__invalid_dynptr_slice); + RUN_TESTS(bpf_qdisc_dynptr_use_after_invalidate_clone); } void serial_test_bpf_qdisc_default(void) diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_dynptr_use_after_invalidate_clone.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_dynptr_use_after_invalidate_clone.c new file mode 100644 index 000000000000..ac626cfa2a98 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_dynptr_use_after_invalidate_clone.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include "bpf_experimental.h" +#include "bpf_qdisc_common.h" +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +int proto; + +SEC("struct_ops") +__success +int BPF_PROG(dynptr_use_after_invalidate_clone, struct sk_buff *skb, struct Qdisc *sch, + struct bpf_sk_buff_ptr *to_free) +{ + struct bpf_dynptr ptr, ptr_clone; + struct ethhdr *hdr; + + bpf_dynptr_from_skb((struct __sk_buff *)skb, 0, &ptr); + + bpf_dynptr_clone(&ptr, &ptr_clone); + + hdr = bpf_dynptr_slice(&ptr_clone, 0, NULL, sizeof(*hdr)); + if (!hdr) { + bpf_qdisc_skb_drop(skb, to_free); + return NET_XMIT_DROP; + } + + *(int *)&ptr = 0; + + proto = hdr->h_proto; + + bpf_qdisc_skb_drop(skb, to_free); + + return NET_XMIT_DROP; +} + +SEC("struct_ops") +__auxiliary +struct sk_buff *BPF_PROG(bpf_qdisc_test_dequeue, struct Qdisc *sch) +{ + return NULL; +} + +SEC("struct_ops") +__auxiliary +int BPF_PROG(bpf_qdisc_test_init, struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) +{ + return 0; +} + +SEC("struct_ops") +__auxiliary +void BPF_PROG(bpf_qdisc_test_reset, struct Qdisc *sch) +{ +} + +SEC("struct_ops") +__auxiliary +void BPF_PROG(bpf_qdisc_test_destroy, struct Qdisc *sch) +{ +} + +SEC(".struct_ops") +struct Qdisc_ops test = { + .enqueue = (void *)dynptr_use_after_invalidate_clone, + .dequeue = (void *)bpf_qdisc_test_dequeue, + .init = (void *)bpf_qdisc_test_init, + .reset = (void *)bpf_qdisc_test_reset, + .destroy = (void *)bpf_qdisc_test_destroy, + .id = "bpf_qdisc_test", +}; -- cgit v1.2.3 From 3f75a757a3afa9c0d5a2637910659e92d236e7f2 Mon Sep 17 00:00:00 2001 From: Amery Hung Date: Thu, 28 May 2026 18:49:35 -0700 Subject: selftests/bpf: Test using file dynptr after the reference on file is dropped File dynptr and slice should be invalidated when the parent file's reference is dropped in the program. Without the verifier tracking dyntpr's parent referenced object, the dynptr would continute to be incorrectly used even if the underlying file is being tear down or gone. Signed-off-by: Amery Hung Link: https://lore.kernel.org/r/20260529014936.2811085-13-ameryhung@gmail.com Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/progs/file_reader_fail.c | 60 ++++++++++++++++++++++ 1 file changed, 60 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/file_reader_fail.c b/tools/testing/selftests/bpf/progs/file_reader_fail.c index 0739620dea8a..d5fae5e4cf9a 100644 --- a/tools/testing/selftests/bpf/progs/file_reader_fail.c +++ b/tools/testing/selftests/bpf/progs/file_reader_fail.c @@ -50,3 +50,63 @@ int xdp_no_dynptr_type(struct xdp_md *xdp) bpf_dynptr_file_discard(&dynptr); return 0; } + +SEC("lsm/file_open") +__failure +__msg("Leaking reference id={{[0-9]+}} alloc_insn={{[0-9]+}}. Release it first.") +int use_file_dynptr_after_put_file(void *ctx) +{ + struct task_struct *task = bpf_get_current_task_btf(); + struct file *file = bpf_get_task_exe_file(task); + struct bpf_dynptr dynptr; + char buf[64]; + + if (!file) + return 0; + + if (bpf_dynptr_from_file(file, 0, &dynptr)) + goto out; + + /* this should fail - file dynptr should be discarded first to prevent resource leak */ + bpf_put_file(file); + + bpf_dynptr_read(buf, sizeof(buf), &dynptr, 0, 0); + return 0; + +out: + bpf_dynptr_file_discard(&dynptr); + bpf_put_file(file); + return 0; +} + +SEC("lsm/file_open") +__failure +__msg("Leaking reference id={{[0-9]+}} alloc_insn={{[0-9]+}}. Release it first.") +int use_file_dynptr_slice_after_put_file(void *ctx) +{ + struct task_struct *task = bpf_get_current_task_btf(); + struct file *file = bpf_get_task_exe_file(task); + struct bpf_dynptr dynptr; + char *data; + + if (!file) + return 0; + + if (bpf_dynptr_from_file(file, 0, &dynptr)) + goto out; + + data = bpf_dynptr_data(&dynptr, 0, 1); + if (!data) + goto out; + + /* this should fail - file dynptr should be discarded first to prevent resource leak */ + bpf_put_file(file); + + *data = 'x'; + return 0; + +out: + bpf_dynptr_file_discard(&dynptr); + bpf_put_file(file); + return 0; +} -- cgit v1.2.3 From 60c7c3b880c8b3ad7fe025bb68b13bfbc440ceaf Mon Sep 17 00:00:00 2001 From: Amery Hung Date: Thu, 28 May 2026 18:49:36 -0700 Subject: selftests/bpf: Test using dynptr after freeing the underlying object Make sure the verifier invalidates the dynptr and dynptr slice derived from an skb after the skb is freed. Signed-off-by: Amery Hung Link: https://lore.kernel.org/r/20260529014936.2811085-14-ameryhung@gmail.com Signed-off-by: Alexei Starovoitov --- .../bpf/progs/bpf_qdisc_fail__invalid_dynptr.c | 68 ++++++++++++++++++++ .../bpf_qdisc_fail__invalid_dynptr_cross_frame.c | 74 ++++++++++++++++++++++ .../progs/bpf_qdisc_fail__invalid_dynptr_slice.c | 70 ++++++++++++++++++++ 3 files changed, 212 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/bpf_qdisc_fail__invalid_dynptr.c create mode 100644 tools/testing/selftests/bpf/progs/bpf_qdisc_fail__invalid_dynptr_cross_frame.c create mode 100644 tools/testing/selftests/bpf/progs/bpf_qdisc_fail__invalid_dynptr_slice.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__invalid_dynptr.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__invalid_dynptr.c new file mode 100644 index 000000000000..1d96f7987a3f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__invalid_dynptr.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include "bpf_experimental.h" +#include "bpf_qdisc_common.h" +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +int proto; + +SEC("struct_ops") +__failure __msg("Expected an initialized dynptr as R1") +int BPF_PROG(invalid_dynptr, struct sk_buff *skb, struct Qdisc *sch, + struct bpf_sk_buff_ptr *to_free) +{ + struct bpf_dynptr ptr; + struct ethhdr *hdr; + + bpf_dynptr_from_skb((struct __sk_buff *)skb, 0, &ptr); + + bpf_qdisc_skb_drop(skb, to_free); + + hdr = bpf_dynptr_slice(&ptr, 0, NULL, sizeof(*hdr)); + if (!hdr) + return NET_XMIT_DROP; + + proto = hdr->h_proto; + + return NET_XMIT_DROP; +} + +SEC("struct_ops") +__auxiliary +struct sk_buff *BPF_PROG(bpf_qdisc_test_dequeue, struct Qdisc *sch) +{ + return NULL; +} + +SEC("struct_ops") +__auxiliary +int BPF_PROG(bpf_qdisc_test_init, struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) +{ + return 0; +} + +SEC("struct_ops") +__auxiliary +void BPF_PROG(bpf_qdisc_test_reset, struct Qdisc *sch) +{ +} + +SEC("struct_ops") +__auxiliary +void BPF_PROG(bpf_qdisc_test_destroy, struct Qdisc *sch) +{ +} + +SEC(".struct_ops") +struct Qdisc_ops test = { + .enqueue = (void *)invalid_dynptr, + .dequeue = (void *)bpf_qdisc_test_dequeue, + .init = (void *)bpf_qdisc_test_init, + .reset = (void *)bpf_qdisc_test_reset, + .destroy = (void *)bpf_qdisc_test_destroy, + .id = "bpf_qdisc_test", +}; diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__invalid_dynptr_cross_frame.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__invalid_dynptr_cross_frame.c new file mode 100644 index 000000000000..2e23b8593af9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__invalid_dynptr_cross_frame.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include "bpf_experimental.h" +#include "bpf_qdisc_common.h" +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +int proto; + +static __noinline int free_skb(struct sk_buff *skb) +{ + bpf_kfree_skb(skb); + return 0; +} + +SEC("struct_ops") +__failure __msg("invalid mem access 'scalar'") +int BPF_PROG(invalid_dynptr_cross_frame, struct sk_buff *skb, struct Qdisc *sch, + struct bpf_sk_buff_ptr *to_free) +{ + struct bpf_dynptr ptr; + struct ethhdr *hdr; + + bpf_dynptr_from_skb((struct __sk_buff *)skb, 0, &ptr); + + hdr = bpf_dynptr_slice(&ptr, 0, NULL, sizeof(*hdr)); + if (!hdr) + return NET_XMIT_DROP; + + free_skb(skb); + + proto = hdr->h_proto; + + return NET_XMIT_DROP; +} + +SEC("struct_ops") +__auxiliary +struct sk_buff *BPF_PROG(bpf_qdisc_test_dequeue, struct Qdisc *sch) +{ + return NULL; +} + +SEC("struct_ops") +__auxiliary +int BPF_PROG(bpf_qdisc_test_init, struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) +{ + return 0; +} + +SEC("struct_ops") +__auxiliary +void BPF_PROG(bpf_qdisc_test_reset, struct Qdisc *sch) +{ +} + +SEC("struct_ops") +__auxiliary +void BPF_PROG(bpf_qdisc_test_destroy, struct Qdisc *sch) +{ +} + +SEC(".struct_ops") +struct Qdisc_ops test = { + .enqueue = (void *)invalid_dynptr_cross_frame, + .dequeue = (void *)bpf_qdisc_test_dequeue, + .init = (void *)bpf_qdisc_test_init, + .reset = (void *)bpf_qdisc_test_reset, + .destroy = (void *)bpf_qdisc_test_destroy, + .id = "bpf_qdisc_test", +}; diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__invalid_dynptr_slice.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__invalid_dynptr_slice.c new file mode 100644 index 000000000000..731216c4e45a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__invalid_dynptr_slice.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include "bpf_experimental.h" +#include "bpf_qdisc_common.h" +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +int proto; + +SEC("struct_ops") +__failure __msg("invalid mem access 'scalar'") +int BPF_PROG(invalid_dynptr_slice, struct sk_buff *skb, struct Qdisc *sch, + struct bpf_sk_buff_ptr *to_free) +{ + struct bpf_dynptr ptr; + struct ethhdr *hdr; + + bpf_dynptr_from_skb((struct __sk_buff *)skb, 0, &ptr); + + hdr = bpf_dynptr_slice(&ptr, 0, NULL, sizeof(*hdr)); + if (!hdr) { + bpf_qdisc_skb_drop(skb, to_free); + return NET_XMIT_DROP; + } + + bpf_qdisc_skb_drop(skb, to_free); + + proto = hdr->h_proto; + + return NET_XMIT_DROP; +} + +SEC("struct_ops") +__auxiliary +struct sk_buff *BPF_PROG(bpf_qdisc_test_dequeue, struct Qdisc *sch) +{ + return NULL; +} + +SEC("struct_ops") +__auxiliary +int BPF_PROG(bpf_qdisc_test_init, struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) +{ + return 0; +} + +SEC("struct_ops") +__auxiliary +void BPF_PROG(bpf_qdisc_test_reset, struct Qdisc *sch) +{ +} + +SEC("struct_ops") +__auxiliary +void BPF_PROG(bpf_qdisc_test_destroy, struct Qdisc *sch) +{ +} + +SEC(".struct_ops") +struct Qdisc_ops test = { + .enqueue = (void *)invalid_dynptr_slice, + .dequeue = (void *)bpf_qdisc_test_dequeue, + .init = (void *)bpf_qdisc_test_init, + .reset = (void *)bpf_qdisc_test_reset, + .destroy = (void *)bpf_qdisc_test_destroy, + .id = "bpf_qdisc_test", +}; -- cgit v1.2.3 From 0fb6c9ed6493b4af01be8bb0a384574eba7df636 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Mon, 1 Jun 2026 17:02:44 +0200 Subject: libbpf: Reject non-exclusive metadata maps in the signed loader The loader verifies map->sha against the metadata hash in its instructions. map->sha is calculated when BPF_OBJ_GET_INFO_BY_FD is called on the frozen map. While the map is frozen, the /signed loader/ must also ensure the map is exclusive, as, without exclusivity (which a hostile host could just omit when loading the loader), another BPF program with map access can mutate the contents afterwards, so the check passes on stale data. With the extra check as part of the signed loader, it now refuses to move on with map->sha validation if the host set it up wrongly. Fixes: fb2b0e290147 ("libbpf: Update light skeleton for signing") Signed-off-by: KP Singh Co-developed-by: Daniel Borkmann Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20260601150248.394863-4-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + kernel/bpf/syscall.c | 7 +++++++ tools/lib/bpf/gen_loader.c | 17 +++++++++++++++++ 3 files changed, 25 insertions(+) (limited to 'tools') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c0510d223685..8599b451dd7a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -296,6 +296,7 @@ struct bpf_map_owner { struct bpf_map { u8 sha[SHA256_DIGEST_SIZE]; + u32 excl; const struct bpf_map_ops *ops; struct bpf_map *inner_map_meta; #ifdef CONFIG_SECURITY diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index a27fa2b9b405..625a4366fe6d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1588,6 +1588,13 @@ static int map_create_alloc(union bpf_attr *attr, bpfptr_t uattr, struct bpf_ver err = -EFAULT; goto free_map; } + + /* See libbpf: emit_signature_match() */ + BUILD_BUG_ON(offsetof(struct bpf_map, excl) != SHA256_DIGEST_SIZE); + BUILD_BUG_ON(!__same_type(map->excl, u32)); + BUILD_BUG_ON(offsetof(struct bpf_map, sha) != 0); + BUILD_BUG_ON(!__same_type(map->sha, u8[SHA256_DIGEST_SIZE])); + map->excl = 1; } else if (attr->excl_prog_hash_size) { bpf_log(log, "Invalid excl_prog_hash_size.\n"); err = -EINVAL; diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index 3702c5944bc0..66a02039da8c 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -586,6 +586,23 @@ static void emit_signature_match(struct bpf_gen *gen) __s64 off; int i; + /* + * Reject if the metadata map is not exclusive. Without exclusivity + * the cached map->sha[] verified above can be stale: another BPF + * program with map access could have mutated the contents between + * BPF_OBJ_GET_INFO_BY_FD and loader execution. + */ + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX, + 0, 0, 0, 0)); + emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, SHA256_DIGEST_LENGTH)); + off = -(gen->insn_cur - gen->insn_start - gen->cleanup_label) / 8 - 2; + if (is_simm16(off)) { + emit(gen, BPF_MOV64_IMM(BPF_REG_7, -EINVAL)); + emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, off)); + } else { + gen->error = -ERANGE; + } + for (i = 0; i < SHA256_DWORD_SIZE; i++) { emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX, 0, 0, 0, 0)); -- cgit v1.2.3 From 61e084152328867fe2279cc790573aae39959cd5 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 1 Jun 2026 17:02:45 +0200 Subject: libbpf: Skip initial_value override on signed loaders bpf_gen__map_update_elem() emits code that, when the host-supplied loader ctx provides a non-NULL map_desc[idx].initial_value, overwrites the blob value with bytes read from the host (bpf_copy_from_user / bpf_probe_read_kernel) before the BPF_MAP_UPDATE_ELEM that populates the program's .data/.rodata/.bss maps. This override runs after emit_signature_match() has validated map->sha[], and initial_value is part of neither the signed loader instructions nor the hashed data blob. For a signed loader this lets an untrusted host substitute global-variable contents into a program whose code carries a valid signature, thus weakening what the signature attests to. The blob already contains the signer-provided value (added via add_data() and covered by the embedded, signed hash), so simply skip emitting the override for signed loaders (gen_hash). Runtime initialization stays available for the unsigned light-skeleton path as before. The jump offsets within the override block are internal to it, so guarding the whole block leaves them unchanged. Fixes: ea923080c145 ("libbpf: Embed and verify the metadata hash in the loader") Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20260601150248.394863-5-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/gen_loader.c | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index 66a02039da8c..a5d9c7a5261b 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -1187,27 +1187,36 @@ void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *pvalue, value = add_data(gen, pvalue, value_size); key = add_data(gen, &zero, sizeof(zero)); - /* if (map_desc[map_idx].initial_value) { + /* + * if (map_desc[map_idx].initial_value) { * if (ctx->flags & BPF_SKEL_KERNEL) * bpf_probe_read_kernel(value, value_size, initial_value); * else * bpf_copy_from_user(value, value_size, initial_value); * } + * + * The runtime initial_value comes from the host-supplied loader + * ctx and would overwrite the blob value after emit_signature_match() + * has already validated map->sha[]. For a signed loader (gen_hash) + * the attested blob value must be authoritative, so skip the override + * and leave the hashed value in place. */ - emit(gen, BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6, - sizeof(struct bpf_loader_ctx) + - sizeof(struct bpf_map_desc) * map_idx + - offsetof(struct bpf_map_desc, initial_value))); - emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0, 8)); - emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, - 0, 0, 0, value)); - emit(gen, BPF_MOV64_IMM(BPF_REG_2, value_size)); - emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, - offsetof(struct bpf_loader_ctx, flags))); - emit(gen, BPF_JMP_IMM(BPF_JSET, BPF_REG_0, BPF_SKEL_KERNEL, 2)); - emit(gen, BPF_EMIT_CALL(BPF_FUNC_copy_from_user)); - emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 1)); - emit(gen, BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel)); + if (!OPTS_GET(gen->opts, gen_hash, false)) { + emit(gen, BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6, + sizeof(struct bpf_loader_ctx) + + sizeof(struct bpf_map_desc) * map_idx + + offsetof(struct bpf_map_desc, initial_value))); + emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0, 8)); + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, value)); + emit(gen, BPF_MOV64_IMM(BPF_REG_2, value_size)); + emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, + offsetof(struct bpf_loader_ctx, flags))); + emit(gen, BPF_JMP_IMM(BPF_JSET, BPF_REG_0, BPF_SKEL_KERNEL, 2)); + emit(gen, BPF_EMIT_CALL(BPF_FUNC_copy_from_user)); + emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 1)); + emit(gen, BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel)); + } map_update_attr = add_data(gen, &attr, attr_size); pr_debug("gen: map_update_elem: idx %d, value: off %d size %d, attr: off %d size %d\n", -- cgit v1.2.3 From 60214435b365ecdd40b2f96d4e54564b5c927645 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 1 Jun 2026 17:02:46 +0200 Subject: libbpf: Skip max_entries override on signed loaders bpf_gen__map_create() lets the host-supplied loader ctx override a map's max_entries at runtime (map_desc[idx].max_entries, when non-zero). This is how the light skeleton sizes maps to the target machine, but it happens after emit_signature_match() and is covered by neither the signed loader instructions nor the hashed blob. For a signed loader this means an untrusted host can re-dimension the program's maps, outside what the signature attests to. Gate the override on gen_hash so signed loaders use the signer-provided max_entries baked into the blob. Fixes: ea923080c145 ("libbpf: Embed and verify the metadata hash in the loader") Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20260601150248.394863-6-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/gen_loader.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index a5d9c7a5261b..66e13566bc31 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -546,13 +546,22 @@ void bpf_gen__map_create(struct bpf_gen *gen, default: break; } - /* conditionally update max_entries */ - if (map_idx >= 0) + + /* + * Conditionally update max_entries from the host-supplied loader + * ctx. This sizes the map at runtime, but for a signed loader + * (gen_hash) it would let an untrusted host re-dimension the + * program's maps after emit_signature_match(), outside what the + * signature attests to. Keep the signer-provided max_entries + * baked into the blob in that case. + */ + if (map_idx >= 0 && !OPTS_GET(gen->opts, gen_hash, false)) move_ctx2blob(gen, attr_field(map_create_attr, max_entries), 4, sizeof(struct bpf_loader_ctx) + sizeof(struct bpf_map_desc) * map_idx + offsetof(struct bpf_map_desc, max_entries), true /* check that max_entries != 0 */); + /* emit MAP_CREATE command */ emit_sys_bpf(gen, BPF_MAP_CREATE, map_create_attr, attr_size); debug_ret(gen, "map_create %s idx %d type %d value_size %d value_btf_id %d", -- cgit v1.2.3 From 38498c0ebacd54dbaac3513a548a13f1a8455c4e Mon Sep 17 00:00:00 2001 From: KP Singh Date: Mon, 1 Jun 2026 17:02:47 +0200 Subject: selftests/bpf: Adjust verifier_map_ptr for the map's excl field Adding the u32 excl field at offset 32 of struct bpf_map right after the sha[SHA256_DIGEST_SIZE] hash shifts the ops pointer from offset 32 to 40. Therefore, fix up the test case. # LDLIBS=-static PKG_CONFIG='pkg-config --static' ./vmtest.sh -- ./test_progs -t verifier_map_ptr [...] #637/1 verifier_map_ptr/bpf_map_ptr: read with negative offset rejected:OK #637/2 verifier_map_ptr/bpf_map_ptr: read with negative offset rejected @unpriv:OK #637/3 verifier_map_ptr/bpf_map_ptr: write rejected:OK #637/4 verifier_map_ptr/bpf_map_ptr: write rejected @unpriv:OK #637/5 verifier_map_ptr/bpf_map_ptr: read non-existent field rejected:OK #637/6 verifier_map_ptr/bpf_map_ptr: read non-existent field rejected @unpriv:OK #637/7 verifier_map_ptr/bpf_map_ptr: read ops field accepted:OK #637/8 verifier_map_ptr/bpf_map_ptr: read ops field accepted @unpriv:OK [...] Summary: 2/18 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: KP Singh Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20260601150248.394863-7-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/verifier_map_ptr.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_map_ptr.c b/tools/testing/selftests/bpf/progs/verifier_map_ptr.c index e2767d27d8aa..d8e822d1a8ba 100644 --- a/tools/testing/selftests/bpf/progs/verifier_map_ptr.c +++ b/tools/testing/selftests/bpf/progs/verifier_map_ptr.c @@ -70,13 +70,15 @@ __naked void bpf_map_ptr_write_rejected(void) : __clobber_all); } -/* The first element of struct bpf_map is a SHA256 hash of 32 bytes, accessing - * into this array is valid. The opts field is now at offset 33. +/* + * struct bpf_map starts with the SHA256 hash sha[32] at offset 0 (a readable + * byte array), followed by the u32 excl field at offset 32. Reading a u32 at + * offset 33 runs past the end of excl and is rejected. */ SEC("socket") __description("bpf_map_ptr: read non-existent field rejected") __failure -__msg("cannot access ptr member ops with moff 32 in struct bpf_map with off 33 size 4") +__msg("access beyond the end of member excl (mend:36) in struct bpf_map with off 33 size 4") __failure_unpriv __msg_unpriv("access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN") __flag(BPF_F_ANY_ALIGNMENT) -- cgit v1.2.3 From 32f725458a1ab5973c64e4636659ca2c0db42f48 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 1 Jun 2026 17:02:48 +0200 Subject: selftests/bpf: Test that exclusive maps are rejected in map-in-map Add a subtest to map_excl that verifies an exclusive map (created with excl_prog_hash) cannot be used in a map-of-maps, covering both kernel enforcement points: i) the inner-map template at map-of-maps creation and, ii) the element inserted into an existing map-of-maps. # LDLIBS=-static PKG_CONFIG='pkg-config --static' ./vmtest.sh -- ./test_progs -t map_excl ./test_progs -t map_excl [ 1.728106] bpf_testmod: loading out-of-tree module taints kernel. [ 1.730473] bpf_testmod: module verification failed: signature and/or required key missing - tainting kernel #215/1 map_excl/map_excl_allowed:OK #215/2 map_excl/map_excl_denied:OK #215/3 map_excl/map_excl_no_map_in_map:OK #215 map_excl:OK Summary: 1/3 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20260601150248.394863-8-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/map_excl.c | 46 +++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/map_excl.c b/tools/testing/selftests/bpf/prog_tests/map_excl.c index 6bdc6d6de0da..a213dd559aae 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_excl.c +++ b/tools/testing/selftests/bpf/prog_tests/map_excl.c @@ -8,6 +8,10 @@ #include "map_excl.skel.h" +#ifndef SHA256_DIGEST_SIZE +#define SHA256_DIGEST_SIZE 32 +#endif + static void test_map_excl_allowed(void) { struct map_excl *skel = map_excl__open(); @@ -45,10 +49,52 @@ out: } +static void test_map_excl_no_map_in_map(void) +{ + __u8 hash[SHA256_DIGEST_SIZE] = {}; + LIBBPF_OPTS(bpf_map_create_opts, excl_opts, + .excl_prog_hash = hash, + .excl_prog_hash_size = sizeof(hash)); + LIBBPF_OPTS(bpf_map_create_opts, outer_opts); + int excl_fd, tmpl_fd = -1, outer_fd = -1, err; + __u32 key = 0; + + excl_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "excl_inner", 4, 4, 1, &excl_opts); + if (!ASSERT_OK_FD(excl_fd, "create exclusive map")) + return; + + outer_opts.inner_map_fd = excl_fd; + err = bpf_map_create(BPF_MAP_TYPE_ARRAY_OF_MAPS, "outer_from_excl", + 4, 4, 1, &outer_opts); + if (err >= 0) + close(err); + ASSERT_EQ(err, -ENOTSUPP, "reject exclusive map as map-in-map template"); + + tmpl_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "tmpl", 4, 4, 1, NULL); + if (!ASSERT_OK_FD(tmpl_fd, "create inner template")) + goto out; + + outer_opts.inner_map_fd = tmpl_fd; + outer_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY_OF_MAPS, "outer", 4, 4, 1, &outer_opts); + if (!ASSERT_OK_FD(outer_fd, "create map-of-maps")) + goto out; + + err = bpf_map_update_elem(outer_fd, &key, &excl_fd, 0); + ASSERT_EQ(err, -ENOTSUPP, "reject exclusive map as map-in-map element"); +out: + if (outer_fd >= 0) + close(outer_fd); + if (tmpl_fd >= 0) + close(tmpl_fd); + close(excl_fd); +} + void test_map_excl(void) { if (test__start_subtest("map_excl_allowed")) test_map_excl_allowed(); if (test__start_subtest("map_excl_denied")) test_map_excl_denied(); + if (test__start_subtest("map_excl_no_map_in_map")) + test_map_excl_no_map_in_map(); } -- cgit v1.2.3 From a0fa68d8ce759dbf6aaf19a043ddd77a2128c26c Mon Sep 17 00:00:00 2001 From: Emil Tsalapatis Date: Mon, 1 Jun 2026 20:41:15 -0400 Subject: selftests/bpf: libarena: Add "arena" BTF type tag to __arena qualifier The arena qualifier currently designates its associated type as belonging to address space 1. This property affects code generation, but is not reflected in the BTF information of the function. This lack of information at the BTF level prevents us from returning arena pointers from global subprograms. Subprogs cannot return any data structure more complex than a scalar, so pointers to structs are rejected as a return type. We have no way of marking the return type as a pointer to an arena, which is safe provided the two subprogs have the same arena. Expand the __arena qualifier to also attach a BTF type tag to the type. This lets us determine whether a variable belongs to an arena from its type alone through BTF parsing. Signed-off-by: Emil Tsalapatis Link: https://lore.kernel.org/r/20260602004120.17087-2-emil@etsalapatis.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/libarena/include/bpf_arena_common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/libarena/include/bpf_arena_common.h b/tools/testing/selftests/bpf/libarena/include/bpf_arena_common.h index 16f8ce832004..445be3c4edec 100644 --- a/tools/testing/selftests/bpf/libarena/include/bpf_arena_common.h +++ b/tools/testing/selftests/bpf/libarena/include/bpf_arena_common.h @@ -33,7 +33,7 @@ #endif #if defined(__BPF_FEATURE_ADDR_SPACE_CAST) && !defined(BPF_ARENA_FORCE_ASM) -#define __arena __attribute__((address_space(1))) +#define __arena __attribute__((address_space(1))) __attribute__((btf_type_tag("arena"))) #define __arena_global __attribute__((address_space(1))) #define cast_kern(ptr) /* nop for bpf prog. emitted by LLVM */ #define cast_user(ptr) /* nop for bpf prog. emitted by LLVM */ -- cgit v1.2.3 From b9b23fe1761117f4a0109a25d16d337c900437ad Mon Sep 17 00:00:00 2001 From: Emil Tsalapatis Date: Mon, 1 Jun 2026 20:41:18 -0400 Subject: selftests/bpf: Remove __arg_arena from the codebase Now that BPF __arg_arena has been subsumed by __arena, remove __arg_arena from the codebase. This way the user has one fewer annotation to worry about. To remove __arg_arena we remove the typedefs we were previously using to minimize __arena annotations. This is because __arena now also includes a BTF type tag, which is ignored for non-pointer types. As a result, we cannot capture the whole __arena annotation inside a typedef and need to directly annotate the pointer type when declaring the variable. The extra verbosity is worth it because the use of the __arena tag is intuitive to the programmer and removes the __arg_arena tag that has been a consistent source of confusion for users. The typedefs can be reintroduced later (without __arg_arena) once compilers start supporting BTF type tags for non-pointer types. Acked-by: Eduard Zingerman Signed-off-by: Emil Tsalapatis Link: https://lore.kernel.org/r/20260602004120.17087-5-emil@etsalapatis.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/bpf_arena_htab.h | 11 ++- tools/testing/selftests/bpf/bpf_arena_strsearch.h | 4 +- .../bpf/libarena/include/bpf_arena_common.h | 3 +- .../bpf/libarena/include/bpf_arena_spin_lock.h | 6 +- .../selftests/bpf/libarena/include/libarena/asan.h | 6 +- .../bpf/libarena/include/libarena/buddy.h | 22 ++---- .../bpf/libarena/selftests/st_asan_buddy.bpf.c | 4 +- .../bpf/libarena/selftests/st_asan_common.h | 2 +- .../bpf/libarena/selftests/st_buddy.bpf.c | 2 +- .../testing/selftests/bpf/libarena/src/asan.bpf.c | 38 +++++----- .../testing/selftests/bpf/libarena/src/buddy.bpf.c | 80 +++++++++++----------- .../selftests/bpf/libarena/src/common.bpf.c | 6 +- .../testing/selftests/bpf/progs/arena_spin_lock.c | 1 + 13 files changed, 84 insertions(+), 101 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/bpf_arena_htab.h b/tools/testing/selftests/bpf/bpf_arena_htab.h index acc01a876668..d7ba86362d86 100644 --- a/tools/testing/selftests/bpf/bpf_arena_htab.h +++ b/tools/testing/selftests/bpf/bpf_arena_htab.h @@ -14,9 +14,8 @@ struct htab { htab_bucket_t *buckets; int n_buckets; }; -typedef struct htab __arena htab_t; -static inline htab_bucket_t *__select_bucket(htab_t *htab, __u32 hash) +static inline htab_bucket_t *__select_bucket(struct htab __arena *htab, __u32 hash) { htab_bucket_t *b = htab->buckets; @@ -24,7 +23,7 @@ static inline htab_bucket_t *__select_bucket(htab_t *htab, __u32 hash) return &b[hash & (htab->n_buckets - 1)]; } -static inline arena_list_head_t *select_bucket(htab_t *htab, __u32 hash) +static inline arena_list_head_t *select_bucket(struct htab __arena *htab, __u32 hash) { return &__select_bucket(htab, hash)->head; } @@ -53,7 +52,7 @@ static int htab_hash(int key) return key; } -__weak int htab_lookup_elem(htab_t *htab __arg_arena, int key) +__weak int htab_lookup_elem(struct htab __arena *htab, int key) { hashtab_elem_t *l_old; arena_list_head_t *head; @@ -66,7 +65,7 @@ __weak int htab_lookup_elem(htab_t *htab __arg_arena, int key) return 0; } -__weak int htab_update_elem(htab_t *htab __arg_arena, int key, int value) +__weak int htab_update_elem(struct htab __arena *htab, int key, int value) { hashtab_elem_t *l_new = NULL, *l_old; arena_list_head_t *head; @@ -90,7 +89,7 @@ __weak int htab_update_elem(htab_t *htab __arg_arena, int key, int value) return 0; } -void htab_init(htab_t *htab) +void htab_init(struct htab __arena *htab) { void __arena *buckets = bpf_arena_alloc_pages(&arena, NULL, 2, NUMA_NO_NODE, 0); diff --git a/tools/testing/selftests/bpf/bpf_arena_strsearch.h b/tools/testing/selftests/bpf/bpf_arena_strsearch.h index f0d575daef5a..10a70667c8bf 100644 --- a/tools/testing/selftests/bpf/bpf_arena_strsearch.h +++ b/tools/testing/selftests/bpf/bpf_arena_strsearch.h @@ -3,7 +3,7 @@ #pragma once #include -__noinline int bpf_arena_strlen(const char __arena *s __arg_arena) +__noinline int bpf_arena_strlen(const char __arena *s) { const char __arena *sc; @@ -40,7 +40,7 @@ __noinline int bpf_arena_strlen(const char __arena *s __arg_arena) * * An opening bracket without a matching close is matched literally. */ -__noinline bool glob_match(char const __arena *pat __arg_arena, char const __arena *str __arg_arena) +__noinline bool glob_match(char const __arena *pat, char const __arena *str) { /* * Backtrack to previous * on mismatch and retry starting one diff --git a/tools/testing/selftests/bpf/libarena/include/bpf_arena_common.h b/tools/testing/selftests/bpf/libarena/include/bpf_arena_common.h index 445be3c4edec..82aafe879fae 100644 --- a/tools/testing/selftests/bpf/libarena/include/bpf_arena_common.h +++ b/tools/testing/selftests/bpf/libarena/include/bpf_arena_common.h @@ -38,7 +38,7 @@ #define cast_kern(ptr) /* nop for bpf prog. emitted by LLVM */ #define cast_user(ptr) /* nop for bpf prog. emitted by LLVM */ #else -#define __arena +#define __arena __attribute__((btf_type_tag("arena"))) #define __arena_global SEC(".addr_space.1") #define cast_kern(ptr) bpf_addr_space_cast(ptr, 0, 1) #define cast_user(ptr) bpf_addr_space_cast(ptr, 1, 0) @@ -54,7 +54,6 @@ void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt) __ksym _ #else /* when compiled as user space code */ #define __arena -#define __arg_arena #define cast_kern(ptr) /* nop for user space */ #define cast_user(ptr) /* nop for user space */ __weak char arena[1]; diff --git a/tools/testing/selftests/bpf/libarena/include/bpf_arena_spin_lock.h b/tools/testing/selftests/bpf/libarena/include/bpf_arena_spin_lock.h index 164638690a4d..ae6b72d15bb6 100644 --- a/tools/testing/selftests/bpf/libarena/include/bpf_arena_spin_lock.h +++ b/tools/testing/selftests/bpf/libarena/include/bpf_arena_spin_lock.h @@ -16,10 +16,6 @@ #define EOPNOTSUPP 95 #define ETIMEDOUT 110 -#ifndef __arena -#define __arena __attribute__((address_space(1))) -#endif - extern unsigned long CONFIG_NR_CPUS __kconfig; /* @@ -246,7 +242,7 @@ static __always_inline int arena_spin_trylock(arena_spinlock_t __arena *lock) } __noinline __weak -int arena_spin_lock_slowpath(arena_spinlock_t __arena __arg_arena *lock, u32 val) +int arena_spin_lock_slowpath(arena_spinlock_t __arena *lock, u32 val) { struct arena_mcs_spinlock __arena *prev, *next, *node0, *node; int ret = -ETIMEDOUT; diff --git a/tools/testing/selftests/bpf/libarena/include/libarena/asan.h b/tools/testing/selftests/bpf/libarena/include/libarena/asan.h index eb9fc69d9eb0..900267159292 100644 --- a/tools/testing/selftests/bpf/libarena/include/libarena/asan.h +++ b/tools/testing/selftests/bpf/libarena/include/libarena/asan.h @@ -25,12 +25,10 @@ extern volatile bool asan_report_once; #ifdef BPF_ARENA_ASAN -typedef s8 __arena s8a; - static inline -s8a *mem_to_shadow(void __arena __arg_arena *addr) +s8 __arena *mem_to_shadow(void __arena *addr) { - return (s8a *)(((u32)(u64)addr >> ASAN_SHADOW_SHIFT) + + return (s8 __arena *)(((u32)(u64)addr >> ASAN_SHADOW_SHIFT) + __asan_shadow_memory_dynamic_address); } diff --git a/tools/testing/selftests/bpf/libarena/include/libarena/buddy.h b/tools/testing/selftests/bpf/libarena/include/libarena/buddy.h index 00e2437128ef..4d57fc1b5c26 100644 --- a/tools/testing/selftests/bpf/libarena/include/libarena/buddy.h +++ b/tools/testing/selftests/bpf/libarena/include/libarena/buddy.h @@ -2,12 +2,6 @@ /* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ #pragma once -struct buddy_chunk; -typedef struct buddy_chunk __arena buddy_chunk_t; - -struct buddy_header; -typedef struct buddy_header __arena buddy_header_t; - enum buddy_consts { /* * Minimum allocation is 1 << BUDDY_MIN_ALLOC_SHIFT. @@ -68,25 +62,21 @@ struct buddy_chunk { u8 allocated[BUDDY_CHUNK_ITEMS / 8]; /* Freelists for O(1) allocation. */ u64 freelists[BUDDY_CHUNK_NUM_ORDERS]; - buddy_chunk_t *next; + struct buddy_chunk __arena *next; }; struct buddy { - buddy_chunk_t *first_chunk; /* Pointer to the chunk linked list. */ + struct buddy_chunk __arena *first_chunk; /* Pointer to the chunk linked list. */ arena_spinlock_t lock; /* Allocator lock */ u64 vaddr; /* Allocation into reserved vaddr */ }; -typedef struct buddy __arena buddy_t; - #ifdef __BPF__ -int buddy_init(buddy_t *buddy); -int buddy_destroy(buddy_t *buddy); -int buddy_free_internal(buddy_t *buddy, u64 free); -#define buddy_free(buddy, ptr) buddy_free_internal((buddy), (u64)(ptr)) -u64 buddy_alloc_internal(buddy_t *buddy, size_t size); +int buddy_init(struct buddy __arena *buddy); +int buddy_destroy(struct buddy __arena *buddy); +int buddy_free(struct buddy __arena *buddy, void __arena *free); +u64 buddy_alloc_internal(struct buddy __arena *buddy, size_t size); #define buddy_alloc(alloc, size) ((void __arena *)buddy_alloc_internal((alloc), (size))) - #endif /* __BPF__ */ diff --git a/tools/testing/selftests/bpf/libarena/selftests/st_asan_buddy.bpf.c b/tools/testing/selftests/bpf/libarena/selftests/st_asan_buddy.bpf.c index 97acd50ffa5c..686caba2c643 100644 --- a/tools/testing/selftests/bpf/libarena/selftests/st_asan_buddy.bpf.c +++ b/tools/testing/selftests/bpf/libarena/selftests/st_asan_buddy.bpf.c @@ -8,7 +8,7 @@ /* Required for parsing the ASAN call stacks. */ #include "test_progs_compat.h" -extern buddy_t buddy; +extern struct buddy __arena buddy; #ifdef BPF_ARENA_ASAN @@ -54,7 +54,7 @@ static __always_inline int asan_test_buddy_oob_single(size_t alloc_size) * Factored out because asan_validate_addr is complex enough to cause * verification failures if verified with the rest of asan_test_buddy_uaf_single. */ -__weak int asan_test_buddy_byte(u8 __arena __arg_arena *mem, int i, bool freed) +__weak int asan_test_buddy_byte(u8 __arena *mem, int i, bool freed) { int ret; diff --git a/tools/testing/selftests/bpf/libarena/selftests/st_asan_common.h b/tools/testing/selftests/bpf/libarena/selftests/st_asan_common.h index 1d3edc4372ac..34a7918cb4cf 100644 --- a/tools/testing/selftests/bpf/libarena/selftests/st_asan_common.h +++ b/tools/testing/selftests/bpf/libarena/selftests/st_asan_common.h @@ -9,7 +9,7 @@ static inline void print_asan_map_state(void __arena *addr) { arena_stdout("%s:%d ASAN %p -> (val: %x gran: %x set: [%s])", __func__, __LINE__, addr, - *(s8a *)(addr), ASAN_GRANULE(addr), + *(s8 __arena *)(addr), ASAN_GRANULE(addr), asan_shadow_set(addr) ? "yes" : "no"); } diff --git a/tools/testing/selftests/bpf/libarena/selftests/st_buddy.bpf.c b/tools/testing/selftests/bpf/libarena/selftests/st_buddy.bpf.c index 79e6f0baabfe..b45a306816c0 100644 --- a/tools/testing/selftests/bpf/libarena/selftests/st_buddy.bpf.c +++ b/tools/testing/selftests/bpf/libarena/selftests/st_buddy.bpf.c @@ -6,7 +6,7 @@ #include #include -extern buddy_t buddy; +extern struct buddy __arena buddy; struct segarr_entry { u8 __arena *block; diff --git a/tools/testing/selftests/bpf/libarena/src/asan.bpf.c b/tools/testing/selftests/bpf/libarena/src/asan.bpf.c index 64c5b990086c..5135d5c72a46 100644 --- a/tools/testing/selftests/bpf/libarena/src/asan.bpf.c +++ b/tools/testing/selftests/bpf/libarena/src/asan.bpf.c @@ -110,7 +110,7 @@ volatile bool asan_report_once = false; * to exit due to a missing implementation. Provide a simple implementation * just for memset to use it for poisoning/unpoisoning the map. */ -__weak int asan_memset(s8a __arg_arena *dst, s8 val, size_t size) +__weak int asan_memset(s8 __arena *dst, s8 val, size_t size) { size_t i; @@ -121,9 +121,9 @@ __weak int asan_memset(s8a __arg_arena *dst, s8 val, size_t size) } /* Validate a 1-byte access, always within a single byte. */ -static __always_inline bool memory_is_poisoned_1(s8a *addr) +static __always_inline bool memory_is_poisoned_1(s8 __arena *addr) { - s8 shadow_value = *(s8a *)mem_to_shadow(addr); + s8 shadow_value = *(s8 __arena *)mem_to_shadow(addr); /* Byte is 0, access is valid. */ if (likely(!shadow_value)) @@ -139,7 +139,7 @@ static __always_inline bool memory_is_poisoned_1(s8a *addr) } /* Validate a 2- 4-, 8-byte access, shadow spans up to 2 bytes. */ -static __always_inline bool memory_is_poisoned_2_4_8(s8a *addr, u64 size) +static __always_inline bool memory_is_poisoned_2_4_8(s8 __arena *addr, u64 size) { u64 end = (u64)addr + size - 1; @@ -148,17 +148,17 @@ static __always_inline bool memory_is_poisoned_2_4_8(s8a *addr, u64 size) * overflow above ASAN_GRANULE). */ if (likely(ASAN_GRANULE(end) >= size - 1)) - return memory_is_poisoned_1((s8a *)end); + return memory_is_poisoned_1((s8 __arena *)end); /* * Otherwise first byte must be fully unpoisoned, and second byte * must be unpoisoned up to the end of the accessed region. */ - return *(s8a *)mem_to_shadow(addr) || memory_is_poisoned_1((s8a *)end); + return *(s8 __arena *)mem_to_shadow(addr) || memory_is_poisoned_1((s8 __arena *)end); } -__weak bool asan_shadow_set(void __arena __arg_arena *addr) +__weak bool asan_shadow_set(void __arena *addr) { return memory_is_poisoned_1(addr); } @@ -166,7 +166,7 @@ __weak bool asan_shadow_set(void __arena __arg_arena *addr) static __always_inline u64 first_nonzero_byte(u64 addr, size_t size) { while (size && can_loop) { - if (unlikely(*(s8a *)addr)) + if (unlikely(*(s8 __arena *)addr)) return addr; addr += 1; size -= 1; @@ -175,7 +175,7 @@ static __always_inline u64 first_nonzero_byte(u64 addr, size_t size) return SHADOW_ALL_ZEROES; } -static __always_inline bool memory_is_poisoned_n(s8a *addr, u64 size) +static __always_inline bool memory_is_poisoned_n(s8 __arena *addr, u64 size) { u64 ret; u64 start; @@ -189,10 +189,10 @@ static __always_inline bool memory_is_poisoned_n(s8a *addr, u64 size) if (likely(ret == SHADOW_ALL_ZEROES)) return false; - return unlikely(ret != end || ASAN_GRANULE(addr + size - 1) >= *(s8a *)end); + return unlikely(ret != end || ASAN_GRANULE(addr + size - 1) >= *(s8 __arena *)end); } -__weak int asan_report(s8a __arg_arena *addr, size_t sz, u32 flags) +__weak int asan_report(s8 __arena *addr, size_t sz, u32 flags) { u32 reported = __sync_val_compare_and_swap(&asan_reported, false, true); @@ -211,7 +211,7 @@ __weak int asan_report(s8a __arg_arena *addr, size_t sz, u32 flags) return 0; } -static __always_inline bool check_asan_args(s8a *addr, size_t size, +static __always_inline bool check_asan_args(s8 __arena *addr, size_t size, bool *result) { bool valid = true; @@ -253,7 +253,7 @@ confirmed_valid: static __always_inline bool check_region_inline(intptr_t ptr, size_t size, u32 flags) { - s8a *addr = (s8a *)(u64)ptr; + s8 __arena *addr = (s8 __arena *)(u64)ptr; bool is_poisoned, is_valid; if (check_asan_args(addr, size, &is_valid)) { @@ -305,19 +305,19 @@ static __always_inline bool check_region_inline(intptr_t ptr, size_t size, } \ __hidden void __asan_report_store##size(intptr_t addr) \ { \ - asan_report((s8a *)addr, size, ASAN_WRITE); \ + asan_report((s8 __arena *)addr, size, ASAN_WRITE); \ } \ __hidden void __asan_report_store##size##_noabort(intptr_t addr) \ { \ - asan_report((s8a *)addr, size, ASAN_WRITE); \ + asan_report((s8 __arena *)addr, size, ASAN_WRITE); \ } \ __hidden void __asan_report_load##size(intptr_t addr) \ { \ - asan_report((s8a *)addr, size, ASAN_READ); \ + asan_report((s8 __arena *)addr, size, ASAN_READ); \ } \ __hidden void __asan_report_load##size##_noabort(intptr_t addr) \ { \ - asan_report((s8a *)addr, size, ASAN_READ); \ + asan_report((s8 __arena *)addr, size, ASAN_READ); \ } DEFINE_ASAN_LOAD_STORE(1); @@ -385,7 +385,7 @@ void *__asan_memset(void *p, int c, size_t n) */ __hidden __noasan int asan_poison(void __arena *addr, s8 val, size_t size) { - s8a *shadow; + s8 __arena *shadow; size_t len; /* @@ -443,7 +443,7 @@ __hidden __noasan int asan_poison(void __arena *addr, s8 val, size_t size) __hidden __noasan int asan_unpoison(void __arena *addr, size_t size) { size_t partial = size & ASAN_GRANULE_MASK; - s8a *shadow; + s8 __arena *shadow; size_t len; /* diff --git a/tools/testing/selftests/bpf/libarena/src/buddy.bpf.c b/tools/testing/selftests/bpf/libarena/src/buddy.bpf.c index 865e00803daa..f4ed4c3abb4b 100644 --- a/tools/testing/selftests/bpf/libarena/src/buddy.bpf.c +++ b/tools/testing/selftests/bpf/libarena/src/buddy.bpf.c @@ -45,12 +45,12 @@ enum { BUDDY_CHUNK_PAGES = BUDDY_CHUNK_BYTES / __PAGE_SIZE }; -static inline int buddy_lock(buddy_t *buddy) +static inline int buddy_lock(struct buddy __arena *buddy) { return arena_spin_lock(&buddy->lock); } -static inline void buddy_unlock(buddy_t *buddy) +static inline void buddy_unlock(struct buddy __arena *buddy) { arena_spin_unlock(&buddy->lock); } @@ -61,7 +61,7 @@ static inline void buddy_unlock(buddy_t *buddy) * page alloc kfuncs do not support aligning to a boundary (in this * case 1 MiB, see buddy.h on how this is derived). */ -static int buddy_reserve_arena_vaddr(buddy_t *buddy) +static int buddy_reserve_arena_vaddr(struct buddy __arena *buddy) { buddy->vaddr = 0; @@ -73,7 +73,7 @@ static int buddy_reserve_arena_vaddr(buddy_t *buddy) /* * Free up any unused address space. Used only during teardown. */ -static void buddy_unreserve_arena_vaddr(buddy_t *buddy) +static void buddy_unreserve_arena_vaddr(struct buddy __arena *buddy) { bpf_arena_free_pages( &arena, (void __arena *)(BUDDY_VADDR_OFFSET + buddy->vaddr), @@ -94,7 +94,7 @@ static void buddy_unreserve_arena_vaddr(buddy_t *buddy) * However, bump allocation must still be atomic because this function * is called without the buddy lock from multiple threads concurrently. */ -__weak int buddy_alloc_arena_vaddr(buddy_t __arg_arena *buddy, u64 *vaddrp) +__weak int buddy_alloc_arena_vaddr(struct buddy __arena *buddy, u64 *vaddrp) { u64 vaddr, old, new; @@ -134,7 +134,7 @@ static u64 arena_next_pow2(__u64 n) } __weak -int idx_set_allocated(buddy_chunk_t __arg_arena *chunk, u64 idx, bool allocated) +int idx_set_allocated(struct buddy_chunk __arena *chunk, u64 idx, bool allocated) { bool already_allocated; @@ -160,7 +160,7 @@ int idx_set_allocated(buddy_chunk_t __arg_arena *chunk, u64 idx, bool allocated) return 0; } -static int idx_is_allocated(buddy_chunk_t *chunk, u64 idx, bool *allocated) +static int idx_is_allocated(struct buddy_chunk __arena *chunk, u64 idx, bool *allocated) { if (unlikely(idx >= BUDDY_CHUNK_ITEMS)) { arena_stderr("getting state of invalid idx (%llu, max %d)\n", idx, @@ -173,7 +173,7 @@ static int idx_is_allocated(buddy_chunk_t *chunk, u64 idx, bool *allocated) } __weak -int idx_set_order(buddy_chunk_t __arg_arena *chunk, u64 idx, u8 order) +int idx_set_order(struct buddy_chunk __arena *chunk, u64 idx, u8 order) { u8 prev_order; @@ -206,7 +206,7 @@ int idx_set_order(buddy_chunk_t __arg_arena *chunk, u64 idx, u8 order) return 0; } -static u8 idx_get_order(buddy_chunk_t *chunk, u64 idx) +static u8 idx_get_order(struct buddy_chunk __arena *chunk, u64 idx) { u8 result; @@ -223,7 +223,7 @@ static u8 idx_get_order(buddy_chunk_t *chunk, u64 idx) return (idx & 0x1) ? (result & 0xf) : (result >> 4); } -static void __arena *idx_to_addr(buddy_chunk_t *chunk, size_t idx) +static void __arena *idx_to_addr(struct buddy_chunk __arena *chunk, size_t idx) { u64 address; @@ -246,7 +246,7 @@ static void __arena *idx_to_addr(buddy_chunk_t *chunk, size_t idx) return (void __arena *)address; } -static buddy_header_t *idx_to_header(buddy_chunk_t *chunk, size_t idx) +static struct buddy_header __arena *idx_to_header(struct buddy_chunk __arena *chunk, size_t idx) { bool allocated; u64 address; @@ -283,13 +283,13 @@ static buddy_header_t *idx_to_header(buddy_chunk_t *chunk, size_t idx) * less probable. */ - return (buddy_header_t *)(address + BUDDY_HEADER_OFF); + return (struct buddy_header __arena *)(address + BUDDY_HEADER_OFF); } -static void header_add_freelist(buddy_chunk_t *chunk, buddy_header_t *header, +static void header_add_freelist(struct buddy_chunk __arena *chunk, struct buddy_header __arena *header, u64 idx, u8 order) { - buddy_header_t *tmp_header; + struct buddy_header __arena *tmp_header; idx_set_order(chunk, idx, order); @@ -304,10 +304,10 @@ static void header_add_freelist(buddy_chunk_t *chunk, buddy_header_t *header, chunk->freelists[order] = idx; } -static void header_remove_freelist(buddy_chunk_t *chunk, - buddy_header_t *header, u8 order) +static void header_remove_freelist(struct buddy_chunk __arena *chunk, + struct buddy_header __arena *header, u8 order) { - buddy_header_t *tmp_header; + struct buddy_header __arena *tmp_header; if (header->prev_index != BUDDY_CHUNK_ITEMS) { tmp_header = idx_to_header(chunk, header->prev_index); @@ -356,10 +356,10 @@ static u64 size_to_order(size_t size) } __weak -int add_leftovers_to_freelist(buddy_chunk_t __arg_arena *chunk, u32 cur_idx, +int add_leftovers_to_freelist(struct buddy_chunk __arena *chunk, u32 cur_idx, u64 min_order, u64 max_order) { - buddy_header_t *header; + struct buddy_header __arena *header; u64 ord; u32 idx; @@ -381,10 +381,10 @@ int add_leftovers_to_freelist(buddy_chunk_t __arg_arena *chunk, u32 cur_idx, return 0; } -static buddy_chunk_t *buddy_chunk_get(buddy_t *buddy) +static struct buddy_chunk __arena *buddy_chunk_get(struct buddy __arena *buddy) { u64 order, ord, min_order, max_order; - buddy_chunk_t *chunk; + struct buddy_chunk __arena *chunk; size_t left; int power2; u64 vaddr; @@ -561,9 +561,9 @@ static buddy_chunk_t *buddy_chunk_get(buddy_t *buddy) return chunk; } -__weak int buddy_init(buddy_t __arg_arena *buddy) +__weak int buddy_init(struct buddy __arena *buddy) { - buddy_chunk_t *chunk; + struct buddy_chunk __arena *chunk; int ret; if (!asan_ready()) @@ -602,9 +602,9 @@ __weak int buddy_init(buddy_t __arg_arena *buddy) * We do not take a lock because we are freeing arena pages, and nobody should * be using the allocator at that point in the execution. */ -__weak int buddy_destroy(buddy_t __arg_arena *buddy) +__weak int buddy_destroy(struct buddy __arena *buddy) { - buddy_chunk_t *chunk, *next; + struct buddy_chunk __arena *chunk, *next; if (!buddy) return -EINVAL; @@ -631,9 +631,9 @@ __weak int buddy_destroy(buddy_t __arg_arena *buddy) return 0; } -__weak u64 buddy_chunk_alloc(buddy_chunk_t __arg_arena *chunk, int order_req) +__weak u64 buddy_chunk_alloc(struct buddy_chunk __arena *chunk, int order_req) { - buddy_header_t *header, *tmp_header, *next_header; + struct buddy_header __arena *header, *tmp_header, *next_header; u32 idx, tmpidx, retidx; u64 address; u64 order = 0; @@ -709,9 +709,9 @@ __weak u64 buddy_chunk_alloc(buddy_chunk_t __arg_arena *chunk, int order_req) } /* Scan the existing chunks for available memory. */ -static u64 buddy_alloc_from_existing_chunks(buddy_t *buddy, int order) +static u64 buddy_alloc_from_existing_chunks(struct buddy __arena *buddy, int order) { - buddy_chunk_t *chunk; + struct buddy_chunk __arena *chunk; u64 address; for (chunk = buddy->first_chunk; chunk != NULL && can_loop; @@ -728,7 +728,7 @@ static u64 buddy_alloc_from_existing_chunks(buddy_t *buddy, int order) * Try an allocation from a newly allocated chunk. Also * incorporate the chunk into the linked list. */ -static u64 buddy_alloc_from_new_chunk(buddy_t *buddy, buddy_chunk_t *chunk, int order) +static u64 buddy_alloc_from_new_chunk(struct buddy __arena *buddy, struct buddy_chunk __arena *chunk, int order) { u64 address; @@ -750,10 +750,10 @@ static u64 buddy_alloc_from_new_chunk(buddy_t *buddy, buddy_chunk_t *chunk, int return (u64)address; } __weak -u64 buddy_alloc_internal(buddy_t __arg_arena *buddy, size_t size) +u64 buddy_alloc_internal(struct buddy __arena *buddy, size_t size) { - buddy_chunk_t *chunk; u64 address = (u64)NULL; + struct buddy_chunk __arena *chunk; int order; if (!buddy) @@ -788,20 +788,20 @@ done: * data is smaller than the header, we must poison any * unused bytes that were part of the header. */ - if (size < BUDDY_HEADER_OFF + sizeof(buddy_header_t)) - asan_poison((u8 __arena *)address + BUDDY_HEADER_OFF, - BUDDY_POISONED, sizeof(buddy_header_t)); + if (size < BUDDY_HEADER_OFF + sizeof(struct buddy_header __arena)) + asan_poison((u8 __arena *)address + BUDDY_HEADER_OFF, BUDDY_POISONED, + sizeof(struct buddy_header __arena)); asan_unpoison((u8 __arena *)address, size); return address; } -static __always_inline int buddy_free_unlocked(buddy_t *buddy, u64 addr) +static __always_inline int buddy_free_unlocked(struct buddy __arena *buddy, u64 addr) { - buddy_header_t *header, *buddy_header; + struct buddy_header __arena *header, *buddy_header; u64 idx, buddy_idx, tmp_idx; - buddy_chunk_t *chunk; + struct buddy_chunk __arena *chunk; bool allocated; u8 order; int ret; @@ -878,7 +878,7 @@ static __always_inline int buddy_free_unlocked(buddy_t *buddy, u64 addr) return 0; } -__weak int buddy_free_internal(buddy_t __arg_arena *buddy, u64 addr) +__weak int buddy_free(struct buddy __arena *buddy, void __arena *addr) { int ret; @@ -893,7 +893,7 @@ __weak int buddy_free_internal(buddy_t __arg_arena *buddy, u64 addr) if (ret) return ret; - ret = buddy_free_unlocked(buddy, addr); + ret = buddy_free_unlocked(buddy, (u64)addr); buddy_unlock(buddy); diff --git a/tools/testing/selftests/bpf/libarena/src/common.bpf.c b/tools/testing/selftests/bpf/libarena/src/common.bpf.c index 544bf9e1cb38..ec9de29e6f3e 100644 --- a/tools/testing/selftests/bpf/libarena/src/common.bpf.c +++ b/tools/testing/selftests/bpf/libarena/src/common.bpf.c @@ -6,7 +6,7 @@ const volatile u32 zero = 0; -buddy_t buddy; +struct buddy __arena buddy; int arena_fls(__u64 word) { @@ -43,9 +43,9 @@ __weak u64 arena_malloc_internal(size_t size) return buddy_alloc_internal(&buddy, size); } -__weak void arena_free(void __arg_arena __arena *ptr) +__weak void arena_free(void __arena *ptr) { - buddy_free_internal(&buddy, (u64)ptr); + buddy_free(&buddy, ptr); } diff --git a/tools/testing/selftests/bpf/progs/arena_spin_lock.c b/tools/testing/selftests/bpf/progs/arena_spin_lock.c index 7236d92d382f..cf7cda79c16c 100644 --- a/tools/testing/selftests/bpf/progs/arena_spin_lock.c +++ b/tools/testing/selftests/bpf/progs/arena_spin_lock.c @@ -4,6 +4,7 @@ #include #include #include "bpf_misc.h" +#include #include struct { -- cgit v1.2.3 From 367e6e4a8173d47b4c57181cdd9dcbfc291755f0 Mon Sep 17 00:00:00 2001 From: Emil Tsalapatis Date: Mon, 1 Jun 2026 20:41:19 -0400 Subject: selftests/bpf: libarena: Directly return arena pointers from functions Now that the __arena annotation includes a BTF type tag, and the verifier can identify arena pointers at BTF loading time, return arena pointers as their true type instead of casting to u64. Remove the preprocessor typecast wrappers used to hide this from the caller. Acked-by: Eduard Zingerman Signed-off-by: Emil Tsalapatis Link: https://lore.kernel.org/r/20260602004120.17087-6-emil@etsalapatis.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/libarena/include/libarena/buddy.h | 3 +-- .../selftests/bpf/libarena/include/libarena/common.h | 3 +-- tools/testing/selftests/bpf/libarena/src/buddy.bpf.c | 20 ++++++++++---------- .../testing/selftests/bpf/libarena/src/common.bpf.c | 4 ++-- 4 files changed, 14 insertions(+), 16 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/libarena/include/libarena/buddy.h b/tools/testing/selftests/bpf/libarena/include/libarena/buddy.h index 4d57fc1b5c26..528c69a1f38e 100644 --- a/tools/testing/selftests/bpf/libarena/include/libarena/buddy.h +++ b/tools/testing/selftests/bpf/libarena/include/libarena/buddy.h @@ -76,7 +76,6 @@ struct buddy { int buddy_init(struct buddy __arena *buddy); int buddy_destroy(struct buddy __arena *buddy); int buddy_free(struct buddy __arena *buddy, void __arena *free); -u64 buddy_alloc_internal(struct buddy __arena *buddy, size_t size); -#define buddy_alloc(alloc, size) ((void __arena *)buddy_alloc_internal((alloc), (size))) +void __arena *buddy_alloc(struct buddy __arena *buddy, size_t size); #endif /* __BPF__ */ diff --git a/tools/testing/selftests/bpf/libarena/include/libarena/common.h b/tools/testing/selftests/bpf/libarena/include/libarena/common.h index ca1a6c1d6477..a3eb1641ac36 100644 --- a/tools/testing/selftests/bpf/libarena/include/libarena/common.h +++ b/tools/testing/selftests/bpf/libarena/include/libarena/common.h @@ -48,8 +48,7 @@ extern volatile u64 asan_violated; int arena_fls(__u64 word); -u64 arena_malloc_internal(size_t size); -#define arena_malloc(size) ((void __arena *)arena_malloc_internal((size))) +void __arena *arena_malloc(size_t size); void arena_free(void __arena *ptr); /* diff --git a/tools/testing/selftests/bpf/libarena/src/buddy.bpf.c b/tools/testing/selftests/bpf/libarena/src/buddy.bpf.c index f4ed4c3abb4b..c674ee5cfcc1 100644 --- a/tools/testing/selftests/bpf/libarena/src/buddy.bpf.c +++ b/tools/testing/selftests/bpf/libarena/src/buddy.bpf.c @@ -750,25 +750,25 @@ static u64 buddy_alloc_from_new_chunk(struct buddy __arena *buddy, struct buddy_ return (u64)address; } __weak -u64 buddy_alloc_internal(struct buddy __arena *buddy, size_t size) +void __arena *buddy_alloc(struct buddy __arena *buddy, size_t size) { - u64 address = (u64)NULL; + void __arena *address = NULL; struct buddy_chunk __arena *chunk; int order; if (!buddy) - return (u64)NULL; + return NULL; order = size_to_order(size); if (order >= BUDDY_CHUNK_NUM_ORDERS || order < 0) { arena_stderr("invalid order %d (sz %lu)\n", order, size); - return (u64)NULL; + return NULL; } if (buddy_lock(buddy)) - return (u64)NULL; + return NULL; - address = buddy_alloc_from_existing_chunks(buddy, order); + address = (u8 __arena *)buddy_alloc_from_existing_chunks(buddy, order); buddy_unlock(buddy); if (address) goto done; @@ -776,12 +776,12 @@ u64 buddy_alloc_internal(struct buddy __arena *buddy, size_t size) /* Get a new chunk. */ chunk = buddy_chunk_get(buddy); if (chunk) - address = buddy_alloc_from_new_chunk(buddy, chunk, order); + address = (u8 __arena *)buddy_alloc_from_new_chunk(buddy, chunk, order); done: /* If we failed to allocate memory, return NULL. */ if (!address) - return (u64)NULL; + return NULL; /* * Unpoison exactly the amount of bytes requested. If the @@ -789,10 +789,10 @@ done: * unused bytes that were part of the header. */ if (size < BUDDY_HEADER_OFF + sizeof(struct buddy_header __arena)) - asan_poison((u8 __arena *)address + BUDDY_HEADER_OFF, BUDDY_POISONED, + asan_poison(address + BUDDY_HEADER_OFF, BUDDY_POISONED, sizeof(struct buddy_header __arena)); - asan_unpoison((u8 __arena *)address, size); + asan_unpoison(address, size); return address; } diff --git a/tools/testing/selftests/bpf/libarena/src/common.bpf.c b/tools/testing/selftests/bpf/libarena/src/common.bpf.c index ec9de29e6f3e..50be57213dfb 100644 --- a/tools/testing/selftests/bpf/libarena/src/common.bpf.c +++ b/tools/testing/selftests/bpf/libarena/src/common.bpf.c @@ -38,9 +38,9 @@ __weak int arena_buddy_reset(void) return buddy_init(&buddy); } -__weak u64 arena_malloc_internal(size_t size) +__weak void __arena *arena_malloc(size_t size) { - return buddy_alloc_internal(&buddy, size); + return buddy_alloc(&buddy, size); } __weak void arena_free(void __arena *ptr) -- cgit v1.2.3 From 9fd5bf96ac4be2ec784598c818f672422182042c Mon Sep 17 00:00:00 2001 From: Emil Tsalapatis Date: Mon, 1 Jun 2026 20:41:20 -0400 Subject: selftests/bpf: Add tests for the new type-tag based __arena identifier Add selftests that combine the new type-based __arena identifier with the volatile qualifier both in functions' arguments and return values. This way we test both that they are recognized as arena arguments and that they are not sensitive to the position they are placed in the type compared to other qualifiers. Signed-off-by: Emil Tsalapatis Link: https://lore.kernel.org/r/20260602004120.17087-7-emil@etsalapatis.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/verifier_arena.c | 67 ++++++++++++++++++++++ 1 file changed, 67 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_arena.c b/tools/testing/selftests/bpf/progs/verifier_arena.c index 89d72c8d756a..df0e22d1a29b 100644 --- a/tools/testing/selftests/bpf/progs/verifier_arena.c +++ b/tools/testing/selftests/bpf/progs/verifier_arena.c @@ -607,4 +607,71 @@ int non_arena_ptr_add_to_arena_ptr(void *ctx) #endif +static __noinline +u32 __arena *check_arena_arg_nonglobal(u32 __arena *arg) +{ + volatile u32 val = *arg; + + *arg = val + 1; + + return arg; +} + +__weak +u32 __arena *check_arena_arg_global(u32 __arena *arg) +{ + volatile u32 val = *arg; + + *arg = val + 1; + + return arg; +} + +__weak +u32 volatile __arena *check_arena_arg_quals1(u32 volatile __arena *arg1, u32 __arena volatile *arg2) +{ + *arg1 = *arg1 + 1; + *arg2 = *arg1 + 1; + + return arg2; +} + +__weak +u32 __arena volatile *check_arena_arg_quals2(u32 volatile __arena *arg1, u32 __arena volatile *arg2) +{ + *arg1 = *arg1 + 1; + *arg2 = *arg2 + 1; + + return arg2; +} + +SEC("syscall") +__success __retval(0) +int check_arena_arg_ret(void *ctx) +{ + u32 __arena *page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0); + u32 __arena *arg = page; + u32 __arena volatile *arg1; + u32 __arena volatile *ret1; + u32 volatile __arena *arg2; + u32 volatile __arena *ret2; + + if (!arg) + return 1; + + /* Make sure we use {arg, ret}{1, 2}. */ + + arg = check_arena_arg_nonglobal(page); + arg = check_arena_arg_global(arg); + + arg1 = arg2 = page; + ret1 = check_arena_arg_quals1(arg1, arg2); + ret2 = check_arena_arg_quals2(arg1, arg2); + + if (!(*ret1 ||*ret2)) + return -EINVAL; + + return 0; +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 7fef1796ec4d8c4cce70c374efafdbbc8d6d6cbc Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 2 Jun 2026 15:30:50 +0200 Subject: libbpf: Guard add_data() against size overflow add_data() computes size8 = roundup(size, 8) and then hands size8 to realloc_data_buf() before doing memcpy(gen->data_cur, data, size) with the original size. A wrapped size8 passes through the realloc_data_buf() INT32_MAX check. Harden this against overflow, though not realistic to happen in practice. Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20260602133052.423725-3-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/gen_loader.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index 66e13566bc31..d79695f01c87 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -160,10 +160,16 @@ void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps static int add_data(struct bpf_gen *gen, const void *data, __u32 size) { - __u32 size8 = roundup(size, 8); __u64 zero = 0; + __u32 size8; void *prev; + if (size > INT32_MAX) { + gen->error = -ERANGE; + return 0; + } + size8 = roundup(size, 8); + if (realloc_data_buf(gen, size8)) return 0; prev = gen->data_cur; -- cgit v1.2.3 From 082c412097716b93ff1365689fc4ddcd1ce8296f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 2 Jun 2026 15:30:51 +0200 Subject: selftests/bpf: Keep verifier_map_ptr exercising ops pointer access sashiko complained that 38498c0ebacd ("selftests/bpf: Adjust verifier_map_ptr for the map's excl field") would slightly decrease the test coverage given before the test was against the verifier rejecting the ops pointer. Recover the old test with the right offsets and add the existing one as an additional test case. # LDLIBS=-static PKG_CONFIG='pkg-config --static' ./vmtest.sh -- ./test_progs -t verifier_map_ptr [ 1.672932] bpf_testmod: module verification failed: signature and/or required key missing - tainting kernel #637/1 verifier_map_ptr/bpf_map_ptr: read with negative offset rejected:OK #637/2 verifier_map_ptr/bpf_map_ptr: read with negative offset rejected @unpriv:OK #637/3 verifier_map_ptr/bpf_map_ptr: write rejected:OK #637/4 verifier_map_ptr/bpf_map_ptr: write rejected @unpriv:OK #637/5 verifier_map_ptr/bpf_map_ptr: read non-existent field rejected:OK #637/6 verifier_map_ptr/bpf_map_ptr: read non-existent field rejected @unpriv:OK #637/7 verifier_map_ptr/bpf_map_ptr: read beyond excl field rejected:OK #637/8 verifier_map_ptr/bpf_map_ptr: read beyond excl field rejected @unpriv:OK #637/9 verifier_map_ptr/bpf_map_ptr: read ops field accepted:OK #637/10 verifier_map_ptr/bpf_map_ptr: read ops field accepted @unpriv:OK #637/11 verifier_map_ptr/bpf_map_ptr: r = 0, map_ptr = map_ptr + r:OK #637/12 verifier_map_ptr/bpf_map_ptr: r = 0, map_ptr = map_ptr + r @unpriv:OK #637/13 verifier_map_ptr/bpf_map_ptr: r = 0, r = r + map_ptr:OK #637/14 verifier_map_ptr/bpf_map_ptr: r = 0, r = r + map_ptr @unpriv:OK #637 verifier_map_ptr:OK [...] Summary: 2/20 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20260602133052.423725-4-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/progs/verifier_map_ptr.c | 34 +++++++++++++++++++--- 1 file changed, 30 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_map_ptr.c b/tools/testing/selftests/bpf/progs/verifier_map_ptr.c index d8e822d1a8ba..166193659870 100644 --- a/tools/testing/selftests/bpf/progs/verifier_map_ptr.c +++ b/tools/testing/selftests/bpf/progs/verifier_map_ptr.c @@ -72,17 +72,43 @@ __naked void bpf_map_ptr_write_rejected(void) /* * struct bpf_map starts with the SHA256 hash sha[32] at offset 0 (a readable - * byte array), followed by the u32 excl field at offset 32. Reading a u32 at - * offset 33 runs past the end of excl and is rejected. + * byte array), the u32 excl field at offset 32, and the ops pointer at offset + * 40. Reading a u32 at offset 41 reaches into the middle of the ops pointer, + * i.e. a partial pointer access, which is rejected. */ SEC("socket") __description("bpf_map_ptr: read non-existent field rejected") __failure -__msg("access beyond the end of member excl (mend:36) in struct bpf_map with off 33 size 4") +__msg("cannot access ptr member ops with moff 40 in struct bpf_map with off 41 size 4") __failure_unpriv __msg_unpriv("access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN") __flag(BPF_F_ANY_ALIGNMENT) __naked void read_non_existent_field_rejected(void) +{ + asm volatile (" \ + r6 = 0; \ + r1 = %[map_array_48b] ll; \ + r6 = *(u32*)(r1 + 41); \ + r0 = 1; \ + exit; \ +" : + : __imm_addr(map_array_48b) + : __clobber_all); +} + +/* + * The u32 excl field spans offsets 32..35 (mend 36). Reading a u32 at offset + * 33 starts inside excl but extends past its end, which the verifier rejects + * as an out-of-bounds scalar access. + */ +SEC("socket") +__description("bpf_map_ptr: read beyond excl field rejected") +__failure +__msg("access beyond the end of member excl (mend:36) in struct bpf_map with off 33 size 4") +__failure_unpriv +__msg_unpriv("access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void read_beyond_excl_field_rejected(void) { asm volatile (" \ r6 = 0; \ @@ -105,7 +131,7 @@ __naked void ptr_read_ops_field_accepted(void) asm volatile (" \ r6 = 0; \ r1 = %[map_array_48b] ll; \ - r6 = *(u64*)(r1 + 0); \ + r6 = *(u64*)(r1 + 40); \ r0 = 1; \ exit; \ " : -- cgit v1.2.3 From 8dedd34122d0950c6b69785db0fa740fdbbf5b2c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 2 Jun 2026 15:30:52 +0200 Subject: selftests/bpf: Test that exclusive maps are rejected as iter targets Add a subtest to map_excl that creates an exclusive map and verifies a bpf_map_elem iterator cannot be attached to it, which would otherwise let an unrelated program read and overwrite the map's contents through the iterator's writable value buffer. # LDLIBS=-static PKG_CONFIG='pkg-config --static' ./vmtest.sh -- ./test_progs -t map_excl [...] ./test_progs -t map_excl [ 1.704382] bpf_testmod: loading out-of-tree module taints kernel. [ 1.706068] bpf_testmod: module verification failed: signature and/or required key missing - tainting kernel #215/1 map_excl/map_excl_allowed:OK #215/2 map_excl/map_excl_denied:OK #215/3 map_excl/map_excl_no_map_in_map:OK #215/4 map_excl/map_excl_no_map_iter:OK #215 map_excl:OK Summary: 1/4 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20260602133052.423725-5-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/map_excl.c | 39 +++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/map_excl.c b/tools/testing/selftests/bpf/prog_tests/map_excl.c index a213dd559aae..3088668e2e45 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_excl.c +++ b/tools/testing/selftests/bpf/prog_tests/map_excl.c @@ -7,6 +7,7 @@ #include #include "map_excl.skel.h" +#include "bpf_iter_bpf_array_map.skel.h" #ifndef SHA256_DIGEST_SIZE #define SHA256_DIGEST_SIZE 32 @@ -89,6 +90,42 @@ out: close(excl_fd); } +static void test_map_excl_no_map_iter(void) +{ + __u8 hash[SHA256_DIGEST_SIZE] = {}; + LIBBPF_OPTS(bpf_map_create_opts, excl_opts, + .excl_prog_hash = hash, + .excl_prog_hash_size = sizeof(hash)); + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); + struct bpf_iter_bpf_array_map *skel = NULL; + union bpf_iter_link_info linfo; + struct bpf_link *link; + int excl_fd; + + excl_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "excl_iter", 4, 8, 3, &excl_opts); + if (!ASSERT_OK_FD(excl_fd, "create exclusive map")) + return; + + skel = bpf_iter_bpf_array_map__open_and_load(); + if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_array_map__open_and_load")) + goto out; + + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = excl_fd; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); + + link = bpf_program__attach_iter(skel->progs.dump_bpf_array_map, &opts); + if (!ASSERT_ERR_PTR(link, "reject exclusive map as iter target")) { + bpf_link__destroy(link); + goto out; + } + ASSERT_EQ(libbpf_get_error(link), -EPERM, "iter attach errno"); +out: + bpf_iter_bpf_array_map__destroy(skel); + close(excl_fd); +} + void test_map_excl(void) { if (test__start_subtest("map_excl_allowed")) @@ -97,4 +134,6 @@ void test_map_excl(void) test_map_excl_denied(); if (test__start_subtest("map_excl_no_map_in_map")) test_map_excl_no_map_in_map(); + if (test__start_subtest("map_excl_no_map_iter")) + test_map_excl_no_map_iter(); } -- cgit v1.2.3 From 8a7f2bff2165e53595d1e91c160b340f978c0ab7 Mon Sep 17 00:00:00 2001 From: Woojin Ji Date: Wed, 3 Jun 2026 09:33:39 +0900 Subject: bpftool: Use libbpf error code for flow dissector query bpf_prog_query() returns a negative errno on failure. query_flow_dissector() currently closes the namespace fd and then reads errno to decide whether -EINVAL means that the running kernel does not support flow dissector queries. That errno check controls behavior, not just diagnostics: -EINVAL is handled as a non-fatal old-kernel case, while any other error makes bpftool net fail. The namespace fd is opened read-only, so close() is not expected to commonly fail in normal use. Still, the BPF_PROG_QUERY error is already available in err, and reading errno after an intervening close() is fragile. If close() does change errno, the compatibility branch may be based on close()'s error instead of the BPF_PROG_QUERY result. This was reproduced with an LD_PRELOAD fault injector that forced BPF_PROG_QUERY for BPF_FLOW_DISSECTOR to fail with EINVAL and then forced close() on the netns fd to fail with EIO. The unpatched bpftool reported "can't query prog: Input/output error". With this change, the same injected failure is handled as the intended non-fatal EINVAL compatibility case. Use the libbpf-returned error code instead. Keep the existing errno reset in the non-fatal path to preserve batch mode behavior. The success path is unchanged. Fixes: 7f0c57fec80f ("bpftool: show flow_dissector attachment status") Signed-off-by: Woojin Ji Signed-off-by: Andrii Nakryiko Acked-by: Leon Hwang Acked-by: Yonghong Song Acked-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20260603003339.33791-1-random6.xyz@gmail.com Assisted-by: ChatGPT:gpt-5.5 --- tools/bpf/bpftool/net.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c index 974189da8a91..dba28755d284 100644 --- a/tools/bpf/bpftool/net.c +++ b/tools/bpf/bpftool/net.c @@ -603,14 +603,14 @@ static int query_flow_dissector(struct bpf_attach_info *attach_info) &attach_flags, prog_ids, &prog_cnt); close(fd); if (err) { - if (errno == EINVAL) { + if (err == -EINVAL) { /* Older kernel's don't support querying * flow dissector programs. */ errno = 0; return 0; } - p_err("can't query prog: %s", strerror(errno)); + p_err("can't query prog: %s", strerror(-err)); return -1; } -- cgit v1.2.3 From e87d898bc766a6dc3cec63478b4cdf4e6286aff1 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 3 Jun 2026 23:16:57 +0200 Subject: selftests/bpf: Cover exclusive map create-time validation map_excl exercises exclusive-map binding (allowed/denied), map-in-map and map iterator rejection. It does not cover the create-time validation of excl_prog_hash: the kernel only accepts a SHA-256-sized hash and requires the pointer and size to be consistent. Add map_excl_create_validation to check the rejected combinations: # LDLIBS=-static PKG_CONFIG='pkg-config --static' ./vmtest.sh -- ./test_progs -t map_excl [...] [ 1.780305] clocksource: Switched to clocksource tsc #215/1 map_excl/map_excl_allowed:OK #215/2 map_excl/map_excl_denied:OK #215/3 map_excl/map_excl_no_map_in_map:OK #215/4 map_excl/map_excl_no_map_iter:OK #215/5 map_excl/map_excl_create_validation:OK #215 map_excl:OK Summary: 1/5 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20260603211658.471212-1-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/map_excl.c | 37 +++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/map_excl.c b/tools/testing/selftests/bpf/prog_tests/map_excl.c index 3088668e2e45..3f4422b9ffa6 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_excl.c +++ b/tools/testing/selftests/bpf/prog_tests/map_excl.c @@ -126,6 +126,41 @@ out: close(excl_fd); } +static void test_map_excl_create_validation(void) +{ + LIBBPF_OPTS(bpf_map_create_opts, o); + __u8 hash[SHA256_DIGEST_SIZE] = {}; + int fd; + + o.excl_prog_hash = hash; + o.excl_prog_hash_size = SHA256_DIGEST_SIZE / 2; + fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "excl", 4, 4, 1, &o); + if (fd >= 0) + close(fd); + ASSERT_EQ(fd, -EINVAL, "reject short excl_prog_hash_size"); + + o.excl_prog_hash = hash; + o.excl_prog_hash_size = SHA256_DIGEST_SIZE * 2; + fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "excl", 4, 4, 1, &o); + if (fd >= 0) + close(fd); + ASSERT_EQ(fd, -EINVAL, "reject long excl_prog_hash_size"); + + o.excl_prog_hash = hash; + o.excl_prog_hash_size = 0; + fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "excl", 4, 4, 1, &o); + if (fd >= 0) + close(fd); + ASSERT_EQ(fd, -EINVAL, "reject hash pointer with zero size"); + + o.excl_prog_hash = NULL; + o.excl_prog_hash_size = SHA256_DIGEST_SIZE; + fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "excl", 4, 4, 1, &o); + if (fd >= 0) + close(fd); + ASSERT_EQ(fd, -EINVAL, "reject size with NULL hash pointer"); +} + void test_map_excl(void) { if (test__start_subtest("map_excl_allowed")) @@ -136,4 +171,6 @@ void test_map_excl(void) test_map_excl_no_map_in_map(); if (test__start_subtest("map_excl_no_map_iter")) test_map_excl_no_map_iter(); + if (test__start_subtest("map_excl_create_validation")) + test_map_excl_create_validation(); } -- cgit v1.2.3 From 5b88319e4775ee1924d5b709084b25f72e6fe78d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 3 Jun 2026 23:16:58 +0200 Subject: selftests/bpf: Test signed loader error paths The positive path for signed BPF loaders is covered today by the signed lskels (fentry_test, fexit_test, atomics). But the runtime metadata check the generated loader performs (libbpf gen_loader's emit_signature_match), the map content hash it relies on, the load-time signature, and the immutability invariants of its metadata map are not yet covered. Thus, add a new, extensive test suite which drives libbpf's gen_loader (bpf_object__gen_loader, gen_hash=true), the same machinery which bpftool uses for signed light skeletons, and exercise corner cases so that we can assert this in BPF CI: # LDLIBS=-static PKG_CONFIG='pkg-config --static' ./vmtest.sh -- ./test_progs -t signed_loader [...] [ 1.840842] clocksource: Switched to clocksource tsc #405/1 signed_loader/metadata_check_shape:OK #405/2 signed_loader/metadata_match:OK #405/3 signed_loader/metadata_sha_mismatch:OK #405/4 signed_loader/metadata_not_exclusive:OK #405/5 signed_loader/metadata_hash_not_computed:OK #405/6 signed_loader/signature_enforced:OK #405/7 signed_loader/signature_too_large:OK #405/8 signed_loader/signature_bad_keyring:OK #405/9 signed_loader/metadata_ctx_max_entries_ignored:OK #405/10 signed_loader/metadata_ctx_initial_value_ignored:OK #405/11 signed_loader/signature_authenticates_insns:OK #405/12 signed_loader/hash_requires_frozen:OK #405/13 signed_loader/no_update_after_freeze:OK #405/14 signed_loader/freeze_writable_mmap:OK #405/15 signed_loader/no_writable_mmap_frozen:OK #405/16 signed_loader/map_hash_matches_libbpf:OK #405/17 signed_loader/map_hash_multi_element:OK #405/18 signed_loader/map_hash_bad_size:OK #405/19 signed_loader/map_hash_unsupported_type:OK #405 signed_loader:OK Summary: 1/19 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20260603211658.471212-2-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/signed_loader.c | 1013 ++++++++++++++++++++ .../selftests/bpf/progs/test_signed_loader.c | 18 + .../selftests/bpf/progs/test_signed_loader_data.c | 20 + .../selftests/bpf/progs/test_signed_loader_map.c | 28 + 4 files changed, 1079 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/signed_loader.c create mode 100644 tools/testing/selftests/bpf/progs/test_signed_loader.c create mode 100644 tools/testing/selftests/bpf/progs/test_signed_loader_data.c create mode 100644 tools/testing/selftests/bpf/progs/test_signed_loader_map.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/signed_loader.c b/tools/testing/selftests/bpf/prog_tests/signed_loader.c new file mode 100644 index 000000000000..dcfdd2d96b05 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/signed_loader.c @@ -0,0 +1,1013 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Isovalent */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "bpf/libbpf_internal.h" /* for libbpf_sha256() */ +#include "bpf/skel_internal.h" /* for loader ctx layout (bpf_loader_ctx etc) */ + +#include "test_signed_loader.skel.h" +#include "test_signed_loader_map.skel.h" +#include "test_signed_loader_data.skel.h" + +#define SIG_MATCH_INSNS 33 /* excl (5) + 4 * sha-dword (7) */ + +static int load_loader(const void *insns, __u32 insns_sz, int map_fd, + const void *sig, __u32 sig_sz, __s32 keyring_id) +{ + union bpf_attr attr; + int fd; + + memset(&attr, 0, sizeof(attr)); + attr.prog_type = BPF_PROG_TYPE_SYSCALL; + attr.insns = ptr_to_u64(insns); + attr.insn_cnt = insns_sz / sizeof(struct bpf_insn); + attr.license = ptr_to_u64("Dual BSD/GPL"); + attr.prog_flags = BPF_F_SLEEPABLE; + attr.fd_array = ptr_to_u64(&map_fd); + if (sig) { + attr.signature = ptr_to_u64(sig); + attr.signature_size = sig_sz; + attr.keyring_id = keyring_id; + } + memcpy(attr.prog_name, "__loader.prog", sizeof("__loader.prog")); + fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, + offsetofend(union bpf_attr, keyring_id)); + return fd < 0 ? -errno : fd; +} + +static int run_gen_loader(const void *insns, __u32 insns_sz, + const void *data, __u32 data_sz, + const void *excl, __u32 excl_sz, + const void *sig, __u32 sig_sz, + bool get_hash, void *ctx, __u32 ctx_sz, bool *loader_ran) +{ + LIBBPF_OPTS(bpf_map_create_opts, mopts, + .excl_prog_hash = excl, + .excl_prog_hash_size = excl_sz); + __u8 hbuf[SHA256_DIGEST_LENGTH]; + struct bpf_map_info info; + __u32 ilen = sizeof(info), key = 0; + union bpf_attr attr; + int map_fd, prog_fd, ret; + + *loader_ran = false; + + map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", + 4, data_sz, 1, &mopts); + if (map_fd < 0) + return -errno; + if (bpf_map_update_elem(map_fd, &key, data, 0)) { + ret = -errno; + goto out_map; + } + if (bpf_map_freeze(map_fd)) { + ret = -errno; + goto out_map; + } + if (get_hash) { + memset(&info, 0, sizeof(info)); + info.hash = ptr_to_u64(hbuf); + info.hash_size = sizeof(hbuf); + if (bpf_map_get_info_by_fd(map_fd, &info, &ilen)) { + ret = -errno; + goto out_map; + } + } + + memset(&attr, 0, sizeof(attr)); + attr.prog_type = BPF_PROG_TYPE_SYSCALL; + attr.insns = ptr_to_u64(insns); + attr.insn_cnt = insns_sz / sizeof(struct bpf_insn); + attr.license = ptr_to_u64("Dual BSD/GPL"); + attr.prog_flags = BPF_F_SLEEPABLE; + attr.fd_array = ptr_to_u64(&map_fd); + if (sig) { + attr.signature = ptr_to_u64(sig); + attr.signature_size = sig_sz; + attr.keyring_id = KEY_SPEC_SESSION_KEYRING; + } + memcpy(attr.prog_name, "__loader.prog", sizeof("__loader.prog")); + prog_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, + offsetofend(union bpf_attr, keyring_id)); + if (prog_fd < 0) { + ret = -errno; + goto out_map; + } + + memset(&attr, 0, sizeof(attr)); + attr.test.prog_fd = prog_fd; + attr.test.ctx_in = ptr_to_u64(ctx); + attr.test.ctx_size_in = ctx_sz; + if (syscall(__NR_bpf, BPF_PROG_RUN, &attr, + offsetofend(union bpf_attr, test)) < 0) { + ret = -errno; + goto out_prog; + } + *loader_ran = true; + ret = (int)attr.test.retval; +out_prog: + close(prog_fd); +out_map: + close(map_fd); + return ret; +} + +static void close_loader_ctx_fds(void *ctx, int nr_maps, int nr_progs) +{ + struct bpf_map_desc *md = (struct bpf_map_desc *)((char *)ctx + + sizeof(struct bpf_loader_ctx)); + struct bpf_prog_desc *pd = (struct bpf_prog_desc *)(md + nr_maps); + int i; + + for (i = 0; i < nr_maps; i++) + if (md[i].map_fd > 0) + close(md[i].map_fd); + for (i = 0; i < nr_progs; i++) + if (pd[i].prog_fd > 0) + close(pd[i].prog_fd); +} + +static int run_setup(const char *cmd, const char *dir) +{ + int pid, status; + + pid = fork(); + if (pid < 0) + return -errno; + if (pid == 0) { + execlp("./verify_sig_setup.sh", "./verify_sig_setup.sh", + cmd, dir, NULL); + exit(1); + } + if (waitpid(pid, &status, 0) < 0) + return -errno; + return (WIFEXITED(status) && + WEXITSTATUS(status) == 0) ? 0 : -EINVAL; +} + +static int sign_buf(const char *dir, const void *buf, __u32 len, + void *sig, __u32 *sig_sz) +{ + char data_tmpl[PATH_MAX], key[PATH_MAX]; + char sigpath[PATH_MAX + sizeof(".p7s")]; + int fd, pid, status, ret; + struct stat st; + + ret = snprintf(data_tmpl, sizeof(data_tmpl), "%s/dataXXXXXX", dir); + if (ret < 0 || ret >= (int)sizeof(data_tmpl)) + return -ENAMETOOLONG; + ret = 0; + + fd = mkstemp(data_tmpl); + if (fd < 0) + return -errno; + if (write(fd, buf, len) != (ssize_t)len) { + close(fd); + ret = -EIO; + goto out; + } + close(fd); + + pid = fork(); + if (pid < 0) { + ret = -errno; + goto out; + } + if (pid == 0) { + snprintf(key, sizeof(key), "%s/signing_key.pem", dir); + execlp("./sign-file", "./sign-file", "-d", "sha256", + key, key, data_tmpl, NULL); + exit(1); + } + if (waitpid(pid, &status, 0) < 0 || + !WIFEXITED(status) || WEXITSTATUS(status)) { + ret = -EINVAL; + goto out; + } + + snprintf(sigpath, sizeof(sigpath), "%s.p7s", data_tmpl); + if (stat(sigpath, &st) < 0) { + ret = -errno; + goto out; + } + if (st.st_size > (off_t)*sig_sz) { + ret = -E2BIG; + goto out_sig; + } + fd = open(sigpath, O_RDONLY); + if (fd < 0) { + ret = -errno; + goto out_sig; + } + if (read(fd, sig, st.st_size) != st.st_size) { + close(fd); + ret = -EIO; + goto out_sig; + } + close(fd); + *sig_sz = st.st_size; +out_sig: + unlink(sigpath); +out: + unlink(data_tmpl); + return ret; +} + +static void check_sig_match_shape(const struct bpf_insn *in, int n) +{ + int a = -1, cleanup = -1, i, base, t, br[5], nb = 0; + + /* BPF_PSEUDO_MAP_IDX (the struct bpf_map * form) is used only here. */ + for (i = 0; i + 1 < n; i++) { + if (in[i].code == (BPF_LD | BPF_IMM | BPF_DW) && + in[i].src_reg == BPF_PSEUDO_MAP_IDX) { + a = i; + break; + } + } + if (!ASSERT_GE(a, 0, "emit_signature_match present")) + return; + if (!ASSERT_LE(a + SIG_MATCH_INSNS, n, "block fits in program")) + return; + + /* excl check: r2 = *(u32 *)(map + 32); if r2 != 1 goto cleanup */ + ASSERT_EQ(in[a + 2].code, (BPF_LDX | BPF_MEM | BPF_W), "excl load width"); + ASSERT_EQ(in[a + 2].off, SHA256_DIGEST_LENGTH, "excl field offset"); + ASSERT_EQ(in[a + 4].code, (BPF_JMP | BPF_JNE | BPF_K), "excl branch op"); + ASSERT_EQ(in[a + 4].imm, 1, "excl compared to 1"); + br[nb++] = a + 4; + + /* 4 sha-dword checks: r2 = *(u64 *)(map + i*8); if r2 != r3 goto cleanup */ + for (i = 0; i < 4; i++) { + base = a + 5 + i * 7; + ASSERT_EQ(in[base + 2].code, (BPF_LDX | BPF_MEM | BPF_DW), "sha load width"); + ASSERT_EQ(in[base + 2].off, i * 8, "sha dword offset"); + ASSERT_EQ(in[base + 3].code, (BPF_LD | BPF_IMM | BPF_DW), "sha imm64 (H_meta)"); + ASSERT_EQ(in[base + 6].code, (BPF_JMP | BPF_JNE | BPF_X), "sha branch op"); + br[nb++] = base + 6; + } + + /* + * Locate the real cleanup label so we can pin the exact jump target, + * not just "some backward label". bpf_gen__init() emits the cleanup + * block as a prog-fd close loop whose first instruction is the label + * every error branch jumps to. + */ + for (i = 0; i + 2 < a; i++) { + if (in[i].code == (BPF_LDX | BPF_MEM | BPF_W) && + in[i].dst_reg == BPF_REG_1 && in[i].src_reg == BPF_REG_10 && + in[i + 1].code == (BPF_JMP | BPF_JSLE | BPF_K) && + in[i + 1].dst_reg == BPF_REG_1 && in[i + 1].imm == 0 && + in[i + 1].off == 1 && + in[i + 2].code == (BPF_JMP | BPF_CALL) && + in[i + 2].imm == BPF_FUNC_sys_close) { + cleanup = i; + break; + } + } + if (!ASSERT_GE(cleanup, 0, "cleanup label located")) + return; + for (i = 0; i < nb; i++) { + t = br[i] + 1 + in[br[i]].off; + ASSERT_EQ(t, cleanup, "sig-match lands on cleanup"); + } + /* + * Same invariant for every other cleanup-bound jump in the program: + * emit_check_err() is the only source of "if (r7 < 0) goto cleanup", + * so each of those must also resolve exactly to cleanup. + */ + for (i = 0, t = 0; i < n; i++) { + if (in[i].code != (BPF_JMP | BPF_JSLT | BPF_K) || + in[i].dst_reg != BPF_REG_7 || in[i].imm != 0 || in[i].off >= 0) + continue; + ASSERT_EQ(i + 1 + in[i].off, cleanup, "err-check lands on cleanup"); + t++; + } + ASSERT_GT(t, 0, "found emit_check_err jumps"); +} + +struct gen_loader_fixture { + struct test_signed_loader *skel; + struct gen_loader_opts gopts; + unsigned char *blob; + void *ctx; + __u32 data_sz; + __u32 ctx_sz; + int nr_maps; + int nr_progs; + __u8 excl[SHA256_DIGEST_LENGTH]; +}; + +static int gen_loader_fixture_init(struct gen_loader_fixture *f) +{ + LIBBPF_OPTS(gen_loader_opts, gopts, .gen_hash = true); + int nr_maps = 0, nr_progs = 0; + struct bpf_program *p; + struct bpf_map *m; + + memset(f, 0, sizeof(*f)); + f->skel = test_signed_loader__open(); + if (!ASSERT_OK_PTR(f->skel, "skel_open")) + return -1; + if (!ASSERT_OK(bpf_object__gen_loader(f->skel->obj, &gopts), "gen_loader")) + return -1; + if (!ASSERT_OK(bpf_object__load(f->skel->obj), "gen_load")) + return -1; + f->gopts = gopts; + + bpf_object__for_each_program(p, f->skel->obj) + nr_progs++; + bpf_object__for_each_map(m, f->skel->obj) + nr_maps++; + f->nr_maps = nr_maps; + f->nr_progs = nr_progs; + f->ctx_sz = sizeof(struct bpf_loader_ctx) + + nr_maps * sizeof(struct bpf_map_desc) + + nr_progs * sizeof(struct bpf_prog_desc); + f->ctx = calloc(1, f->ctx_sz); + if (!ASSERT_OK_PTR(f->ctx, "ctx_alloc")) + return -1; + ((struct bpf_loader_ctx *)f->ctx)->sz = f->ctx_sz; + + f->data_sz = gopts.data_sz; + f->blob = malloc(f->data_sz); + if (!ASSERT_OK_PTR(f->blob, "blob_alloc")) + return -1; + memcpy(f->blob, gopts.data, f->data_sz); + + /* excl_prog_hash = SHA256(loader insns) == the loader's prog->digest. */ + libbpf_sha256(gopts.insns, gopts.insns_sz, f->excl); + return 0; +} + +static void gen_loader_fixture_fini(struct gen_loader_fixture *f) +{ + if (f->ctx) + close_loader_ctx_fds(f->ctx, f->nr_maps, f->nr_progs); + free(f->blob); + free(f->ctx); + test_signed_loader__destroy(f->skel); +} + +static void metadata_check_shape(void) +{ + struct gen_loader_fixture f; + + if (gen_loader_fixture_init(&f) == 0) + check_sig_match_shape((const struct bpf_insn *)f.gopts.insns, + f.gopts.insns_sz / sizeof(struct bpf_insn)); + gen_loader_fixture_fini(&f); +} + +static void metadata_match(void) +{ + struct gen_loader_fixture f; + bool ran; + int r; + + if (gen_loader_fixture_init(&f) == 0) { + r = run_gen_loader(f.gopts.insns, f.gopts.insns_sz, f.blob, + f.data_sz, f.excl, sizeof(f.excl), NULL, 0, + true, f.ctx, f.ctx_sz, &ran); + ASSERT_TRUE(ran, "loader ran"); + ASSERT_EQ(r, 0, "honest loader retval"); + } + gen_loader_fixture_fini(&f); +} + +static void metadata_sha_mismatch(void) +{ + struct gen_loader_fixture f; + bool ran; + int r; + + if (gen_loader_fixture_init(&f) == 0) { + /* + * blob[0] lives in the loader's fd_array scratch (first add_data in + * bpf_gen__init); a 0-map program never reads it, so flipping it + * changes only map->sha. The metadata check is the only thing that + * can notice -> isolates emit_signature_match. + */ + f.blob[0] ^= 0xff; + r = run_gen_loader(f.gopts.insns, f.gopts.insns_sz, f.blob, + f.data_sz, f.excl, sizeof(f.excl), NULL, 0, + true, f.ctx, f.ctx_sz, &ran); + ASSERT_TRUE(ran, "loader ran"); + ASSERT_EQ(r, -EINVAL, "tampered blob rejected by emit_signature_match"); + } + gen_loader_fixture_fini(&f); +} + +static void metadata_not_exclusive(void) +{ + struct gen_loader_fixture f; + bool ran; + int r; + + if (gen_loader_fixture_init(&f) == 0) { + /* + * Correct blob but a non-exclusive metadata map: the verifier does + * not reject (excl_prog_sha unset), so the runtime map->excl == 1 + * check in the loader must. + */ + r = run_gen_loader(f.gopts.insns, f.gopts.insns_sz, f.blob, + f.data_sz, NULL, 0, NULL, 0, true, f.ctx, + f.ctx_sz, &ran); + ASSERT_TRUE(ran, "loader ran"); + ASSERT_EQ(r, -EINVAL, "non-exclusive metadata map rejected"); + } + gen_loader_fixture_fini(&f); +} + +static void metadata_hash_not_computed(void) +{ + struct gen_loader_fixture f; + bool ran; + int r; + + if (gen_loader_fixture_init(&f) == 0) { + /* + * Correct, exclusive, frozen map, but its hash was never computed + * (no OBJ_GET_INFO_BY_FD), so map->sha stays zero. The loader must + * fail closed rather than treat an unset hash as a match. + */ + r = run_gen_loader(f.gopts.insns, f.gopts.insns_sz, f.blob, + f.data_sz, f.excl, sizeof(f.excl), NULL, 0, + false, f.ctx, f.ctx_sz, &ran); + ASSERT_TRUE(ran, "loader ran"); + ASSERT_EQ(r, -EINVAL, "uncomputed metadata hash rejected"); + } + gen_loader_fixture_fini(&f); +} + +static void signature_enforced(void) +{ + static const __u8 junk[64] = { 0x30, 0x42, 0x13, 0x37, }; + struct gen_loader_fixture f; + int fd; + + if (gen_loader_fixture_init(&f) == 0) { + /* + * A present-but-invalid signature (the cert bytes are not a + * PKCS#7 signature) must be rejected at load: the signature + * path is honored, not ignored. (The valid path is covered by + * the signed lskels.) + */ + fd = load_loader(f.gopts.insns, f.gopts.insns_sz, -1, junk, + sizeof(junk), KEY_SPEC_SESSION_KEYRING); + ASSERT_LT(fd, 0, "invalid signature rejected at load"); + } + gen_loader_fixture_fini(&f); +} + +static void signature_too_large(void) +{ + static const __u8 junk[64] = {}; + struct gen_loader_fixture f; + int fd; + + if (gen_loader_fixture_init(&f) == 0) { + /* + * signature_size beyond the kernel's bound (KMALLOC_MAX_CACHE_SIZE) + * is rejected before the buffer is read. + */ + fd = load_loader(f.gopts.insns, f.gopts.insns_sz, -1, junk, + 64 << 20, KEY_SPEC_SESSION_KEYRING); + ASSERT_EQ(fd, -EINVAL, "oversized signature rejected"); + } + gen_loader_fixture_fini(&f); +} + +static void signature_bad_keyring(void) +{ + static const __u8 junk[64] = {}; + struct gen_loader_fixture f; + int fd; + + if (gen_loader_fixture_init(&f) == 0) { + /* + * A present signature with a keyring_id that resolves to no key is + * rejected up front: bpf_prog_verify_signature() fails the keyring + * lookup (-EINVAL) before it ever looks at the signature bytes. A + * large positive serial takes the user-keyring path and won't exist. + */ + fd = load_loader(f.gopts.insns, f.gopts.insns_sz, -1, junk, + sizeof(junk), INT_MAX); + ASSERT_EQ(fd, -EINVAL, "signature with bad keyring_id rejected"); + } + gen_loader_fixture_fini(&f); +} + +/* + * A signed loader must ignore ctx-supplied map dimensions: the host cannot + * resize a signed program's maps via the loader ctx. Drive a one-map program + * through gen_loader, ask (via ctx) for every map to be resized to a bogus + * value, and confirm the created maps keep their attested size. + */ +#define GATING_BOGUS_MAX 0x4000 + +static void metadata_ctx_max_entries_ignored(void) +{ + LIBBPF_OPTS(gen_loader_opts, gopts, .gen_hash = true); + struct test_signed_loader_map *skel; + __u8 excl[SHA256_DIGEST_LENGTH]; + int nr_maps = 0, nr_progs = 0, i, checked = 0, r; + struct bpf_program *p; + struct bpf_map *m; + struct bpf_map_desc *md; + unsigned char *blob; + __u32 ctx_sz, data_sz; + void *ctx; + bool ran; + + skel = test_signed_loader_map__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + if (!ASSERT_OK(bpf_object__gen_loader(skel->obj, &gopts), "gen_loader")) + goto destroy; + if (!ASSERT_OK(bpf_object__load(skel->obj), "gen_load")) + goto destroy; + + bpf_object__for_each_program(p, skel->obj) + nr_progs++; + bpf_object__for_each_map(m, skel->obj) + nr_maps++; + ctx_sz = sizeof(struct bpf_loader_ctx) + + nr_maps * sizeof(struct bpf_map_desc) + + nr_progs * sizeof(struct bpf_prog_desc); + ctx = calloc(1, ctx_sz); + if (!ASSERT_OK_PTR(ctx, "ctx_alloc")) + goto destroy; + ((struct bpf_loader_ctx *)ctx)->sz = ctx_sz; + + md = (struct bpf_map_desc *)((char *)ctx + sizeof(struct bpf_loader_ctx)); + for (i = 0; i < nr_maps; i++) + md[i].max_entries = GATING_BOGUS_MAX; + + libbpf_sha256(gopts.insns, gopts.insns_sz, excl); + data_sz = gopts.data_sz; + blob = malloc(data_sz); + if (!ASSERT_OK_PTR(blob, "blob_alloc")) + goto free_ctx; + memcpy(blob, gopts.data, data_sz); + + r = run_gen_loader(gopts.insns, gopts.insns_sz, blob, data_sz, + excl, sizeof(excl), NULL, 0, true, ctx, ctx_sz, &ran); + if (!ASSERT_TRUE(ran, "loader ran") || + !ASSERT_EQ(r, 0, "loader retval")) + goto free_blob; + + for (i = 0; i < nr_maps; i++) { + struct bpf_map_info info; + __u32 ilen = sizeof(info); + int fd = md[i].map_fd; + + if (fd <= 0) + continue; + memset(&info, 0, sizeof(info)); + if (ASSERT_OK(bpf_map_get_info_by_fd(fd, &info, &ilen), "map_info")) { + ASSERT_NEQ(info.max_entries, GATING_BOGUS_MAX, + "ctx max_entries ignored for signed loader"); + checked++; + } + } + ASSERT_GT(checked, 0, "inspected a created map"); + +free_blob: + free(blob); +free_ctx: + close_loader_ctx_fds(ctx, nr_maps, nr_progs); + free(ctx); +destroy: + test_signed_loader_map__destroy(skel); +} + +/* + * A signed loader must also ignore ctx-supplied initial_value: the host cannot + * re-seed a signed program's map contents through the loader ctx. Drive a + * program with one initialized global (a .data map) through gen_loader, point + * every map's ctx initial_value at an adversarial buffer, and confirm the + * created map still holds the attested value, never the ctx bytes. + */ +#define DATA_MAGIC 0x5eed1234abad1deaULL + +static void metadata_ctx_initial_value_ignored(void) +{ + LIBBPF_OPTS(gen_loader_opts, gopts, .gen_hash = true); + struct test_signed_loader_data *skel; + __u8 excl[SHA256_DIGEST_LENGTH], evil[64]; + int nr_maps = 0, nr_progs = 0, i, found = 0, r; + struct bpf_program *p; + struct bpf_map *m; + struct bpf_map_desc *md; + unsigned char *blob; + __u32 ctx_sz, data_sz; + void *ctx; + bool ran; + + skel = test_signed_loader_data__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + if (!ASSERT_OK(bpf_object__gen_loader(skel->obj, &gopts), "gen_loader")) + goto destroy; + if (!ASSERT_OK(bpf_object__load(skel->obj), "gen_load")) + goto destroy; + + bpf_object__for_each_program(p, skel->obj) + nr_progs++; + bpf_object__for_each_map(m, skel->obj) + nr_maps++; + ctx_sz = sizeof(struct bpf_loader_ctx) + + nr_maps * sizeof(struct bpf_map_desc) + + nr_progs * sizeof(struct bpf_prog_desc); + ctx = calloc(1, ctx_sz); + if (!ASSERT_OK_PTR(ctx, "ctx_alloc")) + goto destroy; + ((struct bpf_loader_ctx *)ctx)->sz = ctx_sz; + + memset(evil, 0xAA, sizeof(evil)); + md = (struct bpf_map_desc *)((char *)ctx + sizeof(struct bpf_loader_ctx)); + for (i = 0; i < nr_maps; i++) + md[i].initial_value = ptr_to_u64(evil); + + libbpf_sha256(gopts.insns, gopts.insns_sz, excl); + data_sz = gopts.data_sz; + blob = malloc(data_sz); + if (!ASSERT_OK_PTR(blob, "blob_alloc")) + goto free_ctx; + memcpy(blob, gopts.data, data_sz); + + r = run_gen_loader(gopts.insns, gopts.insns_sz, blob, data_sz, + excl, sizeof(excl), NULL, 0, true, ctx, ctx_sz, &ran); + if (!ASSERT_TRUE(ran, "loader ran") || + !ASSERT_EQ(r, 0, "loader retval")) + goto free_blob; + + for (i = 0; i < nr_maps; i++) { + struct bpf_map_info info; + __u32 ilen = sizeof(info), key = 0; + __u8 value[64] = {}; + __u64 got; + int fd = md[i].map_fd; + + if (fd <= 0) + continue; + memset(&info, 0, sizeof(info)); + if (!ASSERT_OK(bpf_map_get_info_by_fd(fd, &info, &ilen), "map_info")) + continue; + if (info.value_size <= sizeof(value) && + bpf_map_lookup_elem(fd, &key, value) == 0) { + memcpy(&got, value, sizeof(got)); + /* attested .data survives; ctx bytes (0xAA..) ignored */ + if (got == DATA_MAGIC) + found = 1; + ASSERT_NEQ(got, 0xAAAAAAAAAAAAAAAAULL, + "ctx initial_value ignored for signed loader"); + } + } + ASSERT_EQ(found, 1, "attested .data value preserved"); + +free_blob: + free(blob); +free_ctx: + close_loader_ctx_fds(ctx, nr_maps, nr_progs); + free(ctx); +destroy: + test_signed_loader_data__destroy(skel); +} + +/* + * The load-time signature must authenticate the loader instructions: a valid + * signature loads, and the very same signature over one-byte-tampered insns is + * rejected. Uses ./verify_sig_setup.sh + ./sign-file at runtime, like + * verify_pkcs7_sig, and verifies against the session keyring the key was added + * to. (signature_enforced/_too_large only cover a malformed signature.) + */ +static void signature_authenticates_insns(void) +{ + LIBBPF_OPTS(gen_loader_opts, gopts, .gen_hash = true); + char dir_tmpl[] = "/tmp/signed_loaderXXXXXX", *dir; + struct test_signed_loader *skel = NULL; + __u8 excl[SHA256_DIGEST_LENGTH], sig[8192]; + __u32 sig_sz = sizeof(sig), insns_sz, data_sz, ctx_sz; + unsigned char *insns = NULL, *tampered = NULL, *blob = NULL; + int nr_maps = 0, nr_progs = 0, r; + struct bpf_program *p; + struct bpf_map *m; + void *ctx = NULL; + bool ran; + + syscall(__NR_request_key, "keyring", "_uid.0", NULL, + KEY_SPEC_SESSION_KEYRING); + dir = mkdtemp(dir_tmpl); + if (!ASSERT_OK_PTR(dir, "mkdtemp")) + return; + if (!ASSERT_OK(run_setup("setup", dir), "verify_sig_setup")) { + rmdir(dir); + return; + } + + skel = test_signed_loader__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + if (!ASSERT_OK(bpf_object__gen_loader(skel->obj, &gopts), "gen_loader")) + goto cleanup; + if (!ASSERT_OK(bpf_object__load(skel->obj), "gen_load")) + goto cleanup; + + bpf_object__for_each_program(p, skel->obj) + nr_progs++; + bpf_object__for_each_map(m, skel->obj) + nr_maps++; + ctx_sz = sizeof(struct bpf_loader_ctx) + + nr_maps * sizeof(struct bpf_map_desc) + + nr_progs * sizeof(struct bpf_prog_desc); + insns_sz = gopts.insns_sz; + data_sz = gopts.data_sz; + ctx = calloc(1, ctx_sz); + insns = malloc(insns_sz); + tampered = malloc(insns_sz); + blob = malloc(data_sz); + if (!ASSERT_OK_PTR(ctx, "ctx") || + !ASSERT_OK_PTR(insns, "insns") || + !ASSERT_OK_PTR(tampered, "tampered") || + !ASSERT_OK_PTR(blob, "blob")) + goto cleanup; + memcpy(insns, gopts.insns, insns_sz); + memcpy(blob, gopts.data, data_sz); + libbpf_sha256(insns, insns_sz, excl); + + if (!ASSERT_OK(sign_buf(dir, insns, insns_sz, sig, &sig_sz), "sign-file")) + goto cleanup; + + memset(ctx, 0, ctx_sz); + ((struct bpf_loader_ctx *)ctx)->sz = ctx_sz; + r = run_gen_loader(insns, insns_sz, blob, data_sz, excl, sizeof(excl), + sig, sig_sz, true, ctx, ctx_sz, &ran); + ASSERT_TRUE(ran, "valid signature: loader loaded and ran"); + ASSERT_EQ(r, 0, "valid signature accepted"); + close_loader_ctx_fds(ctx, nr_maps, nr_progs); + + memcpy(tampered, insns, insns_sz); + tampered[insns_sz / 2] ^= 0xff; + memset(ctx, 0, ctx_sz); + ((struct bpf_loader_ctx *)ctx)->sz = ctx_sz; + r = run_gen_loader(tampered, insns_sz, blob, data_sz, excl, sizeof(excl), + sig, sig_sz, true, ctx, ctx_sz, &ran); + ASSERT_FALSE(ran, "tampered loader rejected before run"); + ASSERT_EQ(r, -EKEYREJECTED, "signature is bound to the instructions"); +cleanup: + free(insns); + free(tampered); + free(blob); + free(ctx); + test_signed_loader__destroy(skel); + run_setup("cleanup", dir); +} + +static int make_excl_map(__u32 flags, __u32 value_size) +{ + LIBBPF_OPTS(bpf_map_create_opts, opts); + __u8 hash[SHA256_DIGEST_LENGTH] = { 1 }; /* any 32-byte value */ + + opts.excl_prog_hash = hash; + opts.excl_prog_hash_size = sizeof(hash); + opts.map_flags = flags; + return bpf_map_create(BPF_MAP_TYPE_ARRAY, "md", 4, value_size, 1, &opts); +} + +static void hash_requires_frozen(void) +{ + __u8 hbuf[SHA256_DIGEST_LENGTH], val[64] = {}; + struct bpf_map_info info; + __u32 ilen, key = 0; + int fd; + + fd = make_excl_map(0, sizeof(val)); + if (!ASSERT_OK_FD(fd, "excl_map")) + return; + ASSERT_OK(bpf_map_update_elem(fd, &key, val, 0), "update"); + + memset(&info, 0, sizeof(info)); + info.hash = ptr_to_u64(hbuf); + info.hash_size = sizeof(hbuf); + ilen = sizeof(info); + ASSERT_EQ(bpf_map_get_info_by_fd(fd, &info, &ilen), -EPERM, + "hash of unfrozen map rejected"); + close(fd); +} + +static void no_update_after_freeze(void) +{ + __u8 val[64] = {}; + __u32 key = 0; + int fd; + + fd = make_excl_map(0, sizeof(val)); + if (!ASSERT_OK_FD(fd, "excl_map")) + return; + ASSERT_OK(bpf_map_update_elem(fd, &key, val, 0), "update"); + ASSERT_OK(bpf_map_freeze(fd), "freeze"); + ASSERT_EQ(bpf_map_update_elem(fd, &key, val, 0), -EPERM, + "update after freeze rejected"); + close(fd); +} + +static void freeze_writable_mmap(void) +{ + void *w; + int fd; + + fd = make_excl_map(BPF_F_MMAPABLE, 4096); + if (!ASSERT_OK_FD(fd, "excl_mmapable_map")) + return; + w = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (ASSERT_OK_PTR(w, "writable_mmap")) { + ASSERT_EQ(bpf_map_freeze(fd), -EBUSY, + "freeze rejected while writable mmap held"); + munmap(w, 4096); + } + close(fd); +} + +static void no_writable_mmap_frozen(void) +{ + void *w; + int fd; + + fd = make_excl_map(BPF_F_MMAPABLE, 4096); + if (!ASSERT_OK_FD(fd, "excl_mmapable_map")) + return; + ASSERT_OK(bpf_map_freeze(fd), "freeze"); + w = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + ASSERT_EQ(w, MAP_FAILED, "writable mmap of frozen map rejected"); + if (w != MAP_FAILED) + munmap(w, 4096); + close(fd); +} + +static void map_hash_matches_libbpf(void) +{ + __u8 kbuf[SHA256_DIGEST_LENGTH], lbuf[SHA256_DIGEST_LENGTH], val[64] = {}; + struct bpf_map_info info; + __u32 ilen, key = 0; + int fd, i; + + /* + * The signing scheme assumes the kernel's map hash equals what libbpf + * computes over the same bytes (gen_loader bakes libbpf_sha256(blob); + * the kernel recomputes via array_map_get_hash). Pin that they agree. + */ + for (i = 0; i < (int)sizeof(val); i++) + val[i] = i * 7 + 1; + fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "h", 4, sizeof(val), 1, NULL); + if (!ASSERT_OK_FD(fd, "array_map")) + return; + ASSERT_OK(bpf_map_update_elem(fd, &key, val, 0), "update"); + ASSERT_OK(bpf_map_freeze(fd), "freeze"); + memset(&info, 0, sizeof(info)); + info.hash = ptr_to_u64(kbuf); + info.hash_size = sizeof(kbuf); + ilen = sizeof(info); + if (ASSERT_OK(bpf_map_get_info_by_fd(fd, &info, &ilen), "get_hash")) { + libbpf_sha256(val, sizeof(val), lbuf); + ASSERT_EQ(memcmp(kbuf, lbuf, sizeof(kbuf)), 0, + "kernel map hash matches libbpf_sha256"); + } + close(fd); +} + +static void map_hash_multi_element(void) +{ + const __u32 nr = 8, value_size = 64; + __u8 kbuf[SHA256_DIGEST_LENGTH], lbuf[SHA256_DIGEST_LENGTH]; + struct bpf_map_info info; + __u32 ilen, i, j; + __u8 *full; + int fd; + + /* + * array_map_get_hash() hashes elem_size * max_entries (the whole value + * area), not just element 0. With an 8-aligned value_size elem_size has + * no padding, so pin that a >1-entry array's kernel hash equals + * libbpf_sha256() over the full, concatenated element contents. + */ + fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "h", 4, value_size, nr, NULL); + if (!ASSERT_OK_FD(fd, "array_map")) + return; + full = calloc(nr, value_size); + if (!ASSERT_OK_PTR(full, "buf")) + goto close_fd; + for (i = 0; i < nr; i++) { + __u8 *v = full + i * value_size; + + for (j = 0; j < value_size; j++) + v[j] = i * 31 + j * 7 + 1; + ASSERT_OK(bpf_map_update_elem(fd, &i, v, 0), "update"); + } + ASSERT_OK(bpf_map_freeze(fd), "freeze"); + memset(&info, 0, sizeof(info)); + info.hash = ptr_to_u64(kbuf); + info.hash_size = sizeof(kbuf); + ilen = sizeof(info); + if (ASSERT_OK(bpf_map_get_info_by_fd(fd, &info, &ilen), "get_hash")) { + libbpf_sha256(full, (size_t)nr * value_size, lbuf); + ASSERT_EQ(memcmp(kbuf, lbuf, sizeof(kbuf)), 0, + "kernel hash covers full multi-element value area"); + } + free(full); +close_fd: + close(fd); +} + +static void map_hash_bad_size(void) +{ + __u8 kbuf[SHA256_DIGEST_LENGTH], val[64] = {}; + struct bpf_map_info info; + __u32 ilen, key = 0; + int fd; + + fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "h", 4, sizeof(val), 1, NULL); + if (!ASSERT_OK_FD(fd, "array_map")) + return; + ASSERT_OK(bpf_map_update_elem(fd, &key, val, 0), "update"); + ASSERT_OK(bpf_map_freeze(fd), "freeze"); + memset(&info, 0, sizeof(info)); + info.hash = ptr_to_u64(kbuf); + info.hash_size = sizeof(kbuf) / 2; + ilen = sizeof(info); + ASSERT_EQ(bpf_map_get_info_by_fd(fd, &info, &ilen), -EINVAL, + "wrong hash_size rejected"); + close(fd); +} + +static void map_hash_unsupported_type(void) +{ + __u8 kbuf[SHA256_DIGEST_LENGTH]; + struct bpf_map_info info; + __u32 ilen; + int fd; + + /* Only arrays implement map_get_hash; a hash map must be refused. */ + fd = bpf_map_create(BPF_MAP_TYPE_HASH, "h", 4, 8, 4, NULL); + if (!ASSERT_OK_FD(fd, "hash_map")) + return; + memset(&info, 0, sizeof(info)); + info.hash = ptr_to_u64(kbuf); + info.hash_size = sizeof(kbuf); + ilen = sizeof(info); + ASSERT_EQ(bpf_map_get_info_by_fd(fd, &info, &ilen), -EINVAL, + "hash unsupported for non-array map"); + close(fd); +} + +void test_signed_loader(void) +{ + if (test__start_subtest("metadata_check_shape")) + metadata_check_shape(); + if (test__start_subtest("metadata_match")) + metadata_match(); + if (test__start_subtest("metadata_sha_mismatch")) + metadata_sha_mismatch(); + if (test__start_subtest("metadata_not_exclusive")) + metadata_not_exclusive(); + if (test__start_subtest("metadata_hash_not_computed")) + metadata_hash_not_computed(); + if (test__start_subtest("signature_enforced")) + signature_enforced(); + if (test__start_subtest("signature_too_large")) + signature_too_large(); + if (test__start_subtest("signature_bad_keyring")) + signature_bad_keyring(); + if (test__start_subtest("metadata_ctx_max_entries_ignored")) + metadata_ctx_max_entries_ignored(); + if (test__start_subtest("metadata_ctx_initial_value_ignored")) + metadata_ctx_initial_value_ignored(); + if (test__start_subtest("signature_authenticates_insns")) + signature_authenticates_insns(); + if (test__start_subtest("hash_requires_frozen")) + hash_requires_frozen(); + if (test__start_subtest("no_update_after_freeze")) + no_update_after_freeze(); + if (test__start_subtest("freeze_writable_mmap")) + freeze_writable_mmap(); + if (test__start_subtest("no_writable_mmap_frozen")) + no_writable_mmap_frozen(); + if (test__start_subtest("map_hash_matches_libbpf")) + map_hash_matches_libbpf(); + if (test__start_subtest("map_hash_multi_element")) + map_hash_multi_element(); + if (test__start_subtest("map_hash_bad_size")) + map_hash_bad_size(); + if (test__start_subtest("map_hash_unsupported_type")) + map_hash_unsupported_type(); +} diff --git a/tools/testing/selftests/bpf/progs/test_signed_loader.c b/tools/testing/selftests/bpf/progs/test_signed_loader.c new file mode 100644 index 000000000000..d9a4b85f9391 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_signed_loader.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "vmlinux.h" +#include + +/* + * Minimal, map-less program. Driven through libbpf's gen_loader (gen_hash) + * by prog_tests/signed_loader.c so the generated light-skeleton loader (with + * the emit_signature_match metadata check) can be exercised against good + * and tampered metadata. A socket filter needs no load-time attach resolution, + * and having no maps keeps the generated loader's ctx trivial (0 maps, 1 prog). + */ +SEC("socket") +int probe(void *ctx) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_signed_loader_data.c b/tools/testing/selftests/bpf/progs/test_signed_loader_data.c new file mode 100644 index 000000000000..43e2074d0042 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_signed_loader_data.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "vmlinux.h" +#include + +/* + * A single initialized global, so the generated loader has one internal + * (.data) map that it seeds with an initial value while loading. + * prog_tests/signed_loader.c uses this to check that a signed loader + * keeps the attested contents and ignores a ctx-supplied initial_value: + * the host cannot re-seed a signed program's maps through the loader ctx. + */ +__u64 magic = 0x5eed1234abad1deaULL; + +SEC("socket") +int probe(void *ctx) +{ + return (int)magic; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_signed_loader_map.c b/tools/testing/selftests/bpf/progs/test_signed_loader_map.c new file mode 100644 index 000000000000..4478ce6f1fd9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_signed_loader_map.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "vmlinux.h" +#include + +/* + * One explicit array map and no global variables, so the generated loader + * has exactly one map to create (no .rodata/.bss). prog_tests/signed_loader.c + * uses this to check that a signed loader ignores ctx-supplied max_entries: + * the map must keep its attested size (4), not whatever the host puts in + * the loader ctx. + */ +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 4); + __type(key, __u32); + __type(value, __u64); +} amap SEC(".maps"); + +SEC("socket") +int probe(void *ctx) +{ + __u32 key = 0; + __u64 *val = bpf_map_lookup_elem(&amap, &key); + + return val ? (int)*val : 0; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From bf29346fc39355cc57118e4e825109f66ac3542d Mon Sep 17 00:00:00 2001 From: "Alexis Lothoré (eBPF Foundation)" Date: Thu, 28 May 2026 15:27:14 +0200 Subject: selftests/bpf: ignore call depth accounting for retbleed in verifier tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When running the selftests on a retbleed-affected platform (eg: Skylake), with call depth accounting enabled (CONFIG_CALL_DEPTH_TRACKING=y) _and_ with retbleed=stuff, some verifier selftests fail to validate the jited instructions. For example: MATCHED SUBSTR: ' endbr64' MATCHED SUBSTR: ' nopl (%rax,%rax)' MATCHED SUBSTR: ' xorq %rax, %rax' MATCHED SUBSTR: ' pushq %rbp' MATCHED SUBSTR: ' movq %rsp, %rbp' MATCHED SUBSTR: ' endbr64' MATCHED SUBSTR: ' cmpq $0x21, %rax' MATCHED SUBSTR: ' ja L0' MATCHED SUBSTR: ' pushq %rax' MATCHED SUBSTR: ' movq %rsp, %rax' MATCHED SUBSTR: ' jmp L1' MATCHED SUBSTR: 'L0: pushq %rax' MATCHED SUBSTR: 'L1: pushq %rax' MATCHED SUBSTR: ' movq -0x10(%rbp), %rax' WRONG LINE REGEX: ' callq 0x{{.*}}' Those affected selftests allways fail on some call instruction: this failure is due to the JIT compiler emitting call depth accounting for retbleed mitigation (see x86_call_depth_emit_accounting calls in bpf_jit_comp.c), resulting in an additional instruction being inserted in front of every call instruction, similar to this one: sarq $0x5, %gs:-0x39882741(%rip) Fix those selftests by allowing them to ignore this possibly present call depth accounting instruction. Signed-off-by: Alexis Lothoré (eBPF Foundation) Reviewed-by: Emil Tsalapatis Link: https://lore.kernel.org/r/20260528-fix_tests_for_retbleed_stuff-v1-1-c2022a1f3bee@bootlin.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/verifier_private_stack.c | 5 +++++ tools/testing/selftests/bpf/progs/verifier_tailcall_jit.c | 1 + 2 files changed, 6 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_private_stack.c b/tools/testing/selftests/bpf/progs/verifier_private_stack.c index 046f7445a458..bb8206e10880 100644 --- a/tools/testing/selftests/bpf/progs/verifier_private_stack.c +++ b/tools/testing/selftests/bpf/progs/verifier_private_stack.c @@ -94,6 +94,7 @@ __jited(" addq %gs:{{.*}}, %r9") __jited(" movl $0x2a, %edi") __jited(" movq %rdi, -0x200(%r9)") __jited(" pushq %r9") +__jited("...") __jited(" callq 0x{{.*}}") __jited(" popq %r9") __jited(" xorl %eax, %eax") @@ -153,11 +154,13 @@ __jited(" endbr64") __jited(" movabsq $0x{{.*}}, %r9") __jited(" addq %gs:{{.*}}, %r9") __jited(" pushq %r9") +__jited("...") __jited(" callq") __jited(" popq %r9") __jited(" movl $0x2a, %edi") __jited(" movq %rdi, -0x200(%r9)") __jited(" pushq %r9") +__jited("...") __jited(" callq") __jited(" popq %r9") __arch_arm64 @@ -199,6 +202,7 @@ __description("Private stack, exception in main prog") __success __retval(0) __arch_x86_64 __jited(" pushq %r9") +__jited("...") __jited(" callq") __jited(" popq %r9") __arch_arm64 @@ -246,6 +250,7 @@ __success __retval(0) __arch_x86_64 __jited(" movq %rdi, -0x200(%r9)") __jited(" pushq %r9") +__jited("...") __jited(" callq") __jited(" popq %r9") __arch_arm64 diff --git a/tools/testing/selftests/bpf/progs/verifier_tailcall_jit.c b/tools/testing/selftests/bpf/progs/verifier_tailcall_jit.c index 8d60c634a114..48fa34d2959f 100644 --- a/tools/testing/selftests/bpf/progs/verifier_tailcall_jit.c +++ b/tools/testing/selftests/bpf/progs/verifier_tailcall_jit.c @@ -56,6 +56,7 @@ __jited("L1: pushq %rax") /* rbp[-16] = rax */ * (cause original rax might be clobbered by this point) */ __jited(" movq -0x10(%rbp), %rax") +__jited("...") __jited(" callq 0x{{.*}}") /* call to sub() */ __jited(" xorl %eax, %eax") __jited(" leave") -- cgit v1.2.3 From 16b4d3e2fb24aac3e68a8d86e3bc5e302e1b5cb7 Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Fri, 5 Jun 2026 04:41:21 -0700 Subject: bpf: Implement resizable hashmap basic functions Use rhashtable_lookup_likely() for lookups, rhashtable_remove_fast() for deletes, and rhashtable_lookup_get_insert_fast() for inserts. Updates modify values in place under RCU rather than allocating a new element and swapping the pointer (as regular htab does). This trades read consistency for performance: concurrent readers may see partial updates. BPF_F_LOCK support and special-field handling (timers, kptrs, etc.) follow in a later commit. Initialize rhashtable with bpf_mem_alloc element cache. Require BPF_F_NO_PREALLOC. Limit max_entries to 2^31. Free elements via rhashtable_free_and_destroy(). Signed-off-by: Mykyta Yatsenko Link: https://lore.kernel.org/r/20260605-rhash-v7-4-5b8e05f8630d@meta.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_types.h | 1 + include/uapi/linux/bpf.h | 6 + kernel/bpf/hashtab.c | 311 +++++++++++++++++++++++++++++++++++++++++ kernel/bpf/syscall.c | 3 + kernel/bpf/verifier.c | 1 + tools/include/uapi/linux/bpf.h | 6 + 6 files changed, 328 insertions(+) (limited to 'tools') diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index b13de31e163f..56e4c3f983d3 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -134,6 +134,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_BLOOM_FILTER, bloom_filter_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_USER_RINGBUF, user_ringbuf_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_ARENA, arena_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_INSN_ARRAY, insn_array_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_RHASH, rhtab_map_ops) BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint) BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index aec171ccb6ef..bed9b1b4d5ef 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1047,6 +1047,7 @@ enum bpf_map_type { BPF_MAP_TYPE_CGRP_STORAGE, BPF_MAP_TYPE_ARENA, BPF_MAP_TYPE_INSN_ARRAY, + BPF_MAP_TYPE_RHASH, __MAX_BPF_MAP_TYPE }; @@ -1545,6 +1546,11 @@ union bpf_attr { * * BPF_MAP_TYPE_ARENA - contains the address where user space * is going to mmap() the arena. It has to be page aligned. + * + * BPF_MAP_TYPE_RHASH - initial table size hint + * (nelem_hint). 0 = use rhashtable default. Must be + * <= min(max_entries, U16_MAX). Upper 32 bits reserved, + * must be zero. */ __u64 map_extra; diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 3dd9b4924ae4..10f3a058747b 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -2739,3 +2740,313 @@ const struct bpf_map_ops htab_of_maps_map_ops = { BATCH_OPS(htab), .map_btf_id = &htab_map_btf_ids[0], }; + +struct rhtab_elem { + struct rhash_head node; + /* key bytes, then value bytes follow */ + u8 data[] __aligned(8); +}; + +struct bpf_rhtab { + struct bpf_map map; + struct rhashtable ht; + struct bpf_mem_alloc ma; + u32 elem_size; +}; + +static const struct rhashtable_params rhtab_params = { + .head_offset = offsetof(struct rhtab_elem, node), + .key_offset = offsetof(struct rhtab_elem, data), +}; + +static inline void *rhtab_elem_value(struct rhtab_elem *l, u32 key_size) +{ + return l->data + round_up(key_size, 8); +} + +static struct bpf_map *rhtab_map_alloc(union bpf_attr *attr) +{ + struct rhashtable_params params; + struct bpf_rhtab *rhtab; + int err = 0; + + rhtab = bpf_map_area_alloc(sizeof(*rhtab), NUMA_NO_NODE); + if (!rhtab) + return ERR_PTR(-ENOMEM); + + bpf_map_init_from_attr(&rhtab->map, attr); + + if (rhtab->map.max_entries > 1UL << 31) { + err = -E2BIG; + goto free_rhtab; + } + + rhtab->elem_size = sizeof(struct rhtab_elem) + round_up(rhtab->map.key_size, 8) + + round_up(rhtab->map.value_size, 8); + + params = rhtab_params; + params.key_len = rhtab->map.key_size; + params.nelem_hint = (u32)attr->map_extra; + params.automatic_shrinking = true; + + err = rhashtable_init(&rhtab->ht, ¶ms); + if (err) + goto free_rhtab; + + /* Set max_elems after rhashtable_init() since init zeroes the struct */ + rhtab->ht.max_elems = rhtab->map.max_entries; + + err = bpf_mem_alloc_init(&rhtab->ma, rhtab->elem_size, false); + if (err) + goto destroy_rhtab; + + return &rhtab->map; + +destroy_rhtab: + rhashtable_destroy(&rhtab->ht); +free_rhtab: + bpf_map_area_free(rhtab); + return ERR_PTR(err); +} + +static int rhtab_map_alloc_check(union bpf_attr *attr) +{ + if (!(attr->map_flags & BPF_F_NO_PREALLOC)) + return -EINVAL; + + if (attr->map_flags & BPF_F_ZERO_SEED) + return -EINVAL; + + if (attr->key_size > U16_MAX) + return -E2BIG; + + if (attr->map_extra >> 32) + return -EINVAL; + + if ((u32)attr->map_extra > U16_MAX) + return -E2BIG; + + if ((u32)attr->map_extra > attr->max_entries) + return -EINVAL; + + return htab_map_alloc_check(attr); +} + +static void rhtab_free_elem(void *ptr, void *arg) +{ + struct bpf_rhtab *rhtab = arg; + struct rhtab_elem *elem = ptr; + + bpf_mem_cache_free_rcu(&rhtab->ma, elem); +} + +static void rhtab_map_free(struct bpf_map *map) +{ + struct bpf_rhtab *rhtab = container_of(map, struct bpf_rhtab, map); + + rhashtable_free_and_destroy(&rhtab->ht, rhtab_free_elem, rhtab); + bpf_mem_alloc_destroy(&rhtab->ma); + bpf_map_area_free(rhtab); +} + +static void *rhtab_lookup_elem(struct bpf_map *map, void *key) +{ + struct bpf_rhtab *rhtab = container_of(map, struct bpf_rhtab, map); + + /* Hold RCU lock in case sleepable program calls via gen_lookup */ + guard(rcu)(); + + return rhashtable_lookup_likely(&rhtab->ht, key, rhtab_params); +} + +static void *rhtab_map_lookup_elem(struct bpf_map *map, void *key) __must_hold(RCU) +{ + struct rhtab_elem *l; + + l = rhtab_lookup_elem(map, key); + return l ? rhtab_elem_value(l, map->key_size) : NULL; +} + +static void rhtab_read_elem_value(struct bpf_map *map, void *dst, struct rhtab_elem *elem, + u64 flags) +{ + void *src = rhtab_elem_value(elem, map->key_size); + + if (flags & BPF_F_LOCK) + copy_map_value_locked(map, dst, src, true); + else + copy_map_value(map, dst, src); +} + +static int rhtab_delete_elem(struct bpf_rhtab *rhtab, struct rhtab_elem *elem, void *copy, + u64 flags) +{ + int err; + + /* + * disable_instrumentation() mitigates the deadlock for programs running in NMI context. + * rhashtable locks bucket with local_irq_save(). Only NMI programs may reenter + * rhashtable code, bpf_disable_instrumentation() disables programs running in NMI, except + * raw tracepoints, which we don't have in rhashtable. + */ + bpf_disable_instrumentation(); + err = rhashtable_remove_fast(&rhtab->ht, &elem->node, rhtab_params); + bpf_enable_instrumentation(); + + if (err) + return err; + + if (copy) { + rhtab_read_elem_value(&rhtab->map, copy, elem, flags); + check_and_init_map_value(&rhtab->map, copy); + } + + bpf_mem_cache_free_rcu(&rhtab->ma, elem); + return 0; +} + + +static long rhtab_map_delete_elem(struct bpf_map *map, void *key) +{ + struct bpf_rhtab *rhtab = container_of(map, struct bpf_rhtab, map); + struct rhtab_elem *elem; + + guard(rcu)(); + + elem = rhtab_lookup_elem(map, key); + if (!elem) + return -ENOENT; + + return rhtab_delete_elem(rhtab, elem, NULL, 0); +} + +static int rhtab_map_lookup_and_delete_elem(struct bpf_map *map, void *key, void *value, u64 flags) +{ + struct bpf_rhtab *rhtab = container_of(map, struct bpf_rhtab, map); + struct rhtab_elem *elem; + int err; + + err = bpf_map_check_op_flags(map, flags, BPF_F_LOCK); + if (err) + return err; + + guard(rcu)(); + + elem = rhtab_lookup_elem(map, key); + if (!elem) + return -ENOENT; + + return rhtab_delete_elem(rhtab, elem, value, flags); +} + +static long rhtab_map_update_existing(struct bpf_map *map, struct rhtab_elem *elem, void *value, + u64 map_flags) +{ + void *old_val = rhtab_elem_value(elem, map->key_size); + + if (map_flags & BPF_NOEXIST) + return -EEXIST; + + if (map_flags & BPF_F_LOCK) + copy_map_value_locked(map, old_val, value, false); + else + copy_map_value(map, old_val, value); + return 0; +} + +static long rhtab_map_update_elem(struct bpf_map *map, void *key, void *value, u64 map_flags) +{ + struct bpf_rhtab *rhtab = container_of(map, struct bpf_rhtab, map); + struct rhtab_elem *elem, *tmp; + + if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST)) + return -EINVAL; + + if ((map_flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK)) + return -EINVAL; + + guard(rcu)(); + elem = rhtab_lookup_elem(map, key); + if (elem) + return rhtab_map_update_existing(map, elem, value, map_flags); + + if (map_flags & BPF_EXIST) + return -ENOENT; + + /* Check max_entries limit before inserting new element */ + if (atomic_read(&rhtab->ht.nelems) >= map->max_entries) + return -E2BIG; + + elem = bpf_mem_cache_alloc(&rhtab->ma); + if (!elem) + return -ENOMEM; + + memcpy(elem->data, key, map->key_size); + copy_map_value(map, rhtab_elem_value(elem, map->key_size), value); + + /* Prevent deadlock for NMI programs attempting to take bucket lock */ + bpf_disable_instrumentation(); + tmp = rhashtable_lookup_get_insert_fast(&rhtab->ht, &elem->node, rhtab_params); + bpf_enable_instrumentation(); + + if (tmp) { + bpf_mem_cache_free(&rhtab->ma, elem); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + + return rhtab_map_update_existing(map, tmp, value, map_flags); + } + + return 0; +} + +static int rhtab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) +{ + struct bpf_insn *insn = insn_buf; + const int ret = BPF_REG_0; + + BUILD_BUG_ON(!__same_type(&rhtab_lookup_elem, + (void *(*)(struct bpf_map *map, void *key)) NULL)); + *insn++ = BPF_EMIT_CALL(rhtab_lookup_elem); + *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1); + *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, + offsetof(struct rhtab_elem, data) + round_up(map->key_size, 8)); + + return insn - insn_buf; +} + +static void rhtab_map_free_internal_structs(struct bpf_map *map) +{ +} + +static int rhtab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) +{ + return -EOPNOTSUPP; +} + +static u64 rhtab_map_mem_usage(const struct bpf_map *map) +{ + struct bpf_rhtab *rhtab = container_of(map, struct bpf_rhtab, map); + u64 num_entries; + + /* Excludes rhashtable bucket overhead (~ nelems * sizeof(void *) at 75% load). */ + num_entries = atomic_read(&rhtab->ht.nelems); + return sizeof(struct bpf_rhtab) + rhtab->elem_size * num_entries; +} + +BTF_ID_LIST_SINGLE(rhtab_map_btf_ids, struct, bpf_rhtab) +const struct bpf_map_ops rhtab_map_ops = { + .map_meta_equal = bpf_map_meta_equal, + .map_alloc_check = rhtab_map_alloc_check, + .map_alloc = rhtab_map_alloc, + .map_free = rhtab_map_free, + .map_get_next_key = rhtab_map_get_next_key, + .map_release_uref = rhtab_map_free_internal_structs, + .map_lookup_elem = rhtab_map_lookup_elem, + .map_lookup_and_delete_elem = rhtab_map_lookup_and_delete_elem, + .map_update_elem = rhtab_map_update_elem, + .map_delete_elem = rhtab_map_delete_elem, + .map_gen_lookup = rhtab_map_gen_lookup, + .map_mem_usage = rhtab_map_mem_usage, + .map_btf_id = &rhtab_map_btf_ids[0], +}; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 625a4366fe6d..1faae184de48 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1398,6 +1398,7 @@ static int map_create_alloc(union bpf_attr *attr, bpfptr_t uattr, struct bpf_ver if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER && attr->map_type != BPF_MAP_TYPE_ARENA && + attr->map_type != BPF_MAP_TYPE_RHASH && attr->map_extra != 0) { bpf_log(log, "Invalid map_extra.\n"); return -EINVAL; @@ -1469,6 +1470,7 @@ static int map_create_alloc(union bpf_attr *attr, bpfptr_t uattr, struct bpf_ver case BPF_MAP_TYPE_CGROUP_ARRAY: case BPF_MAP_TYPE_ARRAY_OF_MAPS: case BPF_MAP_TYPE_HASH: + case BPF_MAP_TYPE_RHASH: case BPF_MAP_TYPE_PERCPU_HASH: case BPF_MAP_TYPE_HASH_OF_MAPS: case BPF_MAP_TYPE_RINGBUF: @@ -2259,6 +2261,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr) map->map_type == BPF_MAP_TYPE_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_LRU_HASH || map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || + map->map_type == BPF_MAP_TYPE_RHASH || map->map_type == BPF_MAP_TYPE_STACK_TRACE) { if (!bpf_map_is_offloaded(map)) { bpf_disable_instrumentation(); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8ed484cb1a8a..7d27ba396d32 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -17657,6 +17657,7 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, if (prog->sleepable) switch (map->map_type) { case BPF_MAP_TYPE_HASH: + case BPF_MAP_TYPE_RHASH: case BPF_MAP_TYPE_LRU_HASH: case BPF_MAP_TYPE_ARRAY: case BPF_MAP_TYPE_PERCPU_HASH: diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 37142e6d911a..7d0b282ba674 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1047,6 +1047,7 @@ enum bpf_map_type { BPF_MAP_TYPE_CGRP_STORAGE, BPF_MAP_TYPE_ARENA, BPF_MAP_TYPE_INSN_ARRAY, + BPF_MAP_TYPE_RHASH, __MAX_BPF_MAP_TYPE }; @@ -1545,6 +1546,11 @@ union bpf_attr { * * BPF_MAP_TYPE_ARENA - contains the address where user space * is going to mmap() the arena. It has to be page aligned. + * + * BPF_MAP_TYPE_RHASH - initial table size hint + * (nelem_hint). 0 = use rhashtable default. Must be + * <= min(max_entries, U16_MAX). Upper 32 bits reserved, + * must be zero. */ __u64 map_extra; -- cgit v1.2.3 From 6e46ff0abefde32c2341ca2c61ab1f8855e8cac9 Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Fri, 5 Jun 2026 04:41:25 -0700 Subject: libbpf: Support resizable hashtable Add BPF_MAP_TYPE_RHASH to libbpf's map type name table and feature probing so that libbpf-based tools can create and identify resizable hash maps. Signed-off-by: Mykyta Yatsenko Reviewed-by: Emil Tsalapatis Link: https://lore.kernel.org/r/20260605-rhash-v7-8-5b8e05f8630d@meta.com Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 1 + tools/lib/bpf/libbpf_probes.c | 3 +++ 2 files changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index ab2071fdd3e8..1354bcbc8b30 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -192,6 +192,7 @@ static const char * const map_type_name[] = { [BPF_MAP_TYPE_CGRP_STORAGE] = "cgrp_storage", [BPF_MAP_TYPE_ARENA] = "arena", [BPF_MAP_TYPE_INSN_ARRAY] = "insn_array", + [BPF_MAP_TYPE_RHASH] = "rhash", }; static const char * const prog_type_name[] = { diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index b70d9637ecf5..e40819465ddc 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -309,6 +309,9 @@ static int probe_map_create(enum bpf_map_type map_type) value_size = sizeof(__u64); opts.map_flags = BPF_F_NO_PREALLOC; break; + case BPF_MAP_TYPE_RHASH: + opts.map_flags = BPF_F_NO_PREALLOC; + break; case BPF_MAP_TYPE_CGROUP_STORAGE: case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: key_size = sizeof(struct bpf_cgroup_storage_key); -- cgit v1.2.3 From 249996365b66d09db31bbf3a86c07715f47ea133 Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Fri, 5 Jun 2026 04:41:26 -0700 Subject: selftests/bpf: Add basic tests for resizable hash map Test basic map operations (lookup, update, delete) for BPF_MAP_TYPE_RHASH including boundary conditions like duplicate key insertion and deletion of nonexistent keys. Signed-off-by: Mykyta Yatsenko Link: https://lore.kernel.org/r/20260605-rhash-v7-9-5b8e05f8630d@meta.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/rhash.c | 120 ++++++++++++ tools/testing/selftests/bpf/progs/rhash.c | 248 +++++++++++++++++++++++++ 2 files changed, 368 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/rhash.c create mode 100644 tools/testing/selftests/bpf/progs/rhash.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/rhash.c b/tools/testing/selftests/bpf/prog_tests/rhash.c new file mode 100644 index 000000000000..69686bf69ba5 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/rhash.c @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ +#include +#include +#include +#include "rhash.skel.h" +#include +#include +#include + +static void rhash_run(const char *prog_name) +{ + struct rhash *skel; + struct bpf_program *prog; + LIBBPF_OPTS(bpf_test_run_opts, opts); + int err; + + skel = rhash__open(); + if (!ASSERT_OK_PTR(skel, "rhash__open")) + return; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto cleanup; + bpf_program__set_autoload(prog, true); + + err = rhash__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + err = bpf_prog_test_run_opts(bpf_program__fd(prog), &opts); + if (!ASSERT_OK(err, "prog run")) + goto cleanup; + + if (!ASSERT_OK(opts.retval, "prog retval")) + goto cleanup; + + if (!ASSERT_OK(skel->bss->err, "bss->err")) + goto cleanup; + +cleanup: + rhash__destroy(skel); +} + +static int rhash_map_create(__u32 max_entries, __u64 map_extra) +{ + LIBBPF_OPTS(bpf_map_create_opts, opts, + .map_flags = BPF_F_NO_PREALLOC, + .map_extra = map_extra); + + return bpf_map_create(BPF_MAP_TYPE_RHASH, "rhash_extra", + sizeof(__u32), sizeof(__u64), max_entries, &opts); +} + +static void rhash_map_extra_presize(void) +{ + const __u32 max_entries = 1024; + const __u32 nelem_hint = 256; + struct bpf_map_info info = {}; + __u32 info_len = sizeof(info); + __u64 val = 0; + __u32 key; + int fd, i; + + fd = rhash_map_create(max_entries, nelem_hint); + if (!ASSERT_GE(fd, 0, "rhash_map_create presize")) + return; + + if (!ASSERT_OK(bpf_map_get_info_by_fd(fd, &info, &info_len), "info")) + goto close; + ASSERT_EQ(info.map_extra, nelem_hint, "info.map_extra"); + + for (i = 0; i < (int)nelem_hint; i++) { + key = i; + if (!ASSERT_OK(bpf_map_update_elem(fd, &key, &val, BPF_NOEXIST), + "update")) + goto close; + } +close: + close(fd); +} + +static void rhash_map_extra_too_big(void) +{ + int fd; + + fd = rhash_map_create(1U << 20, 0x10000); + if (!ASSERT_LT(fd, 0, "rhash_map_create hint > U16_MAX")) + close(fd); +} + +void test_rhash(void) +{ + if (test__start_subtest("test_rhash_lookup_update")) + rhash_run("test_rhash_lookup_update"); + + if (test__start_subtest("test_rhash_update_delete")) + rhash_run("test_rhash_update_delete"); + + if (test__start_subtest("test_rhash_update_elements")) + rhash_run("test_rhash_update_elements"); + + if (test__start_subtest("test_rhash_update_exist")) + rhash_run("test_rhash_update_exist"); + + if (test__start_subtest("test_rhash_update_any")) + rhash_run("test_rhash_update_any"); + + if (test__start_subtest("test_rhash_noexist_duplicate")) + rhash_run("test_rhash_noexist_duplicate"); + + if (test__start_subtest("test_rhash_delete_nonexistent")) + rhash_run("test_rhash_delete_nonexistent"); + + if (test__start_subtest("test_rhash_map_extra_presize")) + rhash_map_extra_presize(); + + if (test__start_subtest("test_rhash_map_extra_too_big")) + rhash_map_extra_too_big(); +} diff --git a/tools/testing/selftests/bpf/progs/rhash.c b/tools/testing/selftests/bpf/progs/rhash.c new file mode 100644 index 000000000000..fc2dac3a719e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/rhash.c @@ -0,0 +1,248 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include +#include +#include "bpf_misc.h" + +#define ENOENT 2 +#define EEXIST 17 + +char _license[] SEC("license") = "GPL"; + +int err; + +struct elem { + char arr[128]; + int val; +}; + +struct { + __uint(type, BPF_MAP_TYPE_RHASH); + __uint(map_flags, BPF_F_NO_PREALLOC); + __uint(max_entries, 128); + __type(key, int); + __type(value, struct elem); +} rhmap SEC(".maps"); + +SEC("syscall") +int test_rhash_lookup_update(void *ctx) +{ + int key = 5; + struct elem empty = {.val = 3, .arr = {0}}; + struct elem *e; + + err = 1; + e = bpf_map_lookup_elem(&rhmap, &key); + if (e) + return 1; + + err = bpf_map_update_elem(&rhmap, &key, &empty, BPF_NOEXIST); + if (err) + return 1; + + e = bpf_map_lookup_elem(&rhmap, &key); + if (!e || e->val != empty.val) { + err = 2; + return 2; + } + + err = 0; + return 0; +} + +SEC("syscall") +int test_rhash_update_delete(void *ctx) +{ + int key = 6; + struct elem empty = {.val = 4, .arr = {0}}; + struct elem *e; + + err = 1; + e = bpf_map_lookup_elem(&rhmap, &key); + if (e) + return 1; + + err = bpf_map_update_elem(&rhmap, &key, &empty, BPF_NOEXIST); + if (err) + return 2; + + err = bpf_map_delete_elem(&rhmap, &key); + if (err) + return 3; + + e = bpf_map_lookup_elem(&rhmap, &key); + if (e) { + err = 4; + return 4; + } + + err = 0; + return 0; +} + +SEC("syscall") +int test_rhash_update_elements(void *ctx) +{ + int key = 0; + struct elem empty = {.val = 4, .arr = {0}}; + struct elem *e; + int i; + + err = 1; + + for (i = 0; i < 128; ++i) { + key = i; + e = bpf_map_lookup_elem(&rhmap, &key); + if (e) + return 1; + + empty.val = key; + err = bpf_map_update_elem(&rhmap, &key, &empty, BPF_NOEXIST); + if (err) + return 2; + + e = bpf_map_lookup_elem(&rhmap, &key); + if (!e || e->val != key) { + err = 4; + return 4; + } + } + + for (i = 0; i < 128; ++i) { + key = i; + err = bpf_map_delete_elem(&rhmap, &key); + if (err) + return 3; + + e = bpf_map_lookup_elem(&rhmap, &key); + if (e) { + err = 5; + return 5; + } + } + + err = 0; + return 0; +} + +SEC("syscall") +int test_rhash_update_exist(void *ctx) +{ + int key = 10; + struct elem val1 = {.val = 100, .arr = {0}}; + struct elem val2 = {.val = 200, .arr = {0}}; + struct elem *e; + int ret; + + err = 1; + + /* BPF_EXIST on non-existent key should fail with -ENOENT */ + ret = bpf_map_update_elem(&rhmap, &key, &val1, BPF_EXIST); + if (ret != -ENOENT) + return 1; + + /* Insert element first */ + ret = bpf_map_update_elem(&rhmap, &key, &val1, BPF_NOEXIST); + if (ret) + return 2; + + /* Verify initial value */ + e = bpf_map_lookup_elem(&rhmap, &key); + if (!e || e->val != 100) + return 3; + + /* BPF_EXIST on existing key should succeed and update value */ + ret = bpf_map_update_elem(&rhmap, &key, &val2, BPF_EXIST); + if (ret) + return 4; + + /* Verify value was updated */ + e = bpf_map_lookup_elem(&rhmap, &key); + if (!e || e->val != 200) + return 5; + + /* Cleanup */ + bpf_map_delete_elem(&rhmap, &key); + err = 0; + return 0; +} + +SEC("syscall") +int test_rhash_update_any(void *ctx) +{ + int key = 11; + struct elem val1 = {.val = 111, .arr = {0}}; + struct elem val2 = {.val = 222, .arr = {0}}; + struct elem *e; + int ret; + + err = 1; + + /* BPF_ANY on non-existent key should insert */ + ret = bpf_map_update_elem(&rhmap, &key, &val1, BPF_ANY); + if (ret) + return 1; + + e = bpf_map_lookup_elem(&rhmap, &key); + if (!e || e->val != 111) + return 2; + + /* BPF_ANY on existing key should update */ + ret = bpf_map_update_elem(&rhmap, &key, &val2, BPF_ANY); + if (ret) + return 3; + + e = bpf_map_lookup_elem(&rhmap, &key); + if (!e || e->val != 222) + return 4; + + /* Cleanup */ + bpf_map_delete_elem(&rhmap, &key); + err = 0; + return 0; +} + +SEC("syscall") +int test_rhash_noexist_duplicate(void *ctx) +{ + int key = 12; + struct elem val = {.val = 600, .arr = {0}}; + int ret; + + err = 1; + + /* Insert element */ + ret = bpf_map_update_elem(&rhmap, &key, &val, BPF_NOEXIST); + if (ret) + return 1; + + /* Try to insert again with BPF_NOEXIST - should fail with -EEXIST */ + ret = bpf_map_update_elem(&rhmap, &key, &val, BPF_NOEXIST); + if (ret != -EEXIST) + return 2; + + /* Cleanup */ + bpf_map_delete_elem(&rhmap, &key); + err = 0; + return 0; +} + +SEC("syscall") +int test_rhash_delete_nonexistent(void *ctx) +{ + int key = 99999; + int ret; + + err = 1; + + /* Delete non-existent key should return -ENOENT */ + ret = bpf_map_delete_elem(&rhmap, &key); + if (ret != -ENOENT) + return 1; + + err = 0; + return 0; +} -- cgit v1.2.3 From a996794fda8463afbc2bc70fbc7f6a2a9c1547ef Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Fri, 5 Jun 2026 04:41:27 -0700 Subject: selftests/bpf: Add BPF iterator tests for resizable hash map Test basic BPF iterator functionality for BPF_MAP_TYPE_RHASH, verifying all elements are visited. Signed-off-by: Mykyta Yatsenko Link: https://lore.kernel.org/r/20260605-rhash-v7-10-5b8e05f8630d@meta.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/rhash.c | 63 ++++++++++++++++++++++ .../selftests/bpf/progs/bpf_iter_bpf_rhash_map.c | 34 ++++++++++++ 2 files changed, 97 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_bpf_rhash_map.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/rhash.c b/tools/testing/selftests/bpf/prog_tests/rhash.c index 69686bf69ba5..98bb66907b7f 100644 --- a/tools/testing/selftests/bpf/prog_tests/rhash.c +++ b/tools/testing/selftests/bpf/prog_tests/rhash.c @@ -4,6 +4,7 @@ #include #include #include "rhash.skel.h" +#include "bpf_iter_bpf_rhash_map.skel.h" #include #include #include @@ -89,6 +90,65 @@ static void rhash_map_extra_too_big(void) close(fd); } +static void rhash_iter_test(void) +{ + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); + struct bpf_iter_bpf_rhash_map *skel; + int err, i, len, map_fd, iter_fd; + union bpf_iter_link_info linfo; + u32 expected_key_sum = 0, key; + struct bpf_link *link; + u64 val = 0; + char buf[64]; + + skel = bpf_iter_bpf_rhash_map__open(); + if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_rhash_map__open")) + return; + + err = bpf_iter_bpf_rhash_map__load(skel); + if (!ASSERT_OK(err, "bpf_iter_bpf_rhash_map__load")) + goto out; + + map_fd = bpf_map__fd(skel->maps.rhashmap); + + /* Populate map with test data */ + for (i = 0; i < 64; i++) { + key = i + 1; + expected_key_sum += key; + + err = bpf_map_update_elem(map_fd, &key, &val, BPF_NOEXIST); + if (!ASSERT_OK(err, "map_update")) + goto out; + } + + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = map_fd; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); + + link = bpf_program__attach_iter(skel->progs.dump_bpf_rhash_map, &opts); + if (!ASSERT_OK_PTR(link, "attach_iter")) + goto out; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (!ASSERT_GE(iter_fd, 0, "create_iter")) + goto free_link; + + do { + len = read(iter_fd, buf, sizeof(buf)); + } while (len > 0); + + ASSERT_EQ(skel->bss->key_sum, expected_key_sum, "key_sum"); + ASSERT_EQ(skel->bss->elem_count, 64, "elem_count"); + + close(iter_fd); + +free_link: + bpf_link__destroy(link); +out: + bpf_iter_bpf_rhash_map__destroy(skel); +} + void test_rhash(void) { if (test__start_subtest("test_rhash_lookup_update")) @@ -117,4 +177,7 @@ void test_rhash(void) if (test__start_subtest("test_rhash_map_extra_too_big")) rhash_map_extra_too_big(); + + if (test__start_subtest("test_rhash_iter")) + rhash_iter_test(); } diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_rhash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_rhash_map.c new file mode 100644 index 000000000000..86f6c0d5eadb --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_rhash_map.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ +#include +#include + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_RHASH); + __uint(map_flags, BPF_F_NO_PREALLOC); + __uint(max_entries, 64); + __type(key, __u32); + __type(value, __u64); +} rhashmap SEC(".maps"); + +__u32 key_sum = 0; +__u64 val_sum = 0; +__u32 elem_count = 0; +__u32 err = 0; + +SEC("iter/bpf_map_elem") +int dump_bpf_rhash_map(struct bpf_iter__bpf_map_elem *ctx) +{ + __u32 *key = ctx->key; + __u64 *val = ctx->value; + + if (!key || !val) + return 0; + + key_sum += *key; + val_sum += *val; + elem_count++; + return 0; +} -- cgit v1.2.3 From 2bea44ea3c4ef6cee3a7c8b6bd74ace093632bef Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Fri, 5 Jun 2026 04:41:28 -0700 Subject: bpftool: Add rhash map documentation Make bpftool documentation aware of the resizable hash map. Signed-off-by: Mykyta Yatsenko Reviewed-by: Emil Tsalapatis Link: https://lore.kernel.org/r/20260605-rhash-v7-11-5b8e05f8630d@meta.com Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/Documentation/bpftool-map.rst | 2 +- tools/bpf/bpftool/map.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 1af3305ea2b2..5daf3de5c744 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -56,7 +56,7 @@ MAP COMMANDS | | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage** | | **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage** | | **task_storage** | **bloom_filter** | **user_ringbuf** | **cgrp_storage** | **arena** -| | **insn_array** } +| | **insn_array** | **rhash** } DESCRIPTION =========== diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 7ebf7dbcfba4..71a45d96617e 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -1478,7 +1478,7 @@ static int do_help(int argc, char **argv) " cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n" " queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n" " task_storage | bloom_filter | user_ringbuf | cgrp_storage | arena |\n" - " insn_array }\n" + " insn_array | rhash }\n" " " HELP_SPEC_OPTIONS " |\n" " {-f|--bpffs} | {-n|--nomount} }\n" "", -- cgit v1.2.3 From 84f7a49e76ec8e0a1e18f3758e89800f8cf8cfc6 Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Fri, 5 Jun 2026 04:41:29 -0700 Subject: selftests/bpf: Add resizable hashmap to benchmarks Support resizable hashmap in BPF map benchmarks. 1. LOOKUP (single producer, M events/sec) key | max | nr | htab | rhtab | ratio | delta ----+-----+-------+---------+---------+-------+------- 8 | 1K | 750 | 99.85 | 81.92 | 0.82x | -18 % 8 | 1K | 1K | 100.71 | 80.19 | 0.80x | -20 % 8 | 1M | 750K | 23.37 | 72.09 | 3.08x | +208 % 8 | 1M | 1M | 13.39 | 53.72 | 4.01x | +301 % 32 | 1K | 750 | 51.57 | 42.78 | 0.83x | -17 % 32 | 1K | 1K | 50.81 | 45.83 | 0.90x | -10 % 32 | 1M | 750K | 11.27 | 15.29 | 1.36x | +36 % 32 | 1M | 1M | 7.32 | 8.75 | 1.19x | +19 % 256 | 1K | 750 | 7.58 | 7.88 | 1.04x | +4 % 256 | 1K | 1K | 7.43 | 7.81 | 1.05x | +5 % 256 | 1M | 750K | 3.69 | 4.27 | 1.16x | +16 % 256 | 1M | 1M | 2.60 | 3.12 | 1.20x | +20 % Pattern: * Small map (1K): htab wins for 8 / 32 byte keys by 10-20% * Large map (1M): rhtab wins everywhere, up to 4x at high load factor with 8 byte keys. * Higher load factor amplifies rhtab's lead: rhtab grows the bucket array; htab stays at user-declared max. 2. FULL UPDATE (M events/sec per producer) htab per-producer: 20.33 22.02 19.27 23.61 24.18 23.17 21.07 mean 21.94 range 19.27 - 24.18 rhtab per-producer: 133.51 129.47 74.52 129.29 102.26 129.98 107.64 mean 115.24 range 74.52 - 133.51 speedup (mean): 5.25x (+425 %) In-place memcpy avoids the per-update alloc + RCU pointer swap that htab pays. 3. MEMORY value_size | htab ops/s | rhtab ops/s | htab mem | rhtab mem -----------+-------------+-------------+----------+---------- 32 B | 122.87 k/s | 133.04 k/s | 2.47 MiB | 2.49 MiB 4096 B | 64.43 k/s | 65.38 k/s | 6.74 MiB | 6.44 MiB rhtab/htab : +8 % ops, +0.8 % mem (32 B) +1 % ops, -4 % mem (4096 B) Throughput effectively tied SUMMARY * Small / well-fitting map: htab is faster (cache-friendly fixed bucket array), but only by ~10-20 %. * Large / high-load-factor map: rhtab is dramatically faster (1.2x to 4x) because rhashtable resizes to keep the load factor sane while htab stays stuck at user-declared max. * Update-heavy workloads: rhtab is ~5x faster per producer via in-place memcpy. * Memory benchmark: effectively on par. Signed-off-by: Mykyta Yatsenko Link: https://lore.kernel.org/r/20260605-rhash-v7-12-5b8e05f8630d@meta.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/bench.c | 6 ++++ .../bpf/benchs/bench_bpf_hashmap_full_update.c | 34 +++++++++++++++++++-- .../bpf/benchs/bench_bpf_hashmap_lookup.c | 31 +++++++++++++++++-- .../testing/selftests/bpf/benchs/bench_htab_mem.c | 35 ++++++++++++++++++++-- 4 files changed, 100 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index 6155ce455c27..3d9d2cd7764b 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -560,13 +560,16 @@ extern const struct bench bench_bpf_loop; extern const struct bench bench_strncmp_no_helper; extern const struct bench bench_strncmp_helper; extern const struct bench bench_bpf_hashmap_full_update; +extern const struct bench bench_bpf_rhashmap_full_update; extern const struct bench bench_local_storage_cache_seq_get; extern const struct bench bench_local_storage_cache_interleaved_get; extern const struct bench bench_local_storage_cache_hashmap_control; extern const struct bench bench_local_storage_tasks_trace; extern const struct bench bench_bpf_hashmap_lookup; +extern const struct bench bench_bpf_rhashmap_lookup; extern const struct bench bench_local_storage_create; extern const struct bench bench_htab_mem; +extern const struct bench bench_rhtab_mem; extern const struct bench bench_crypto_encrypt; extern const struct bench bench_crypto_decrypt; extern const struct bench bench_sockmap; @@ -640,13 +643,16 @@ static const struct bench *benchs[] = { &bench_strncmp_no_helper, &bench_strncmp_helper, &bench_bpf_hashmap_full_update, + &bench_bpf_rhashmap_full_update, &bench_local_storage_cache_seq_get, &bench_local_storage_cache_interleaved_get, &bench_local_storage_cache_hashmap_control, &bench_local_storage_tasks_trace, &bench_bpf_hashmap_lookup, + &bench_bpf_rhashmap_lookup, &bench_local_storage_create, &bench_htab_mem, + &bench_rhtab_mem, &bench_crypto_encrypt, &bench_crypto_decrypt, &bench_sockmap, diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c index ee1dc12c5e5e..7278fa860397 100644 --- a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c +++ b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c @@ -34,19 +34,29 @@ static void measure(struct bench_res *res) { } -static void setup(void) +static void hashmap_full_update_setup(enum bpf_map_type map_type) { struct bpf_link *link; int map_fd, i, max_entries; setup_libbpf(); - ctx.skel = bpf_hashmap_full_update_bench__open_and_load(); + ctx.skel = bpf_hashmap_full_update_bench__open(); if (!ctx.skel) { fprintf(stderr, "failed to open skeleton\n"); exit(1); } + bpf_map__set_type(ctx.skel->maps.hash_map_bench, map_type); + if (map_type == BPF_MAP_TYPE_RHASH) + bpf_map__set_map_flags(ctx.skel->maps.hash_map_bench, + BPF_F_NO_PREALLOC); + + if (bpf_hashmap_full_update_bench__load(ctx.skel)) { + fprintf(stderr, "failed to load skeleton\n"); + exit(1); + } + ctx.skel->bss->nr_loops = MAX_LOOP_NUM; link = bpf_program__attach(ctx.skel->progs.benchmark); @@ -62,6 +72,16 @@ static void setup(void) bpf_map_update_elem(map_fd, &i, &i, BPF_ANY); } +static void setup(void) +{ + hashmap_full_update_setup(BPF_MAP_TYPE_HASH); +} + +static void rhash_setup(void) +{ + hashmap_full_update_setup(BPF_MAP_TYPE_RHASH); +} + static void hashmap_report_final(struct bench_res res[], int res_cnt) { unsigned int nr_cpus = bpf_num_possible_cpus(); @@ -87,3 +107,13 @@ const struct bench bench_bpf_hashmap_full_update = { .report_progress = NULL, .report_final = hashmap_report_final, }; + +const struct bench bench_bpf_rhashmap_full_update = { + .name = "bpf-rhashmap-full-update", + .validate = validate, + .setup = rhash_setup, + .producer_thread = producer, + .measure = measure, + .report_progress = NULL, + .report_final = hashmap_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c index 279ff1b8b5b2..5264b7b20e39 100644 --- a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c +++ b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c @@ -148,9 +148,10 @@ static inline void patch_key(u32 i, u32 *key) /* the rest of key is random */ } -static void setup(void) +static void hashmap_lookup_setup(enum bpf_map_type map_type) { struct bpf_link *link; + __u32 map_flags; int map_fd; int ret; int i; @@ -163,10 +164,15 @@ static void setup(void) exit(1); } + map_flags = args.map_flags; + if (map_type == BPF_MAP_TYPE_RHASH) + map_flags |= BPF_F_NO_PREALLOC; + + bpf_map__set_type(ctx.skel->maps.hash_map_bench, map_type); bpf_map__set_max_entries(ctx.skel->maps.hash_map_bench, args.max_entries); bpf_map__set_key_size(ctx.skel->maps.hash_map_bench, args.key_size); bpf_map__set_value_size(ctx.skel->maps.hash_map_bench, 8); - bpf_map__set_map_flags(ctx.skel->maps.hash_map_bench, args.map_flags); + bpf_map__set_map_flags(ctx.skel->maps.hash_map_bench, map_flags); ctx.skel->bss->nr_entries = args.nr_entries; ctx.skel->bss->nr_loops = args.nr_loops / args.nr_entries; @@ -197,6 +203,16 @@ static void setup(void) } } +static void setup(void) +{ + hashmap_lookup_setup(BPF_MAP_TYPE_HASH); +} + +static void rhash_setup(void) +{ + hashmap_lookup_setup(BPF_MAP_TYPE_RHASH); +} + static inline double events_from_time(u64 time) { if (time) @@ -275,3 +291,14 @@ const struct bench bench_bpf_hashmap_lookup = { .report_progress = NULL, .report_final = hashmap_report_final, }; + +const struct bench bench_bpf_rhashmap_lookup = { + .name = "bpf-rhashmap-lookup", + .argp = &bench_hashmap_lookup_argp, + .validate = validate, + .setup = rhash_setup, + .producer_thread = producer, + .measure = measure, + .report_progress = NULL, + .report_final = hashmap_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_htab_mem.c b/tools/testing/selftests/bpf/benchs/bench_htab_mem.c index 297e32390cd1..1ee217d97434 100644 --- a/tools/testing/selftests/bpf/benchs/bench_htab_mem.c +++ b/tools/testing/selftests/bpf/benchs/bench_htab_mem.c @@ -152,7 +152,7 @@ static const struct htab_mem_use_case *htab_mem_find_use_case_or_exit(const char exit(1); } -static void htab_mem_setup(void) +static void htab_mem_setup_impl(enum bpf_map_type map_type) { struct bpf_map *map; const char **names; @@ -178,10 +178,11 @@ static void htab_mem_setup(void) } map = ctx.skel->maps.htab; + bpf_map__set_type(map, map_type); bpf_map__set_value_size(map, args.value_size); /* Ensure that different CPUs can operate on different subset */ bpf_map__set_max_entries(map, MAX(8192, 64 * env.nr_cpus)); - if (args.preallocated) + if (map_type != BPF_MAP_TYPE_RHASH && args.preallocated) bpf_map__set_map_flags(map, bpf_map__map_flags(map) & ~BPF_F_NO_PREALLOC); names = ctx.uc->progs; @@ -220,6 +221,16 @@ cleanup: exit(1); } +static void htab_mem_setup(void) +{ + htab_mem_setup_impl(BPF_MAP_TYPE_HASH); +} + +static void rhtab_mem_setup(void) +{ + htab_mem_setup_impl(BPF_MAP_TYPE_RHASH); +} + static void htab_mem_add_fn(pthread_barrier_t *notify) { while (true) { @@ -338,6 +349,15 @@ static void htab_mem_report_final(struct bench_res res[], int res_cnt) cleanup_cgroup_environment(); } +static void rhtab_mem_validate(void) +{ + if (args.preallocated) { + fprintf(stderr, "rhash map does not support preallocation\n"); + exit(1); + } + htab_mem_validate(); +} + const struct bench bench_htab_mem = { .name = "htab-mem", .argp = &bench_htab_mem_argp, @@ -348,3 +368,14 @@ const struct bench bench_htab_mem = { .report_progress = htab_mem_report_progress, .report_final = htab_mem_report_final, }; + +const struct bench bench_rhtab_mem = { + .name = "rhtab-mem", + .argp = &bench_htab_mem_argp, + .validate = rhtab_mem_validate, + .setup = rhtab_mem_setup, + .producer_thread = htab_mem_producer, + .measure = htab_mem_measure, + .report_progress = htab_mem_report_progress, + .report_final = htab_mem_report_final, +}; -- cgit v1.2.3 From aa22d619ba22177f430693cf5e9495052d996644 Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Wed, 3 Jun 2026 07:39:15 -0700 Subject: selftests/bpf: Fix flaky file_reader test file_reader/on_open_expect_fault test expects page fault when reading pages from the test harness executable. It is not guaranteed that those are paged out, even after madvise(MADV_PAGEOUT). Relax the condition in the test to succeed with both 0 and -EFAULT returned. Fixes: 784cdf931543 ("selftests/bpf: add file dynptr tests") Reported-by: Shung-Hsi Yu Closes: https://lore.kernel.org/all/ah6g7JSYOWGp2oAG@u94a/ Signed-off-by: Mykyta Yatsenko Tested-by: Ihor Solodrai Link: https://lore.kernel.org/r/20260603-file_reader_flake-v1-1-7f3f52d1e388@meta.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/file_reader.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/file_reader.c b/tools/testing/selftests/bpf/progs/file_reader.c index 462712ff3b8a..aa2c05cce2b3 100644 --- a/tools/testing/selftests/bpf/progs/file_reader.c +++ b/tools/testing/selftests/bpf/progs/file_reader.c @@ -50,7 +50,7 @@ int on_open_expect_fault(void *c) goto out; local_err = bpf_dynptr_read(tmp_buf, user_buf_sz, &dynptr, user_buf_sz, 0); - if (local_err == -EFAULT) { /* Expect page fault */ + if (local_err == -EFAULT || local_err == 0) { /* Expect page fault or success */ local_err = 0; run_success = 1; } -- cgit v1.2.3 From 231fc9bc27fd03db171cab4e75116923250af7a7 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Sat, 30 May 2026 18:07:50 -0700 Subject: bpftool: Restrict feature tests during bootstrap compilation When the perf build executes 'make -C ../bpf/bpftool bootstrap', bpftool's Makefile unconditionally evaluated feature checks for llvm, libcap, libbfd, and disassembler libraries because the bootstrap target was not exempted. Since the bootstrap bpftool strictly compiles minimal AST parsing and C code generation logic without linking LLVM or disassembler libraries, these feature check sub-makes are completely redundant. Exempt the bootstrap target from non-essential feature tests to eliminate unneeded sub-make fork overhead during Kbuild startup. Tested-by: James Clark Assisted-by: Gemini:gemini-3.1-pro-preview Signed-off-by: Ian Rogers Acked-by: Quentin Monnet Link: https://lore.kernel.org/r/20260531010750.525160-1-irogers@google.com Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/Makefile | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools') diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 0febf60e1b64..8f50bc163bb2 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -106,6 +106,10 @@ ifneq ($(SKIP_CRYPTO),1) CRYPTO_LIBS := -lcrypto endif +ifeq ($(MAKECMDGOALS),bootstrap) +FEATURE_TESTS := libelf-zstd +FEATURE_DISPLAY := +else FEATURE_TESTS := clang-bpf-co-re FEATURE_TESTS += llvm FEATURE_TESTS += libcap @@ -122,6 +126,7 @@ FEATURE_DISPLAY += libcap FEATURE_DISPLAY += libbfd FEATURE_DISPLAY += libbfd-liberty FEATURE_DISPLAY += libbfd-liberty-z +endif check_feat := 1 NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall -- cgit v1.2.3 From d83d4f63cb8f92aa6254dfc001eac0e41f5b2c35 Mon Sep 17 00:00:00 2001 From: Amery Hung Date: Fri, 5 Jun 2026 13:20:56 -0700 Subject: selftests/bpf: Use bpf_dynptr_slice() to read file dynptr in leak test use_file_dynptr_slice_after_put_file() reads the dynptr via bpf_dynptr_data(), which always returns NULL for a read-only file dynptr, making the example confusing. Switch to bpf_dynptr_slice(), the correct read API for file dynptrs, and read (rather than write) the slice since it is read-only. The test still fails as expected. Acked-by: Eduard Zingerman Signed-off-by: Amery Hung Link: https://lore.kernel.org/r/20260605202056.1780352-6-ameryhung@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/file_reader_fail.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/file_reader_fail.c b/tools/testing/selftests/bpf/progs/file_reader_fail.c index d5fae5e4cf9a..3bb9e2612f8f 100644 --- a/tools/testing/selftests/bpf/progs/file_reader_fail.c +++ b/tools/testing/selftests/bpf/progs/file_reader_fail.c @@ -87,7 +87,8 @@ int use_file_dynptr_slice_after_put_file(void *ctx) struct task_struct *task = bpf_get_current_task_btf(); struct file *file = bpf_get_task_exe_file(task); struct bpf_dynptr dynptr; - char *data; + char buf[1]; + const char *data; if (!file) return 0; @@ -95,15 +96,14 @@ int use_file_dynptr_slice_after_put_file(void *ctx) if (bpf_dynptr_from_file(file, 0, &dynptr)) goto out; - data = bpf_dynptr_data(&dynptr, 0, 1); + data = bpf_dynptr_slice(&dynptr, 0, buf, sizeof(buf)); if (!data) goto out; /* this should fail - file dynptr should be discarded first to prevent resource leak */ bpf_put_file(file); - *data = 'x'; - return 0; + return data[0]; out: bpf_dynptr_file_discard(&dynptr); -- cgit v1.2.3 From a6850fa388f6f6ff365b3b72cb71e6d9a8a614ed Mon Sep 17 00:00:00 2001 From: "Ricardo B. Marlière" Date: Tue, 2 Jun 2026 10:02:50 -0300 Subject: selftests/bpf: Add BPF_STRICT_BUILD toggle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Distro kernels often lack BTF types or kernel features required by some BPF selftests, causing the build to abort on the first failure and preventing the remaining tests from running. Add BPF_STRICT_BUILD (default 1) to control build failure tolerance. When set to 0, the PERMISSIVE make variable is assigned a non-empty value that subsequent Makefile rules use to make individual build steps non-fatal. When set to 1 (the default), the build fails on any error, preserving the existing behavior for CI and direct builds. Users can opt in to permissive mode on the command line: make -C tools/testing/selftests \ TARGETS=bpf SKIP_TARGETS= BPF_STRICT_BUILD=0 Suggested-by: Alan Maguire Signed-off-by: Ricardo B. Marlière Link: https://lore.kernel.org/r/20260602-selftests-bpf_misconfig-v12-1-27f898b3ba26@suse.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index bc049620c774..75036c1b5c4f 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -44,6 +44,12 @@ SKIP_LLVM ?= SKIP_LIBBFD ?= SKIP_CRYPTO ?= +# When BPF_STRICT_BUILD is 1, any BPF object, skeleton, test object, or +# benchmark compilation failure is fatal. Set to 0 to tolerate failures +# and continue building the remaining tests. +BPF_STRICT_BUILD ?= 1 +PERMISSIVE := $(filter 0,$(BPF_STRICT_BUILD)) + ifeq ($(srctree),) srctree := $(patsubst %/,%,$(dir $(CURDIR))) srctree := $(patsubst %/,%,$(dir $(srctree))) -- cgit v1.2.3 From 9779193e871b144e34ec4a3e50109b3778a51a69 Mon Sep 17 00:00:00 2001 From: "Ricardo B. Marlière" Date: Tue, 2 Jun 2026 10:02:51 -0300 Subject: selftests/bpf: Fix test_kmods KDIR to honor O= and distro kernels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit test_kmods/Makefile always pointed KDIR at the kernel source tree root, ignoring O= and KBUILD_OUTPUT. On distro kernels where the source tree has not been built, the Makefile had no fallback and would fail unconditionally. When O= or KBUILD_OUTPUT is set and points at a prepared kernel build directory (one containing Module.symvers), pass it through so kbuild can locate the correct build infrastructure (scripts, Kconfig, etc.). Note that the module artifacts themselves still land in the M= directory, which is test_kmods/; O= only controls where kbuild finds its build infrastructure. Fall back to /lib/modules/$(uname -r)/build when neither an explicit valid build directory nor an in-tree Module.symvers is present. A selftests-only O= value (one that does not contain Module.symvers, e.g. a private output directory) is intentionally not treated as a kernel build directory. Without this guard, a user invoking "make -C tools/testing/selftests/bpf O=/tmp/out" would have test_kmods try to use /tmp/out as the kernel build dir and fail. The parent bpf/Makefile resolves O= and KBUILD_OUTPUT to absolute paths before invoking the test_kmods sub-make. Without this, $(abspath ...) inside test_kmods/Makefile would resolve relative paths against the sub-make's CWD (test_kmods/) rather than the user's invocation directory. When O= is passed to kbuild, also pass KBUILD_OUTPUT=$(KMOD_O_VALID) explicitly. The parent invocation lifts KBUILD_OUTPUT into MAKEFLAGS as a command-line variable, which would otherwise suppress kbuild's own "KBUILD_OUTPUT := $(O)" assignment and cause it to use the inherited KBUILD_OUTPUT instead of the validated O=. Guard both all and clean against a missing KDIR so the step is silently skipped rather than fatal. Make the parent Makefile's cp conditional so it does not abort when modules were not built. Signed-off-by: Ricardo B. Marlière Link: https://lore.kernel.org/r/20260602-selftests-bpf_misconfig-v12-2-27f898b3ba26@suse.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 10 +++++---- tools/testing/selftests/bpf/test_kmods/Makefile | 30 ++++++++++++++++++++++--- 2 files changed, 33 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 75036c1b5c4f..e912526a65dd 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -296,13 +296,15 @@ $(OUTPUT)/sign-file: ../../../../scripts/sign-file.c # subst() turns the rule into a pattern matching rule $(addprefix test_kmods/,$(subst .ko,%ko,$(TEST_KMODS))): $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard test_kmods/Makefile test_kmods/*.[ch]) $(Q)$(RM) test_kmods/*.ko test_kmods/*.mod.o # force re-compilation - $(Q)$(MAKE) $(submake_extras) -C test_kmods \ - RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) \ + $(Q)$(MAKE) $(submake_extras) -C test_kmods \ + $(if $(O),O=$(abspath $(O))) \ + $(if $(KBUILD_OUTPUT),KBUILD_OUTPUT=$(abspath $(KBUILD_OUTPUT)))\ + RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) \ EXTRA_CFLAGS='' EXTRA_LDFLAGS='' $(TEST_KMOD_TARGETS): $(addprefix test_kmods/,$(TEST_KMODS)) $(call msg,MOD,,$@) - $(Q)cp test_kmods/$(@F) $@ + $(Q)$(if $(PERMISSIVE),if [ -f test_kmods/$(@F) ]; then )cp test_kmods/$(@F) $@$(if $(PERMISSIVE),; fi) DEFAULT_BPFTOOL := $(HOST_SCRATCH_DIR)/sbin/bpftool @@ -718,7 +720,7 @@ $(TRUNNER_LIB_OBJS): $(TRUNNER_OUTPUT)/%.o:$(TOOLSDIR)/lib/%.c $(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT) ifneq ($2:$(OUTPUT),:$(shell pwd)) $$(call msg,EXT-COPY,$(TRUNNER_BINARY),$(TRUNNER_EXTRA_FILES)) - $(Q)rsync -aq $$^ $(TRUNNER_OUTPUT)/ + $(Q)rsync -aq $(if $(PERMISSIVE),--ignore-missing-args) $$^ $(TRUNNER_OUTPUT)/ endif # some X.test.o files have runtime dependencies on Y.bpf.o files diff --git a/tools/testing/selftests/bpf/test_kmods/Makefile b/tools/testing/selftests/bpf/test_kmods/Makefile index 63c4d3f6a12f..031c7454ce65 100644 --- a/tools/testing/selftests/bpf/test_kmods/Makefile +++ b/tools/testing/selftests/bpf/test_kmods/Makefile @@ -1,5 +1,16 @@ TEST_KMOD_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST))))) -KDIR ?= $(abspath $(TEST_KMOD_DIR)/../../../../..) +SRCTREE_KDIR := $(abspath $(TEST_KMOD_DIR)/../../../../..) +# Honor O=/KBUILD_OUTPUT only if they point at a prepared kernel build +# directory (one containing Module.symvers); otherwise treat the value as a +# selftests-only output directory and fall back to in-tree or distro headers. +# The parent bpf/Makefile resolves O=/KBUILD_OUTPUT to absolute paths before +# invoking this sub-make so relative paths still anchor to the user's +# invocation directory. +KMOD_O := $(or $(O),$(KBUILD_OUTPUT)) +KMOD_O_VALID := $(if $(KMOD_O),$(if $(wildcard $(KMOD_O)/Module.symvers),$(KMOD_O))) +KDIR ?= $(if $(KMOD_O_VALID),$(SRCTREE_KDIR), \ + $(if $(wildcard $(SRCTREE_KDIR)/Module.symvers),$(SRCTREE_KDIR), \ + /lib/modules/$(shell uname -r)/build)) ifeq ($(V),1) Q = @@ -14,8 +25,21 @@ $(foreach m,$(MODULES),$(eval obj-m += $(m:.ko=.o))) CFLAGS_bpf_testmod.o = -I$(src) +# When BPF_STRICT_BUILD != 0, a missing KDIR is fatal (the default). +# When permissive, skip silently. +PERMISSIVE := $(filter 0,$(BPF_STRICT_BUILD)) + all: - $(Q)$(MAKE) -C $(KDIR) M=$(TEST_KMOD_DIR) modules +ifeq ($(PERMISSIVE),) + $(Q)$(MAKE) -C $(KDIR) $(if $(KMOD_O_VALID),O=$(KMOD_O_VALID) KBUILD_OUTPUT=$(KMOD_O_VALID),KBUILD_OUTPUT=) \ + M=$(TEST_KMOD_DIR) modules +else ifneq ("$(wildcard $(KDIR))", "") + $(Q)$(MAKE) -C $(KDIR) $(if $(KMOD_O_VALID),O=$(KMOD_O_VALID) KBUILD_OUTPUT=$(KMOD_O_VALID),KBUILD_OUTPUT=) \ + M=$(TEST_KMOD_DIR) modules +endif clean: - $(Q)$(MAKE) -C $(KDIR) M=$(TEST_KMOD_DIR) clean +ifneq ("$(wildcard $(KDIR))", "") + $(Q)$(MAKE) -C $(KDIR) $(if $(KMOD_O_VALID),O=$(KMOD_O_VALID) KBUILD_OUTPUT=$(KMOD_O_VALID),KBUILD_OUTPUT=) \ + M=$(TEST_KMOD_DIR) clean +endif -- cgit v1.2.3 From c476bdf27657c6ea4a447c18de169c7bdcdd419d Mon Sep 17 00:00:00 2001 From: "Ricardo B. Marlière" Date: Tue, 2 Jun 2026 10:02:52 -0300 Subject: selftests/bpf: Tolerate BPF and skeleton generation failures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some BPF programs cannot be built on distro kernels because required BTF types or features are missing. A single failure currently aborts the selftests/bpf build. Make BPF object and skeleton generation best effort in permissive mode: emit SKIP-BPF or SKIP-SKEL to stderr, remove failed outputs so downstream rules can detect absence, and continue with remaining tests. Apply the same tolerance to linked skeletons (TRUNNER_BPF_SKELS_LINKED), which depend on multiple .bpf.o files and abort the build when any dependency is missing. Note that progress messages (GEN-SKEL, LINK-BPF) are also redirected to stderr as a side effect of rewriting the recipes into single-shell pipelines; the $(call msg,...) macro is a make-recipe construct that cannot be used inside an &&-chained shell command sequence. Signed-off-by: Ricardo B. Marlière Link: https://lore.kernel.org/r/20260602-selftests-bpf_misconfig-v12-3-27f898b3ba26@suse.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 108 +++++++++++++++++++++++------------ 1 file changed, 73 insertions(+), 35 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index e912526a65dd..dc1f4a4a3582 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -485,22 +485,26 @@ $(OUTPUT)/cgroup_getset_retval_hooks.o: cgroup_getset_retval_hooks.h # $4 - binary name define CLANG_BPF_BUILD_RULE $(call msg,CLNG-BPF,$4,$2) - $(Q)$(CLANG) $3 -O2 $(BPF_TARGET_ENDIAN) -c $1 -mcpu=v3 -o $2 + $(Q)$(CLANG) $3 -O2 $(BPF_TARGET_ENDIAN) -c $1 -mcpu=v3 -o $2 $(if $(PERMISSIVE),|| \ + ($(RM) $2; printf ' %-12s %s\n' 'SKIP-BPF' '$(notdir $2)' 1>&2)) endef # Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32 define CLANG_NOALU32_BPF_BUILD_RULE $(call msg,CLNG-BPF,$4,$2) - $(Q)$(CLANG) $3 -O2 $(BPF_TARGET_ENDIAN) -c $1 -mcpu=v2 -o $2 + $(Q)$(CLANG) $3 -O2 $(BPF_TARGET_ENDIAN) -c $1 -mcpu=v2 -o $2 $(if $(PERMISSIVE),|| \ + ($(RM) $2; printf ' %-12s %s\n' 'SKIP-BPF' '$(notdir $2)' 1>&2)) endef # Similar to CLANG_BPF_BUILD_RULE, but with cpu-v4 define CLANG_CPUV4_BPF_BUILD_RULE $(call msg,CLNG-BPF,$4,$2) - $(Q)$(CLANG) $3 -O2 $(BPF_TARGET_ENDIAN) -c $1 -mcpu=v4 -o $2 + $(Q)$(CLANG) $3 -O2 $(BPF_TARGET_ENDIAN) -c $1 -mcpu=v4 -o $2 $(if $(PERMISSIVE),|| \ + ($(RM) $2; printf ' %-12s %s\n' 'SKIP-BPF' '$(notdir $2)' 1>&2)) endef # Build BPF object using GCC define GCC_BPF_BUILD_RULE $(call msg,GCC-BPF,$4,$2) - $(Q)$(BPF_GCC) $3 -DBPF_NO_PRESERVE_ACCESS_INDEX -Wno-attributes -O2 -c $1 -o $2 + $(Q)$(BPF_GCC) $3 -DBPF_NO_PRESERVE_ACCESS_INDEX -Wno-attributes -O2 -c $1 -o $2 $(if $(PERMISSIVE),|| \ + ($(RM) $2; printf ' %-12s %s\n' 'SKIP-BPF' '$(notdir $2)' 1>&2)) endef SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c @@ -607,47 +611,81 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.bpf.o: \ $$($$<-$2-CFLAGS),$(TRUNNER_BINARY)) $(TRUNNER_BPF_SKELS): %.skel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT) - $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@) - $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked1.o) $$< - $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked2.o) $$(<:.o=.linked1.o) - $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked3.o) $$(<:.o=.linked2.o) - $(Q)diff $$(<:.o=.linked2.o) $$(<:.o=.linked3.o) - $(Q)$$(BPFTOOL) gen skeleton $$(<:.o=.linked3.o) name $$(notdir $$(<:.bpf.o=)) > $$@ - $(Q)$$(BPFTOOL) gen subskeleton $$(<:.o=.linked3.o) name $$(notdir $$(<:.bpf.o=)) > $$(@:.skel.h=.subskel.h) - $(Q)rm -f $$(<:.o=.linked1.o) $$(<:.o=.linked2.o) $$(<:.o=.linked3.o) + $(Q)$(if $(PERMISSIVE),if [ ! -f $$< ]; then \ + $$(RM) $$@ $$(@:.skel.h=.subskel.h); \ + printf ' %-12s %s\n' 'SKIP-SKEL' '$$(notdir $$@)' 1>&2; \ + exit 0; \ + fi;) \ + printf ' %-12s %s\n' 'GEN-SKEL' '[$(TRUNNER_BINARY)] $$(notdir $$@)' 1>&2; \ + $$(BPFTOOL) gen object $$(<:.o=.linked1.o) $$< && \ + $$(BPFTOOL) gen object $$(<:.o=.linked2.o) $$(<:.o=.linked1.o) && \ + $$(BPFTOOL) gen object $$(<:.o=.linked3.o) $$(<:.o=.linked2.o) && \ + diff $$(<:.o=.linked2.o) $$(<:.o=.linked3.o) && \ + $$(BPFTOOL) gen skeleton $$(<:.o=.linked3.o) name $$(notdir $$(<:.bpf.o=)) > $$@ && \ + $$(BPFTOOL) gen subskeleton $$(<:.o=.linked3.o) name $$(notdir $$(<:.bpf.o=)) > $$(@:.skel.h=.subskel.h) $(if $(PERMISSIVE),|| { \ + $$(RM) $$@ $$(@:.skel.h=.subskel.h); \ + printf ' %-12s %s\n' 'SKIP-SKEL' '$$(notdir $$@)' 1>&2; \ + }) && \ + rm -f $$(<:.o=.linked1.o) $$(<:.o=.linked2.o) $$(<:.o=.linked3.o) $(TRUNNER_BPF_LSKELS): %.lskel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT) - $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@) - $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked1.o) $$< - $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked2.o) $$(<:.o=.llinked1.o) - $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked3.o) $$(<:.o=.llinked2.o) - $(Q)diff $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o) - $(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.llinked3.o) name $$(notdir $$(<:.bpf.o=_lskel)) > $$@ - $(Q)rm -f $$(<:.o=.llinked1.o) $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o) + $(Q)$(if $(PERMISSIVE),if [ ! -f $$< ]; then \ + $$(RM) $$@; \ + printf ' %-12s %s\n' 'SKIP-SKEL' '$$(notdir $$@)' 1>&2; \ + exit 0; \ + fi;) \ + printf ' %-12s %s\n' 'GEN-SKEL' '[$(TRUNNER_BINARY)] $$(notdir $$@)' 1>&2; \ + $$(BPFTOOL) gen object $$(<:.o=.llinked1.o) $$< && \ + $$(BPFTOOL) gen object $$(<:.o=.llinked2.o) $$(<:.o=.llinked1.o) && \ + $$(BPFTOOL) gen object $$(<:.o=.llinked3.o) $$(<:.o=.llinked2.o) && \ + diff $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o) && \ + $$(BPFTOOL) gen skeleton -L $$(<:.o=.llinked3.o) name $$(notdir $$(<:.bpf.o=_lskel)) > $$@ $(if $(PERMISSIVE),|| { \ + $$(RM) $$@; \ + printf ' %-12s %s\n' 'SKIP-SKEL' '$$(notdir $$@)' 1>&2; \ + }) && \ + rm -f $$(<:.o=.llinked1.o) $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o) $(TRUNNER_BPF_LSKELS_SIGNED): %.lskel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT) - $$(call msg,GEN-SKEL,$(TRUNNER_BINARY) (signed),$$@) - $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked1.o) $$< - $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked2.o) $$(<:.o=.llinked1.o) - $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked3.o) $$(<:.o=.llinked2.o) - $(Q)diff $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o) - $(Q)$$(BPFTOOL) gen skeleton $(LSKEL_SIGN) $$(<:.o=.llinked3.o) name $$(notdir $$(<:.bpf.o=_lskel)) > $$@ - $(Q)rm -f $$(<:.o=.llinked1.o) $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o) + $(Q)$(if $(PERMISSIVE),if [ ! -f $$< ]; then \ + $$(RM) $$@; \ + printf ' %-12s %s\n' 'SKIP-SKEL' '$$(notdir $$@)' 1>&2; \ + exit 0; \ + fi;) \ + printf ' %-12s %s\n' 'GEN-SKEL' '[$(TRUNNER_BINARY) (signed)] $$(notdir $$@)' 1>&2; \ + $$(BPFTOOL) gen object $$(<:.o=.llinked1.o) $$< && \ + $$(BPFTOOL) gen object $$(<:.o=.llinked2.o) $$(<:.o=.llinked1.o) && \ + $$(BPFTOOL) gen object $$(<:.o=.llinked3.o) $$(<:.o=.llinked2.o) && \ + diff $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o) && \ + $$(BPFTOOL) gen skeleton $(LSKEL_SIGN) $$(<:.o=.llinked3.o) name $$(notdir $$(<:.bpf.o=_lskel)) > $$@ $(if $(PERMISSIVE),|| { \ + $$(RM) $$@; \ + printf ' %-12s %s\n' 'SKIP-SKEL' '$$(notdir $$@)' 1>&2; \ + }) && \ + rm -f $$(<:.o=.llinked1.o) $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o) $(LINKED_BPF_OBJS): %: $(TRUNNER_OUTPUT)/% # .SECONDEXPANSION here allows to correctly expand %-deps variables as prerequisites .SECONDEXPANSION: $(TRUNNER_BPF_SKELS_LINKED): $(TRUNNER_OUTPUT)/%: $$$$(%-deps) $(BPFTOOL) | $(TRUNNER_OUTPUT) - $$(call msg,LINK-BPF,$(TRUNNER_BINARY),$$(@:.skel.h=.bpf.o)) - $(Q)$$(BPFTOOL) gen object $$(@:.skel.h=.linked1.o) $$(addprefix $(TRUNNER_OUTPUT)/,$$($$(@F)-deps)) - $(Q)$$(BPFTOOL) gen object $$(@:.skel.h=.linked2.o) $$(@:.skel.h=.linked1.o) - $(Q)$$(BPFTOOL) gen object $$(@:.skel.h=.linked3.o) $$(@:.skel.h=.linked2.o) - $(Q)diff $$(@:.skel.h=.linked2.o) $$(@:.skel.h=.linked3.o) - $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@) - $(Q)$$(BPFTOOL) gen skeleton $$(@:.skel.h=.linked3.o) name $$(notdir $$(@:.skel.h=)) > $$@ - $(Q)$$(BPFTOOL) gen subskeleton $$(@:.skel.h=.linked3.o) name $$(notdir $$(@:.skel.h=)) > $$(@:.skel.h=.subskel.h) - $(Q)rm -f $$(@:.skel.h=.linked1.o) $$(@:.skel.h=.linked2.o) $$(@:.skel.h=.linked3.o) + $(Q)$(if $(PERMISSIVE),for f in $$(addprefix $(TRUNNER_OUTPUT)/,$$($$(@F)-deps)); do \ + if [ ! -f $$$$f ]; then \ + $$(RM) $$@ $$(@:.skel.h=.subskel.h); \ + printf ' %-12s %s\n' 'SKIP-SKEL' '$$(notdir $$@)' 1>&2; \ + exit 0; \ + fi; \ + done;) \ + printf ' %-12s %s\n' 'LINK-BPF' '[$(TRUNNER_BINARY)] $$(notdir $$(@:.skel.h=.bpf.o))' 1>&2; \ + $$(BPFTOOL) gen object $$(@:.skel.h=.linked1.o) $$(addprefix $(TRUNNER_OUTPUT)/,$$($$(@F)-deps)) && \ + $$(BPFTOOL) gen object $$(@:.skel.h=.linked2.o) $$(@:.skel.h=.linked1.o) && \ + $$(BPFTOOL) gen object $$(@:.skel.h=.linked3.o) $$(@:.skel.h=.linked2.o) && \ + diff $$(@:.skel.h=.linked2.o) $$(@:.skel.h=.linked3.o) && \ + printf ' %-12s %s\n' 'GEN-SKEL' '[$(TRUNNER_BINARY)] $$(notdir $$@)' 1>&2 && \ + $$(BPFTOOL) gen skeleton $$(@:.skel.h=.linked3.o) name $$(notdir $$(@:.skel.h=)) > $$@ && \ + $$(BPFTOOL) gen subskeleton $$(@:.skel.h=.linked3.o) name $$(notdir $$(@:.skel.h=)) > $$(@:.skel.h=.subskel.h) $(if $(PERMISSIVE),|| { \ + $$(RM) $$@ $$(@:.skel.h=.subskel.h); \ + printf ' %-12s %s\n' 'SKIP-SKEL' '$$(notdir $$@)' 1>&2; \ + }) && \ + rm -f $$(@:.skel.h=.linked1.o) $$(@:.skel.h=.linked2.o) $$(@:.skel.h=.linked3.o) # When the compiler generates a %.d file, only skel basenames (not # full paths) are specified as prerequisites for corresponding %.o -- cgit v1.2.3 From a97bfc9aae076f49f0bcad713bde02b87553b995 Mon Sep 17 00:00:00 2001 From: "Ricardo B. Marlière" Date: Tue, 2 Jun 2026 10:02:53 -0300 Subject: selftests/bpf: Avoid rebuilds when running emit_tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit emit_tests is used while installing selftests to generate the kselftest list. Pulling in .d files for this goal can trigger BPF rebuild rules and mix build output into list generation. Skip dependency file inclusion for emit_tests, like clean goals, so list generation stays side-effect free. Also add emit_tests to NON_CHECK_FEAT_TARGETS so that feature detection is skipped; without this, Makefile.feature's $(info) output leaks into stdout and corrupts the test list captured by the top-level selftests Makefile. Signed-off-by: Ricardo B. Marlière Link: https://lore.kernel.org/r/20260602-selftests-bpf_misconfig-v12-4-27f898b3ba26@suse.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index dc1f4a4a3582..49f6a5503e84 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -170,7 +170,7 @@ endef include ../lib.mk -NON_CHECK_FEAT_TARGETS := clean docs-clean +NON_CHECK_FEAT_TARGETS := clean docs-clean emit_tests CHECK_FEAT := $(filter-out $(NON_CHECK_FEAT_TARGETS),$(or $(MAKECMDGOALS), "none")) ifneq ($(CHECK_FEAT),) FEATURE_USER := .selftests @@ -732,7 +732,7 @@ $(TRUNNER_TEST_OBJS:.o=.d): $(TRUNNER_OUTPUT)/%.test.d: \ $(TRUNNER_BPF_SKELS_LINKED) \ $$(BPFOBJ) | $(TRUNNER_OUTPUT) -ifeq ($(filter clean docs-clean,$(MAKECMDGOALS)),) +ifeq ($(filter clean docs-clean emit_tests,$(MAKECMDGOALS)),) include $(wildcard $(TRUNNER_TEST_OBJS:.o=.d)) endif -- cgit v1.2.3 From 5498e47741c8a742f730bf9996234bdae1c08ccc Mon Sep 17 00:00:00 2001 From: "Ricardo B. Marlière" Date: Tue, 2 Jun 2026 10:02:54 -0300 Subject: selftests/bpf: Make skeleton headers order-only prerequisites of .test.d MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .test.d dependency files are generated by the C preprocessor and list the headers each test file actually #includes. Skeleton headers appear in those generated lists, so the .test.o -> .skel.h dependency is already tracked by the .d file content. Making skeletons order-only prerequisites of .test.d means that a missing or skipped skeleton does not prevent .test.d generation, and regenerating a skeleton does not force .test.d to be recreated. This avoids unnecessary recompilation and, more importantly, avoids build errors when a skeleton was intentionally skipped due to a BPF compilation failure. $$(BPFOBJ) is intentionally kept as a normal prerequisite: a libbpf rebuild legitimately invalidates .test.d, since libbpf header changes can affect the headers .test.o sees. Only the skeleton headers are moved to order-only. Note that adding a new BPF skeleton via a modified existing local header still works correctly: GNU make builds order-only prerequisites that do not exist (the order-only qualifier only suppresses timestamp-driven rebuilds, not existence-driven builds), so a brand-new .skel.h listed in TRUNNER_BPF_SKELS is generated even when .test.d is otherwise up to date. The modified local header invalidates .test.o through the previously included .d content, forcing a recompile that regenerates .test.d with the new .skel.h dependency captured by gcc -MMD. Signed-off-by: Ricardo B. Marlière Link: https://lore.kernel.org/r/20260602-selftests-bpf_misconfig-v12-5-27f898b3ba26@suse.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 49f6a5503e84..09de69a81112 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -726,11 +726,11 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \ $(TRUNNER_TEST_OBJS:.o=.d): $(TRUNNER_OUTPUT)/%.test.d: \ $(TRUNNER_TESTS_DIR)/%.c \ $(TRUNNER_EXTRA_HDRS) \ + $$(BPFOBJ) | $(TRUNNER_OUTPUT) \ $(TRUNNER_BPF_SKELS) \ $(TRUNNER_BPF_LSKELS) \ $(TRUNNER_BPF_LSKELS_SIGNED) \ - $(TRUNNER_BPF_SKELS_LINKED) \ - $$(BPFOBJ) | $(TRUNNER_OUTPUT) + $(TRUNNER_BPF_SKELS_LINKED) ifeq ($(filter clean docs-clean emit_tests,$(MAKECMDGOALS)),) include $(wildcard $(TRUNNER_TEST_OBJS:.o=.d)) -- cgit v1.2.3 From 9c4de137a9a5280c95515e83e97838826603ea93 Mon Sep 17 00:00:00 2001 From: "Ricardo B. Marlière" Date: Tue, 2 Jun 2026 10:02:55 -0300 Subject: selftests/bpf: Tolerate test file compilation failures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Individual test files may fail to compile when headers or kernel features required by that test are absent. Currently this aborts the entire build. Make the per-test compilation non-fatal: remove the output object on failure and print a SKIP-TEST marker to stderr. Guard the BTFIDS post-processing step so it is skipped when the object file is absent. The linker step will later ignore absent objects, allowing the remaining tests to build and run. Group cd and CC in a sub-shell so a cd failure cannot leak into the error-handling branch and operate in the original working directory; use $@ (absolute path) for $(RM) so it cannot match an unrelated file there. Replace the $(call msg,...) in the BTFIDS block with a plain printf (the msg macro expands to @printf, which is a make-recipe construct and is invalid inside a shell if-then-fi body) and gate the printf on $(filter 1,$(V)) so verbose mode (V=1) does not double-print the line that the recipe shell already echoes; non-verbose modes (V unset, V=0, V=2, ...) still print the BTFIDS marker, matching the convention of the shared msg macro. Restrict tolerance to test_progs and its flavors via an inlined $(if $(filter test_progs%,$1),$(if $(PERMISSIVE),...)) check: runners with strong cross-object references (e.g. test_maps) would link-fail with a partial object set, so they keep strict semantics even when BPF_STRICT_BUILD=0. The check is inlined rather than stored in a helper variable so $1 is substituted at $(call) time and the per-runner result is baked into each recipe. Note on bisectability: this change is gated entirely behind PERMISSIVE for test_progs%, so default builds (BPF_STRICT_BUILD!=0) compile and run identically at every commit in the series. Bisecting in PERMISSIVE mode at this commit still requires the next two patches ("selftests/bpf: Skip tests whose objects were not built" and "selftests/bpf: Allow test_progs to link with a partial object set") to avoid the linker rejecting missing objects and the runtime aborting on NULL function pointers. Signed-off-by: Ricardo B. Marlière Link: https://lore.kernel.org/r/20260602-selftests-bpf_misconfig-v12-6-27f898b3ba26@suse.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 09de69a81112..7739799c2566 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -588,6 +588,12 @@ endef # $2 - test runner extra "flavor" (e.g., no_alu32, cpuv4, bpf_gcc, etc) define DEFINE_TEST_RUNNER_RULES +# Permissive build behaviour (skip-on-failure compile, partial-link) only +# applies to test_progs and its flavors; runners that use strong cross-object +# references (e.g. test_maps) keep strict semantics even when permissive. +# The check is inlined per-runner so $1 is substituted at $(call) time and +# the result is baked into each rule's recipe. + ifeq ($($(TRUNNER_OUTPUT)-dir),) $(TRUNNER_OUTPUT)-dir := y $(TRUNNER_OUTPUT): @@ -717,11 +723,14 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \ $(TRUNNER_TESTS_DIR)/%.c \ | $(TRUNNER_OUTPUT)/%.test.d $$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@) - $(Q)cd $$(@D) && $$(CC) -I. $$(CFLAGS) -MMD -MT $$@ -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F) + $(Q)(cd $$(@D) && $$(CC) -I. $$(CFLAGS) -MMD -MT $$@ -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F)) $(if $(filter test_progs%,$1),$(if $(PERMISSIVE),|| \ + ($(RM) $$@; printf ' %-12s %s\n' 'SKIP-TEST' '$$(notdir $$@)' 1>&2))) $$(if $$(TEST_NEEDS_BTFIDS), \ - $$(call msg,BTFIDS,$(TRUNNER_BINARY),$$@) \ + $(Q)if [ -f $$@ ]; then \ + $(if $(filter 1,$(V)),true,printf ' %-8s%s %s\n' "BTFIDS" " [$(TRUNNER_BINARY)]" "$$(notdir $$@)"); \ $(RESOLVE_BTFIDS) --btf $(TRUNNER_OUTPUT)/btf_data.bpf.o $$@; \ - $(RESOLVE_BTFIDS) --patch_btfids $$@.BTF_ids $$@) + $(RESOLVE_BTFIDS) --patch_btfids $$@.BTF_ids $$@; \ + fi) $(TRUNNER_TEST_OBJS:.o=.d): $(TRUNNER_OUTPUT)/%.test.d: \ $(TRUNNER_TESTS_DIR)/%.c \ -- cgit v1.2.3 From aeb73a9f301de4f0df7c858ea465a7a9f5d09fd7 Mon Sep 17 00:00:00 2001 From: "Ricardo B. Marlière" Date: Tue, 2 Jun 2026 10:02:56 -0300 Subject: selftests/bpf: Skip tests whose objects were not built MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When both run_test and run_serial_test are NULL (because the corresponding .test.o was not compiled), mark the test as not built instead of fatally aborting. Report these tests as "SKIP (not built)" in per-test output and include them in the skip count so they remain visible in CI results and JSON output. The summary line shows the not-built count when nonzero: Summary: 50/55 PASSED, 5 SKIPPED (3 not built), 0 FAILED Tests filtered out by -t/-n remain invisible as before; only genuinely unbuilt tests are surfaced. Signed-off-by: Ricardo B. Marlière Link: https://lore.kernel.org/r/20260602-selftests-bpf_misconfig-v12-7-27f898b3ba26@suse.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_progs.c | 53 +++++++++++++++++++++++++++----- tools/testing/selftests/bpf/test_progs.h | 1 + 2 files changed, 46 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index cc14b13e23fe..7ba82974ee78 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -165,6 +165,8 @@ struct prog_test_def { void (*run_test)(void); void (*run_serial_test)(void); bool should_run; + bool not_built; + bool selected; bool need_cgroup_cleanup; bool should_tmon; }; @@ -372,6 +374,8 @@ static void print_test_result(const struct prog_test_def *test, const struct tes fprintf(env.stdout_saved, "#%-*d %s:", TEST_NUM_WIDTH, test->test_num, test->test_name); if (test_state->error_cnt) fprintf(env.stdout_saved, "FAIL"); + else if (test->not_built) + fprintf(env.stdout_saved, "SKIP (not built)"); else if (!skipped_cnt) fprintf(env.stdout_saved, "OK"); else if (skipped_cnt == subtests_cnt || !subtests_cnt) @@ -1641,6 +1645,7 @@ static void calculate_summary_and_print_errors(struct test_env *env) json_writer_t *w = NULL; for (i = 0; i < prog_test_cnt; i++) { + struct prog_test_def *test = &prog_test_defs[i]; struct test_state *state = &test_states[i]; if (!state->tested) @@ -1651,7 +1656,7 @@ static void calculate_summary_and_print_errors(struct test_env *env) if (state->error_cnt) fail_cnt++; - else + else if (!test->not_built) succ_cnt++; } @@ -1700,8 +1705,13 @@ static void calculate_summary_and_print_errors(struct test_env *env) if (env->json) fclose(env->json); - printf("Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", - succ_cnt, sub_succ_cnt, skip_cnt, fail_cnt); + if (env->not_built_cnt) + printf("Summary: %d/%d PASSED, %d SKIPPED (%d not built), %d FAILED\n", + succ_cnt, sub_succ_cnt, skip_cnt, env->not_built_cnt, + fail_cnt); + else + printf("Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", + succ_cnt, sub_succ_cnt, skip_cnt, fail_cnt); env->succ_cnt = succ_cnt; env->sub_succ_cnt = sub_succ_cnt; @@ -1772,6 +1782,19 @@ static void server_main(void) run_one_test(i); } + /* mark not-built tests as skipped */ + for (int i = 0; i < prog_test_cnt; i++) { + struct prog_test_def *test = &prog_test_defs[i]; + struct test_state *state = &test_states[i]; + + if (test->not_built && test->selected) { + state->tested = true; + state->skip_cnt = 1; + env.not_built_cnt++; + print_test_result(test, state); + } + } + /* generate summary */ fflush(stderr); fflush(stdout); @@ -2046,15 +2069,20 @@ int main(int argc, char **argv) struct prog_test_def *test = &prog_test_defs[i]; test->test_num = i + 1; - test->should_run = should_run(&env.test_selector, - test->test_num, test->test_name); + test->selected = should_run(&env.test_selector, + test->test_num, test->test_name); + test->should_run = test->selected; - if ((test->run_test == NULL && test->run_serial_test == NULL) || - (test->run_test != NULL && test->run_serial_test != NULL)) { + if (test->run_test && test->run_serial_test) { fprintf(stderr, "Test %d:%s must have either test_%s() or serial_test_%sl() defined.\n", test->test_num, test->test_name, test->test_name, test->test_name); exit(EXIT_ERR_SETUP_INFRA); } + if (!test->run_test && !test->run_serial_test) { + test->not_built = true; + test->should_run = false; + continue; + } if (test->should_run) test->should_tmon = should_tmon(&env.tmon_selector, test->test_name); } @@ -2106,9 +2134,18 @@ int main(int argc, char **argv) for (i = 0; i < prog_test_cnt; i++) { struct prog_test_def *test = &prog_test_defs[i]; + struct test_state *state = &test_states[i]; - if (!test->should_run) + if (!test->should_run) { + if (test->not_built && test->selected && + !env.get_test_cnt && !env.list_test_names) { + state->tested = true; + state->skip_cnt = 1; + env.not_built_cnt++; + print_test_result(test, state); + } continue; + } if (env.get_test_cnt) { env.succ_cnt++; diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 37955a8ad385..2cf950afcd85 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -125,6 +125,7 @@ struct test_env { int sub_succ_cnt; /* successful sub-tests */ int fail_cnt; /* total failed tests + sub-tests */ int skip_cnt; /* skipped tests */ + int not_built_cnt; /* tests not built */ int saved_netns_fd; int workers; /* number of worker process */ -- cgit v1.2.3 From af490669fd339988765d87de9dd1b25e62ec64cf Mon Sep 17 00:00:00 2001 From: "Ricardo B. Marlière" Date: Tue, 2 Jun 2026 10:02:57 -0300 Subject: selftests/bpf: Allow test_progs to link with a partial object set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When individual test files are skipped due to compilation failures, their .test.o files are absent. The linker step currently lists all expected .test.o files as explicit prerequisites, so make considers any missing one an error. In permissive mode, declare the test objects that already exist on disk (via parse-time $(wildcard ...)) as normal prerequisites of the binary so that modifications to a test source still trigger a relink, and keep the full TRUNNER_TEST_OBJS list as order-only prerequisites so that initial fresh builds still produce them and missing objects do not abort the link. The recipe filter is split per mode: in permissive mode it combines a recipe-time $(wildcard ...) (which catches objects freshly produced via the order-only path on a fresh build) with $(filter-out $(TRUNNER_TEST_OBJS),$^) (which keeps the non-test inputs from $^ but drops the parse-time wildcard duplicates). This avoids passing the same .test.o twice to the linker while still presenting test objects before libbpf.a so that GNU ld, which scans static archives left-to-right, pulls in archive members referenced exclusively by test objects (e.g. ring_buffer__new from ringbuf.c). In default (strict) mode the recipe remains the simple $(filter %.a %.o,$^) since TRUNNER_TEST_OBJS is part of $^ exactly once. Gate the partial-link behavior on $(if $(filter test_progs%,$1),...) so it only applies to test_progs and its flavors. test_maps and similar runners using strong cross-object references would link-fail with a partial set and intentionally retain strict link semantics. Note: adding a brand-new test_*.c file in permissive mode requires removing the binary (or a clean rebuild) before the new test is linked in, because the parse-time $(wildcard ...) is evaluated when the Makefile is read and will not yet see the new .test.o. This is acceptable since permissive mode targets tolerant CI builds rather than incremental development. Signed-off-by: Ricardo B. Marlière Link: https://lore.kernel.org/r/20260602-selftests-bpf_misconfig-v12-8-27f898b3ba26@suse.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 7739799c2566..bc845022a7ef 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -773,14 +773,15 @@ endif # some X.test.o files have runtime dependencies on Y.bpf.o files $(OUTPUT)/$(TRUNNER_BINARY): | $(TRUNNER_BPF_OBJS) -$(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \ +$(OUTPUT)/$(TRUNNER_BINARY): $(if $(filter test_progs%,$1),$(if $(PERMISSIVE),$$(wildcard $(TRUNNER_TEST_OBJS)),$(TRUNNER_TEST_OBJS)),$(TRUNNER_TEST_OBJS)) \ $(TRUNNER_EXTRA_OBJS) $$(BPFOBJ) \ $(TRUNNER_LIB_OBJS) \ $(TRUNNER_BPFTOOL) \ $(OUTPUT)/veristat \ - | $(TRUNNER_BINARY)-extras + | $(TRUNNER_BINARY)-extras \ + $(if $(filter test_progs%,$1),$(if $(PERMISSIVE),$(TRUNNER_TEST_OBJS))) $$(call msg,BINARY,,$$@) - $(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) $$(LLVM_LDLIBS) $$(LDFLAGS) $$(LLVM_LDFLAGS) -o $$@ + $(Q)$$(CC) $$(CFLAGS) $(if $(filter test_progs%,$1),$(if $(PERMISSIVE),$$(filter %.a %.o,$$(wildcard $(TRUNNER_TEST_OBJS)) $$(filter-out $(TRUNNER_TEST_OBJS),$$^)),$$(filter %.a %.o,$$^)),$$(filter %.a %.o,$$^)) $$(LDLIBS) $$(LLVM_LDLIBS) $$(LDFLAGS) $$(LLVM_LDFLAGS) -o $$@ $(Q)ln -sf $(if $2,..,.)/tools/build/bpftool/$(USE_BOOTSTRAP)bpftool \ $(OUTPUT)/$(if $2,$2/)bpftool -- cgit v1.2.3 From f813a4d6877e9197f6e85120c144738e3c1c3b80 Mon Sep 17 00:00:00 2001 From: "Ricardo B. Marlière" Date: Tue, 2 Jun 2026 10:02:58 -0300 Subject: selftests/bpf: Tolerate benchmark build failures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Benchmark objects depend on skeletons that may be missing when some BPF programs fail to build. In that case, benchmark object compilation or final bench linking should not abort the full selftests/bpf build. Keep both steps non-fatal, emit SKIP-BENCH or SKIP-LINK, and remove failed outputs so stale objects or binaries are not reused by later incremental builds. Note that because bench.c statically references every benchmark via extern symbols, partial linking is not possible: if any single benchmark object fails, the entire bench binary is skipped. This is by design -- the error handler catches all compilation failures including genuine ones, but those are caught by full-config CI runs. Signed-off-by: Ricardo B. Marlière Link: https://lore.kernel.org/r/20260602-selftests-bpf_misconfig-v12-9-27f898b3ba26@suse.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index bc845022a7ef..4eebc15670e3 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -945,7 +945,8 @@ $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ) # Benchmark runner $(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h $(BPFOBJ) $(call msg,CC,,$@) - $(Q)$(CC) $(CFLAGS) -O2 -c $(filter %.c,$^) $(LDLIBS) -o $@ + $(Q)$(CC) $(CFLAGS) -O2 -c $(filter %.c,$^) $(LDLIBS) -o $@ $(if $(PERMISSIVE),|| \ + ($(RM) $@; printf ' %-12s %s\n' 'SKIP-BENCH' '$(notdir $@)' 1>&2)) $(OUTPUT)/bench_rename.o: $(OUTPUT)/test_overhead.skel.h $(OUTPUT)/bench_trigger.o: $(OUTPUT)/trigger_bench.skel.h $(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \ @@ -994,7 +995,8 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \ $(OUTPUT)/usdt_2.o \ # $(call msg,BINARY,,$@) - $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ + $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ $(if $(PERMISSIVE),|| \ + ($(RM) $@; printf ' %-12s %s\n' 'SKIP-LINK' '$(notdir $@) (some benchmarks may have been skipped)' 1>&2)) # This works around GCC warning about snprintf truncating strings like: # -- cgit v1.2.3 From b85e63cb65f96df373b034cc347b0e18231cb0d5 Mon Sep 17 00:00:00 2001 From: "Ricardo B. Marlière" Date: Tue, 2 Jun 2026 10:02:59 -0300 Subject: selftests/bpf: Provide weak definitions for cross-test functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some test files reference functions defined in other translation units that may not be compiled when skeletons are missing. Replace forward declarations of uprobe_multi_func_{1,2,3}() with weak no-op stubs so the linker resolves them regardless of which objects are present. The stub bodies are `asm volatile ("")` rather than empty, matching the shape of the strong definitions in prog_tests/uprobe_multi_test.c. This keeps the weak and strong sides on the same footing for the optimiser (noinline + asm-barrier), which is the form upstream already relies on for these functions. Move stack_mprotect() from test_lsm.c into testing_helpers.c so it is always available. The previous weak-stub approach returned 0, which would cause callers expecting -1/EPERM to fail their assertions deterministically. Having the real implementation in a shared utility avoids this problem entirely. Include for alloca() so the build does not rely on glibc's implicit declaration via . Signed-off-by: Ricardo B. Marlière Link: https://lore.kernel.org/r/20260602-selftests-bpf_misconfig-v12-10-27f898b3ba26@suse.com Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/prog_tests/bpf_cookie.c | 17 +++++++++++------ tools/testing/selftests/bpf/prog_tests/iters.c | 2 -- tools/testing/selftests/bpf/prog_tests/test_lsm.c | 22 ---------------------- tools/testing/selftests/bpf/testing_helpers.c | 18 ++++++++++++++++++ tools/testing/selftests/bpf/testing_helpers.h | 1 + 5 files changed, 30 insertions(+), 30 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c index 35adc3f6d443..fa484d00a7a5 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c @@ -252,10 +252,17 @@ cleanup: kprobe_multi__destroy(skel); } -/* defined in prog_tests/uprobe_multi_test.c */ -void uprobe_multi_func_1(void); -void uprobe_multi_func_2(void); -void uprobe_multi_func_3(void); +/* + * Weak uprobe target stubs. noinline is required because + * uprobe_multi_test_run() takes their addresses to configure the BPF + * program's attachment points; an inlined function has no stable + * address in the binary to probe. The strong definitions in + * uprobe_multi_test.c take precedence when that translation unit is + * linked. + */ +noinline __weak void uprobe_multi_func_1(void) { asm volatile (""); } +noinline __weak void uprobe_multi_func_2(void) { asm volatile (""); } +noinline __weak void uprobe_multi_func_3(void) { asm volatile (""); } static void uprobe_multi_test_run(struct uprobe_multi *skel) { @@ -574,8 +581,6 @@ cleanup: close(fmod_ret_fd); } -int stack_mprotect(void); - static void lsm_subtest(struct test_bpf_cookie *skel) { __u64 cookie; diff --git a/tools/testing/selftests/bpf/prog_tests/iters.c b/tools/testing/selftests/bpf/prog_tests/iters.c index a539980a2fbe..c0b6082f345a 100644 --- a/tools/testing/selftests/bpf/prog_tests/iters.c +++ b/tools/testing/selftests/bpf/prog_tests/iters.c @@ -202,8 +202,6 @@ cleanup: iters_task__destroy(skel); } -extern int stack_mprotect(void); - static void subtest_css_task_iters(void) { struct iters_css_task *skel = NULL; diff --git a/tools/testing/selftests/bpf/prog_tests/test_lsm.c b/tools/testing/selftests/bpf/prog_tests/test_lsm.c index bdc4fc06bc5a..d7495efd4a56 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_lsm.c +++ b/tools/testing/selftests/bpf/prog_tests/test_lsm.c @@ -5,36 +5,14 @@ */ #include -#include #include #include -#include -#include #include "lsm.skel.h" #include "lsm_tailcall.skel.h" char *CMD_ARGS[] = {"true", NULL}; -#define GET_PAGE_ADDR(ADDR, PAGE_SIZE) \ - (char *)(((unsigned long) (ADDR + PAGE_SIZE)) & ~(PAGE_SIZE-1)) - -int stack_mprotect(void) -{ - void *buf; - long sz; - int ret; - - sz = sysconf(_SC_PAGESIZE); - if (sz < 0) - return sz; - - buf = alloca(sz * 3); - ret = mprotect(GET_PAGE_ADDR(buf, sz), sz, - PROT_READ | PROT_WRITE | PROT_EXEC); - return ret; -} - int exec_cmd(int *monitored_pid) { int child_pid, child_status; diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c index 6fbe1e995660..c970e7793dfc 100644 --- a/tools/testing/selftests/bpf/testing_helpers.c +++ b/tools/testing/selftests/bpf/testing_helpers.c @@ -5,6 +5,8 @@ #include #include #include +#include +#include #include #include #include "disasm.h" @@ -516,3 +518,19 @@ bool is_jit_enabled(void) return enabled; } + +int stack_mprotect(void) +{ + void *buf; + long sz; + int ret; + + sz = sysconf(_SC_PAGESIZE); + if (sz < 0) + return sz; + + buf = alloca(sz * 3); + ret = mprotect((void *)(((unsigned long)(buf + sz)) & ~(sz - 1)), sz, + PROT_READ | PROT_WRITE | PROT_EXEC); + return ret; +} diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h index 2ca2356a0b58..2edc6fb7fc52 100644 --- a/tools/testing/selftests/bpf/testing_helpers.h +++ b/tools/testing/selftests/bpf/testing_helpers.h @@ -59,5 +59,6 @@ struct bpf_insn; int get_xlated_program(int fd_prog, struct bpf_insn **buf, __u32 *cnt); int testing_prog_flags(void); bool is_jit_enabled(void); +int stack_mprotect(void); #endif /* __TESTING_HELPERS_H */ -- cgit v1.2.3 From 3ca6543464f8f396eee018399b5e266196b0a9a7 Mon Sep 17 00:00:00 2001 From: "Ricardo B. Marlière" Date: Tue, 2 Jun 2026 10:03:00 -0300 Subject: selftests/bpf: Tolerate missing files during install MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With partial builds, some TEST_GEN_FILES entries can be absent at install time. rsync treats missing source arguments as fatal and aborts kselftest installation. Override INSTALL_SINGLE_RULE in selftests/bpf to use --ignore-missing-args, while keeping the existing bpf-specific INSTALL_RULE extension logic. Also add --ignore-missing-args to the TEST_INST_SUBDIRS rsync loop so that subdirectories with no .bpf.o files (e.g. when a test runner flavor was skipped) do not abort installation. Note that the INSTALL_SINGLE_RULE override applies globally to all file categories including static source files (TEST_PROGS, TEST_FILES). These are version-controlled and should always be present, so the practical risk is negligible. Signed-off-by: Ricardo B. Marlière Link: https://lore.kernel.org/r/20260602-selftests-bpf_misconfig-v12-11-27f898b3ba26@suse.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 4eebc15670e3..42d9cf848b25 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -1029,12 +1029,23 @@ EXTRA_CLEAN := $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \ # Delete partially updated (corrupted) files on error .DELETE_ON_ERROR: +# When permissive, tell rsync to ignore missing source arguments so that +# partial builds do not abort installation. +ifneq ($(PERMISSIVE),) +override define INSTALL_SINGLE_RULE + $(if $(INSTALL_LIST),@mkdir -p $(INSTALL_PATH)) + $(if $(INSTALL_LIST),rsync -a --copy-unsafe-links --ignore-missing-args $(INSTALL_LIST) $(INSTALL_PATH)/) +endef +endif + DEFAULT_INSTALL_RULE := $(INSTALL_RULE) override define INSTALL_RULE $(DEFAULT_INSTALL_RULE) - @for DIR in $(TEST_INST_SUBDIRS); do \ - mkdir -p $(INSTALL_PATH)/$$DIR; \ - rsync -a $(OUTPUT)/$$DIR/*.bpf.o $(INSTALL_PATH)/$$DIR;\ + @for DIR in $(TEST_INST_SUBDIRS); do \ + mkdir -p $(INSTALL_PATH)/$$DIR; \ + rsync -a $(if $(PERMISSIVE),--ignore-missing-args) \ + $(OUTPUT)/$$DIR/*.bpf.o \ + $(INSTALL_PATH)/$$DIR; \ done endef -- cgit v1.2.3 From e2a49fdb1beed150125b4104c90eb2a96ec7f63a Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Fri, 5 Jun 2026 23:52:47 +0800 Subject: bpf: Check tail zero of bpf_map_info Since there're 4 bytes padding at the end of struct bpf_map_info, they won't be checked by bpf_check_uarg_tail_zero(). pahole -C bpf_map_info ./vmlinux struct bpf_map_info { ... __u64 hash __attribute__((__aligned__(8))); /* 88 8 */ __u32 hash_size; /* 96 4 */ /* size: 104, cachelines: 2, members: 18 */ /* padding: 4 */ /* forced alignments: 1 */ /* last cacheline: 40 bytes */ } __attribute__((__aligned__(8))); If a future kernel extension adds a new 4-byte field, older userspace programs allocating this structure on the stack might inadvertently pass uninitialized stack garbage into the new field, permanently breaking backward compatibility. -- sashiko [1] Fix it by changing sizeof(info) to offsetofend(struct bpf_map_info, hash_size). And, add "__u32 :32" to the tail of struct bpf_map_info. [1] https://lore.kernel.org/bpf/20260513224823.6494FC19425@smtp.kernel.org/ Fixes: ea2e6467ac36 ("bpf: Return hashes of maps in BPF_OBJ_GET_INFO_BY_FD") Acked-by: Mykyta Yatsenko Signed-off-by: Leon Hwang Link: https://lore.kernel.org/r/20260605155249.20772-2-leon.hwang@linux.dev Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 1 + kernel/bpf/syscall.c | 5 +++-- tools/include/uapi/linux/bpf.h | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bed9b1b4d5ef..e1730f449d9e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6733,6 +6733,7 @@ struct bpf_map_info { __u64 map_extra; __aligned_u64 hash; __u32 hash_size; + __u32 :32; } __attribute__((aligned(8))); struct bpf_btf_info { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 31a3b70a0b5d..89f020a44fc9 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -5406,10 +5406,11 @@ static int bpf_map_get_info_by_fd(struct file *file, { struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info); struct bpf_map_info info; - u32 info_len = attr->info.info_len; + u32 info_len = attr->info.info_len, len; int err; - err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len); + len = offsetofend(struct bpf_map_info, hash_size); + err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), len, info_len); if (err) return err; info_len = min_t(u32, sizeof(info), info_len); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 7d0b282ba674..7caf667e86fe 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -6733,6 +6733,7 @@ struct bpf_map_info { __u64 map_extra; __aligned_u64 hash; __u32 hash_size; + __u32 :32; } __attribute__((aligned(8))); struct bpf_btf_info { -- cgit v1.2.3 From 786be2b05980a5828e67fc564ad7517e2adbe9bd Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Fri, 5 Jun 2026 23:52:48 +0800 Subject: bpf: Check tail zero of bpf_prog_info Since there're 4 bytes padding at the end of struct bpf_prog_info, they won't be checked by bpf_check_uarg_tail_zero(). pahole -C bpf_prog_info ./vmlinux struct bpf_prog_info { ... __u32 attach_btf_obj_id; /* 220 4 */ __u32 attach_btf_id; /* 224 4 */ /* size: 232, cachelines: 4, members: 38 */ /* sum members: 224 */ /* sum bitfield members: 1 bits, bit holes: 1, sum bit holes: 31 bits */ /* padding: 4 */ /* forced alignments: 9 */ /* last cacheline: 40 bytes */ } __attribute__((__aligned__(8))); If a future kernel extension adds a new 4-byte field, older userspace programs allocating this structure on the stack might inadvertently pass uninitialized stack garbage into the new field, permanently breaking backward compatibility. -- sashiko [1] Fix it by changing sizeof(info) to offsetofend(struct bpf_prog_info, attach_btf_id). And, add "__u32 :32" to the tail of struct bpf_prog_info. [1] https://lore.kernel.org/bpf/20260513224823.6494FC19425@smtp.kernel.org/ Fixes: aba64c7da983 ("bpf: Add verified_insns to bpf_prog_info and fdinfo") Acked-by: Mykyta Yatsenko Signed-off-by: Leon Hwang Link: https://lore.kernel.org/r/20260605155249.20772-3-leon.hwang@linux.dev Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 1 + kernel/bpf/syscall.c | 5 +++-- tools/include/uapi/linux/bpf.h | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e1730f449d9e..d5238df5e5eb 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6712,6 +6712,7 @@ struct bpf_prog_info { __u32 verified_insns; __u32 attach_btf_obj_id; __u32 attach_btf_id; + __u32 :32; } __attribute__((aligned(8))); struct bpf_map_info { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 89f020a44fc9..c5d4ae957e87 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -5121,10 +5121,11 @@ static int bpf_prog_get_info_by_fd(struct file *file, u32 info_len = attr->info.info_len; struct bpf_prog_kstats stats; char __user *uinsns; - u32 ulen; + u32 ulen, len; int err; - err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len); + len = offsetofend(struct bpf_prog_info, attach_btf_id); + err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), len, info_len); if (err) return err; info_len = min_t(u32, sizeof(info), info_len); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 7caf667e86fe..3829db087449 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -6712,6 +6712,7 @@ struct bpf_prog_info { __u32 verified_insns; __u32 attach_btf_obj_id; __u32 attach_btf_id; + __u32 :32; } __attribute__((aligned(8))); struct bpf_map_info { -- cgit v1.2.3 From d47e67a487bfb6952a7831a6b36b7a90534c6044 Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Fri, 5 Jun 2026 23:52:49 +0800 Subject: selftests/bpf: Add tests to verify checking padding bytes for bpf_[map,prog]_info Add two tests to verify that the tail padding 4 bytes of struct bpf_map_info and bpf_prog_info are checked in syscall.c using bpf_check_uarg_tail_zero(). Signed-off-by: Leon Hwang Link: https://lore.kernel.org/r/20260605155249.20772-4-leon.hwang@linux.dev Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/bpf_attr_size.c | 55 ++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_attr_size.c b/tools/testing/selftests/bpf/prog_tests/bpf_attr_size.c index 32159dc64da8..87842c4347a6 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_attr_size.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_attr_size.c @@ -62,8 +62,63 @@ cleanup: cgroup_skb_direct_packet_access__destroy(skel); } +static void test_map_info_tail_zero(void) +{ + LIBBPF_OPTS(bpf_map_create_opts, map_opts); + struct bpf_map_info_fake { + __u8 info[offsetofend(struct bpf_map_info, hash_size)]; + __u32 pad; + } info = { + .pad = 1, + }; + int map_fd, err; + __u32 info_len; + + map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "arr", sizeof(int), 1, 1, &map_opts); + if (!ASSERT_GE(map_fd, 0, "bpf_map_create")) + return; + + info_len = sizeof(info); + err = bpf_obj_get_info_by_fd(map_fd, &info, &info_len); + ASSERT_EQ(err, -E2BIG, "bpf_obj_get_info_by_fd"); + + close(map_fd); +} + +static void test_prog_info_tail_zero(void) +{ + LIBBPF_OPTS(bpf_prog_load_opts, prog_opts); + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + struct bpf_prog_info_fake { + __u8 info[offsetofend(struct bpf_prog_info, attach_btf_id)]; + __u32 pad; + } info = { + .pad = 1, + }; + int prog_fd, err; + __u32 info_len; + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "test_prog", "GPL", insns, + ARRAY_SIZE(insns), &prog_opts); + if (!ASSERT_GE(prog_fd, 0, "bpf_prog_load")) + return; + + info_len = sizeof(info); + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + ASSERT_EQ(err, -E2BIG, "bpf_obj_get_info_by_fd"); + + close(prog_fd); +} + void test_bpf_attr_size(void) { if (test__start_subtest("query_size_boundaries")) test_query_size_boundaries(); + if (test__start_subtest("map_info_tail_zero")) + test_map_info_tail_zero(); + if (test__start_subtest("prog_info_tail_zero")) + test_prog_info_tail_zero(); } -- cgit v1.2.3 From 557d0cc3f2520feba45360beeafb93203b3230e0 Mon Sep 17 00:00:00 2001 From: Varun R Mallya Date: Wed, 3 Jun 2026 02:28:46 +0530 Subject: selftests/bpf: use host CPU features in JIT disassembler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass the host CPU name and feature string to LLVMCreateDisasmCPUFeatures() instead of using LLVMCreateDisasm(), so the disassembler correctly decodes CPU-specific instructions and extensions such as RISC-V compressed and vector instructions. Signed-off-by: Varun R Mallya Reviewed-by: Björn Töpel Link: https://lore.kernel.org/r/20260602205847.102825-2-varunrmallya@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/jit_disasm_helpers.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/jit_disasm_helpers.c b/tools/testing/selftests/bpf/jit_disasm_helpers.c index 364c557c5115..3558fe10e28c 100644 --- a/tools/testing/selftests/bpf/jit_disasm_helpers.c +++ b/tools/testing/selftests/bpf/jit_disasm_helpers.c @@ -96,10 +96,19 @@ static int disasm_one_func(FILE *text_out, uint8_t *image, __u32 len) __u32 *label_pc, pc; int i, cnt, err = 0; char buf[64]; + char *cpu, *features; triple = LLVMGetDefaultTargetTriple(); - ctx = LLVMCreateDisasm(triple, &labels, 0, NULL, lookup_symbol); - if (!ASSERT_OK_PTR(ctx, "LLVMCreateDisasm")) { + + cpu = LLVMGetHostCPUName(); + features = LLVMGetHostCPUFeatures(); + + ctx = LLVMCreateDisasmCPUFeatures(triple, cpu, features, &labels, 0, NULL, lookup_symbol); + + LLVMDisposeMessage(cpu); + LLVMDisposeMessage(features); + + if (!ASSERT_OK_PTR(ctx, "LLVMCreateDisasmCPUFeatures")) { err = -EINVAL; goto out; } -- cgit v1.2.3 From 6d13ddb1d46525931d2324d9358721eb3c495d72 Mon Sep 17 00:00:00 2001 From: Varun R Mallya Date: Wed, 3 Jun 2026 02:28:47 +0530 Subject: bpf, riscv: inline bpf_get_current_task() and bpf_get_current_task_btf() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On RISC-V, the current task pointer is stored in the thread pointer register (tp). Emit a single `mv a5, tp` instead of a full helper call for BPF_FUNC_get_current_task and BPF_FUNC_get_current_task_btf. Register bpf_jit_inlines_helper_call() entries for both helpers so the verifier treats them as inlined, and add the expected `mv a5, tp` annotation to the riscv64 selftests. The following show changes before and after this patch. Before patch: auipc t1,0x817a # load upper PC-relative address jalr -2004(t1) # call bpf_get_current_task helper mv a5,a0 # move return value to BPF_REG_0 After patch: mv a5,tp # directly: a5 = current (tp = thread pointer) Benchmark (bpf_prog_test_run wrapping bpf_get_current_task in loop, batch=100, 10s, QEMU RISC-V): | runs/sec | helper-calls/sec | ns/call -------------+-----------+------------------+--------- Before patch | 173,490 | 17,349,090 | 57 After patch | 320,497 | 32,049,780 | 31 -------------+-----------+------------------+--------- Improvement | +84.7% | +84.7% | -45.6% Signed-off-by: Varun R Mallya Acked-by: Björn Töpel Link: https://lore.kernel.org/r/20260602205847.102825-3-varunrmallya@gmail.com Signed-off-by: Alexei Starovoitov --- arch/riscv/net/bpf_jit_comp64.c | 9 +++++++++ tools/testing/selftests/bpf/progs/verifier_jit_inline.c | 2 ++ 2 files changed, 11 insertions(+) (limited to 'tools') diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index 2f1109dbf105..e2c70c70cca8 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -1808,6 +1808,13 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, break; } + /* Implement helper call to bpf_get_current_task/_btf() inline */ + if (insn->src_reg == 0 && (insn->imm == BPF_FUNC_get_current_task || + insn->imm == BPF_FUNC_get_current_task_btf)) { + emit_mv(bpf_to_rv_reg(BPF_REG_0, ctx), RV_REG_TP, ctx); + break; + } + mark_call(ctx); ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, &addr, &fixed_addr); @@ -2138,6 +2145,8 @@ bool bpf_jit_inlines_helper_call(s32 imm) { switch (imm) { case BPF_FUNC_get_smp_processor_id: + case BPF_FUNC_get_current_task: + case BPF_FUNC_get_current_task_btf: return true; default: return false; diff --git a/tools/testing/selftests/bpf/progs/verifier_jit_inline.c b/tools/testing/selftests/bpf/progs/verifier_jit_inline.c index 885ff69a3a62..76d80605ec7f 100644 --- a/tools/testing/selftests/bpf/progs/verifier_jit_inline.c +++ b/tools/testing/selftests/bpf/progs/verifier_jit_inline.c @@ -10,6 +10,8 @@ __arch_x86_64 __jited(" addq %gs:{{.*}}, %rax") __arch_arm64 __jited(" mrs x8, SP_EL0") +__arch_riscv64 +__jited(" mv a5, tp") int inline_bpf_get_current_task(void) { bpf_get_current_task(); -- cgit v1.2.3 From 5477d55f351fea3eeb2c5c77a9224eed0fd4d6a9 Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Tue, 2 Jun 2026 23:09:31 +0800 Subject: selftests/bpf: Add tests to verify the fix of encapsulating VxLAN in lwt Add two tests to verify the transport header of skb has been set when encapsulate VxLAN using bpf_lwt_push_encap() helper. 1. VxLAN over IPv4. 2. VxLAN over IPv6. Without the fix, the tests would fail: lwt_ip_encap_vxlan:FAIL:transport_hdr offset unexpected transport_hdr offset: actual 70 != expected 20 #208 lwt_ip_encap_vxlan_ipv4:FAIL lwt_ip_encap_vxlan:FAIL:transport_hdr offset unexpected transport_hdr offset: actual 110 != expected 40 #209 lwt_ip_encap_vxlan_ipv6:FAIL The unexpected offsets are: outer encap headers (IPv4: iphdr+udp+vxlan+eth = 50 bytes, IPv6: ipv6hdr+udp+vxlan+eth = 70 bytes) plus the inner IP header (20 or 40 bytes), because without the fix transport_header still points at the inner transport layer instead of the outer UDP header. Assisted-by: Claude:claude-sonnet-4-6 Cc: Leon Hwang Signed-off-by: Leon Hwang Link: https://lore.kernel.org/r/20260602150931.49629-3-leon.hwang@linux.dev Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/lwt_ip_encap.c | 145 +++++++++++++++++++ .../selftests/bpf/progs/test_lwt_ip_encap.c | 155 +++++++++++++++++++-- 2 files changed, 290 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_ip_encap.c b/tools/testing/selftests/bpf/prog_tests/lwt_ip_encap.c index b6391af5f6f9..6606f0ed9a9a 100644 --- a/tools/testing/selftests/bpf/prog_tests/lwt_ip_encap.c +++ b/tools/testing/selftests/bpf/prog_tests/lwt_ip_encap.c @@ -3,6 +3,7 @@ #include "network_helpers.h" #include "test_progs.h" +#include "test_lwt_ip_encap.skel.h" #define BPF_FILE "test_lwt_ip_encap.bpf.o" @@ -32,6 +33,9 @@ #define IP6_ADDR_8 "fb08::1" #define IP6_ADDR_GRE "fb10::1" +#define IP4_ADDR_VXLAN "172.16.17.100" +#define IP6_ADDR_VXLAN "fb11::1" + #define IP6_ADDR_SRC IP6_ADDR_1 #define IP6_ADDR_DST IP6_ADDR_4 @@ -538,3 +542,144 @@ void test_lwt_ip_encap_ipv4(void) if (test__start_subtest("ingress")) lwt_ip_encap(IPV4_ENCAP, INGRESS, ""); } + +/* + * VxLAN Setup/topology: + * + * NS1 (IP*_ADDR_1) NS2 NS3 (IP*_ADDR_4) + * [ping src] + * | top route + * veth1 (LWT encap) <<-- veth2 veth3 <<-- veth4 (ping dst) + * | ^ + * (bottom route) | (inner pkt) + * v bottom route | + * veth5 -->> veth6 veth7 -->> veth8 (vxlan decap) + * (IP*_ADDR_VXLAN) + * + * Add the VxLAN endpoint addresses to NS3's veth8, create standard + * VxLAN decap devices bound to those addresses, and install routes so + * NS1/NS2 can reach the endpoints via the bottom route. NS2 here is to + * make sure the LWT-encap VxLAN packets are routed to NS3 correctly. + */ +static int setup_vxlan_routes(const char *ns3, const char *ns1, const char *ns2) +{ + struct nstoken *nstoken; + + nstoken = open_netns(ns3); + if (!ASSERT_OK_PTR(nstoken, "open ns3 for vxlan")) + return -1; + + SYS(fail_close, "ip a add %s/32 dev veth8", IP4_ADDR_VXLAN); + SYS(fail_close, "ip -6 a add %s/128 dev veth8", IP6_ADDR_VXLAN); + /* + * Standard VxLAN devices to decap the encapsulated packets. The inner + * Ethernet frame uses a broadcast dst MAC so the IP stack accepts it + * without ARP or FDB configuration. + */ + SYS(fail_close, "ip link add vxlan4 type vxlan id 1 dstport 4789 local %s dev veth8 nolearning noudpcsum", + IP4_ADDR_VXLAN); + SYS(fail_close, "ip link set vxlan4 up"); + SYS(fail_close, "ip link add vxlan6 type vxlan id 1 dstport 4789 local %s dev veth8 nolearning udp6zerocsumrx", + IP6_ADDR_VXLAN); + SYS(fail_close, "ip link set vxlan6 up"); + close_netns(nstoken); + + SYS(fail, "ip -n %s route add %s/32 dev veth5 via %s", + ns1, IP4_ADDR_VXLAN, IP4_ADDR_6); + SYS(fail, "ip -n %s route add %s/32 dev veth7 via %s", + ns2, IP4_ADDR_VXLAN, IP4_ADDR_8); + SYS(fail, "ip -n %s -6 route add %s/128 dev veth5 via %s", + ns1, IP6_ADDR_VXLAN, IP6_ADDR_6); + SYS(fail, "ip -n %s -6 route add %s/128 dev veth7 via %s", + ns2, IP6_ADDR_VXLAN, IP6_ADDR_8); + return 0; + +fail_close: + close_netns(nstoken); +fail: + return -1; +} + +static void lwt_ip_encap_vxlan(bool ipv4_encap) +{ + char ns1[NETNS_NAME_SIZE] = NETNS_BASE "-1-"; + char ns2[NETNS_NAME_SIZE] = NETNS_BASE "-2-"; + char ns3[NETNS_NAME_SIZE] = NETNS_BASE "-3-"; + const char *sec = ipv4_encap ? "encap_vxlan" : "encap_vxlan6"; + int expected_offset = ipv4_encap ? (int)sizeof(struct iphdr) + : (int)sizeof(struct ipv6hdr); + struct test_lwt_ip_encap *skel = NULL; + int thdr_offset, err; + + if (!ASSERT_OK(create_ns(ns1, NETNS_NAME_SIZE), "create ns1")) + goto out; + if (!ASSERT_OK(create_ns(ns2, NETNS_NAME_SIZE), "create ns2")) + goto out; + if (!ASSERT_OK(create_ns(ns3, NETNS_NAME_SIZE), "create ns3")) + goto out; + + if (!ASSERT_OK(setup_network(ns1, ns2, ns3, ""), "setup network")) + goto out; + + if (!ASSERT_OK(setup_vxlan_routes(ns3, ns1, ns2), "setup vxlan routes")) + goto out; + + skel = test_lwt_ip_encap__open(); + if (!ASSERT_OK_PTR(skel, "test_lwt_ip_encap__open")) + goto out; + + bpf_program__set_autoload(skel->progs.bpf_lwt_encap_gre, false); + bpf_program__set_autoload(skel->progs.bpf_lwt_encap_gre6, false); + bpf_program__set_autoload(skel->progs.bpf_lwt_encap_vxlan, false); + bpf_program__set_autoload(skel->progs.bpf_lwt_encap_vxlan6, false); + bpf_program__set_autoload(skel->progs.fexit_lwt_push_ip_encap, true); + skel->rodata->tgt_ip_version = ipv4_encap ? 4 : 6; + + err = test_lwt_ip_encap__load(skel); + if (!ASSERT_OK(err, "test_lwt_ip_encap__load")) + goto out; + + err = test_lwt_ip_encap__attach(skel); + if (!ASSERT_OK(err, "test_lwt_ip_encap__attach")) + goto out; + + /* Remove the direct NS2->DST route so packets must go via LWT encap. */ + SYS(out, "ip -n %s route del %s/32 dev veth3", ns2, IP4_ADDR_DST); + SYS(out, "ip -n %s -6 route del %s/128 dev veth3", ns2, IP6_ADDR_DST); + + if (ipv4_encap) + SYS(out, "ip -n %s route add %s encap bpf xmit obj %s sec %s dev veth1", + ns1, IP4_ADDR_DST, BPF_FILE, sec); + else + SYS(out, "ip -n %s -6 route add %s encap bpf xmit obj %s sec %s dev veth1", + ns1, IP6_ADDR_DST, BPF_FILE, sec); + + skel->bss->fexit_triggered = false; + + if (ipv4_encap) + SYS(out, "ip netns exec %s ping -c 1 -W1 %s", ns1, IP4_ADDR_DST); + else + SYS(out, "ip netns exec %s ping6 -c 1 -W1 %s", ns1, IP6_ADDR_DST); + + if (!ASSERT_TRUE(skel->bss->fexit_triggered, "fexit_triggered")) + goto out; + + thdr_offset = (int)skel->bss->transport_hdr - (int)skel->bss->network_hdr; + ASSERT_EQ(thdr_offset, expected_offset, "transport_hdr offset"); + +out: + test_lwt_ip_encap__destroy(skel); + SYS_NOFAIL("ip netns del %s", ns1); + SYS_NOFAIL("ip netns del %s", ns2); + SYS_NOFAIL("ip netns del %s", ns3); +} + +void test_lwt_ip_encap_vxlan_ipv4(void) +{ + lwt_ip_encap_vxlan(IPV4_ENCAP); +} + +void test_lwt_ip_encap_vxlan_ipv6(void) +{ + lwt_ip_encap_vxlan(IPV6_ENCAP); +} diff --git a/tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c b/tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c index d6cb986e7533..4a934fccf8f5 100644 --- a/tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c +++ b/tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c @@ -1,11 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 -#include +#include "vmlinux.h" #include -#include -#include -#include #include #include +#include struct grehdr { __be16 flags; @@ -64,13 +62,13 @@ int bpf_lwt_encap_gre6(struct __sk_buff *skb) hdr.ip6hdr.nexthdr = 47; /* IPPROTO_GRE */ hdr.ip6hdr.hop_limit = 0x40; /* fb01::1 */ - hdr.ip6hdr.saddr.s6_addr[0] = 0xfb; - hdr.ip6hdr.saddr.s6_addr[1] = 1; - hdr.ip6hdr.saddr.s6_addr[15] = 1; + hdr.ip6hdr.saddr.in6_u.u6_addr8[0] = 0xfb; + hdr.ip6hdr.saddr.in6_u.u6_addr8[1] = 1; + hdr.ip6hdr.saddr.in6_u.u6_addr8[15] = 1; /* fb10::1 */ - hdr.ip6hdr.daddr.s6_addr[0] = 0xfb; - hdr.ip6hdr.daddr.s6_addr[1] = 0x10; - hdr.ip6hdr.daddr.s6_addr[15] = 1; + hdr.ip6hdr.daddr.in6_u.u6_addr8[0] = 0xfb; + hdr.ip6hdr.daddr.in6_u.u6_addr8[1] = 0x10; + hdr.ip6hdr.daddr.in6_u.u6_addr8[15] = 1; hdr.greh.protocol = skb->protocol; @@ -82,4 +80,141 @@ int bpf_lwt_encap_gre6(struct __sk_buff *skb) return BPF_LWT_REROUTE; } +#define VXLAN_PORT 4789 +#define VXLAN_FLAGS 0x08000000 +#define VXLAN_VNI 1 + +#define ETH_ALEN 6 /* Octets in one ethernet addr */ +#define ETH_P_IP 0x0800 /* Internet Protocol packet */ +#define ETH_P_IPV6 0x86DD /* IPv6 over bluebook */ + +static const __u8 bcast[ETH_ALEN] = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +}; + +static const __u8 srcmac[ETH_ALEN] = { + 0x02, 0x00, 0x00, 0x00, 0x00, 0x01, +}; + +SEC("encap_vxlan") +int bpf_lwt_encap_vxlan(struct __sk_buff *skb) +{ + struct encap_hdr { + struct iphdr iph; + struct udphdr udph; + struct vxlanhdr vxh; + struct ethhdr eth; + } __attribute__((__packed__)) hdr; + int err; + + memset(&hdr, 0, sizeof(hdr)); + + hdr.iph.ihl = 5; + hdr.iph.version = 4; + hdr.iph.ttl = 0x40; + hdr.iph.protocol = 17; /* IPPROTO_UDP */ + hdr.iph.tot_len = bpf_htons(skb->len + sizeof(hdr)); +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + hdr.iph.saddr = 0x640510ac; /* 172.16.5.100 */ + hdr.iph.daddr = 0x641110ac; /* 172.16.17.100 */ +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + hdr.iph.saddr = 0xac100564; /* 172.16.5.100 */ + hdr.iph.daddr = 0xac101164; /* 172.16.17.100 */ +#else +#error "Fix your compiler's __BYTE_ORDER__?!" +#endif + + hdr.udph.source = bpf_htons(VXLAN_PORT); + hdr.udph.dest = bpf_htons(VXLAN_PORT); + hdr.udph.len = bpf_htons(skb->len + sizeof(hdr.udph) + sizeof(hdr.vxh) + + sizeof(hdr.eth)); + + hdr.vxh.vx_flags = bpf_htonl(VXLAN_FLAGS); + hdr.vxh.vx_vni = bpf_htonl(VXLAN_VNI << 8); + + __builtin_memcpy(hdr.eth.h_dest, bcast, ETH_ALEN); + __builtin_memcpy(hdr.eth.h_source, srcmac, ETH_ALEN); + hdr.eth.h_proto = bpf_htons(ETH_P_IP); + + err = bpf_lwt_push_encap(skb, BPF_LWT_ENCAP_IP, &hdr, sizeof(hdr)); + if (err) + return BPF_DROP; + + return BPF_LWT_REROUTE; +} + +SEC("encap_vxlan6") +int bpf_lwt_encap_vxlan6(struct __sk_buff *skb) +{ + struct encap_hdr { + struct ipv6hdr ip6hdr; + struct udphdr udph; + struct vxlanhdr vxh; + struct ethhdr eth; + } __attribute__((__packed__)) hdr; + int err; + + memset(&hdr, 0, sizeof(hdr)); + + hdr.ip6hdr.version = 6; + hdr.ip6hdr.nexthdr = 17; /* IPPROTO_UDP */ + hdr.ip6hdr.hop_limit = 0x40; + hdr.ip6hdr.payload_len = bpf_htons(skb->len + sizeof(hdr.udph) + sizeof(hdr.vxh) + + sizeof(hdr.eth)); + /* fb05::1 */ + hdr.ip6hdr.saddr.in6_u.u6_addr8[0] = 0xfb; + hdr.ip6hdr.saddr.in6_u.u6_addr8[1] = 0x05; + hdr.ip6hdr.saddr.in6_u.u6_addr8[15] = 1; + /* fb11::1 */ + hdr.ip6hdr.daddr.in6_u.u6_addr8[0] = 0xfb; + hdr.ip6hdr.daddr.in6_u.u6_addr8[1] = 0x11; + hdr.ip6hdr.daddr.in6_u.u6_addr8[15] = 1; + + hdr.udph.source = bpf_htons(VXLAN_PORT); + hdr.udph.dest = bpf_htons(VXLAN_PORT); + hdr.udph.len = bpf_htons(skb->len + sizeof(hdr.udph) + sizeof(hdr.vxh) + + sizeof(hdr.eth)); + + hdr.vxh.vx_flags = bpf_htonl(VXLAN_FLAGS); + hdr.vxh.vx_vni = bpf_htonl(VXLAN_VNI << 8); + + __builtin_memcpy(hdr.eth.h_dest, bcast, ETH_ALEN); + __builtin_memcpy(hdr.eth.h_source, srcmac, ETH_ALEN); + hdr.eth.h_proto = bpf_htons(ETH_P_IPV6); + + err = bpf_lwt_push_encap(skb, BPF_LWT_ENCAP_IP, &hdr, sizeof(hdr)); + if (err) + return BPF_DROP; + + return BPF_LWT_REROUTE; +} + +volatile const int tgt_ip_version; + +__u16 transport_hdr = 0; +__u16 network_hdr = 0; +bool fexit_triggered = false; + +SEC("?fexit/bpf_lwt_push_ip_encap") +int BPF_PROG(fexit_lwt_push_ip_encap, struct sk_buff *skb, void *hdr, u32 len, bool ingress, + int retval) +{ + struct iphdr *iph; + + if (retval || fexit_triggered) + return 0; + + iph = (typeof(iph)) (skb->head + skb->network_header); + if (iph->version != tgt_ip_version) + return 0; + + if ((iph->version == 4 && iph->protocol == 17 /* IPPROTO_UDP */) || + (iph->version == 6 && ((struct ipv6hdr *)iph)->nexthdr == 17 /* IPPROTO_UDP */)) { + fexit_triggered = true; + transport_hdr = skb->transport_header; + network_hdr = skb->network_header; + } + return 0; +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 6fa2839893e3db43566e623f12805daeca64d9c4 Mon Sep 17 00:00:00 2001 From: Xu Kuohai Date: Fri, 5 Jun 2026 14:02:41 +0000 Subject: selftests/bpf: Restrict bpf_set_retval argument in sk_bypass_prot_mem Test sk_bypass_prot_mem passes an unchecked value as argument to helper bpf_set_retval(). The argument can be outside the valid range enforced by the strict retval validation added in the next patch. Restrict the argument to -EFAULT when it is outside the valid range, so the test will not be rejected by the verifier when retval validation is enforced. Signed-off-by: Xu Kuohai Link: https://lore.kernel.org/r/20260605140243.664590-2-xukuohai@huaweicloud.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c b/tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c index 09a00d11ffcc..bae5283fca6b 100644 --- a/tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c +++ b/tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c @@ -5,6 +5,7 @@ #include #include #include +#include "err.h" extern int tcp_memory_per_cpu_fw_alloc __ksym; extern int udp_memory_per_cpu_fw_alloc __ksym; @@ -97,6 +98,7 @@ int sock_create(struct bpf_sock *ctx) return 1; err: + set_if_not_errno_or_zero(err, -EFAULT); bpf_set_retval(err); return 0; } -- cgit v1.2.3 From 7913cdb54ee3271f608ad518bf8e75ad72cc3a3d Mon Sep 17 00:00:00 2001 From: Xu Kuohai Date: Fri, 5 Jun 2026 14:02:43 +0000 Subject: selftests/bpf: Add tests for bpf_set_retval validation Add verifier tests to validate bpf_set_retval argument for cgroup program types. Reviewed-by: Emil Tsalapatis #v1 Signed-off-by: Xu Kuohai Link: https://lore.kernel.org/r/20260605140243.664590-4-xukuohai@huaweicloud.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/verifier.c | 2 + .../selftests/bpf/progs/verifier_set_retval.c | 107 +++++++++++++++++++++ 2 files changed, 109 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/verifier_set_retval.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 219ff2969868..89779d897aba 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -117,6 +117,7 @@ #include "verifier_xdp.skel.h" #include "verifier_xdp_direct_packet_access.skel.h" #include "verifier_bits_iter.skel.h" +#include "verifier_set_retval.skel.h" #include "verifier_lsm.skel.h" #include "verifier_jit_inline.skel.h" #include "irq.skel.h" @@ -266,6 +267,7 @@ void test_verifier_xadd(void) { RUN(verifier_xadd); } void test_verifier_xdp(void) { RUN(verifier_xdp); } void test_verifier_xdp_direct_packet_access(void) { RUN(verifier_xdp_direct_packet_access); } void test_verifier_bits_iter(void) { RUN(verifier_bits_iter); } +void test_verifier_set_retval(void) { RUN(verifier_set_retval); } void test_verifier_lsm(void) { RUN(verifier_lsm); } void test_irq(void) { RUN(irq); } void test_verifier_mtu(void) { RUN(verifier_mtu); } diff --git a/tools/testing/selftests/bpf/progs/verifier_set_retval.c b/tools/testing/selftests/bpf/progs/verifier_set_retval.c new file mode 100644 index 000000000000..1415cd15cede --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_set_retval.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include "bpf_misc.h" + +SEC("lsm_cgroup/socket_create") +__description("lsm_cgroup bpf_set_retval success") +__success +int BPF_PROG(lsm_cgroup_set_retval_zero_valid, int family, int type, int protocol, int kern) +{ + bpf_set_retval(0); + return 0; +} + +SEC("lsm_cgroup/socket_create") +__description("lsm_cgroup bpf_set_retval valid errno") +__success +int BPF_PROG(lsm_cgroup_set_retval_negative_valid, int family, int type, int protocol, int kern) +{ + bpf_set_retval(-12); + return 0; +} + +SEC("lsm_cgroup/socket_create") +__description("lsm_cgroup bpf_set_retval invalid negative value") +__failure __msg("should have been in [-4095, 0]") +int BPF_PROG(lsm_cgroup_set_retval_negative_invalid, int family, int type, int protocol, int kern) +{ + bpf_set_retval(-4096); + return 0; +} + +SEC("lsm_cgroup/socket_create") +__description("lsm_cgroup bpf_set_retval invalid positive value") +__failure __msg("should have been in [-4095, 0]") +int BPF_PROG(lsm_cgroup_set_retval_positive_invalid, int family, int type, int protocol, int kern) +{ + bpf_set_retval(1); + return 0; +} + +SEC("cgroup/dev") +__description("cgroup_device bpf_set_retval success") +__success +int cgroup_dev_set_retval_0(struct bpf_cgroup_dev_ctx *ctx) +{ + bpf_set_retval(0); + return 1; +} + +SEC("cgroup/dev") +__description("cgroup_device bpf_set_retval valid errno") +__success +int cgroup_dev_set_retval_neg_maxerrno(struct bpf_cgroup_dev_ctx *ctx) +{ + bpf_set_retval(-4095); + return 1; +} + +SEC("cgroup/dev") +__description("cgroup_device bpf_set_retval invalid positive value") +__failure __msg("should have been in [-4095, 0]") +int cgroup_dev_set_retval_1(struct bpf_cgroup_dev_ctx *ctx) +{ + bpf_set_retval(1); + return 1; +} + +SEC("cgroup/dev") +__description("cgroup_device bpf_set_retval invalid negative value") +__failure __msg("should have been in [-4095, 0]") +int cgroup_dev_set_retval_neg_4096(struct bpf_cgroup_dev_ctx *ctx) +{ + bpf_set_retval(-4096); + return 1; +} + +SEC("cgroup/dev") +__description("bpf_set_retval bounds check survives state pruning") +__failure __msg("should have been in [-4095, 0]") +__naked int cgroup_dev_set_retval_pruning_bypass(struct bpf_cgroup_dev_ctx *ctx) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "if r0 != 0 goto 1f;" + "r0 = r0;" + "r0 = r0;" + "r0 = r0;" + "r0 = r0;" + "goto 2f;" + "1:" + "call %[bpf_get_prandom_u32];" + "2:" + "r1 = r0;" + "call %[bpf_set_retval];" + "r0 = 1;" + "exit;" + : + : __imm(bpf_get_prandom_u32), + __imm(bpf_set_retval) + : __clobber_common + ); +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From d5e5745f8a1dfd0d026fe36eb1265268bce4988c Mon Sep 17 00:00:00 2001 From: Sean Young Date: Fri, 5 Jun 2026 16:14:16 +0100 Subject: selftests/bpf: Fix test_lirc test Since commit 68a99f6a0ebf ("media: lirc: report ir receiver overflow"), the rc-loopback driver does not accept edges over 50ms, as these are never seen in real life ir protocols. Fix this. Signed-off-by: Sean Young Link: https://lore.kernel.org/r/20260605151417.777614-1-sean@mess.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/test_lirc_mode2_kern.c | 4 ++-- tools/testing/selftests/bpf/test_lirc_mode2_user.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/test_lirc_mode2_kern.c b/tools/testing/selftests/bpf/progs/test_lirc_mode2_kern.c index 7a6620671a83..cbe4284c032f 100644 --- a/tools/testing/selftests/bpf/progs/test_lirc_mode2_kern.c +++ b/tools/testing/selftests/bpf/progs/test_lirc_mode2_kern.c @@ -13,9 +13,9 @@ int bpf_decoder(unsigned int *sample) if (LIRC_IS_PULSE(*sample)) { unsigned int duration = LIRC_VALUE(*sample); - if (duration & 0x10000) + if (duration & 0x1000) bpf_rc_keydown(sample, 0x40, duration & 0xffff, 0); - if (duration & 0x20000) + if (duration & 0x2000) bpf_rc_pointer_rel(sample, (duration >> 8) & 0xff, duration & 0xff); } diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_user.c b/tools/testing/selftests/bpf/test_lirc_mode2_user.c index 88e4aeab21b7..cd191da20d14 100644 --- a/tools/testing/selftests/bpf/test_lirc_mode2_user.c +++ b/tools/testing/selftests/bpf/test_lirc_mode2_user.c @@ -50,8 +50,8 @@ int main(int argc, char **argv) { struct bpf_object *obj; int ret, lircfd, progfd, inputfd; - int testir1 = 0x1dead; - int testir2 = 0x20101; + int testir1 = 0x1ead; + int testir2 = 0x2101; u32 prog_ids[10], prog_flags[10], prog_cnt; if (argc != 3) { @@ -125,7 +125,7 @@ int main(int argc, char **argv) } if (event.type == EV_MSC && event.code == MSC_SCAN && - event.value == 0xdead) { + event.value == 0x1ead) { break; } } -- cgit v1.2.3 From 6c3e8a4d476521bc33362e90b2569548f1adb7a4 Mon Sep 17 00:00:00 2001 From: Emil Tsalapatis Date: Fri, 5 Jun 2026 18:20:18 -0400 Subject: selftests/bpf: libarena: Add rbtree data structure Add a native red-black tree data structure to libarena. The data structure supports multiple APIs (key-value based, node based) with which users can query and modify it. The tree uses the libarena memory allocator to manage its data. Signed-off-by: Emil Tsalapatis Link: https://lore.kernel.org/r/20260605222020.5231-2-emil@etsalapatis.com Signed-off-by: Alexei Starovoitov --- .../bpf/libarena/include/libarena/rbtree.h | 83 ++ .../bpf/libarena/selftests/test_rbtree.bpf.c | 968 ++++++++++++++++++ .../selftests/bpf/libarena/src/rbtree.bpf.c | 1047 ++++++++++++++++++++ 3 files changed, 2098 insertions(+) create mode 100644 tools/testing/selftests/bpf/libarena/include/libarena/rbtree.h create mode 100644 tools/testing/selftests/bpf/libarena/selftests/test_rbtree.bpf.c create mode 100644 tools/testing/selftests/bpf/libarena/src/rbtree.bpf.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/libarena/include/libarena/rbtree.h b/tools/testing/selftests/bpf/libarena/include/libarena/rbtree.h new file mode 100644 index 000000000000..486428911d96 --- /dev/null +++ b/tools/testing/selftests/bpf/libarena/include/libarena/rbtree.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause */ + +#pragma once + +#define RB_MAXLVL_PRINT (16) + +struct rbnode; + +struct rbnode { + struct rbnode __arena *parent; + union { + struct { + struct rbnode __arena *left; + struct rbnode __arena *right; + }; + + struct rbnode __arena *child[2]; + }; + uint64_t key; + /* Used as a linked list or to store KV pairs. */ + union { + struct rbnode __arena *next; + uint64_t value; + }; + bool is_red; +}; + +/* + * Does the rbtree allocate its own nodes, or do they get + * allocated by the caller? + */ +enum rbtree_alloc { + RB_ALLOC, + RB_NOALLOC, +}; + +/* + * Specify the behavior of rbtree insertions when the key is + * already present in the tree. + * + * RB_DEFAULT: Default behavior, reject the new insert. + * + * RB_UPDATE: Update the existing value in the rbtree. + * This updates the node itself, not just the value in + * the existing node. + * + * RB_DUPLICATE: Allow nodes with identical keys in the rbtree. + * Finding/popping/removing a key acts on any of the nodes + * with the appropriate key - there is no ordering by time + * of insertion. + */ +enum rbtree_insert_mode { + RB_DEFAULT, + RB_UPDATE, + RB_DUPLICATE, +}; + +struct rbtree { + struct rbnode __arena *root; + enum rbtree_alloc alloc; + enum rbtree_insert_mode insert; +}; + +#ifdef __BPF__ +struct rbtree __arena *rb_create(enum rbtree_alloc alloc, enum rbtree_insert_mode insert); + +int rb_destroy(struct rbtree __arena *rbtree); +int rb_insert(struct rbtree __arena *rbtree, u64 key, u64 value); +int rb_remove(struct rbtree __arena *rbtree, u64 key); +int rb_find(struct rbtree __arena *rbtree, u64 key, u64 *value); +int rb_print(struct rbtree __arena *rbtree); +int rb_least(struct rbtree __arena *rbtree, u64 *key, u64 *value); +int rb_pop(struct rbtree __arena *rbtree, u64 *key, u64 *value); + +int rb_insert_node(struct rbtree __arena *rbtree, struct rbnode __arena *node); +int rb_remove_node(struct rbtree __arena *rbtree, struct rbnode __arena *node); + +struct rbnode __arena *rb_node_alloc(u64 key, u64 value); +void rb_node_free(struct rbnode __arena *rbnode); + +int rb_integrity_check(struct rbtree __arena *rbtree); + +#endif /* __BPF__ */ diff --git a/tools/testing/selftests/bpf/libarena/selftests/test_rbtree.bpf.c b/tools/testing/selftests/bpf/libarena/selftests/test_rbtree.bpf.c new file mode 100644 index 000000000000..856c484a009a --- /dev/null +++ b/tools/testing/selftests/bpf/libarena/selftests/test_rbtree.bpf.c @@ -0,0 +1,968 @@ +// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause + +#include + +#include +#include + +typedef struct node_ctx __arena *node_ctx; + +struct node_ctx { + struct rbnode rbnode; + node_ctx next; +}; + +static const u64 keys[] = { 51, 43, 37, 3, 301, 46, 383, 990, 776, 729, 871, 96, 189, 213, + 376, 167, 131, 939, 626, 119, 374, 700, 772, 154, 883, 620, 641, 5, + 428, 516, 105, 622, 988, 811, 931, 973, 246, 690, 934, 744, 210, 311, + 32, 255, 960, 830, 523, 429, 541, 738, 705, 774, 715, 446, 98, 578, + 777, 191, 279, 91, 767 }; + +static const u64 morekeys[] = { 173, 636, 1201, 8642, 5957, 3617, 4586, 8053, 6551, 7592, 1748, 1589, 8644, 9918, 6977, + 4448, 5852, 4640, 9717, 2303, 7424, 7695, 2334, 8876, 8618, 5745, 7134, 2178, 5280, 2140, 1138, + 5083, 8922, 1516, 2437, 2488, 4307, 4329, 5088, 8456, 5938, 1441, 1684, 5750, 721, 1107, 2089, + 9737, 4687, 5016, 4849, 8193, 9603, 9147, 5992, 166, 6721, 812, 4144, 6237, 6509, 3466, 9255, + 7767, 3960, 6759, 2968, 6046, 9784, 8395, 2619, 1711, 528, 6424, 9084, 3179, 1342, 5676, 9445, + 5691, 6678, 8487, 1627, 998, 6178, 2229, 1987, 3319, 572, 169, 2161, 3018, 5439, 7287, 7265, 5995, + 5003, 5857, 2836, 5634, 4735, 9261, 8287, 5359, 533, 1406, 9573, 4026, 714, 3956, 1722, 6395, + 9648, 3887, 7185, 470, 4482, 4997, 841, 8913, 9946, 3999, 9357, 9847, 277, 8184, 8704, 6766, 3323, + 5468, 8638, 7905, 8858, 6142, 3685, 3452, 4689, 8878, 8836, 158, 831, 7914, 3031, 8374, 4921, + 4207, 3460, 5547, 3358, 1083, 4619, 7818, 2962, 4879, 4583, 2172, 8819, 9830, 1194, 2666, 9812, + 5704, 8432, 5916, 6007, 6609, 4791, 1985, 3226, 2478, 9605, 5236, 8079, 3042, 1965, 3539, 9704, + 4267, 6416, 760, 9968, 2983, 1190, 1964, 3211, 2870, 3106, 2794, 1542, 6916, 5986, 9096, 441, + 5894, 8353, 7765, 3757, 5732, 88, 3091, 5637, 6042, 8447, 4073, 6923, 5491, 7010, 3663, 5029, + 6162, 822, 4874, 7491, 5100, 3461, 6983, 2170, 1458, 1856, 648, 6272, 4887, 976, 2369, 5909, 4274, + 3324, 6968, 2312, 2271, 8891, 6268, 6581, 1610, 8880, 6194, 6144, 9764, 6915, 829, 3774, 2265, + 1752, 1314, 6377, 8760, 8004, 501, 4912, 9278, 1425, 9578, 7337, 307, 1885, 3151, 9617, 1647, + 2458, 3702, 6091, 8902, 5663, 9378, 7640, 3336, 557, 1644, 6848, 1559, 8821, 266, 4330, 9790, + 5920, 4222, 1143, 6248, 5792, 4847, 9726, 6303, 821, 6839, 6062, 7133, 3649, 9888, 2528, 1966, + 5456, 4914, 3615, 1543, 3206, 3353, 6097, 2800, 1424, 9094, 7920, 7243, 1394, 5464, 1707, 576, + 6524, 4261, 4187, 7889, 5336, 3377, 2921, 7244, 2766, 6584, 5514, 1387, 2957, 2258, 1077, 9979, + 1128, 876, 4056, 4668, 4532, 1982, 7093, 4184, 5460, 7588, 4704, 6717, 61, 3959, 1826, 2294, 18, + 8170, 9394, 8796, 7288, 7285, 7143, 148, 6676, 6603, 1051, 8225, 4169, 3230, 7697, 6971, 3454, + 7501, 9514, 394, 2339, 4993, 5606, 6060, 1297, 8273, 3012, 157, 8181, 6765, 7207, 1005, 8833, 1914, + 7456, 1846, 8375, 2741, 2074, 1712, 5286 }; + +SEC("syscall") +__weak int test_rbtree_find_nonexistent(void) +{ + u64 key = 0xdeadbeef; + u64 value = 0; + int ret; + + struct rbtree __arena *rbtree; + + rbtree = rb_create(RB_ALLOC, RB_DEFAULT); + if (!rbtree) + return 1; + + /* Should return -EINVAL */ + ret = rb_find(rbtree, key, &value); + if (!ret) + return 2; + + return rb_destroy(rbtree); +} + +SEC("syscall") +__weak int test_rbtree_insert_existing(void) +{ + u64 key = 525252; + u64 value = 24; + int ret; + + struct rbtree __arena *rbtree; + + rbtree = rb_create(RB_ALLOC, RB_DEFAULT); + if (!rbtree) + return 1; + + ret = rb_insert(rbtree, key, value); + if (ret) + return 2; + + /* Should return -EALREADY. */ + ret = rb_insert(rbtree, key, value); + if (ret != -EALREADY) { + return 3; + } + + return rb_destroy(rbtree); +} + +SEC("syscall") +__weak int test_rbtree_update_existing(void) +{ + u64 key = 33333; + u64 value; + int ret; + + struct rbtree __arena *rbtree; + + rbtree = rb_create(RB_ALLOC, RB_UPDATE); + if (!rbtree) + return 1; + + value = 52; + ret = rb_insert(rbtree, key, value); + if (ret) + return 2; + + ret = rb_find(rbtree, key, &value); + if (ret) + return 3; + + if (value != 52) + return 4; + + value = 65; + + /* Should succeed. */ + ret = rb_insert(rbtree, key, value); + if (ret) + return 5; + + /* Should be updated. */ + ret = rb_find(rbtree, key, &value); + if (ret) + return 6; + + if (value != 65) + return 7; + + return rb_destroy(rbtree); +} + +SEC("syscall") +__weak int test_rbtree_insert_one(void) +{ + u64 key = 202020; + u64 value = 0xbadcafe; + int ret; + + struct rbtree __arena *rbtree; + + rbtree = rb_create(RB_ALLOC, RB_UPDATE); + if (!rbtree) + return 1; + + ret = rb_insert(rbtree, key, value); + if (ret) + return 2; + + ret = rb_find(rbtree, key, &value); + if (ret) + return 3; + + if (value != 0xbadcafe) + return 4; + + return rb_destroy(rbtree); +} + +SEC("syscall") +__weak int test_rbtree_insert_ten(void) +{ + u64 key, value; + int ret, i; + + struct rbtree __arena *rbtree; + + rbtree = rb_create(RB_ALLOC, RB_UPDATE); + if (!rbtree) + return 1; + + for (i = 0; i < 10 && can_loop; i++) { + key = keys[i]; + ret = rb_insert(rbtree, key, 2 * key); + if (ret) + return 2 + 3 * i; + + /* Read it back. */ + ret = rb_find(rbtree, key, &value); + if (ret) + return 2 + 3 * i + 1; + + if (value != 2 * key) + return 2 + 3 * i + 2; + } + + /* Go find all inserted pairs. */ + for (i = 0; i < 10 && can_loop; i++) { + key = keys[i]; + + ret = rb_find(rbtree, key, &value); + if (ret) + return 35 + 2 * i; + + if (value != 2 * key) + return 35 + 2 * i + 1; + } + + return rb_destroy(rbtree); +} + +SEC("syscall") +__weak int test_rbtree_duplicate(void) +{ + u64 key = 0x121212; + u64 value; + int ret, i; + + struct rbtree __arena *rbtree; + + rbtree = rb_create(RB_ALLOC, RB_DUPLICATE); + if (!rbtree) + return 1; + + for (i = 0; i < 10 && can_loop; i++) { + ret = rb_insert(rbtree, key, 2 * key); + if (ret) + return 2 + 3 * i; + + /* Read it back. */ + ret = rb_find(rbtree, key, &value); + if (ret) + return 2 + 3 * i + 1; + + if (value != 2 * key) + return 2 + 3 * i + 2; + } + + /* Go find all inserted copies and remove them. */ + for (i = 0; i < 10 && can_loop; i++) { + ret = rb_find(rbtree, key, &value); + if (ret) { + rb_print(rbtree); + return 35 + 3 * i; + } + + if (value != 2 * key) + return 35 + 3 * i + 1; + + ret = rb_remove(rbtree, key); + if (ret) + return 35 + 3 * i + 2; + } + + return rb_destroy(rbtree); +} + +static inline int +clean_up_noalloc_tree(struct rbtree __arena *rbtree) +{ + node_ctx nodec; + int ret; + + if (rbtree->alloc != RB_NOALLOC) + return -EINVAL; + + /* Can't destroy an RB_NOALLOC tree that still has nodes. */ + if (rb_destroy(rbtree) != -EBUSY) + return -EINVAL; + + while (rbtree->root && can_loop) { + nodec = (node_ctx)arena_container_of(rbtree->root, struct node_ctx, rbnode); + ret = rb_remove_node(rbtree, &nodec->rbnode); + if (ret) + return ret; + + arena_free(nodec); + } + + return 0; +} + +int insert_many(enum rbtree_alloc alloc, enum rbtree_insert_mode insert) +{ + const size_t numkeys = sizeof(keys) / sizeof(keys[0]); + node_ctx nodec; + u64 key, value; + int ret; + int i; + + struct rbtree __arena *rbtree; + + rbtree = rb_create(alloc, insert); + if (!rbtree) + return 1; + + for (i = 0; i < numkeys && can_loop; i++) { + key = keys[i]; + if (rbtree->alloc != RB_ALLOC) { + nodec = arena_malloc(sizeof(*nodec)); + if (!nodec) { + arena_stderr("out of memory\n"); + return -ENOMEM; + } + nodec->rbnode.key = key; + nodec->rbnode.value = 2 * key; + ret = rb_insert_node(rbtree, &nodec->rbnode); + } else { + ret = rb_insert(rbtree, key, 2 * key); + } + if (ret) + return 2 + 3 * i; + + /* Read it back. */ + ret = rb_find(rbtree, key, &value); + if (ret) + return 2 + 3 * i + 1; + + if (value != 2 * key) + return 2 + 3 * i + 2; + } + + /* Go find all inserted pairs. */ + for (i = 0; i < numkeys && can_loop; i++) { + key = keys[i]; + + ret = rb_find(rbtree, key, &value); + if (ret) + return 302 + 2 * i; + + if (value != 2 * key) + return 302 + 2 * i + 1; + } + + /* RB_ALLOC trees are destroyed while still having elements. */ + if (rbtree->alloc == RB_ALLOC) + return rb_destroy(rbtree); + + /* Otherwise manually clean up the tree. */ + if (clean_up_noalloc_tree(rbtree)) + return 5; + + return rb_destroy(rbtree); +} + +SEC("syscall") +__weak int test_rbtree_remove_one(void) +{ + u64 key = 20, value = 5, newvalue; + int ret; + + struct rbtree __arena *rbtree; + + rbtree = rb_create(RB_ALLOC, RB_DEFAULT); + if (!rbtree) + return 1; + + ret = rb_find(rbtree, key, &newvalue); + if (!ret) + return 2; + + ret = rb_insert(rbtree, key, value); + if (ret) + return 3; + + ret = rb_find(rbtree, key, &newvalue); + if (ret || value != newvalue) + return 4; + + ret = rb_remove(rbtree, key); + if (ret) + return 5; + + ret = rb_find(rbtree, key, &newvalue); + if (!ret) + return 6; + + return rb_destroy(rbtree); +} + +static __always_inline int remove_many_verify_all_present(struct rbtree __arena *rbtree) +{ + const size_t numkeys = sizeof(morekeys) / sizeof(morekeys[0]); + u64 value; + int ret; + int i; + + for (i = 0; i < numkeys && can_loop; i++) { + u64 key = morekeys[i]; + + ret = rb_find(rbtree, key, &value); + if (ret) + return -1; + + if (value != 2 * key) + return -1; + } + + return 0; +} + +static __always_inline int remove_many_verify_remaining(struct rbtree __arena *rbtree) +{ + const size_t numkeys = sizeof(morekeys) / sizeof(morekeys[0]); + u64 value; + int ret; + int i; + + for (i = 0; i < numkeys && can_loop; i += 2) { + u64 key = morekeys[i]; + + ret = rb_find(rbtree, key, &value); + if (!ret) + return -1; + + if (i + 1 >= numkeys) + break; + + key = morekeys[i + 1]; + ret = rb_find(rbtree, key, &value); + if (ret) + return -1; + + if (value != 2 * key) + return -1; + } + + for (i = 1; i < numkeys && can_loop; i += 2) { + u64 key = morekeys[i]; + + ret = rb_find(rbtree, key, &value); + if (ret) + return -1; + + if (value != 2 * key) + return -1; + } + + return 0; +} + +static __noinline int remove_many_alloc(struct rbtree __arena *rbtree) +{ + const size_t numkeys = sizeof(morekeys) / sizeof(morekeys[0]); + u64 value; + int ret; + int i; + + for (i = 0; i < numkeys && can_loop; i++) { + u64 key = morekeys[i]; + + ret = rb_insert(rbtree, key, 2 * key); + if (ret) + return -1; + + if (rb_integrity_check(rbtree)) { + arena_stderr("iteration %d\n", i); + return -EINVAL; + } + + ret = rb_find(rbtree, key, &value); + if (ret) + return -1; + + if (value != 2 * key) + return -1; + } + + ret = remove_many_verify_all_present(rbtree); + if (ret) + return ret; + + for (i = 0; i < numkeys && can_loop; i += 2) { + u64 key = morekeys[i]; + + ret = rb_remove(rbtree, key); + if (ret) { + arena_stderr("Failed to remove %ld\n", key); + return -1; + } + + ret = rb_find(rbtree, key, &value); + if (!ret) + return -1; + } + + return remove_many_verify_remaining(rbtree); +} + +static __noinline int remove_many_noalloc(struct rbtree __arena *rbtree) +{ + const size_t numkeys = sizeof(morekeys) / sizeof(morekeys[0]); + node_ctx first = NULL, last = NULL; + u64 value; + int ret; + int i; + + for (i = 0; i < numkeys && can_loop; i++) { + u64 key = morekeys[i]; + node_ctx nodec = arena_malloc(sizeof(*nodec)); + + if (!nodec) { + arena_stderr("out of memory\n"); + return -ENOMEM; + } + nodec->rbnode.key = key; + nodec->rbnode.value = 2 * key; + nodec->next = NULL; + + if (!first) + first = nodec; + + if (last) + last->next = nodec; + last = nodec; + + ret = rb_insert_node(rbtree, &nodec->rbnode); + if (ret) + return -1; + + if (rb_integrity_check(rbtree)) { + arena_stderr("iteration %d\n", i); + return -EINVAL; + } + + ret = rb_find(rbtree, key, &value); + if (ret) + return -1; + + if (value != 2 * key) + return -1; + } + + ret = remove_many_verify_all_present(rbtree); + if (ret) + return ret; + + for (i = 0; i < numkeys && can_loop; i += 2) { + u64 key = morekeys[i]; + node_ctx nodec = first; + + if (!nodec || key != nodec->rbnode.key) + return -1; + + first = nodec->next ? nodec->next->next : NULL; + ret = rb_remove_node(rbtree, &nodec->rbnode); + if (ret) { + arena_stderr("Failed to remove %ld\n", key); + return -1; + } + + ret = rb_find(rbtree, key, &value); + if (!ret) + return -1; + } + + return remove_many_verify_remaining(rbtree); +} + +static inline int remove_many(enum rbtree_alloc alloc, + enum rbtree_insert_mode insert) +{ + int ret; + struct rbtree __arena *rbtree; + + rbtree = rb_create(alloc, insert); + if (!rbtree) + return -ENOMEM; + + ret = (alloc == RB_ALLOC) ? remove_many_alloc(rbtree) + : remove_many_noalloc(rbtree); + if (ret) + return ret; + + if (alloc == RB_ALLOC) + return rb_destroy(rbtree); + + ret = clean_up_noalloc_tree(rbtree); + if (ret) + return ret; + + return rb_destroy(rbtree); +} + +SEC("syscall") +__weak int test_rbtree_insert_many_update(void) +{ + return insert_many(RB_ALLOC, RB_UPDATE); +} + +SEC("syscall") +__weak int test_rbtree_insert_many_noalloc(void) +{ + return insert_many(RB_NOALLOC, RB_DUPLICATE); +} + +SEC("syscall") +__weak int test_rbtree_remove_many_update(void) +{ + return remove_many(RB_ALLOC, RB_UPDATE); +} + +SEC("syscall") +__weak int test_rbtree_remove_many_noalloc(void) +{ + return remove_many(RB_NOALLOC, RB_DUPLICATE); +} + +SEC("syscall") +__weak int test_rbtree_add_remove_circular(void) +{ + const size_t iters = 60; + const size_t prefill = 10; + const size_t numkeys = 50; + const size_t prefix = 400000; + u64 value, rmval; + int errval = 1; + u64 key; + int ret; + int i; + + struct rbtree __arena *rbtree; + + rbtree = rb_create(RB_ALLOC, RB_UPDATE); + if (!rbtree) + return 1; + + for (i = 0; i < prefill && can_loop; i++) { + ret = rb_insert(rbtree, prefix + (i % numkeys), i); + if (ret) + return errval; + + errval += 1; + } + + errval = 2 * 1000 * 1000; + + for (i = 0; i < prefill && can_loop; i++) { + /* Read it back. */ + ret = rb_find(rbtree, prefix + (i % numkeys), &value); + if (ret) + return errval; + + if (value != i) + return errval; + } + + errval = 3 * 1000 * 1000; + + for (i = prefill; i < iters && can_loop; i++) { + key = prefix + (i % numkeys); + + ret = rb_find(rbtree, key, &value); + if (!ret) { + arena_stderr("Key %d already present\n", key); + return errval; + } + + errval += 1; + + ret = rb_insert(rbtree, key, i); + if (ret) { + arena_stderr("ITERATION %d\n", i); + rb_print(rbtree); + return errval; + } + + rmval = i - prefill; + + errval += 1; + + ret = rb_find(rbtree, prefix + (rmval % numkeys), &value); + if (ret) + return errval; + + errval += 1; + + if (value != rmval) + return errval; + + errval += 1; + + ret = rb_remove(rbtree, prefix + (rmval % numkeys)); + if (ret) { + arena_stderr("ITERATION %d\n", i); + return errval; + } + + errval += 1; + } + + for (i = 0; i < numkeys && can_loop; i++) { + rb_remove(rbtree, prefix + i); + } + + return rb_destroy(rbtree); +} + +SEC("syscall") +__weak int test_rbtree_add_remove_circular_reverse(void) +{ + const size_t iters = 110; + const size_t prefill = 10; + const size_t numkeys = 50; + const size_t prefix = 500000; + u64 value, rmval; + int errval = 1; + u64 key; + int ret; + int i; + + struct rbtree __arena *rbtree; + + rbtree = rb_create(RB_ALLOC, RB_UPDATE); + if (!rbtree) + return 1; + + for (i = 0; i < prefill && can_loop; i++) { + ret = rb_insert(rbtree, prefix - (i % numkeys), i); + if (ret) + return errval; + + errval += 1; + } + + errval = 2 * 1000 * 1000; + + for (i = 0; i < prefill && can_loop; i++) { + /* Read it back. */ + ret = rb_find(rbtree, prefix - (i % numkeys), &value); + if (ret) + return errval; + + if (value != i) + return errval; + } + + errval = 3 * 1000 * 1000; + + for (i = prefill; i < iters && can_loop; i++) { + key = prefix - (i % numkeys); + + ret = rb_find(rbtree, key, &value); + if (!ret) { + arena_stderr("Key %d already present\n", key); + return errval; + } + + errval += 1; + + ret = rb_insert(rbtree, key, i); + if (ret) { + arena_stderr("error %d on insert\n", ret); + rb_print(rbtree); + return errval; + } + + rmval = i - prefill; + + errval += 1; + + ret = rb_find(rbtree, prefix - (rmval % numkeys), &value); + if (ret) + return errval; + + errval += 1; + + if (value != rmval) + return errval; + + errval += 1; + + ret = rb_remove(rbtree, prefix - (rmval % numkeys)); + if (ret) + return errval; + + errval += 1; + } + + + errval = 4 * 1000 * 1000; + for (i = 0; i < prefill && can_loop; i++) { + ret = rb_remove(rbtree, prefix - i); + if (ret) { + arena_stderr("Did not remove %d, error %d\n", prefix - i, ret); + return errval + i; + } + } + + return rb_destroy(rbtree); +} + +SEC("syscall") +__weak int test_rbtree_least_pop(void) +{ + const size_t keys = 10; + u64 key, value; + int errval = 1; + int ret, i; + + struct rbtree __arena *rbtree; + + rbtree = rb_create(RB_ALLOC, RB_DEFAULT); + if (!rbtree) + return errval; + + errval += 1; + + for (i = 0; i < keys / 2 && can_loop; i++) { + ret = rb_insert(rbtree, i, i); + if (ret) + return errval; + + errval += 1; + + ret = rb_insert(rbtree, keys - 1 - i, keys - 1 - i); + if (ret) + return errval; + + errval += 1; + + ret = rb_least(rbtree, &key, &value); + if (ret) + return errval; + + errval += 1; + + if (key != 0 || value != 0) + return errval; + + errval += 1; + } + + errval = 1000; + + for (i = 0; i < keys && can_loop; i++) { + ret = rb_least(rbtree, &key, &value); + if (ret) { + arena_stderr("rb_least failed with %d\n", ret); + return errval; + } + + errval += 1; + + if (key != i || value != i) { + arena_stderr("Got KV %ld/%ld expected %d\n", key, value, i); + return errval; + } + + errval += 1; + + ret = rb_pop(rbtree, &key, &value); + if (ret) { + arena_stderr("Error %d during pop on iter %d\n", ret, i); + return errval; + } + + errval += 1; + + if (key != i || value != i) + return errval; + } + + return rb_destroy(rbtree); +} + +/* Reject rb_pop() for RB_NOALLOC trees. */ +SEC("syscall") +__weak int test_rbtree_noalloc_pop(void) +{ + const u64 expect_value = 1; + const u64 expect_key = 0; + struct rbtree __arena *rbtree; + struct rbnode __arena *node; + u64 value = 0; + int ret; + + rbtree = rb_create(RB_NOALLOC, RB_DEFAULT); + if (!rbtree) + return 1; + + node = rb_node_alloc(expect_key, expect_value); + if (!node) { + rb_destroy(rbtree); + return 2; + } + + ret = rb_insert_node(rbtree, node); + if (ret) { + rb_node_free(node); + rb_destroy(rbtree); + return 3; + } + + ret = rb_pop(rbtree, NULL, &value); + if (ret != -EINVAL) + return 4; + + ret = rb_find(rbtree, expect_key, &value); + if (ret) + return 5; + + if (value != expect_value) + return 6; + + ret = rb_remove_node(rbtree, node); + if (ret) + return 7; + + rb_node_free(node); + + return rb_destroy(rbtree); +} + +SEC("syscall") +__weak int test_rbtree_alloc_check(void) +{ + struct rbtree __arena *alloc, *noalloc; + struct rbnode __arena *node; + int ret; + + alloc = rb_create(RB_ALLOC, RB_DEFAULT); + if (!alloc) + return 1; + + noalloc = rb_create(RB_NOALLOC, RB_DEFAULT); + if (!noalloc) + return 2; + + + node = rb_node_alloc(0, 0); + if (!node) + return 3; + + /* + * RB_ALLOC trees can use rb_insert, RB_NOALLOC trees can + * use rb_insert_node. RB_ALLOC and RB_NOALLOC trees cannot + * use each other's APIs. + * + * NOTE: This begs the question, why not different types? We + * want to partially share the API and that would require us + * to duplicate it. + */ + if (rb_insert(alloc, 0, 0)) + return 4; + + if (!rb_insert_node(alloc, node)) + return 5; + + if (!rb_remove_node(alloc, node)) + return 6; + + if (rb_remove(alloc, 0)) + return 7; + + if (rb_insert_node(noalloc, node)) + return 8; + + if (!rb_insert(noalloc, 0, 0)) + return 9; + + if (!rb_remove(noalloc, 0)) + return 10; + + if (rb_remove_node(noalloc, node)) + return 11; + + rb_node_free(node); + + ret = rb_destroy(alloc); + if (ret) + return ret; + + return rb_destroy(noalloc); +} diff --git a/tools/testing/selftests/bpf/libarena/src/rbtree.bpf.c b/tools/testing/selftests/bpf/libarena/src/rbtree.bpf.c new file mode 100644 index 000000000000..7f0f6dc3e17d --- /dev/null +++ b/tools/testing/selftests/bpf/libarena/src/rbtree.bpf.c @@ -0,0 +1,1047 @@ +// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause +/* + * Copyright (c) 2025-2026 Meta Platforms, Inc. and affiliates. + * Copyright (c) 2025-2026 Emil Tsalapatis + */ + +#include + +#include +#include + +int rb_integrity_check(struct rbtree __arena *rbtree); +void rbnode_print(size_t depth, struct rbnode __arena *rbn); +static int rbnode_replace(struct rbtree __arena *rbtree, + struct rbnode __arena *existing, + struct rbnode __arena *replacement); + +struct rbtree __arena *rb_create(enum rbtree_alloc alloc, + enum rbtree_insert_mode insert) +{ + struct rbtree __arena *rbtree; + + rbtree = arena_malloc(sizeof(*rbtree)); + if (unlikely(!rbtree)) + return NULL; + + /* + * RB_UPDATE overwrites existing values in the nodes, but RB_NOALLOC + * trees manage the tree nodes directly (including holding pointers + * to them). Disallow mixing the two modes to avoid dealing with + * unintuitive semantics. + */ + if (alloc == RB_NOALLOC && insert == RB_UPDATE) { + arena_stderr("WARNING: Cannot combine RB_NOALLOC and RB_UPDATE"); + arena_free(rbtree); + return NULL; + } + + rbtree->alloc = alloc; + rbtree->insert = insert; + rbtree->root = NULL; + + return rbtree; +} + +__weak +int rb_destroy(struct rbtree __arena *rbtree) +{ + int ret = 0; + + arena_subprog_init(); + + if (unlikely(!rbtree)) + return -EINVAL; + + if (rbtree->alloc == RB_NOALLOC) { + /* + * We cannot do anything about RB_NOALLOC nodes. The whole + * point of RB_NOALLOC is that the nodes are directly owned + * by the caller that allocates and inserts them. We could + * unilaterally grab all nodes and free them anyway, but that + * would almost certainly cause UAF as the callers keep accessing + * the now freed nodes. Throw an error instead. + */ + if (rbtree->root) { + arena_stderr("WARNING: Destroying RB_NOALLOC tree with > 0 nodes"); + return -EBUSY; + } + + goto out; + } + + while (rbtree->root && can_loop) { + ret = rb_remove(rbtree, rbtree->root->key); + if (ret) + break; + } + +out: + arena_free(rbtree); + return ret; +} + +static inline int rbnode_dir(struct rbnode __arena *node) +{ + /* Arbitrarily choose a direction for the root. */ + if (unlikely(!node->parent)) + return 0; + + return (node->parent->left == node) ? 0 : 1; +} + +/* + * The __noinline is to prevent inlining from bloating the add + * remove calls, in turn causing register splits and increasing + * stack usage above what is permitted. + */ +__noinline +int rbnode_rotate(struct rbtree __arena *rbtree, + struct rbnode __arena *node, int dir) +{ + struct rbnode __arena *tmp, *parent; + int parentdir; + + parent = node->parent; + if (parent) + parentdir = rbnode_dir(node); + + /* If we're doing a root change, are we the root? */ + if (unlikely(!parent && rbtree->root != node)) + return -EINVAL; + + /* + * Does the node we're turning into the root into exist? + * Note that the new root is on the opposite side of the + * rotation's direction. + */ + tmp = node->child[1 - dir]; + if (unlikely(!tmp)) + return -EINVAL; + + /* Steal the closest child of the new root. */ + node->child[1 - dir] = tmp->child[dir]; + if (node->child[1 - dir]) + node->child[1 - dir]->parent = node; + + /* Put the node below the new root.*/ + tmp->child[dir] = node; + node->parent = tmp; + + tmp->parent = parent; + if (parent) + parent->child[parentdir] = tmp; + else + rbtree->root = tmp; + + return 0; +} + +static +struct rbnode __arena *rbnode_find(struct rbnode __arena *subtree, u64 key) +{ + struct rbnode __arena *node = subtree; + int dir; + + if (!subtree) + return NULL; + + while (can_loop) { + if (node->key == key) + break; + + dir = (key < node->key) ? 0 : 1; + + if (!node->child[dir]) + break; + + node = node->child[dir]; + } + + return node; +} + +static +struct rbnode __arena *rbnode_least_upper_bound(struct rbnode __arena *subtree, uint64_t key) +{ + struct rbnode __arena *node = subtree; + int dir; + + if (!subtree) + return NULL; + + while (can_loop) { + dir = (key <= node->key) ? 0 : 1; + + if (!node->child[dir]) + break; + + node = node->child[dir]; + } + + return node; +} + +__weak +int rb_find(struct rbtree __arena *rbtree, u64 key, u64 *value) +{ + struct rbnode __arena *node; + + if (unlikely(!rbtree)) + return -EINVAL; + + if (unlikely(!value)) + return -EINVAL; + + node = rbnode_find(rbtree->root, key); + if (!node || node->key != key) + return -ENOENT; + + *value = node->value; + + return 0; +} + +__weak +struct rbnode __arena *rb_node_alloc(u64 key, u64 value) +{ + struct rbnode __arena *rbnode = NULL; + + rbnode = (struct rbnode __arena *)arena_malloc(sizeof(*rbnode)); + if (!rbnode) + return NULL; + + /* + * WARNING: The order of assignments is weird on purpose. + * See comment in rb_insert_node() for more context. + * TL;DR: Prevent consecutive 0 assignments from being + * promoted into an unverifiable memset by the compiler. + */ + + rbnode->key = key; + rbnode->parent = NULL; + rbnode->value = value; + rbnode->left = NULL; + rbnode->is_red = true; + rbnode->right = NULL; + + return rbnode; +} + +__weak +void rb_node_free(struct rbnode __arena *rbnode) +{ + arena_free(rbnode); +} + +static +int rb_node_insert(struct rbtree __arena *rbtree, + struct rbnode __arena *node) +{ + struct rbnode __arena *grandparent, *parent = rbtree->root; + u64 key = node->key; + struct rbnode __arena *uncle; + int dir; + int ret; + + if (unlikely(!rbtree)) + return -EINVAL; + + if (!parent) { + rbtree->root = node; + return 0; + } + + if (rbtree->insert != RB_DUPLICATE) + parent = rbnode_find(parent, key); + else + parent = rbnode_least_upper_bound(parent, key); + + if (key == parent->key && rbtree->insert != RB_DUPLICATE) { + if (rbtree->insert == RB_UPDATE) { + /* + * Replace the old node with the new one. + * Free up the old node. + */ + ret = rbnode_replace(rbtree, parent, node); + if (ret) + return ret; + + if (rbtree->alloc == RB_ALLOC) + rb_node_free(parent); + + return 0; + } + + /* Otherwise it's RB_DEFAULT. */ + return -EALREADY; + } + + node->parent = parent; + /* Also works if key == parent->key. */ + if (key <= parent->key) + parent->left = node; + else + parent->right = node; + + while (can_loop) { + parent = node->parent; + if (!parent) + return 0; + + if (!parent->is_red) + return 0; + + grandparent = parent->parent; + if (!grandparent) { + parent->is_red = false; + return 0; + } + + dir = rbnode_dir(parent); + uncle = grandparent->child[1 - dir]; + + if (!uncle || !uncle->is_red) { + if (node == parent->child[1 - dir]) { + rbnode_rotate(rbtree, parent, dir); + node = parent; + parent = grandparent->child[dir]; + } + + rbnode_rotate(rbtree, grandparent, 1 - dir); + parent->is_red = false; + grandparent->is_red = true; + + return 0; + } + + /* Uncle is red. */ + + parent->is_red = false; + uncle->is_red = false; + grandparent->is_red = true; + + node = grandparent; + } + + return 0; +} + +int rb_insert_node(struct rbtree __arena *rbtree, + struct rbnode __arena *node) +{ + if (unlikely(!rbtree)) + return -EINVAL; + + if (unlikely(rbtree->alloc == RB_ALLOC)) + return -EINVAL; + + node->left = NULL; + + /* + * Workaround to break an optimization that causes + * verification failures on some compilers. Assignments + * of the kind + * + * *(r0 + 0) = 0; + * *(r0 + 8) = 0; + * *(r0 + 16) = 0; + * + * get promoted into a memset, and that in turn is not + * handled properly for arena memory by LLVM 21 and GCC 15. + * Add a barrier for now to prevent the assignments from being fused. + */ + barrier(); + + node->parent = NULL; + node->right = NULL; + + node->is_red = true; + + return rb_node_insert(rbtree, node); +} + +__weak +int rb_insert(struct rbtree __arena *rbtree, u64 key, u64 value) +{ + struct rbnode __arena *node; + int ret; + + if (unlikely(!rbtree)) + return -EINVAL; + + if (unlikely(rbtree->alloc != RB_ALLOC)) + return -EINVAL; + + node = rb_node_alloc(key, value); + if (!node) + return -ENOMEM; + + ret = rb_node_insert(rbtree, node); + if (ret) { + rb_node_free(node); + return ret; + } + + return 0; +} + +static inline struct rbnode __arena *rbnode_least(struct rbnode __arena *subtree) +{ + while (subtree->left && can_loop) + subtree = subtree->left; + + return subtree; +} + +__weak int rb_least(struct rbtree __arena *rbtree, u64 *key, u64 *value) +{ + struct rbnode __arena *least; + + if (unlikely(!rbtree)) + return -EINVAL; + + if (!rbtree->root) + return -ENOENT; + + least = rbnode_least(rbtree->root); + if (key) + *key = least->key; + if (value) + *value = least->value; + + return 0; +} + + +/* + * If we are referencing ourselves, a and b have a parent-child relation, + * and we should be pointing at the other node instead. + */ +static inline void rbnode_fixup_pointers(struct rbnode __arena *a, + struct rbnode __arena *b) +{ +#define fixup(n1, n2, member) do { if (n1->member == n1) n1->member = n2; } while (0) + fixup(a, b, left); + fixup(a, b, right); + fixup(a, b, parent); +#undef fixup +} + +static inline void rbnode_swap_values(struct rbnode __arena *a, + struct rbnode __arena *b) +{ +#define swap(n1, n2, tmp) do { (tmp) = (n1); (n1) = (n2); (n2) = (tmp); } while (0) + struct rbnode __arena *tmpnode; + u64 tmp; + + /* Swap the pointers. */ + swap(a->is_red, b->is_red, tmp); + + swap(a->left, b->left, tmpnode); + swap(a->right, b->right, tmpnode); + swap(a->parent, b->parent, tmpnode); +#undef swap + + /* Account for the nodes being parent and child. */ + rbnode_fixup_pointers(b, a); + rbnode_fixup_pointers(a, b); +} + +static inline void rbnode_adjust_neighbors(struct rbtree __arena *rbtree, + struct rbnode __arena *node, int dir) +{ + if (node->left) + node->left->parent = node; + if (node->right) + node->right->parent = node; + + if (node->parent) { + node->parent->child[dir] = node; + return; + } + + rbtree->root = node; +} + +/* + * Directly replace an existing node with a replacement. The replacement node + * should not already be in the tree. + */ +static int rbnode_replace(struct rbtree __arena *rbtree, + struct rbnode __arena *existing, + struct rbnode __arena *replacement) +{ + int dir = 0; + + if (unlikely(replacement->parent || replacement->left || replacement->right)) + return -EINVAL; + + if (existing->parent) + dir = rbnode_dir(existing); + + replacement->is_red = existing->is_red; + replacement->left = existing->left; + replacement->right = existing->right; + replacement->parent = existing->parent; + + /* Fix up the new node's neighbors. */ + rbnode_adjust_neighbors(rbtree, replacement, dir); + + return 0; +} + +/* + * Switch two nodes in the tree in place. This is useful during node deletion. + * This is more involved than switching the values of the two nodes because we + * must update all tree pointers. + */ +static void rbnode_switch(struct rbtree __arena *rbtree, + struct rbnode __arena *a, + struct rbnode __arena *b) +{ + int adir = 0, bdir = 0; + + /* + * Store the direction in the parent because we will not + * be able to recompute it once we start swapping values. + */ + if (a->parent) + adir = rbnode_dir(a); + + if (b->parent) + bdir = rbnode_dir(b); + + rbnode_swap_values(a, b); + + /* + * Fix up the pointers from the children/parent to the + * new nodes. + */ + rbnode_adjust_neighbors(rbtree, a, bdir); + rbnode_adjust_neighbors(rbtree, b, adir); +} + +static inline int rbnode_remove_node_single_child(struct rbtree __arena *rbtree, + struct rbnode __arena *node, + bool free) +{ + struct rbnode __arena *child; + int dir; + + if (unlikely(node->is_red)) { + arena_stderr("Node unexpectedly red\n"); + return -EINVAL; + } + + child = node->left ? node->left : node->right; + if (unlikely(!child->is_red)) { + arena_stderr("Only child is black\n"); + return -EINVAL; + } + + /* + * Since it's the immediate child, we can just + * remove the parent. + */ + child->parent = node->parent; + + if (node->parent) { + dir = rbnode_dir(node); + node->parent->child[dir] = child; + } else { + rbtree->root = child; + } + + /* Color the child black. */ + child->is_red = false; + + /* Only free if called from rb_remove. */ + if (free) + rb_node_free(node); + + return 0; +} + +static inline bool rbnode_has_red_children(struct rbnode __arena *node) +{ + if (node->left && node->left->is_red) + return true; + + return node->right && node->right->is_red; +} + +static +int rb_node_remove(struct rbtree __arena *rbtree, + struct rbnode __arena *node) +{ + struct rbnode __arena *parent, *sibling, *close_nephew, *distant_nephew; + bool free = (rbtree->alloc == RB_ALLOC); + struct rbnode __arena *replace, *initial; + bool is_red; + int dir; + + /* Both children present, replace with next largest key. */ + if (node->left && node->right) { + /* + * Swap the node itself instead of just the + * key/value pair to account for nodes embedded + * in other structs. + */ + + replace = rbnode_least(node->right); + rbnode_switch(rbtree, replace, node); + + /* + * FALLTHROUGH: We moved the node we are removing to + * the leftmost position of the subtree. We can now + * remove it as if it was always where we moved it to. + */ + } + + initial = node; + + /* Only one child present, replace with child and paint it black. */ + if (!node->left != !node->right) + return rbnode_remove_node_single_child(rbtree, node, free); + + /* (!node->left && !node->right) */ + + parent = node->parent; + if (!parent) { + /* Check that we're _actually_ the root. */ + if (rbtree->root == node) + rbtree->root = NULL; + else + arena_stderr("WARNING: Attempting to remove detached node from rbtree\n"); + + if (free) + rb_node_free(node); + return 0; + } + + dir = rbnode_dir(node); + parent->child[dir] = NULL; + is_red = node->is_red; + + if (free) + rb_node_free(node); + + /* If we removed a red node, we did not unbalance the tree.*/ + if (is_red) + return 0; + + sibling = parent->child[1 - dir]; + if (unlikely(!sibling)) { + arena_stderr("rbtree: removed black node has no sibling\n"); + return -EINVAL; + } + + /* + * We removed a black node, causing a change in path + * weight. Start rebalancing. The invariant is that + * all paths going through the node are shortened + * by one, and the current node is black. + */ + while (can_loop) { + + /* Balancing reached the root, there can be no imbalance. */ + if (!parent) + return 0; + + /* + * We already determined the dir, either above or + * at the end of the loop. + */ + + /* + * If we have no sibling, the tree was + * already unbalanced. + */ + sibling = parent->child[1 - dir]; + if (unlikely(!sibling)) { + arena_stderr("rbtree: removed black node has no sibling\n"); + return -EINVAL; + } + + /* Sibling is red, turn it into the grandparent. */ + if (sibling->is_red) { + /* + * Sibling is red. Transform the tree to turn + * the sibling into the parent's position, and + * repaint them. This does not balance the tree + * but makes it so we know the sibling is black + * and so can use the transformations to balance. + */ + rbnode_rotate(rbtree, parent, dir); + parent->is_red = true; + sibling->is_red = false; + + /* Our new sibling is now the close nephew. */ + sibling = parent->child[1 - dir]; + /* If sibling has any red siblings, break out. */ + if (rbnode_has_red_children(sibling)) + break; + + /* We can repaint the sibling and parent, we're done. */ + sibling->is_red = true; + parent->is_red = false; + + return 0; + } + + /* Sibling guaranteed to be black. If it has red children, break out. */ + if (rbnode_has_red_children(sibling)) + break; + + /* + * Both sibling and children are black. If parent is red, swap + * colors with the sibling. Otherwise + */ + if (parent->is_red) { + parent->is_red = false; + sibling->is_red = true; + return 0; + } + + /* + * Parent, sibling, and all its children are black. Repaint the sibling. + * This shortens the paths through it, so pop up a level in the + * tree and repeat the balancing. + */ + sibling->is_red = true; + node = parent; + parent = node->parent; + dir = rbnode_dir(node); + } + + if (node != initial) { + dir = rbnode_dir(node); + parent = node->parent; + sibling = parent->child[1-dir]; + } + /* + * Almost there. We know between the parent, sibling, + * and nephews only one or two of the nephews are red. If + * it is the close one, rotate it to the sibling position, + * paint it black, and paint the previous sibling red. + */ + + close_nephew = sibling->child[dir]; + distant_nephew = sibling->child[1 - dir]; + + /* + * If the distant red nephew is not red, rotate + * and repaint. We need the distant nephew + * to be red. We know the close nephew is red + * because at least one of them are, so the + * distant one is black if it exists. + */ + if (!distant_nephew || !distant_nephew->is_red) { + rbnode_rotate(rbtree, sibling, 1 - dir); + sibling->is_red = true; + close_nephew->is_red = false; + distant_nephew = sibling; + sibling = close_nephew; + } + + /* + * We now know it's the distant nephew that's red. + * Rotate the sibling into our parent's position + * and paint both black. + */ + + rbnode_rotate(rbtree, parent, dir); + sibling->is_red = parent->is_red; + parent->is_red = false; + distant_nephew->is_red = false; + + return 0; +} + +__weak +int rb_remove_node(struct rbtree __arena *rbtree, + struct rbnode __arena *node) +{ + if (unlikely(!rbtree)) + return -EINVAL; + + if (unlikely(rbtree->alloc == RB_ALLOC)) + return -EINVAL; + + return rb_node_remove(rbtree, node); +} + +__weak +int rb_remove(struct rbtree __arena *rbtree, u64 key) +{ + struct rbnode __arena *node; + + if (unlikely(!rbtree)) + return -EINVAL; + + if (unlikely(rbtree->alloc != RB_ALLOC)) + return -EINVAL; + + if (!rbtree->root) + return -ENOENT; + + node = rbnode_find(rbtree->root, key); + if (!node || node->key != key) + return -ENOENT; + + return rb_node_remove(rbtree, node); +} + +__weak +int rb_pop(struct rbtree __arena *rbtree, u64 *key, u64 *value) +{ + struct rbnode __arena *node; + + if (unlikely(!rbtree)) + return -EINVAL; + + if (!rbtree->root) + return -ENOENT; + + if (rbtree->alloc != RB_ALLOC) + return -EINVAL; + + node = rbnode_least(rbtree->root); + if (unlikely(!node)) + return -ENOENT; + + if (key) + *key = node->key; + if (value) + *value = node->value; + + return rb_node_remove(rbtree, node); +} + +inline void rbnode_print(size_t depth, struct rbnode __arena *rbn) +{ + arena_stderr("[DEPTH %d] %p (%s)\n PARENT %p", depth, rbn, rbn->is_red ? "red" : "black", rbn->parent); + arena_stderr("\tKV (%ld, %ld)\n LEFT %p RIGHT %p]\n", rbn->key, rbn->value, rbn->left, rbn->right); +} + +enum rb_print_state { + RB_NONE_VISITED, + RB_LEFT_VISITED, + RB_RIGHT_VISITED, +}; + +__weak +enum rb_print_state rb_print_next_state(struct rbnode __arena *rbnode, + enum rb_print_state state, u64 *next) +{ + if (unlikely(!next)) + return RB_NONE_VISITED; + + switch (state) { + case RB_NONE_VISITED: + if (rbnode->left) { + *next = (u64)rbnode->left; + state = RB_LEFT_VISITED; + break; + } + + /* FALLTHROUGH */ + + case RB_LEFT_VISITED: + if (rbnode->right) { + *next = (u64)rbnode->right; + state = RB_RIGHT_VISITED; + break; + } + + /* FALLTHROUGH */ + + default: + *next = 0; + state = RB_RIGHT_VISITED; + } + + return state; +} + +__weak +int rb_print_pop_up(struct rbnode __arena **rbnodep, u8 *depthp, enum rb_print_state (*stack)[RB_MAXLVL_PRINT], enum rb_print_state *state) +{ + struct rbnode __arena *rbnode; + volatile u8 depth; + int j; + + if (unlikely(!rbnodep || !depthp || !stack || !state)) + return -EINVAL; + + rbnode = *rbnodep; + depth = *depthp; + + for (j = 0; j < RB_MAXLVL_PRINT && can_loop; j++) { + if (*state != RB_RIGHT_VISITED) + break; + + depth -= 1; + if (depth < 0 || depth >= RB_MAXLVL_PRINT) + break; + + *state = (*stack)[depth % RB_MAXLVL_PRINT]; + rbnode = rbnode->parent; + } + + *rbnodep = rbnode; + *depthp = depth; + + return 0; +} + +__weak +int rb_print(struct rbtree __arena *rbtree) +{ + enum rb_print_state stack[RB_MAXLVL_PRINT]; + struct rbnode __arena *rbnode = rbtree->root; + enum rb_print_state state; + struct rbnode __arena *next; + u64 next_addr; + u8 depth; + int ret; + + if (unlikely(!rbtree)) + return -EINVAL; + + depth = 0; + state = RB_NONE_VISITED; + + arena_stderr("=== RB TREE START ===\n"); + + if (!rbtree->root) + goto out; + + /* Even with can_loop, the verifier doesn't like infinite loops. */ + while (can_loop) { + if (state == RB_NONE_VISITED) + rbnode_print(depth, rbnode); + + /* Find which child to traverse next. */ + state = rb_print_next_state(rbnode, state, &next_addr); + next = (struct rbnode __arena *)next_addr; + + /* Child found. Store the node state and go on. */ + if (next) { + if (depth < 0 || depth >= RB_MAXLVL_PRINT) + return 0; + + stack[depth++] = state; + + rbnode = next; + state = RB_NONE_VISITED; + + continue; + } + + /* Otherwise, go as far up as possible. */ + ret = rb_print_pop_up(&rbnode, &depth, &stack, &state); + if (ret) + return -EINVAL; + + if (depth < 0 || depth >= RB_MAXLVL_PRINT) { + arena_stderr("=== RB TREE END (depth %d\n)===", depth); + return 0; + } + + } + +out: + arena_stderr("=== RB TREE END ===\n"); + + return 0; +} + +__weak +int rb_integrity_check(struct rbtree __arena *rbtree) +{ + enum rb_print_state stack[RB_MAXLVL_PRINT]; + struct rbnode __arena *rbnode = rbtree->root; + enum rb_print_state state; + struct rbnode __arena *next; + u64 next_addr; + u8 depth; + int ret; + + if (unlikely(!rbtree)) + return -EINVAL; + + if (!rbtree->root) + return 0; + + depth = 0; + state = RB_NONE_VISITED; + + /* Even with can_loop, the verifier doesn't like infinite loops. */ + while (can_loop) { + if (rbnode->parent && rbnode->parent->left != rbnode + && rbnode->parent->right != rbnode) { + arena_stderr("WARNING: Inconsistent tree. Parent %p has no child %p\n", rbnode->parent, rbnode); + return -EINVAL; + } + + if (rbnode->parent == rbnode) { + arena_stderr("WARNING: Inconsistent tree, node %p is its own parent\n", rbnode); + return -EINVAL; + } + + if (rbnode->left == rbnode) { + arena_stderr("WARNING: Inconsistent tree, node %p is its own left child\n", rbnode); + return -EINVAL; + } + + if (rbnode->right == rbnode) { + arena_stderr("WARNING: Inconsistent tree, node %p is its own right child\n", rbnode); + return -EINVAL; + } + + if (rbnode->is_red) { + if (rbnode->left && rbnode->left->is_red) { + arena_stderr("WARNING: Inconsistent tree. Parent has %p has red child %p\n", rbnode, rbnode->left); + return -EINVAL; + } + if (rbnode->right && rbnode->right->is_red) { + arena_stderr("WARNING: Inconsistent tree. Parent has %p has red child %p\n", rbnode, rbnode->right); + return -EINVAL; + } + } else if (rbnode->parent && rbnode->parent->child[1 - rbnode_dir(rbnode)] == NULL) { + arena_stderr("WARNING: Inconsistent tree. Black node %p has no sibling\n", rbnode); + return -EINVAL; + } + + /* Find which child to traverse next. */ + state = rb_print_next_state(rbnode, state, &next_addr); + next = (struct rbnode __arena *)next_addr; + + /* Child found. Store the node state and go on. */ + if (next) { + if (depth < 0 || depth >= RB_MAXLVL_PRINT) + return 0; + + stack[depth++] = state; + + rbnode = next; + state = RB_NONE_VISITED; + + continue; + } + + /* Otherwise, go as far up as possible. */ + ret = rb_print_pop_up(&rbnode, &depth, &stack, &state); + if (ret) + return -EINVAL; + + if (depth < 0 || depth >= RB_MAXLVL_PRINT) { + return 0; + } + + } + + return 0; +} -- cgit v1.2.3 From 57c6ace8395d53b9bae6fb21e0bd3f536342c16e Mon Sep 17 00:00:00 2001 From: Emil Tsalapatis Date: Fri, 5 Jun 2026 18:20:19 -0400 Subject: selftests/bpf: libarena: Add spmc queue data structure Expand libarena with a single producer multiple consumer deque data structure. This is a single producer, multiple consumer lockless structure that permits efficient work stealing. The structure is a Lev-Chase queue, so it is lock-free and wait-free. The data structure exposes three main calls. two of them are available to the thread owning the queue and one available to all threads in the program: spmc_owner_push(): Push an item to the top of the queue. spmc_owner_pop(): Pop an item from the top of the queue. spmc_steal(): Steal a thread from the bottom of the queue from any thread. Note that the queue is not really FIFO for all consumers, since non-owners of the queue can only work steal from the bottom. Signed-off-by: Emil Tsalapatis Link: https://lore.kernel.org/r/20260605222020.5231-3-emil@etsalapatis.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/libarena/include/libarena/spmc.h | 27 +++ .../bpf/libarena/selftests/test_spmc.bpf.c | 194 +++++++++++++++++ .../testing/selftests/bpf/libarena/src/spmc.bpf.c | 234 +++++++++++++++++++++ 3 files changed, 455 insertions(+) create mode 100644 tools/testing/selftests/bpf/libarena/include/libarena/spmc.h create mode 100644 tools/testing/selftests/bpf/libarena/selftests/test_spmc.bpf.c create mode 100644 tools/testing/selftests/bpf/libarena/src/spmc.bpf.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/libarena/include/libarena/spmc.h b/tools/testing/selftests/bpf/libarena/include/libarena/spmc.h new file mode 100644 index 000000000000..75611276ce13 --- /dev/null +++ b/tools/testing/selftests/bpf/libarena/include/libarena/spmc.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause */ + +#pragma once + +struct spmc_arr; + +#define SPMC_ARR_BASESZ 128 +#define SPMC_ARR_ORDERS 10 + +struct spmc_arr { + u64 __arena *data; + u64 order; +}; + +struct spmc { + volatile struct spmc_arr __arena *cur; + volatile u64 top; + volatile u64 bottom; + struct spmc_arr arr[SPMC_ARR_ORDERS]; +}; + +int spmc_owned_add(struct spmc __arena *spmc, u64 val); +int spmc_owned_remove(struct spmc __arena *spmc, u64 *val); +int spmc_steal(struct spmc __arena *spmc, u64 *val); + +struct spmc __arena *spmc_create(void); +int spmc_destroy(struct spmc __arena *spmc); diff --git a/tools/testing/selftests/bpf/libarena/selftests/test_spmc.bpf.c b/tools/testing/selftests/bpf/libarena/selftests/test_spmc.bpf.c new file mode 100644 index 000000000000..4d7a520115d1 --- /dev/null +++ b/tools/testing/selftests/bpf/libarena/selftests/test_spmc.bpf.c @@ -0,0 +1,194 @@ +// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause + +#include + +#include +#include + +/* + * NOTE: These selftests only test for the single-threaded use case, which for + * Lev-Chase queues is obviously the simplest one. Still, it is important to + * exercise the API to ensure it passes verification and basic checks. + */ + +SEC("syscall") +int test_spmc_remove_empty(void) +{ + u64 val; + int ret; + + struct spmc __arena *spmc = spmc_create(); + + if (!spmc) + return 1; + + ret = spmc_owned_remove(spmc, &val); + if (ret != -ENOENT) + return 1; + + spmc_destroy(spmc); + + return 0; +} + +SEC("syscall") +int test_spmc_steal_empty(void) +{ + u64 val; + int ret; + + struct spmc __arena *spmc = spmc_create(); + + if (!spmc) + return 1; + + ret = spmc_steal(spmc, &val); + if (ret != -ENOENT) + return 1; + + spmc_destroy(spmc); + + return 0; +} + +SEC("syscall") +int test_spmc_steal_one(void) +{ + u64 val, newval; + int ret, i; + + struct spmc __arena *spmc = spmc_create(); + + if (!spmc) + return 1; + + for (i = 0; i < 10 && can_loop; i++) { + val = i; + + ret = spmc_owned_add(spmc, val); + if (ret) + return 1; + + ret = spmc_steal(spmc, &newval); + if (ret) + return 2; + + if (val != newval) + return 3; + } + + spmc_destroy(spmc); + + return 0; +} + +SEC("syscall") +int test_spmc_remove_one(void) +{ + u64 val, newval; + int ret, i; + + struct spmc __arena *spmc = spmc_create(); + + if (!spmc) + return 1; + + for (i = 0; i < 10 && can_loop; i++) { + val = i; + + ret = spmc_owned_add(spmc, val); + if (ret) + return 1; + + ret = spmc_owned_remove(spmc, &newval); + if (ret) + return 2; + + if (val != newval) + return 3; + } + + spmc_destroy(spmc); + + return 0; +} + +SEC("syscall") +int test_spmc_remove_many(void) +{ + u64 val, newval; + int ret, i; + u64 expected; + + struct spmc __arena *spmc = spmc_create(); + + if (!spmc) + return 1; + + for (i = 0; i < 500 && can_loop; i++) { + val = i; + + ret = spmc_owned_add(spmc, val); + if (ret) { + arena_stderr("%s:%d error %d\n", __func__, __LINE__, ret); + return 1; + } + } + + for (i = 0; i < 500 && can_loop; i++) { + ret = spmc_owned_remove(spmc, &newval); + if (ret) { + arena_stderr("%s:%d error %d\n", __func__, __LINE__, ret); + return 1; + } + + expected = 500 - 1 - i; + if (newval != expected) { + arena_stderr("%s:%d expected %llu found %llu\n", __func__, __LINE__, expected, newval); + return 1; + } + } + + spmc_destroy(spmc); + + return 0; +} + +SEC("syscall") +int test_spmc_steal_many(void) +{ + u64 val, newval; + int ret, i; + + struct spmc __arena *spmc = spmc_create(); + + if (!spmc) + return 1; + + for (i = 0; i < 500 && can_loop; i++) { + val = i; + + ret = spmc_owned_add(spmc, val); + if (ret) { + arena_stderr("%s:%d error %d\n", __func__, __LINE__, ret); + return 1; + } + } + + for (i = 0; i < 500 && can_loop; i++) { + ret = spmc_steal(spmc, &newval); + if (ret) { + arena_stderr("%s:%d error %d\n", __func__, __LINE__, ret); + return 1; + } + + if (newval != i) { + arena_stderr("%s:%d expected %d found %llu\n", __func__, __LINE__, i, newval); + return 1; + } + } + + spmc_destroy(spmc); + + return 0; +} diff --git a/tools/testing/selftests/bpf/libarena/src/spmc.bpf.c b/tools/testing/selftests/bpf/libarena/src/spmc.bpf.c new file mode 100644 index 000000000000..42732b7d29a6 --- /dev/null +++ b/tools/testing/selftests/bpf/libarena/src/spmc.bpf.c @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause +/* + * Copyright (c) 2025-2026 Meta Platforms, Inc. and affiliates. + * Copyright (c) 2025-2026 Emil Tsalapatis + */ + +#include + +#include + +#include +#include + +static inline +u64 spmc_arr_size(volatile struct spmc_arr __arena *spmc_arr) +{ + return SPMC_ARR_BASESZ << spmc_arr->order; +} + +static inline +u64 spmc_arr_get(volatile struct spmc_arr __arena *spmc_arr, u64 ind) +{ + u64 ret = READ_ONCE(spmc_arr->data[ind % spmc_arr_size(spmc_arr)]); + + return ret; +} + +static inline +void spmc_arr_put(volatile struct spmc_arr __arena *spmc_arr, u64 ind, u64 value) +{ + WRITE_ONCE(spmc_arr->data[ind % spmc_arr_size(spmc_arr)], value); +} + +static inline +void spmc_arr_copy(volatile struct spmc_arr __arena *dst, + volatile struct spmc_arr __arena *src, u64 b, u64 t) +{ + u64 i; + + for (i = t; i < b && can_loop; i++) + spmc_arr_put(dst, i, spmc_arr_get(src, i)); +} + +static inline +int spmc_order_init(struct spmc __arena *spmc, int order) +{ + volatile struct spmc_arr __arena *arr = &spmc->arr[order]; + + if (unlikely(!spmc)) + return -EINVAL; + + if (order >= SPMC_ARR_ORDERS) + return -E2BIG; + + /* Already allocated? */ + if (arr->data) + return 0; + + arr->data = arena_malloc((SPMC_ARR_BASESZ << order) * sizeof(*arr->data)); + if (!arr->data) + return -ENOMEM; + + return 0; +} + +__weak +int spmc_owned_add(struct spmc __arena *spmc, u64 val) +{ + volatile struct spmc_arr __arena *newarr; + volatile struct spmc_arr __arena *arr; + ssize_t sz; + u64 b, t; + int ret; + + if (unlikely(!spmc)) + return -EINVAL; + + /* + * Bottom must always be read first, also + * see spmc_steal(). + */ + b = smp_load_acquire(&spmc->bottom); + t = READ_ONCE(spmc->top); + arr = READ_ONCE(spmc->cur); + + sz = b - t; + if (sz >= spmc_arr_size(arr) - 1) { + ret = spmc_order_init(spmc, arr->order + 1); + if (ret) + return ret; + + newarr = &spmc->arr[arr->order + 1]; + + spmc_arr_copy(newarr, arr, b, t); + smp_store_release(&spmc->cur, newarr); + arr = newarr; + } + + spmc_arr_put(arr, b, val); + smp_store_release(&spmc->bottom, b + 1); + + return 0; +} + + +__weak +int spmc_owned_remove(struct spmc __arena *spmc, u64 *val) +{ + volatile struct spmc_arr __arena *arr; + int ret = 0; + ssize_t sz; + u64 value; + u64 b, t; + + if (unlikely(!spmc || !val)) + return -EINVAL; + + b = READ_ONCE(spmc->bottom) - 1; + WRITE_ONCE(spmc->bottom, b); + smp_mb(); + + t = READ_ONCE(spmc->top); + arr = READ_ONCE(spmc->cur); + + sz = b - t; + if (sz < 0) { + WRITE_ONCE(spmc->bottom, t); + return -ENOENT; + } + + value = spmc_arr_get(arr, b); + if (sz > 0) { + *val = value; + return 0; + } + + if (cmpxchg(&spmc->top, t, t + 1) != t) + ret = -EAGAIN; + + WRITE_ONCE(spmc->bottom, t + 1); + + if (ret) + return ret; + + *val = value; + + return 0; +} + +__weak +int spmc_steal(struct spmc __arena *spmc, u64 *val) +{ + volatile struct spmc_arr __arena *arr; + ssize_t sz; + u64 value; + u64 b, t; + + if (unlikely(!spmc || !val)) + return -EINVAL; + + /* + * It is important that t is read before b for + * stealers to avoid racing with the owner. + * Races between stealers are dealt with using + * CAS to increment the top value below. + */ + t = smp_load_acquire(&spmc->top); + b = smp_load_acquire(&spmc->bottom); + + sz = b - t; + if (sz <= 0) + return -ENOENT; + + arr = smp_load_acquire(&spmc->cur); + value = spmc_arr_get(arr, t); + + if (cmpxchg(&spmc->top, t, t + 1) != t) + return -EAGAIN; + + *val = value; + + return 0; +} + + +__weak +struct spmc __arena *spmc_create(void) +{ + /* + * Marked as volatile because otherwise the array + * reference in the internal loop gets demoted to + * scalar and the program fails verification. + */ + struct spmc __arena *volatile spmc; + int ret, i; + + spmc = arena_malloc(sizeof(*spmc)); + if (!spmc) + return NULL; + + spmc->bottom = 0; + spmc->top = 0; + + for (i = 0; i < SPMC_ARR_ORDERS && can_loop; i++) { + spmc->arr[i].data = NULL; + spmc->arr[i].order = i; + } + + ret = spmc_order_init((struct spmc __arena *)spmc, 0); + if (ret) { + arena_free(spmc); + return NULL; + } + + spmc->cur = &spmc->arr[0]; + + return (struct spmc __arena *)spmc; +} + +__weak +int spmc_destroy(struct spmc __arena *spmc) +{ + int i; + + if (unlikely(!spmc)) + return -EINVAL; + + for (i = 0; i < SPMC_ARR_ORDERS && can_loop; i++) + arena_free(spmc->arr[i].data); + + arena_free(spmc); + + return 0; +} -- cgit v1.2.3 From 42998f819256ef272b6a445310e2b64a3729a139 Mon Sep 17 00:00:00 2001 From: Emil Tsalapatis Date: Fri, 5 Jun 2026 18:20:20 -0400 Subject: selftests/bpf: libarena: parallel test harness and spmc parallel selftest Add a parallel test for the SPMC Lev-Chase workstealing queue. The queue is built to be wait-free even when there are multiple consumers, and the parallel selftest provides a signal on whether the queue behaves correctly when stress tested. To support the test, this patch includes a test harness for parallel selftests. The spmc selftest acts as an example of the naming and other conventions expected by the harness. Signed-off-by: Emil Tsalapatis Link: https://lore.kernel.org/r/20260605222020.5231-4-emil@etsalapatis.com Signed-off-by: Alexei Starovoitov --- .../bpf/libarena/include/libarena/userspace.h | 6 + .../libarena/selftests/test_parallel_spmc.bpf.c | 673 +++++++++++++++++++++ tools/testing/selftests/bpf/prog_tests/libarena.c | 187 ++++++ 3 files changed, 866 insertions(+) create mode 100644 tools/testing/selftests/bpf/libarena/selftests/test_parallel_spmc.bpf.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/libarena/include/libarena/userspace.h b/tools/testing/selftests/bpf/libarena/include/libarena/userspace.h index 88b68ac73cca..fc27a4bcf5d7 100644 --- a/tools/testing/selftests/bpf/libarena/include/libarena/userspace.h +++ b/tools/testing/selftests/bpf/libarena/include/libarena/userspace.h @@ -32,6 +32,12 @@ static inline bool libarena_is_asan_test_prog(const char *name) return strstr(name, "asan_test") == name; } +static inline bool libarena_is_parallel_test_prog(const char *name) +{ + return strstr(name, "parallel_test") == name; +} + + static inline int libarena_run_prog_args(int prog_fd, void *args, size_t argsize) { LIBBPF_OPTS(bpf_test_run_opts, opts); diff --git a/tools/testing/selftests/bpf/libarena/selftests/test_parallel_spmc.bpf.c b/tools/testing/selftests/bpf/libarena/selftests/test_parallel_spmc.bpf.c new file mode 100644 index 000000000000..981c845e2d15 --- /dev/null +++ b/tools/testing/selftests/bpf/libarena/selftests/test_parallel_spmc.bpf.c @@ -0,0 +1,673 @@ +// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause + +#include + +#include + +#include +#include + +#define TEST_SPMC_THREADS 4 +#define TEST_SPMC_STEALERS (TEST_SPMC_THREADS - 1) + +/* + * The test requires the stealers/owners to sometimes quiesce + * before continuing the benchmark. Normally we'd use something + * like a condition variable, but since the benchmark is short-lived + * and operations are wait-free we just spin around the quiescence + * point instead. If we time out, we just fail the benchmark. + */ +#define TEST_SPMC_SYNC_SPINS (1U << 18) + +/* + * We track all the values we retrieve from the queue + * to get some guarantee we're, not corrupting data, + * e.g., accidentally reusing a past value from a slot. + */ +#define TEST_SPMC_MAX_VALUES (1024) +static u64 __arena seen[TEST_SPMC_MAX_VALUES]; + +/* The single spmc queue for the benchmark. */ +static struct spmc __arena *spmc; + +/* Owner and stealer epochs. We define the , */ +static volatile u64 owner_epoch; +static volatile u64 stealer_epoch; + +/* Map owner epochs to stealer epochs (simply scale by # of stealers). */ +#define STEALER_EPOCH(owner_epoch) ((owner_epoch) * TEST_SPMC_STEALERS) + +/* Global abort switch. If any thread fails, all others exit ASAP. */ +static volatile bool test_abort; + +/* + * Counters useful for ensuring conservation of pushes/pops of unique values + * (we're not stealing/popping more/fewer items than were pushed). + */ +static volatile u64 expected_total; +static volatile u64 total_seen; + +/* Measure how many pops and steals we've made (irrespective of retrieved value). */ +static volatile u64 pops; +static volatile u64 steals; + +/* Used for the resize selftest, see below. */ +static volatile u64 stealers_started; + +/* Used for the mixed selftest, see below. */ +static volatile u64 round_steals; + +/* + * We have multiple stealers and a single owner. We sometimes want the owner + * to successfully outproduce the stealers, we add a busy loop in them. + */ +#define TEST_SPMC_WASTE_ROUNDS (1024) + +/* + * The spmc data structure depends on the runtime fully + * supporting acquire/release semantics, which is not + * the case for all architectures. + */ +#if defined(ENABLE_ATOMICS_TESTS) && \ + (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) +static bool spmc_tests_enabled(void) +{ + return true; +} +#else +static bool spmc_tests_enabled(void) +{ + return false; +} +#endif + +/* + * Scaffolding for each parallel test. Each test has setup/teardown, + * a single owner thread that owns the queue, and TEST_SPMC_STEALER + * threads that try to steal. + */ +#define DEFINE_PARALLEL_SPMC_TEST(prefix, expected_total) \ + SEC("syscall") int parallel_test_spmc_##prefix##__enabled(void) \ + { \ + return spmc_tests_enabled() ? 0 : -EOPNOTSUPP; \ + } \ + SEC("syscall") int parallel_test_spmc_##prefix##__init(void) \ + { \ + return spmc_common_init(expected_total); \ + } \ + SEC("syscall") int parallel_test_spmc_##prefix##__fini(void) \ + { \ + return spmc_common_fini(); \ + } \ + SEC("syscall") int parallel_test_spmc_##prefix##__0(void) \ + { \ + return spmc_##prefix##_owner(); \ + } \ + SEC("syscall") int parallel_test_spmc_##prefix##__1(void) \ + { \ + return spmc_##prefix##_stealer(); \ + } \ + SEC("syscall") int parallel_test_spmc_##prefix##__2(void) \ + { \ + return spmc_##prefix##_stealer(); \ + } \ + SEC("syscall") int parallel_test_spmc_##prefix##__3(void) \ + { \ + return spmc_##prefix##_stealer(); \ + } + +static int spmc_common_init(u64 total) +{ + u64 i; + + if (total > TEST_SPMC_MAX_VALUES) + return -E2BIG; + + owner_epoch = 0; + stealer_epoch = 0; + test_abort = false; + expected_total = total; + total_seen = 0; + pops = 0; + steals = 0; + stealers_started = 0; + round_steals = 0; + + for (i = zero; i < TEST_SPMC_MAX_VALUES && can_loop; i++) + seen[i] = 0; + + spmc = spmc_create(); + if (!spmc) + return -ENOMEM; + + return 0; +} + +static int spmc_common_fini(void) +{ + int ret; + + ret = spmc_destroy(spmc); + spmc = NULL; + + return ret; +} + +__weak +int spmc_quiesce_on_owner(u64 epoch) +{ + u64 i; + + bpf_for(i, 0, TEST_SPMC_SYNC_SPINS) { + if (test_abort) + return -EINTR; + if (smp_load_acquire(&owner_epoch) >= epoch) + return 0; + } + + test_abort = true; + + return -ETIMEDOUT; +} + +__weak +int spmc_quiesce_on_stealer(u64 epoch) +{ + u64 target, cur; + unsigned int i; + int err = -ETIMEDOUT; + + target = STEALER_EPOCH(epoch); + bpf_for(i, 0, TEST_SPMC_SYNC_SPINS) { + + if (test_abort) { + err = -EINTR; + break; + } + + cur = smp_load_acquire(&stealer_epoch); + if (cur > target) { + err = -EINVAL; + test_abort = true; + break; + } + + if (cur == target) + return 0; + } + + test_abort = true; + + return err; +} + +static int spmc_update_stats(u64 val, bool owner) +{ + u64 total; + + total = expected_total; + if (val >= total || val >= TEST_SPMC_MAX_VALUES) { + test_abort = true; + return -EINVAL; + } + + if (__sync_fetch_and_add(&seen[val], 1) != 0) { + test_abort = true; + return -EINVAL; + } + + __sync_fetch_and_add(&total_seen, 1); + if (owner) + __sync_fetch_and_add(&pops, 1); + else + __sync_fetch_and_add(&steals, 1); + + return 0; +} + +static int spmc_validate_owner_empty(void) +{ + u64 val; + int ret; + + ret = spmc_owned_remove(spmc, &val); + if (ret != -ENOENT) { + test_abort = true; + /* Change a 0 return value into -EINVAL. */ + return ret ?: -EINVAL; + } + + return 0; +} + +__weak +int spmc_validate_all_seen(void) +{ + u64 i, total; + + total = expected_total; + if (total_seen != total) + goto err; + + if (pops + steals != total) + goto err; + + for (i = zero; i < total && can_loop; i++) { + if (seen[i % TEST_SPMC_MAX_VALUES] != 1) + goto err; + } + + return 0; + +err: + test_abort = true; + + return -EINVAL; +} + +/* + * Single value benchmark. The owner adds an item then races with + * the stealers for it. This way directly race between owner and + * stealers on the same slot. + */ + + +#define TEST_SPMC_SINGLEVAL_ITERS (64) + +__weak +int spmc_singleval_tryconsume(u64 expected, bool steal) +{ + u64 val; + int ret; + + while (can_loop) { + if (steal) + ret = spmc_steal(spmc, &val); + else + ret = spmc_owned_remove(spmc, &val); + + /* Success. Update and validate. */ + if (!ret) { + if (val != expected) + return -EINVAL; + + ret = spmc_update_stats(val, !steal); + if (ret) + return ret; + + return 0; + } + + /* + * If we got -ENOENT, the queue is empty + * and we're good to go. + */ + if (ret != -EAGAIN) + return (ret == -ENOENT) ? 0 : ret; + } + + /* Impossible. */ + return -EINVAL; +} + +static int spmc_singleval_owner(void) +{ + int ret; + u64 i; + + for (i = zero; i < TEST_SPMC_SINGLEVAL_ITERS && can_loop; i++) { + ret = spmc_quiesce_on_stealer(i); + if (ret) + goto err; + + ret = spmc_owned_add(spmc, i); + if (ret) + goto err; + + __sync_fetch_and_add(&owner_epoch, 1); + + ret = spmc_singleval_tryconsume(i, false); + if (ret) + goto err; + + ret = spmc_quiesce_on_stealer(i + 1); + if (ret) + goto err; + } + + ret = spmc_validate_owner_empty(); + if (ret) + return ret; + + return spmc_validate_all_seen(); + +err: + test_abort = true; + return -EINVAL; +} + +static int spmc_singleval_stealer(void) +{ + int ret; + u64 i; + + for (i = zero; i < TEST_SPMC_SINGLEVAL_ITERS && can_loop; i++) { + ret = spmc_quiesce_on_owner(i + 1); + if (ret) + goto err; + + ret = spmc_singleval_tryconsume(i, true); + if (ret) + goto err; + + __sync_fetch_and_add(&stealer_epoch, 1); + } + + return 0; + +err: + test_abort = true; + return -EINVAL; +} + +DEFINE_PARALLEL_SPMC_TEST(singleval, TEST_SPMC_SINGLEVAL_ITERS) + +/* + * The resize test. Force a resize from the owner even while the stealers + * are trying to consume. Then make sure the queue is still consistent + * after the resize. + * + * The owner _doesn't_ consume from the queue. The test makes sure that + * switching the array from underneath the stealers works. + */ + +/* Force 2 resizes (since the rate of resize is logarithmic). */ +#define TEST_SPMC_RESIZE_ORDER (2) +#define TEST_SPMC_RESIZE_PREFILL ((SPMC_ARR_BASESZ << TEST_SPMC_RESIZE_ORDER) - 1) + +/* */ +#define TEST_SPMC_RESIZE_TAIL (SPMC_ARR_BASESZ << TEST_SPMC_RESIZE_ORDER) +#define TEST_SPMC_RESIZE_TOTAL (TEST_SPMC_RESIZE_PREFILL + TEST_SPMC_RESIZE_TAIL) + +__weak +int spmc_wait_for_stealers_to_start(u64 target) +{ + u64 i; + + bpf_for(i, 0, TEST_SPMC_SYNC_SPINS) { + if (test_abort) + return -EINTR; + if (READ_ONCE(stealers_started) >= target) + return 0; + } + + test_abort = true; + + return -ETIMEDOUT; +} + +__weak +void spmc_waste_time(void) +{ + int i; + int j; + + for (i = zero; i < TEST_SPMC_WASTE_ROUNDS && can_loop; i++) { + /* Random computation. */ + WRITE_ONCE(j, i * 17 + 23); + } +} + +static int spmc_resize_owner(void) +{ + bool resized = false; + u64 i; + int ret; + + /* Get a head start vs the consumers. */ + for (i = zero; i < TEST_SPMC_RESIZE_PREFILL && can_loop; i++) { + ret = spmc_owned_add(spmc, i); + if (ret) { + test_abort = true; + return ret; + } + } + + __sync_fetch_and_add(&owner_epoch, 1); + + /* Wait for stealers to start then start racing. */ + ret = spmc_wait_for_stealers_to_start(TEST_SPMC_STEALERS); + if (ret) + return ret; + + for (i = TEST_SPMC_RESIZE_PREFILL; i < TEST_SPMC_RESIZE_TOTAL && can_loop; i++) { + ret = spmc_owned_add(spmc, i); + if (ret) { + test_abort = true; + return ret; + } + + if (spmc->cur->order > TEST_SPMC_RESIZE_ORDER) + resized = true; + } + + /* Did we get to resize while racing/ */ + if (!resized) { + test_abort = true; + return -153; + } + + /* + * Wait for the stealers to drain and make sure + * we didn't lose any items along the way. + */ + __sync_fetch_and_add(&owner_epoch, 1); + + ret = spmc_quiesce_on_stealer(1); + if (ret) + return ret; + + ret = spmc_validate_owner_empty(); + if (ret) + return ret; + + return spmc_validate_all_seen(); +} + +static int spmc_resize_stealer(void) +{ + bool owner_done = false; + u64 val; + int ret; + + arena_subprog_init(); + + ret = spmc_quiesce_on_owner(1); + if (ret) + return ret; + + __sync_fetch_and_add(&stealers_started, 1); + + while (can_loop) { + spmc_waste_time(); + if (test_abort) + return -EINTR; + + ret = spmc_steal(spmc, &val); + if (!ret) { + ret = spmc_update_stats(val, false); + if (ret) + return ret; + continue; + } + + if (ret == -EAGAIN) + continue; + + if (ret == -ENOENT) { + if (owner_done) + break; + owner_done = owner_epoch >= 2; + continue; + } + + test_abort = true; + return ret; + } + + __sync_fetch_and_add(&stealer_epoch, 1); + + return 0; +} + +DEFINE_PARALLEL_SPMC_TEST(resize, TEST_SPMC_RESIZE_TOTAL) + +/* + * The burst benchmark. The owner generates data all at once, + * then waits for the stealers to steal half then starts removing + * items until the queue empties. The owner also makes sure the + * item order is not jumbled. + */ + +#define TEST_SPMC_BURST_ROUNDS (4) +#define TEST_SPMC_BURST_BURST (64) +#define TEST_SPMC_BURST_TOTAL (TEST_SPMC_BURST_ROUNDS * TEST_SPMC_BURST_BURST) +#define TEST_SPMC_BURST_STEAL_TARGET (TEST_SPMC_BURST_BURST / 2) + +static int spmc_wait_for_round_steals(u64 target) +{ + u64 i; + + arena_subprog_init(); + + bpf_for(i, 0, TEST_SPMC_SYNC_SPINS) { + if (test_abort) + return -EINTR; + if (round_steals >= target) + return 0; + } + + test_abort = true; + + return -ETIMEDOUT; +} + +__weak int +spmc_burst_owner_round(u64 round) +{ + u64 i, base, stolen, expected, val; + int ret; + + base = round * TEST_SPMC_BURST_BURST; + round_steals = 0; + + for (i = zero; i < TEST_SPMC_BURST_BURST && can_loop; i++) { + ret = spmc_owned_add(spmc, base + i); + if (ret) + return ret; + } + + __sync_fetch_and_add(&owner_epoch, 1); + + ret = spmc_wait_for_round_steals(TEST_SPMC_BURST_STEAL_TARGET); + if (ret == -EINTR || ret == -ETIMEDOUT) + return ret; + + __sync_fetch_and_add(&owner_epoch, 1); + + ret = spmc_quiesce_on_stealer(round + 1); + if (ret) + return ret; + + stolen = round_steals; + if (stolen > TEST_SPMC_BURST_BURST) + return -EINVAL; + + for (i = zero; i < TEST_SPMC_BURST_BURST - stolen && can_loop; i++) { + ret = spmc_owned_remove(spmc, &val); + if (ret) + return ret; + + expected = base + TEST_SPMC_BURST_BURST - 1 - i; + if (val != expected) + return -EINVAL; + + ret = spmc_update_stats(val, true); + if (ret) { + test_abort = true; + return -EINVAL; + } + } + + ret = spmc_validate_owner_empty(); + if (ret) + return ret; + + return 0; +} + +static int spmc_burst_owner(void) +{ + u64 round; + int ret; + + arena_subprog_init(); + + for (round = zero; round < TEST_SPMC_BURST_ROUNDS && can_loop; round++) { + ret = spmc_burst_owner_round(round); + if (ret) + goto err; + } + + return spmc_validate_all_seen(); + +err: + test_abort = true; + return -EINVAL; +} + +static int spmc_burst_stealer(void) +{ + u64 round, val, active_epoch; + int ret; + + arena_subprog_init(); + + for (round = zero; round < TEST_SPMC_BURST_ROUNDS && can_loop; round++) { + active_epoch = round * 2 + 1; + + /* + * Wait till the owner prefills the queue then + * start stealing. + */ + ret = spmc_quiesce_on_owner(active_epoch); + if (ret) + return ret; + + while (owner_epoch == active_epoch && can_loop) { + if (test_abort) + return -EINTR; + + ret = spmc_steal(spmc, &val); + if (!ret) { + ret = spmc_update_stats(val, false); + if (ret) + return ret; + __sync_fetch_and_add(&round_steals, 1); + continue; + } + if (ret == -EAGAIN || ret == -ENOENT) + continue; + + test_abort = true; + return ret; + } + + __sync_fetch_and_add(&stealer_epoch, 1); + } + + return 0; +} + +DEFINE_PARALLEL_SPMC_TEST(burst, TEST_SPMC_BURST_TOTAL) diff --git a/tools/testing/selftests/bpf/prog_tests/libarena.c b/tools/testing/selftests/bpf/prog_tests/libarena.c index 81bdb084c271..61ea68dce410 100644 --- a/tools/testing/selftests/bpf/prog_tests/libarena.c +++ b/tools/testing/selftests/bpf/prog_tests/libarena.c @@ -27,6 +27,177 @@ static void run_libarena_test(struct libarena *skel, struct bpf_program *prog, } +static void *run_libarena_parallel_prog(void *arg) +{ + struct bpf_program *prog = arg; + + return (void *)(long)libarena_run_prog(bpf_program__fd(prog)); +} + +/* Max suffix is ceil((lg 2^32) / (lg 10)) + sizeof("__") = 10 + 2 = 12. */ +#define MAX_PARTEST_SUFFIX (12) +#define MAX_PARTEST_NAME (1024) +#define MAX_PARTEST_PREFIX (MAX_PARTEST_NAME - MAX_PARTEST_SUFFIX) + +static int run_libarena_parallel_fini(struct libarena *skel, const char *name, + size_t prefixlen) +{ + char tdname[MAX_PARTEST_NAME]; + struct bpf_program *fini_prog; + int ret; + + ret = snprintf(tdname, sizeof(tdname), "%.*s__fini", (int)prefixlen, name); + if (!ASSERT_LT(ret, sizeof(tdname), "partest fini name")) + return -ENAMETOOLONG; + + fini_prog = bpf_object__find_program_by_name(skel->obj, tdname); + if (!ASSERT_TRUE(fini_prog, "partest fini prog")) + return -ENOENT; + + ret = libarena_run_prog(bpf_program__fd(fini_prog)); + ASSERT_OK(ret, tdname); + + return ret; +} + +static int run_libarena_parallel_test_workers(struct libarena *skel, + const char *name, size_t prefixlen) +{ + pthread_t *threads = NULL, *tmp_threads; + char tdname[MAX_PARTEST_NAME]; + struct bpf_program *tdprog; + uint32_t nthreads; + void *thread_ret; + int ret, err = 0; + int i; + + for (nthreads = 0; nthreads < UINT_MAX; nthreads++) { + ret = snprintf(tdname, sizeof(tdname), "%.*s__%u", (int)prefixlen, + name, nthreads); + if (!ASSERT_LT(ret, sizeof(tdname), "test worker name")) { + err = -ENAMETOOLONG; + break; + } + + /* + * We enumerate the worker threads for a given test with __0, __1, + * and so on. The suffixes always start from 0 and are contiguous, + * so if we don't find a program with the requested name we have + * discovered all available worker programs. + */ + tdprog = bpf_object__find_program_by_name(skel->obj, tdname); + if (!tdprog) + break; + + /* Bump the alloc array to accommodate the new thread. */ + tmp_threads = realloc(threads, (nthreads + 1) * sizeof(*threads)); + if (!ASSERT_TRUE(tmp_threads, "realloc")) { + err = -ENOMEM; + break; + } + threads = tmp_threads; + + ret = pthread_create(&threads[nthreads], NULL, + run_libarena_parallel_prog, + tdprog); + if (!ASSERT_OK(ret, "pthread_create")) { + err = ret; + break; + } + } + + + for (i = 0; i < nthreads; i++) { + ret = pthread_join(threads[i], &thread_ret); + if (!ASSERT_OK(ret, "pthread_join")) { + err = err ?: ret; + continue; + } + + err = err ?: (long)thread_ret; + } + + free(threads); + + return err; +} + +static bool libarena_parallel_test_enabled(struct libarena *skel, + const char *prefix, + size_t prefixlen) +{ + struct bpf_program *prog; + char progname[MAX_PARTEST_NAME]; + int ret; + + ret = snprintf(progname, sizeof(progname), "%.*s__enabled", (int)prefixlen, + prefix); + if (!ASSERT_LT(ret, sizeof(progname), "partest enabled name")) + return false; + + prog = bpf_object__find_program_by_name(skel->obj, progname); + if (!prog) + return true; + + ret = libarena_run_prog(bpf_program__fd(prog)); + if (ret == -EOPNOTSUPP) + return false; + if (!ASSERT_OK(ret, progname)) + return false; + return true; +} + +static void run_libarena_parallel_test(struct libarena *skel, struct bpf_program *prog, + const char *name) +{ + char testname[MAX_PARTEST_NAME]; + size_t prefixlen; + const char *pos; + int ret; + + /* + * We annotate the initialization prog with __init. If the current prog does + * not match, it is one of the parallel threads instead and is ignored. + * + * We assume the test writer knows what they are doing and do not add __init + * randomly in the middle of a test name. + */ + pos = strstr(name, "__init"); + if (!pos) + return; + + prefixlen = pos - name; + if (!ASSERT_LT(prefixlen, MAX_PARTEST_PREFIX, "partest prefix too long")) + return; + + /* The name of the test without the __init suffix. Looks nicer in the test log. */ + ret = snprintf(testname, sizeof(testname), "%.*s", (int)prefixlen, name); + if (!ASSERT_LT(ret, sizeof(testname), "partest test name")) + return; + + if (!test__start_subtest(testname)) + return; + + if (!libarena_parallel_test_enabled(skel, testname, prefixlen)) { + test__skip(); + return; + } + + ret = libarena_run_prog(bpf_program__fd(skel->progs.arena_buddy_reset)); + if (!ASSERT_OK(ret, "arena_buddy_reset")) + return; + + ret = libarena_run_prog(bpf_program__fd(prog)); + if (!ASSERT_OK(ret, testname)) + return; + + ret = run_libarena_parallel_test_workers(skel, name, prefixlen); + + ASSERT_OK(ret, testname); + + run_libarena_parallel_fini(skel, name, prefixlen); +} + void test_libarena(void) { struct arena_alloc_reserve_args args; @@ -52,6 +223,22 @@ void test_libarena(void) bpf_object__for_each_program(prog, skel->obj) { const char *name = bpf_program__name(prog); + /* + * Handle parallel test progs separately. For those + * progs it's not a matter of test/skip, because each + * parallel test prog includes an initialization prog + * and a set of progs to be run in parallel. For the + * latter we do not record them as skipped or run, + * because we run them all at once when we come across + * the initialization prog. For more details on how we + * discover the progs see the comment on + * run_libarena_parallel_test. + */ + if (libarena_is_parallel_test_prog(name)) { + run_libarena_parallel_test(skel, prog, name); + continue; + } + if (!libarena_is_test_prog(name)) continue; -- cgit v1.2.3 From 8ddce416797b7454ba1df855821b02c6e43b5a0e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 5 Jun 2026 23:35:18 +0200 Subject: selftests/bpf: Inspect the signature verdict exposed to BPF LSM Add a minimal BPF LSM program on lsm/bpf_prog_load that, for loads on the monitored thread, reads back prog->aux->sig.{verdict,keyring_type, keyring_serial}, and a signed_loader subtest that drives the same gen_loader loader through the hook twice: i) /unsigned/ where the LSM must observe UNSIGNED, no keyring and serial 0; ii) /signed/ where the very same insns signed against the session keyring must be observed as VERIFIED with a user keyring, and the recorded keyring_serial must be equal to the resolved session keyring serial. Loading (not running) the loader is sufficient since the verdict is attached at load time. # LDLIBS=-static PKG_CONFIG='pkg-config --static' ./vmtest.sh -- ./test_progs -t signed_loader [ 1.970530] clocksource: Switched to clocksource tsc #405/1 signed_loader/metadata_check_shape:OK #405/2 signed_loader/metadata_match:OK #405/3 signed_loader/metadata_sha_mismatch:OK #405/4 signed_loader/metadata_not_exclusive:OK #405/5 signed_loader/metadata_hash_not_computed:OK #405/6 signed_loader/signature_enforced:OK #405/7 signed_loader/signature_too_large:OK #405/8 signed_loader/signature_bad_keyring:OK #405/9 signed_loader/metadata_ctx_max_entries_ignored:OK #405/10 signed_loader/metadata_ctx_initial_value_ignored:OK #405/11 signed_loader/signature_authenticates_insns:OK #405/12 signed_loader/hash_requires_frozen:OK #405/13 signed_loader/no_update_after_freeze:OK #405/14 signed_loader/freeze_writable_mmap:OK #405/15 signed_loader/no_writable_mmap_frozen:OK #405/16 signed_loader/map_hash_matches_libbpf:OK #405/17 signed_loader/map_hash_multi_element:OK #405/18 signed_loader/map_hash_bad_size:OK #405/19 signed_loader/map_hash_unsupported_type:OK #405/20 signed_loader/lsm_signature_verdict:OK #405 signed_loader:OK Summary: 1/20 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20260605213518.544262-2-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/signed_loader.c | 122 +++++++++++++++++++++ .../selftests/bpf/progs/test_signed_loader_lsm.c | 30 +++++ 2 files changed, 152 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/test_signed_loader_lsm.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/signed_loader.c b/tools/testing/selftests/bpf/prog_tests/signed_loader.c index dcfdd2d96b05..5fc417e31fc6 100644 --- a/tools/testing/selftests/bpf/prog_tests/signed_loader.c +++ b/tools/testing/selftests/bpf/prog_tests/signed_loader.c @@ -17,9 +17,23 @@ #include "test_signed_loader.skel.h" #include "test_signed_loader_map.skel.h" #include "test_signed_loader_data.skel.h" +#include "test_signed_loader_lsm.skel.h" #define SIG_MATCH_INSNS 33 /* excl (5) + 4 * sha-dword (7) */ +enum { + BPF_SIG_UNSIGNED = 0, + BPF_SIG_VERIFIED, +}; + +enum { + BPF_SIG_KEYRING_NONE = 0, + BPF_SIG_KEYRING_BUILTIN, + BPF_SIG_KEYRING_SECONDARY, + BPF_SIG_KEYRING_PLATFORM, + BPF_SIG_KEYRING_USER, +}; + static int load_loader(const void *insns, __u32 insns_sz, int map_fd, const void *sig, __u32 sig_sz, __s32 keyring_id) { @@ -970,6 +984,112 @@ static void map_hash_unsupported_type(void) close(fd); } +static int setup_meta_map(const struct gen_loader_fixture *f) +{ + LIBBPF_OPTS(bpf_map_create_opts, mopts, + .excl_prog_hash = f->excl, + .excl_prog_hash_size = sizeof(f->excl)); + __u32 key = 0; + int fd; + + fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, + f->data_sz, 1, &mopts); + if (fd < 0) + return -errno; + if (bpf_map_update_elem(fd, &key, f->blob, 0) || bpf_map_freeze(fd)) { + close(fd); + return -errno; + } + return fd; +} + +static void lsm_signature_verdict(void) +{ + char dir_tmpl[] = "/tmp/signed_loader_lsmXXXXXX", *dir = NULL; + struct test_signed_loader_lsm *lsm = NULL; + int map_fd = -1, prog_fd = -1; + bool have_fixture = false; + struct gen_loader_fixture f; + __u32 sig_sz = 8192; + __s32 ses_serial; + __u8 sig[8192]; + + lsm = test_signed_loader_lsm__open_and_load(); + if (!ASSERT_OK_PTR(lsm, "lsm_skel_load")) + return; + lsm->bss->monitored_tid = sys_gettid(); + if (!ASSERT_OK(test_signed_loader_lsm__attach(lsm), "lsm_attach")) + goto out; + + have_fixture = true; + if (gen_loader_fixture_init(&f) != 0) + goto out; + + map_fd = setup_meta_map(&f); + if (!ASSERT_OK_FD(map_fd, "meta_map_unsigned")) + goto out; + lsm->bss->seen = 0; + prog_fd = load_loader(f.gopts.insns, f.gopts.insns_sz, map_fd, NULL, 0, 0); + close(map_fd); + map_fd = -1; + if (!ASSERT_OK_FD(prog_fd, "unsigned loader load")) + goto out; + close(prog_fd); + prog_fd = -1; + if (!ASSERT_NEQ(lsm->bss->seen, 0, "bpf LSM in the active LSM set")) + goto out; + ASSERT_EQ(lsm->bss->seen, 1, "unsigned: one observed load"); + ASSERT_EQ(lsm->bss->sig_verdict, BPF_SIG_UNSIGNED, "unsigned verdict"); + ASSERT_EQ(lsm->bss->sig_keyring_type, BPF_SIG_KEYRING_NONE, "unsigned keyring type"); + ASSERT_EQ(lsm->bss->sig_keyring_serial, 0, "unsigned: no keyring serial"); + + syscall(__NR_request_key, "keyring", "_uid.0", NULL, + KEY_SPEC_SESSION_KEYRING); + dir = mkdtemp(dir_tmpl); + if (!ASSERT_OK_PTR(dir, "mkdtemp")) + goto out; + if (!ASSERT_OK(run_setup("setup", dir), "verify_sig_setup")) { + rmdir(dir); + dir = NULL; + goto out; + } + if (!ASSERT_OK(sign_buf(dir, f.gopts.insns, f.gopts.insns_sz, sig, + &sig_sz), "sign-file")) + goto out; + + map_fd = setup_meta_map(&f); + if (!ASSERT_OK_FD(map_fd, "meta_map_signed")) + goto out; + lsm->bss->seen = 0; + prog_fd = load_loader(f.gopts.insns, f.gopts.insns_sz, map_fd, sig, + sig_sz, KEY_SPEC_SESSION_KEYRING); + close(map_fd); + map_fd = -1; + if (!ASSERT_OK_FD(prog_fd, "signed loader load")) + goto out; + close(prog_fd); + prog_fd = -1; + + ses_serial = syscall(__NR_keyctl, KEYCTL_GET_KEYRING_ID, + KEY_SPEC_SESSION_KEYRING, 0); + ASSERT_EQ(lsm->bss->seen, 1, "signed: one observed load"); + ASSERT_EQ(lsm->bss->sig_verdict, BPF_SIG_VERIFIED, "signed verdict"); + ASSERT_EQ(lsm->bss->sig_keyring_type, BPF_SIG_KEYRING_USER, "signed keyring type"); + ASSERT_GT(ses_serial, 0, "session keyring serial resolved"); + ASSERT_EQ(lsm->bss->sig_keyring_serial, ses_serial, + "signed: validated against session keyring"); +out: + if (map_fd >= 0) + close(map_fd); + if (prog_fd >= 0) + close(prog_fd); + if (have_fixture) + gen_loader_fixture_fini(&f); + if (dir) + run_setup("cleanup", dir); + test_signed_loader_lsm__destroy(lsm); +} + void test_signed_loader(void) { if (test__start_subtest("metadata_check_shape")) @@ -1010,4 +1130,6 @@ void test_signed_loader(void) map_hash_bad_size(); if (test__start_subtest("map_hash_unsupported_type")) map_hash_unsupported_type(); + if (test__start_subtest("lsm_signature_verdict")) + lsm_signature_verdict(); } diff --git a/tools/testing/selftests/bpf/progs/test_signed_loader_lsm.c b/tools/testing/selftests/bpf/progs/test_signed_loader_lsm.c new file mode 100644 index 000000000000..575a9b7910c8 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_signed_loader_lsm.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include +#include + +char _license[] SEC("license") = "GPL"; + +__u32 monitored_tid; + +int sig_keyring_serial; +int sig_keyring_type; +int sig_verdict; +int seen; + +SEC("lsm/bpf_prog_load") +int BPF_PROG(inspect_prog_load, struct bpf_prog *prog, union bpf_attr *attr, + struct bpf_token *token, bool kernel) +{ + __u32 tid = bpf_get_current_pid_tgid() & 0xffffffff; + + if (!monitored_tid || tid != monitored_tid) + return 0; + + seen++; + sig_keyring_serial = prog->aux->sig.keyring_serial; + sig_keyring_type = prog->aux->sig.keyring_type; + sig_verdict = prog->aux->sig.verdict; + return 0; +} -- cgit v1.2.3 From 3ce6b42458f0e2176350fccf86b954d322591ff7 Mon Sep 17 00:00:00 2001 From: Nuoqi Gui Date: Sat, 6 Jun 2026 18:50:38 +0800 Subject: selftests/bpf: add tests for PTR_TO_FLOW_KEYS offset bounds Add verifier tests covering pointer arithmetic on a PTR_TO_FLOW_KEYS register. This covers the bpf-next regression where an out-of-bounds constant offset introduced as flow_keys += K and then dereferenced at insn->off 0 was accepted, while the equivalent flow_keys + K direct offset was rejected. The tests check that in-bounds constant arithmetic on the keys pointer is still accepted, out-of-bounds constant arithmetic is rejected for both read and write, and a truly varying offset from bpf_get_prandom_u32() remains rejected by the existing PTR_TO_FLOW_KEYS pointer arithmetic rules. Signed-off-by: Nuoqi Gui Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20260606-c3-01-v3-v3-2-97c51f592f15@mails.tsinghua.edu.cn Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/verifier.c | 2 + .../selftests/bpf/progs/verifier_flow_keys.c | 97 ++++++++++++++++++++++ 2 files changed, 99 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/verifier_flow_keys.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 89779d897aba..8a3d69e2453c 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -38,6 +38,7 @@ #include "verifier_div0.skel.h" #include "verifier_div_mod_bounds.skel.h" #include "verifier_div_overflow.skel.h" +#include "verifier_flow_keys.skel.h" #include "verifier_global_subprogs.skel.h" #include "verifier_global_ptr_args.skel.h" #include "verifier_gotol.skel.h" @@ -190,6 +191,7 @@ void test_verifier_direct_stack_access_wraparound(void) { RUN(verifier_direct_st void test_verifier_div0(void) { RUN(verifier_div0); } void test_verifier_div_mod_bounds(void) { RUN(verifier_div_mod_bounds); } void test_verifier_div_overflow(void) { RUN(verifier_div_overflow); } +void test_verifier_flow_keys(void) { RUN(verifier_flow_keys); } void test_verifier_global_subprogs(void) { RUN(verifier_global_subprogs); } void test_verifier_global_ptr_args(void) { RUN(verifier_global_ptr_args); } void test_verifier_gotol(void) { RUN(verifier_gotol); } diff --git a/tools/testing/selftests/bpf/progs/verifier_flow_keys.c b/tools/testing/selftests/bpf/progs/verifier_flow_keys.c new file mode 100644 index 000000000000..d780a36a6e9a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_flow_keys.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Bounds checks for PTR_TO_FLOW_KEYS pointer arithmetic. */ + +#include "vmlinux.h" +#include +#include "bpf_misc.h" + +/* sizeof(struct bpf_flow_keys) is well under 4096, so +0x1000 is OOB. */ + +SEC("flow_dissector") +__description("flow_keys: in-bounds constant pointer arithmetic accepted") +__success +__naked void flow_keys_const_inbounds(void) +{ + asm volatile (" \ + r1 = *(u64 *)(r1 + %[flow_keys]); \ + r1 += 8; \ + r0 = *(u64 *)(r1 + 0); \ + r0 = 0; \ + exit; \ +" : + : __imm_const(flow_keys, offsetof(struct __sk_buff, flow_keys)) + : __clobber_all); +} + +SEC("flow_dissector") +__description("flow_keys: OOB via constant pointer arithmetic rejected") +__failure __msg("invalid access to flow keys off=4096 size=8") +__naked void flow_keys_const_oob_read(void) +{ + asm volatile (" \ + r1 = *(u64 *)(r1 + %[flow_keys]); \ + r1 += 4096; \ + r0 = *(u64 *)(r1 + 0); \ + r0 = 0; \ + exit; \ +" : + : __imm_const(flow_keys, offsetof(struct __sk_buff, flow_keys)) + : __clobber_all); +} + +SEC("flow_dissector") +__description("flow_keys: OOB write via constant pointer arithmetic rejected") +__failure __msg("invalid access to flow keys off=4096 size=8") +__naked void flow_keys_const_oob_write(void) +{ + asm volatile (" \ + r1 = *(u64 *)(r1 + %[flow_keys]); \ + r1 += 4096; \ + r2 = 0; \ + *(u64 *)(r1 + 0) = r2; \ + r0 = 0; \ + exit; \ +" : + : __imm_const(flow_keys, offsetof(struct __sk_buff, flow_keys)) + : __clobber_all); +} + +/* Equivalent OOB expressed directly in insn->off; this form was always + * rejected and is kept to show both forms now share one diagnostic. + */ +SEC("flow_dissector") +__description("flow_keys: OOB via insn->off rejected") +__failure __msg("invalid access to flow keys off=4096 size=8") +__naked void flow_keys_insn_off_oob(void) +{ + asm volatile (" \ + r1 = *(u64 *)(r1 + %[flow_keys]); \ + r0 = *(u64 *)(r1 + 4096); \ + r0 = 0; \ + exit; \ +" : + : __imm_const(flow_keys, offsetof(struct __sk_buff, flow_keys)) + : __clobber_all); +} + +SEC("flow_dissector") +__description("flow_keys: variable pointer arithmetic rejected") +__failure __msg("R1 pointer arithmetic on flow_keys prohibited") +__naked void flow_keys_var_read(void) +{ + asm volatile (" \ + r6 = r1; \ + call %[bpf_get_prandom_u32]; \ + r0 &= 0xFFFF; \ + r1 = *(u64 *)(r6 + %[flow_keys]); \ + r1 += r0; \ + r0 = *(u64 *)(r1 + 0); \ + r0 = 0; \ + exit; \ +" : + : __imm_const(flow_keys, offsetof(struct __sk_buff, flow_keys)), + __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From d14e6b4346bf397eca7cb5f4b7b0b8054be632d8 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 6 Jun 2026 14:39:35 +0200 Subject: bpf: Add multi tracing attach types Adding new program attach types multi tracing attachment: BPF_TRACE_FENTRY_MULTI BPF_TRACE_FEXIT_MULTI and their base support in verifier code. Programs with such attach type will use specific link attachment interface coming in following changes. This was suggested by Andrii some (long) time ago and turned out to be easier than having special program flag for that. Bpf programs with such types have 'bpf_multi_func' function set as their attach_btf_id and keep module reference when it's specified by attach_prog_fd. They are also accepted as sleepable programs during verification, and the real validation for specific BTF_IDs/functions will happen during the multi link attachment in following changes. Suggested-by: Andrii Nakryiko Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20260606123955.345967-11-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 5 +++++ include/linux/btf_ids.h | 1 + include/uapi/linux/bpf.h | 2 ++ kernel/bpf/fixups.c | 1 + kernel/bpf/syscall.c | 28 ++++++++++++++++++++++++---- kernel/bpf/trampoline.c | 5 ++++- kernel/bpf/verifier.c | 40 +++++++++++++++++++++++++++++++++++++++- net/bpf/test_run.c | 2 ++ tools/include/uapi/linux/bpf.h | 2 ++ tools/lib/bpf/libbpf.c | 2 ++ 10 files changed, 82 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 428789a9e736..b52dc64ec92d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2113,6 +2113,11 @@ static inline void bpf_prog_put_recursion_context(struct bpf_prog *prog) #endif } +static inline bool is_tracing_multi(enum bpf_attach_type type) +{ + return type == BPF_TRACE_FENTRY_MULTI || type == BPF_TRACE_FEXIT_MULTI; +} + #if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL) /* This macro helps developer to register a struct_ops type and generate * type information correctly. Developers should use this macro to register diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index af011db39ab3..8b5a9ee92513 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -284,5 +284,6 @@ extern u32 bpf_cgroup_btf_id[]; extern u32 bpf_local_storage_map_btf_id[]; extern u32 btf_bpf_map_id[]; extern u32 bpf_kmem_cache_btf_id[]; +extern u32 bpf_multi_func_btf_id[]; #endif diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d5238df5e5eb..28d127e5040a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1156,6 +1156,8 @@ enum bpf_attach_type { BPF_TRACE_KPROBE_SESSION, BPF_TRACE_UPROBE_SESSION, BPF_TRACE_FSESSION, + BPF_TRACE_FENTRY_MULTI, + BPF_TRACE_FEXIT_MULTI, __MAX_BPF_ATTACH_TYPE }; diff --git a/kernel/bpf/fixups.c b/kernel/bpf/fixups.c index 5aa3f7d99ac9..0cf9735929f5 100644 --- a/kernel/bpf/fixups.c +++ b/kernel/bpf/fixups.c @@ -2186,6 +2186,7 @@ patch_map_ops_generic: insn->imm == BPF_FUNC_get_func_ret) { if (eatype == BPF_TRACE_FEXIT || eatype == BPF_TRACE_FSESSION || + eatype == BPF_TRACE_FEXIT_MULTI || eatype == BPF_MODIFY_RETURN) { /* Load nr_args from ctx - 8 */ insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0cfc8bcb3dc9..efdd6639a598 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -2719,7 +2720,8 @@ static int bpf_prog_load_check_attach(enum bpf_prog_type prog_type, enum bpf_attach_type expected_attach_type, struct btf *attach_btf, u32 btf_id, - struct bpf_prog *dst_prog) + struct bpf_prog *dst_prog, + bool multi_func) { if (btf_id) { if (btf_id > BTF_MAX_TYPE) @@ -2739,6 +2741,14 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type, } } + if (multi_func) { + if (prog_type != BPF_PROG_TYPE_TRACING) + return -EINVAL; + if (!attach_btf || btf_id) + return -EINVAL; + return 0; + } + if (attach_btf && (!btf_id || dst_prog)) return -EINVAL; @@ -2946,6 +2956,11 @@ static int bpf_prog_mark_insn_arrays_ready(struct bpf_prog *prog) return 0; } +extern int bpf_multi_func(void); +int __init __used bpf_multi_func(void) { return 0; } + +BTF_ID_LIST_GLOBAL_SINGLE(bpf_multi_func_btf_id, func, bpf_multi_func) + /* last field in 'union bpf_attr' used by this command */ #define BPF_PROG_LOAD_LAST_FIELD keyring_id @@ -2958,6 +2973,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, struct bpf_log_at bool bpf_cap; int err; char license[128]; + bool multi_func; if (CHECK_ATTR(BPF_PROG_LOAD)) return -EINVAL; @@ -3024,6 +3040,8 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, struct bpf_log_at if (is_perfmon_prog_type(type) && !bpf_token_capable(token, CAP_PERFMON)) goto put_token; + multi_func = is_tracing_multi(attr->expected_attach_type); + /* attach_prog_fd/attach_btf_obj_fd can specify fd of either bpf_prog * or btf, we need to check which one it is */ @@ -3045,7 +3063,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, struct bpf_log_at goto put_token; } } - } else if (attr->attach_btf_id) { + } else if (attr->attach_btf_id || multi_func) { /* fall back to vmlinux BTF, if BTF type ID is specified */ attach_btf = bpf_get_btf_vmlinux(); if (IS_ERR(attach_btf)) { @@ -3061,7 +3079,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, struct bpf_log_at if (bpf_prog_load_check_attach(type, attr->expected_attach_type, attach_btf, attr->attach_btf_id, - dst_prog)) { + dst_prog, multi_func)) { if (dst_prog) bpf_prog_put(dst_prog); if (attach_btf) @@ -3084,7 +3102,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, struct bpf_log_at prog->expected_attach_type = attr->expected_attach_type; prog->sleepable = !!(attr->prog_flags & BPF_F_SLEEPABLE); prog->aux->attach_btf = attach_btf; - prog->aux->attach_btf_id = attr->attach_btf_id; + prog->aux->attach_btf_id = multi_func ? bpf_multi_func_btf_id[0] : attr->attach_btf_id; prog->aux->dst_prog = dst_prog; prog->aux->dev_bound = !!attr->prog_ifindex; prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS; @@ -4480,6 +4498,8 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type) case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: case BPF_TRACE_FSESSION: + case BPF_TRACE_FENTRY_MULTI: + case BPF_TRACE_FEXIT_MULTI: case BPF_MODIFY_RETURN: return BPF_PROG_TYPE_TRACING; case BPF_LSM_MAC: diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 5776d2b8e36e..ae7e4fdfe2a3 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -182,7 +182,8 @@ bool bpf_prog_has_trampoline(const struct bpf_prog *prog) switch (ptype) { case BPF_PROG_TYPE_TRACING: if (eatype == BPF_TRACE_FENTRY || eatype == BPF_TRACE_FEXIT || - eatype == BPF_MODIFY_RETURN || eatype == BPF_TRACE_FSESSION) + eatype == BPF_MODIFY_RETURN || eatype == BPF_TRACE_FSESSION || + eatype == BPF_TRACE_FENTRY_MULTI || eatype == BPF_TRACE_FEXIT_MULTI) return true; return false; case BPF_PROG_TYPE_LSM: @@ -781,10 +782,12 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog) { switch (prog->expected_attach_type) { case BPF_TRACE_FENTRY: + case BPF_TRACE_FENTRY_MULTI: return BPF_TRAMP_FENTRY; case BPF_MODIFY_RETURN: return BPF_TRAMP_MODIFY_RETURN; case BPF_TRACE_FEXIT: + case BPF_TRACE_FEXIT_MULTI: return BPF_TRAMP_FEXIT; case BPF_TRACE_FSESSION: return BPF_TRAMP_FSESSION; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 926ff63a0b61..0e593f3335e9 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -16382,6 +16382,8 @@ static bool return_retval_range(struct bpf_verifier_env *env, struct bpf_retval_ case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: case BPF_TRACE_FSESSION: + case BPF_TRACE_FENTRY_MULTI: + case BPF_TRACE_FEXIT_MULTI: *range = retval_range(0, 0); break; case BPF_TRACE_RAW_TP: @@ -18772,6 +18774,11 @@ static int check_attach_modify_return(unsigned long addr, const char *func_name) #endif /* CONFIG_FUNCTION_ERROR_INJECTION */ +static bool is_tracing_multi_id(const struct bpf_prog *prog, u32 btf_id) +{ + return is_tracing_multi(prog->expected_attach_type) && bpf_multi_func_btf_id[0] == btf_id; +} + int bpf_check_attach_target(struct bpf_verifier_log *log, const struct bpf_prog *prog, const struct bpf_prog *tgt_prog, @@ -18894,6 +18901,8 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, prog_extension && (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY || tgt_prog->expected_attach_type == BPF_TRACE_FEXIT || + tgt_prog->expected_attach_type == BPF_TRACE_FENTRY_MULTI || + tgt_prog->expected_attach_type == BPF_TRACE_FEXIT_MULTI || tgt_prog->expected_attach_type == BPF_TRACE_FSESSION)) { /* Program extensions can extend all program types * except fentry/fexit. The reason is the following. @@ -19000,6 +19009,8 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: case BPF_TRACE_FSESSION: + case BPF_TRACE_FENTRY_MULTI: + case BPF_TRACE_FEXIT_MULTI: if (prog->expected_attach_type == BPF_TRACE_FSESSION && !bpf_jit_supports_fsession()) { bpf_log(log, "JIT does not support fsession\n"); @@ -19029,7 +19040,18 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, if (ret < 0) return ret; - if (tgt_prog) { + /* + * *.multi programs don't need an address during program + * verification, we just take the module ref if needed. + */ + if (is_tracing_multi_id(prog, btf_id)) { + if (btf_is_module(btf)) { + mod = btf_try_get_module(btf); + if (!mod) + return -ENOENT; + } + addr = 0; + } else if (tgt_prog) { if (subprog == 0) addr = (long) tgt_prog->bpf_func; else @@ -19057,6 +19079,12 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, ret = -EINVAL; switch (prog->type) { case BPF_PROG_TYPE_TRACING: + /* *.multi sleepable programs will pass initial sleepable check, + * the actual attached btf ids are checked later during the link + * attachment. + */ + if (is_tracing_multi_id(prog, btf_id)) + ret = 0; if (!check_attach_sleepable(btf_id, addr, tname)) ret = 0; /* fentry/fexit/fmod_ret progs can also be sleepable if they are @@ -19167,6 +19195,8 @@ static bool can_be_sleepable(struct bpf_prog *prog) case BPF_TRACE_ITER: case BPF_TRACE_FSESSION: case BPF_TRACE_RAW_TP: + case BPF_TRACE_FENTRY_MULTI: + case BPF_TRACE_FEXIT_MULTI: return true; default: return false; @@ -19260,6 +19290,14 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) return -EINVAL; } + /* + * We don't get trampoline for tracing_multi programs at this point, + * it's done when tracing_multi link is created. + */ + if (prog->type == BPF_PROG_TYPE_TRACING && + is_tracing_multi(prog->expected_attach_type)) + return 0; + key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id); tr = bpf_trampoline_get(key, &tgt_info); if (!tr) diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index c9aea7052ba7..67769c700cae 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -703,6 +703,8 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog, case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: case BPF_TRACE_FSESSION: + case BPF_TRACE_FENTRY_MULTI: + case BPF_TRACE_FEXIT_MULTI: if (bpf_fentry_test1(1) != 2 || bpf_fentry_test2(2, 3) != 5 || bpf_fentry_test3(4, 5, 6) != 15 || diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 3829db087449..1b9aacf468e5 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1156,6 +1156,8 @@ enum bpf_attach_type { BPF_TRACE_KPROBE_SESSION, BPF_TRACE_UPROBE_SESSION, BPF_TRACE_FSESSION, + BPF_TRACE_FENTRY_MULTI, + BPF_TRACE_FEXIT_MULTI, __MAX_BPF_ATTACH_TYPE }; diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 1354bcbc8b30..1b09381d16ff 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -136,6 +136,8 @@ static const char * const attach_type_name[] = { [BPF_NETKIT_PEER] = "netkit_peer", [BPF_TRACE_KPROBE_SESSION] = "trace_kprobe_session", [BPF_TRACE_UPROBE_SESSION] = "trace_uprobe_session", + [BPF_TRACE_FENTRY_MULTI] = "trace_fentry_multi", + [BPF_TRACE_FEXIT_MULTI] = "trace_fexit_multi", }; static const char * const link_type_name[] = { -- cgit v1.2.3 From c1d32dea5d4694c1a6c14d1d1c3192d0e18ffc7b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 6 Jun 2026 14:39:38 +0200 Subject: bpf: Add support for tracing multi link Adding new link to allow to attach program to multiple function BTF IDs. The link is represented by struct bpf_tracing_multi_link. To configure the link, new fields are added to bpf_attr::link_create to pass array of BTF IDs; struct { __aligned_u64 ids; __u32 cnt; } tracing_multi; Each BTF ID represents function (BTF_KIND_FUNC) that the link will attach bpf program to. We use previously added bpf_trampoline_multi_attach/detach functions to attach/detach the link. The linkinfo/fdinfo callbacks will be implemented in following changes. Note this is supported only for archs (x86_64) with ftrace direct and have single ops support. CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS && CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS Note using sort_r (instead of plain sort) in check_dup_ids, because we will use the swap callback in following changes. Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20260606123955.345967-14-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf_types.h | 1 + include/linux/trace_events.h | 6 ++ include/uapi/linux/bpf.h | 5 ++ kernel/bpf/syscall.c | 2 + kernel/trace/bpf_trace.c | 130 +++++++++++++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 6 ++ tools/lib/bpf/libbpf.c | 1 + 7 files changed, 151 insertions(+) (limited to 'tools') diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 56e4c3f983d3..e5906829aa6f 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -156,3 +156,4 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_PERF_EVENT, perf) BPF_LINK_TYPE(BPF_LINK_TYPE_KPROBE_MULTI, kprobe_multi) BPF_LINK_TYPE(BPF_LINK_TYPE_STRUCT_OPS, struct_ops) BPF_LINK_TYPE(BPF_LINK_TYPE_UPROBE_MULTI, uprobe_multi) +BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING_MULTI, tracing_multi) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index d49338c44014..308c76b57d13 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -787,6 +787,7 @@ int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, unsigned long *missed); int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); +int bpf_tracing_multi_attach(struct bpf_prog *prog, const union bpf_attr *attr); #else static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) { @@ -844,6 +845,11 @@ bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) { return -EOPNOTSUPP; } +static inline int +bpf_tracing_multi_attach(struct bpf_prog *prog, const union bpf_attr *attr) +{ + return -EOPNOTSUPP; +} #endif enum { diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 28d127e5040a..9f603731d267 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1182,6 +1182,7 @@ enum bpf_link_type { BPF_LINK_TYPE_UPROBE_MULTI = 12, BPF_LINK_TYPE_NETKIT = 13, BPF_LINK_TYPE_SOCKMAP = 14, + BPF_LINK_TYPE_TRACING_MULTI = 15, __MAX_BPF_LINK_TYPE, }; @@ -1877,6 +1878,10 @@ union bpf_attr { }; __u64 expected_revision; } cgroup; + struct { + __aligned_u64 ids; + __u32 cnt; + } tracing_multi; }; } link_create; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index efdd6639a598..d551b9da0cfb 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -5885,6 +5885,8 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) ret = bpf_iter_link_attach(attr, uattr, prog); else if (prog->expected_attach_type == BPF_LSM_CGROUP) ret = cgroup_bpf_link_attach(attr, prog); + else if (is_tracing_multi(prog->expected_attach_type)) + ret = bpf_tracing_multi_attach(prog, attr); else ret = bpf_tracing_prog_attach(prog, attr->link_create.target_fd, diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index d853f97bd154..9e3cb547651e 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -42,6 +42,7 @@ #define MAX_UPROBE_MULTI_CNT (1U << 20) #define MAX_KPROBE_MULTI_CNT (1U << 20) +#define MAX_TRACING_MULTI_CNT (1U << 20) #ifdef CONFIG_MODULES struct bpf_trace_module { @@ -3641,3 +3642,132 @@ __bpf_kfunc int bpf_copy_from_user_task_str_dynptr(const struct bpf_dynptr *dptr } __bpf_kfunc_end_defs(); + +#if defined(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS) && \ + defined(CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS) + +static void bpf_tracing_multi_link_release(struct bpf_link *link) +{ + struct bpf_tracing_multi_link *tr_link = + container_of(link, struct bpf_tracing_multi_link, link); + + WARN_ON_ONCE(bpf_trampoline_multi_detach(link->prog, tr_link)); +} + +static void bpf_tracing_multi_link_dealloc(struct bpf_link *link) +{ + struct bpf_tracing_multi_link *tr_link = + container_of(link, struct bpf_tracing_multi_link, link); + + kvfree(tr_link); +} + +static const struct bpf_link_ops bpf_tracing_multi_link_lops = { + .release = bpf_tracing_multi_link_release, + .dealloc_deferred = bpf_tracing_multi_link_dealloc, +}; + +static int ids_cmp_r(const void *pa, const void *pb, const void *priv __maybe_unused) +{ + u32 a = *(u32 *) pa; + u32 b = *(u32 *) pb; + + return (a > b) - (a < b); +} + +static void ids_swap_r(void *a, void *b, int size __maybe_unused, + const void *priv __maybe_unused) +{ + u32 *id_a = a, *id_b = b; + + swap(*id_a, *id_b); +} + +static int check_dup_ids(u32 *ids, u32 cnt) +{ + int err = 0; + + /* + * Sort ids array (together with cookies array if defined) + * and check it for duplicates. The ids and cookies arrays + * are left sorted. + */ + sort_r_nonatomic(ids, cnt, sizeof(ids[0]), ids_cmp_r, ids_swap_r, NULL); + + for (int i = 1; i < cnt; i++) { + if (ids[i] == ids[i - 1]) { + err = -EINVAL; + break; + } + } + return err; +} + +int bpf_tracing_multi_attach(struct bpf_prog *prog, const union bpf_attr *attr) +{ + struct bpf_tracing_multi_link *link = NULL; + struct bpf_link_primer link_primer; + u32 cnt, *ids = NULL; + u32 __user *uids; + int err; + + uids = u64_to_user_ptr(attr->link_create.tracing_multi.ids); + cnt = attr->link_create.tracing_multi.cnt; + + if (!cnt || !uids) + return -EINVAL; + if (cnt > MAX_TRACING_MULTI_CNT) + return -E2BIG; + if (attr->link_create.flags || attr->link_create.target_fd) + return -EINVAL; + + ids = kvmalloc_objs(*ids, cnt); + if (!ids) + return -ENOMEM; + + if (copy_from_user(ids, uids, cnt * sizeof(*ids))) { + err = -EFAULT; + goto error; + } + + err = check_dup_ids(ids, cnt); + if (err) + goto error; + + link = kvzalloc_flex(*link, nodes, cnt); + if (!link) { + err = -ENOMEM; + goto error; + } + + bpf_link_init(&link->link, BPF_LINK_TYPE_TRACING_MULTI, + &bpf_tracing_multi_link_lops, prog, prog->expected_attach_type); + + err = bpf_link_prime(&link->link, &link_primer); + if (err) + goto error; + + link->nodes_cnt = cnt; + + err = bpf_trampoline_multi_attach(prog, ids, link); + kvfree(ids); + if (err) { + bpf_link_cleanup(&link_primer); + return err; + } + return bpf_link_settle(&link_primer); + +error: + kvfree(ids); + kvfree(link); + return err; +} + +#else + +int bpf_tracing_multi_attach(struct bpf_prog *prog, const union bpf_attr *attr) +{ + return -EOPNOTSUPP; +} + +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS && CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS */ diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 1b9aacf468e5..9f603731d267 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1182,6 +1182,7 @@ enum bpf_link_type { BPF_LINK_TYPE_UPROBE_MULTI = 12, BPF_LINK_TYPE_NETKIT = 13, BPF_LINK_TYPE_SOCKMAP = 14, + BPF_LINK_TYPE_TRACING_MULTI = 15, __MAX_BPF_LINK_TYPE, }; @@ -1877,6 +1878,10 @@ union bpf_attr { }; __u64 expected_revision; } cgroup; + struct { + __aligned_u64 ids; + __u32 cnt; + } tracing_multi; }; } link_create; @@ -7254,6 +7259,7 @@ enum { TCP_BPF_SOCK_OPS_CB_FLAGS = 1008, /* Get or Set TCP sock ops flags */ SK_BPF_CB_FLAGS = 1009, /* Get or set sock ops flags in socket */ SK_BPF_BYPASS_PROT_MEM = 1010, /* Get or Set sk->sk_bypass_prot_mem */ + }; enum { diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 1b09381d16ff..59405d318624 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -156,6 +156,7 @@ static const char * const link_type_name[] = { [BPF_LINK_TYPE_UPROBE_MULTI] = "uprobe_multi", [BPF_LINK_TYPE_NETKIT] = "netkit", [BPF_LINK_TYPE_SOCKMAP] = "sockmap", + [BPF_LINK_TYPE_TRACING_MULTI] = "tracing_multi", }; static const char * const map_type_name[] = { -- cgit v1.2.3 From 46b42af27d40021a97c147d23de8cb29eb5020df Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 6 Jun 2026 14:39:39 +0200 Subject: bpf: Add support for tracing_multi link cookies Add support to specify cookies for tracing_multi link. Cookies are provided in array where each value is paired with provided BTF ID value with the same array index. Such cookie can be retrieved by bpf program with bpf_get_attach_cookie helper call. We need to sort cookies array together with ids array in check_dup_ids, to keep the id->cookie relation. Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20260606123955.345967-15-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 1 + kernel/bpf/trampoline.c | 1 + kernel/trace/bpf_trace.c | 37 +++++++++++++++++++++++++++++++++---- tools/include/uapi/linux/bpf.h | 1 + 5 files changed, 37 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index bcf70f810d2c..e9d2b42a3981 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1971,6 +1971,7 @@ struct bpf_tracing_multi_data { struct bpf_tracing_multi_link { struct bpf_link link; struct bpf_tracing_multi_data data; + u64 *cookies; int nodes_cnt; struct bpf_tracing_multi_node nodes[] __counted_by(nodes_cnt); }; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 9f603731d267..569c15e1cae3 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1880,6 +1880,7 @@ union bpf_attr { } cgroup; struct { __aligned_u64 ids; + __aligned_u64 cookies; __u32 cnt; } tracing_multi; }; diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 957e5d7f9554..a3537fda50cf 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -1613,6 +1613,7 @@ int bpf_trampoline_multi_attach(struct bpf_prog *prog, u32 *ids, mnode->trampoline = tr; mnode->node.link = &link->link; + mnode->node.cookie = link->cookies ? link->cookies[i] : 0; cond_resched(); } diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 9e3cb547651e..e33492739ed1 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -3659,6 +3659,7 @@ static void bpf_tracing_multi_link_dealloc(struct bpf_link *link) struct bpf_tracing_multi_link *tr_link = container_of(link, struct bpf_tracing_multi_link, link); + kvfree(tr_link->cookies); kvfree(tr_link); } @@ -3678,13 +3679,24 @@ static int ids_cmp_r(const void *pa, const void *pb, const void *priv __maybe_un static void ids_swap_r(void *a, void *b, int size __maybe_unused, const void *priv __maybe_unused) { - u32 *id_a = a, *id_b = b; + u64 *cookie_a, *cookie_b, *cookies; + u32 *id_a = a, *id_b = b, *ids; + void **data = (void **) priv; + ids = data[0]; + cookies = data[1]; + + if (cookies) { + cookie_a = cookies + (id_a - ids); + cookie_b = cookies + (id_b - ids); + swap(*cookie_a, *cookie_b); + } swap(*id_a, *id_b); } -static int check_dup_ids(u32 *ids, u32 cnt) +static int check_dup_ids(u32 *ids, u64 *cookies, u32 cnt) { + void *data[2] = { ids, cookies }; int err = 0; /* @@ -3692,7 +3704,7 @@ static int check_dup_ids(u32 *ids, u32 cnt) * and check it for duplicates. The ids and cookies arrays * are left sorted. */ - sort_r_nonatomic(ids, cnt, sizeof(ids[0]), ids_cmp_r, ids_swap_r, NULL); + sort_r_nonatomic(ids, cnt, sizeof(ids[0]), ids_cmp_r, ids_swap_r, data); for (int i = 1; i < cnt; i++) { if (ids[i] == ids[i - 1]) { @@ -3708,6 +3720,8 @@ int bpf_tracing_multi_attach(struct bpf_prog *prog, const union bpf_attr *attr) struct bpf_tracing_multi_link *link = NULL; struct bpf_link_primer link_primer; u32 cnt, *ids = NULL; + u64 __user *ucookies; + u64 *cookies = NULL; u32 __user *uids; int err; @@ -3730,7 +3744,20 @@ int bpf_tracing_multi_attach(struct bpf_prog *prog, const union bpf_attr *attr) goto error; } - err = check_dup_ids(ids, cnt); + ucookies = u64_to_user_ptr(attr->link_create.tracing_multi.cookies); + if (ucookies) { + cookies = kvmalloc_objs(*cookies, cnt); + if (!cookies) { + err = -ENOMEM; + goto error; + } + if (copy_from_user(cookies, ucookies, cnt * sizeof(*cookies))) { + err = -EFAULT; + goto error; + } + } + + err = check_dup_ids(ids, cookies, cnt); if (err) goto error; @@ -3748,6 +3775,7 @@ int bpf_tracing_multi_attach(struct bpf_prog *prog, const union bpf_attr *attr) goto error; link->nodes_cnt = cnt; + link->cookies = cookies; err = bpf_trampoline_multi_attach(prog, ids, link); kvfree(ids); @@ -3758,6 +3786,7 @@ int bpf_tracing_multi_attach(struct bpf_prog *prog, const union bpf_attr *attr) return bpf_link_settle(&link_primer); error: + kvfree(cookies); kvfree(ids); kvfree(link); return err; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 9f603731d267..569c15e1cae3 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1880,6 +1880,7 @@ union bpf_attr { } cgroup; struct { __aligned_u64 ids; + __aligned_u64 cookies; __u32 cnt; } tracing_multi; }; -- cgit v1.2.3 From ba042ed6446fc524c1d804227765b45616f9cba3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 6 Jun 2026 14:39:40 +0200 Subject: bpf: Add support for tracing_multi link session Adding support to use session attachment with tracing_multi link. Adding new BPF_TRACE_FSESSION_MULTI program attach type, that follows the BPF_TRACE_FSESSION behaviour but on the tracing_multi link. Such program is called on entry and exit of the attached function and allows to pass cookie value from entry to exit execution. Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20260606123955.345967-16-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 6 +++++- include/uapi/linux/bpf.h | 1 + kernel/bpf/fixups.c | 1 + kernel/bpf/syscall.c | 1 + kernel/bpf/trampoline.c | 44 ++++++++++++++++++++++++++++++++++-------- kernel/bpf/verifier.c | 20 ++++++++++++++----- kernel/trace/bpf_trace.c | 15 +++++++++++++- net/bpf/test_run.c | 1 + tools/include/uapi/linux/bpf.h | 1 + tools/lib/bpf/libbpf.c | 1 + 10 files changed, 76 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e9d2b42a3981..62bba7a4876f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1972,6 +1972,7 @@ struct bpf_tracing_multi_link { struct bpf_link link; struct bpf_tracing_multi_data data; u64 *cookies; + struct bpf_tramp_node *fexits; int nodes_cnt; struct bpf_tracing_multi_node nodes[] __counted_by(nodes_cnt); }; @@ -2159,7 +2160,8 @@ static inline void bpf_prog_put_recursion_context(struct bpf_prog *prog) static inline bool is_tracing_multi(enum bpf_attach_type type) { - return type == BPF_TRACE_FENTRY_MULTI || type == BPF_TRACE_FEXIT_MULTI; + return type == BPF_TRACE_FENTRY_MULTI || type == BPF_TRACE_FEXIT_MULTI || + type == BPF_TRACE_FSESSION_MULTI; } #if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL) @@ -2286,6 +2288,8 @@ static inline int bpf_fsession_cnt(struct bpf_tramp_nodes *nodes) for (int i = 0; i < nodes[BPF_TRAMP_FENTRY].nr_nodes; i++) { if (fentries.nodes[i]->link->prog->expected_attach_type == BPF_TRACE_FSESSION) cnt++; + if (fentries.nodes[i]->link->prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI) + cnt++; } return cnt; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 569c15e1cae3..11dd610fa5fa 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1158,6 +1158,7 @@ enum bpf_attach_type { BPF_TRACE_FSESSION, BPF_TRACE_FENTRY_MULTI, BPF_TRACE_FEXIT_MULTI, + BPF_TRACE_FSESSION_MULTI, __MAX_BPF_ATTACH_TYPE }; diff --git a/kernel/bpf/fixups.c b/kernel/bpf/fixups.c index 0cf9735929f5..3cf2cc6e3ab6 100644 --- a/kernel/bpf/fixups.c +++ b/kernel/bpf/fixups.c @@ -2187,6 +2187,7 @@ patch_map_ops_generic: if (eatype == BPF_TRACE_FEXIT || eatype == BPF_TRACE_FSESSION || eatype == BPF_TRACE_FEXIT_MULTI || + eatype == BPF_TRACE_FSESSION_MULTI || eatype == BPF_MODIFY_RETURN) { /* Load nr_args from ctx - 8 */ insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index d551b9da0cfb..d4188a992bd8 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -4498,6 +4498,7 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type) case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: case BPF_TRACE_FSESSION: + case BPF_TRACE_FSESSION_MULTI: case BPF_TRACE_FENTRY_MULTI: case BPF_TRACE_FEXIT_MULTI: case BPF_MODIFY_RETURN: diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index a3537fda50cf..1a721fc4bef5 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -183,7 +183,8 @@ bool bpf_prog_has_trampoline(const struct bpf_prog *prog) case BPF_PROG_TYPE_TRACING: if (eatype == BPF_TRACE_FENTRY || eatype == BPF_TRACE_FEXIT || eatype == BPF_MODIFY_RETURN || eatype == BPF_TRACE_FSESSION || - eatype == BPF_TRACE_FENTRY_MULTI || eatype == BPF_TRACE_FEXIT_MULTI) + eatype == BPF_TRACE_FENTRY_MULTI || eatype == BPF_TRACE_FEXIT_MULTI || + eatype == BPF_TRACE_FSESSION_MULTI) return true; return false; case BPF_PROG_TYPE_LSM: @@ -790,6 +791,7 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog) case BPF_TRACE_FEXIT_MULTI: return BPF_TRAMP_FEXIT; case BPF_TRACE_FSESSION: + case BPF_TRACE_FSESSION_MULTI: return BPF_TRAMP_FSESSION; case BPF_LSM_MAC: if (!prog->aux->attach_func_proto->type) @@ -822,13 +824,30 @@ static int bpf_freplace_check_tgt_prog(struct bpf_prog *tgt_prog) return 0; } +static struct bpf_tramp_node *fsession_exit(struct bpf_tramp_node *node) +{ + if (node->link->type == BPF_LINK_TYPE_TRACING) { + struct bpf_tracing_link *link; + + link = container_of(node->link, struct bpf_tracing_link, link.link); + return &link->fexit; + } else if (node->link->type == BPF_LINK_TYPE_TRACING_MULTI) { + struct bpf_tracing_multi_link *link; + struct bpf_tracing_multi_node *mnode; + + link = container_of(node->link, struct bpf_tracing_multi_link, link); + mnode = container_of(node, struct bpf_tracing_multi_node, node); + return &link->fexits[mnode - link->nodes]; + } + return NULL; +} + static int bpf_trampoline_add_prog(struct bpf_trampoline *tr, struct bpf_tramp_node *node, int cnt) { - struct bpf_tracing_link *tr_link = NULL; enum bpf_tramp_prog_type kind; - struct bpf_tramp_node *node_existing; + struct bpf_tramp_node *node_existing, *fexit; struct hlist_head *prog_list; kind = bpf_attach_type_to_tramp(node->link->prog); @@ -853,8 +872,10 @@ static int bpf_trampoline_add_prog(struct bpf_trampoline *tr, hlist_add_head(&node->tramp_hlist, prog_list); if (kind == BPF_TRAMP_FSESSION) { tr->progs_cnt[BPF_TRAMP_FENTRY]++; - tr_link = container_of(node, struct bpf_tracing_link, link.node); - hlist_add_head(&tr_link->fexit.tramp_hlist, &tr->progs_hlist[BPF_TRAMP_FEXIT]); + fexit = fsession_exit(node); + if (WARN_ON_ONCE(!fexit)) + return -EINVAL; + hlist_add_head(&fexit->tramp_hlist, &tr->progs_hlist[BPF_TRAMP_FEXIT]); tr->progs_cnt[BPF_TRAMP_FEXIT]++; } else { tr->progs_cnt[kind]++; @@ -865,13 +886,15 @@ static int bpf_trampoline_add_prog(struct bpf_trampoline *tr, static void bpf_trampoline_remove_prog(struct bpf_trampoline *tr, struct bpf_tramp_node *node) { - struct bpf_tracing_link *tr_link; enum bpf_tramp_prog_type kind; + struct bpf_tramp_node *fexit; kind = bpf_attach_type_to_tramp(node->link->prog); if (kind == BPF_TRAMP_FSESSION) { - tr_link = container_of(node, struct bpf_tracing_link, link.node); - hlist_del_init(&tr_link->fexit.tramp_hlist); + fexit = fsession_exit(node); + if (WARN_ON_ONCE(!fexit)) + return; + hlist_del_init(&fexit->tramp_hlist); tr->progs_cnt[BPF_TRAMP_FEXIT]--; kind = BPF_TRAMP_FENTRY; } @@ -1615,6 +1638,11 @@ int bpf_trampoline_multi_attach(struct bpf_prog *prog, u32 *ids, mnode->node.link = &link->link; mnode->node.cookie = link->cookies ? link->cookies[i] : 0; + if (prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI) { + link->fexits[i].link = &link->link; + link->fexits[i].cookie = link->cookies ? link->cookies[i] : 0; + } + cond_resched(); } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5c594047ff0a..0c1cf506c219 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -16384,6 +16384,7 @@ static bool return_retval_range(struct bpf_verifier_env *env, struct bpf_retval_ case BPF_TRACE_FSESSION: case BPF_TRACE_FENTRY_MULTI: case BPF_TRACE_FEXIT_MULTI: + case BPF_TRACE_FSESSION_MULTI: *range = retval_range(0, 0); break; case BPF_TRACE_RAW_TP: @@ -18952,7 +18953,8 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, tgt_prog->expected_attach_type == BPF_TRACE_FEXIT || tgt_prog->expected_attach_type == BPF_TRACE_FENTRY_MULTI || tgt_prog->expected_attach_type == BPF_TRACE_FEXIT_MULTI || - tgt_prog->expected_attach_type == BPF_TRACE_FSESSION)) { + tgt_prog->expected_attach_type == BPF_TRACE_FSESSION || + tgt_prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI)) { /* Program extensions can extend all program types * except fentry/fexit. The reason is the following. * The fentry/fexit programs are used for performance @@ -19058,9 +19060,11 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: case BPF_TRACE_FSESSION: + case BPF_TRACE_FSESSION_MULTI: case BPF_TRACE_FENTRY_MULTI: case BPF_TRACE_FEXIT_MULTI: - if (prog->expected_attach_type == BPF_TRACE_FSESSION && + if ((prog->expected_attach_type == BPF_TRACE_FSESSION || + prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI) && !bpf_jit_supports_fsession()) { bpf_log(log, "JIT does not support fsession\n"); return -EOPNOTSUPP; @@ -19215,6 +19219,7 @@ static bool can_be_sleepable(struct bpf_prog *prog) case BPF_TRACE_RAW_TP: case BPF_TRACE_FENTRY_MULTI: case BPF_TRACE_FEXIT_MULTI: + case BPF_TRACE_FSESSION_MULTI: return true; default: return false; @@ -19301,6 +19306,7 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) return -EINVAL; } else if ((prog->expected_attach_type == BPF_TRACE_FEXIT || prog->expected_attach_type == BPF_TRACE_FSESSION || + prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI || prog->expected_attach_type == BPF_MODIFY_RETURN) && btf_id_set_contains(&noreturn_deny, btf_id)) { verbose(env, "Attaching fexit/fsession/fmod_ret to __noreturn function '%s' is rejected.\n", @@ -19340,7 +19346,8 @@ int bpf_check_attach_btf_id_multi(struct btf *btf, struct bpf_prog *prog, u32 bt return -EINVAL; /* Check noreturn attachment. */ - if (prog->expected_attach_type == BPF_TRACE_FEXIT_MULTI && + if ((prog->expected_attach_type == BPF_TRACE_FEXIT_MULTI || + prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI) && btf_id_set_contains(&noreturn_deny, btf_id)) return -EINVAL; /* Check denied attachment. */ @@ -19623,7 +19630,9 @@ int bpf_fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1); *cnt = 1; } else if (desc->func_id == special_kfunc_list[KF_bpf_session_is_return] && - env->prog->expected_attach_type == BPF_TRACE_FSESSION) { + (env->prog->expected_attach_type == BPF_TRACE_FSESSION || + env->prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI)) { + /* * inline the bpf_session_is_return() for fsession: * bool bpf_session_is_return(void *ctx) @@ -19636,7 +19645,8 @@ int bpf_fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1); *cnt = 3; } else if (desc->func_id == special_kfunc_list[KF_bpf_session_cookie] && - env->prog->expected_attach_type == BPF_TRACE_FSESSION) { + (env->prog->expected_attach_type == BPF_TRACE_FSESSION || + env->prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI)) { /* * inline bpf_session_cookie() for fsession: * __u64 *bpf_session_cookie(void *ctx) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index e33492739ed1..a0d688fffc5a 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1334,7 +1334,8 @@ static inline bool is_uprobe_session(const struct bpf_prog *prog) static inline bool is_trace_fsession(const struct bpf_prog *prog) { return prog->type == BPF_PROG_TYPE_TRACING && - prog->expected_attach_type == BPF_TRACE_FSESSION; + (prog->expected_attach_type == BPF_TRACE_FSESSION || + prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI); } static const struct bpf_func_proto * @@ -3659,6 +3660,7 @@ static void bpf_tracing_multi_link_dealloc(struct bpf_link *link) struct bpf_tracing_multi_link *tr_link = container_of(link, struct bpf_tracing_multi_link, link); + kvfree(tr_link->fexits); kvfree(tr_link->cookies); kvfree(tr_link); } @@ -3718,6 +3720,7 @@ static int check_dup_ids(u32 *ids, u64 *cookies, u32 cnt) int bpf_tracing_multi_attach(struct bpf_prog *prog, const union bpf_attr *attr) { struct bpf_tracing_multi_link *link = NULL; + struct bpf_tramp_node *fexits = NULL; struct bpf_link_primer link_primer; u32 cnt, *ids = NULL; u64 __user *ucookies; @@ -3761,6 +3764,14 @@ int bpf_tracing_multi_attach(struct bpf_prog *prog, const union bpf_attr *attr) if (err) goto error; + if (prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI) { + fexits = kvmalloc_objs(*fexits, cnt); + if (!fexits) { + err = -ENOMEM; + goto error; + } + } + link = kvzalloc_flex(*link, nodes, cnt); if (!link) { err = -ENOMEM; @@ -3776,6 +3787,7 @@ int bpf_tracing_multi_attach(struct bpf_prog *prog, const union bpf_attr *attr) link->nodes_cnt = cnt; link->cookies = cookies; + link->fexits = fexits; err = bpf_trampoline_multi_attach(prog, ids, link); kvfree(ids); @@ -3786,6 +3798,7 @@ int bpf_tracing_multi_attach(struct bpf_prog *prog, const union bpf_attr *attr) return bpf_link_settle(&link_primer); error: + kvfree(fexits); kvfree(cookies); kvfree(ids); kvfree(link); diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 67769c700cae..a831682ee982 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -705,6 +705,7 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog, case BPF_TRACE_FSESSION: case BPF_TRACE_FENTRY_MULTI: case BPF_TRACE_FEXIT_MULTI: + case BPF_TRACE_FSESSION_MULTI: if (bpf_fentry_test1(1) != 2 || bpf_fentry_test2(2, 3) != 5 || bpf_fentry_test3(4, 5, 6) != 15 || diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 569c15e1cae3..11dd610fa5fa 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1158,6 +1158,7 @@ enum bpf_attach_type { BPF_TRACE_FSESSION, BPF_TRACE_FENTRY_MULTI, BPF_TRACE_FEXIT_MULTI, + BPF_TRACE_FSESSION_MULTI, __MAX_BPF_ATTACH_TYPE }; diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 59405d318624..62f088359c5e 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -138,6 +138,7 @@ static const char * const attach_type_name[] = { [BPF_TRACE_UPROBE_SESSION] = "trace_uprobe_session", [BPF_TRACE_FENTRY_MULTI] = "trace_fentry_multi", [BPF_TRACE_FEXIT_MULTI] = "trace_fexit_multi", + [BPF_TRACE_FSESSION_MULTI] = "trace_fsession_multi", }; static const char * const link_type_name[] = { -- cgit v1.2.3 From fe9c8cb2b52b455149d363bbca0fc3648ba0cea6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 6 Jun 2026 14:39:42 +0200 Subject: libbpf: Add bpf_object_cleanup_btf function Adding bpf_object_cleanup_btf function to cleanup btf objects. It will be used in following changes. Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20260606123955.345967-18-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 62f088359c5e..5bdaa5eb1f50 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -8941,13 +8941,10 @@ static void bpf_object_unpin(struct bpf_object *obj) bpf_map__unpin(&obj->maps[i], NULL); } -static void bpf_object_post_load_cleanup(struct bpf_object *obj) +static void bpf_object_cleanup_btf(struct bpf_object *obj) { int i; - /* clean up fd_array */ - zfree(&obj->fd_array); - /* clean up module BTFs */ for (i = 0; i < obj->btf_module_cnt; i++) { close(obj->btf_modules[i].fd); @@ -8955,6 +8952,8 @@ static void bpf_object_post_load_cleanup(struct bpf_object *obj) free(obj->btf_modules[i].name); } obj->btf_module_cnt = 0; + obj->btf_module_cap = 0; + obj->btf_modules_loaded = false; zfree(&obj->btf_modules); /* clean up vmlinux BTF */ @@ -8962,6 +8961,15 @@ static void bpf_object_post_load_cleanup(struct bpf_object *obj) obj->btf_vmlinux = NULL; } +static void bpf_object_post_load_cleanup(struct bpf_object *obj) +{ + /* clean up fd_array */ + zfree(&obj->fd_array); + + /* clean up BTF */ + bpf_object_cleanup_btf(obj); +} + static int bpf_object_prepare(struct bpf_object *obj, const char *target_btf_path) { int err; -- cgit v1.2.3 From 630e85a9f0056a7534601ed1ec2532d6ac85b7d7 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 6 Jun 2026 14:39:43 +0200 Subject: libbpf: Add bpf_link_create support for tracing_multi link Adding bpf_link_create support for tracing_multi link with new tracing_multi record in struct bpf_link_create_opts. Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20260606123955.345967-19-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/bpf.c | 9 +++++++++ tools/lib/bpf/bpf.h | 5 +++++ 2 files changed, 14 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index bc513aa8f404..f37e3416f61a 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -845,6 +845,15 @@ int bpf_link_create(int prog_fd, int target_fd, if (!OPTS_ZEROED(opts, uprobe_multi)) return libbpf_err(-EINVAL); break; + case BPF_TRACE_FENTRY_MULTI: + case BPF_TRACE_FEXIT_MULTI: + case BPF_TRACE_FSESSION_MULTI: + attr.link_create.tracing_multi.ids = ptr_to_u64(OPTS_GET(opts, tracing_multi.ids, 0)); + attr.link_create.tracing_multi.cookies = ptr_to_u64(OPTS_GET(opts, tracing_multi.cookies, 0)); + attr.link_create.tracing_multi.cnt = OPTS_GET(opts, tracing_multi.cnt, 0); + if (!OPTS_ZEROED(opts, tracing_multi)) + return libbpf_err(-EINVAL); + break; case BPF_TRACE_RAW_TP: case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 2312900a3263..012354131cf6 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -469,6 +469,11 @@ struct bpf_link_create_opts { __u32 relative_id; __u64 expected_revision; } cgroup; + struct { + const __u32 *ids; + const __u64 *cookies; + __u32 cnt; + } tracing_multi; }; size_t :0; }; -- cgit v1.2.3 From 616a93b473a6ab33494db27057f8a413f375ac4f Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 6 Jun 2026 14:39:44 +0200 Subject: libbpf: Add btf_type_is_traceable_func function Adding btf_type_is_traceable_func function to perform same checks as the kernel's btf_distill_func_proto function to prevent attachment on some of the functions. Exporting the function via libbpf_internal.h because it will be used by benchmark test in following changes. Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20260606123955.345967-20-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 79 +++++++++++++++++++++++++++++++++++++++++ tools/lib/bpf/libbpf_internal.h | 1 + 2 files changed, 80 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 5bdaa5eb1f50..42f0efd70327 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -12450,6 +12450,85 @@ static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, stru return ret; } +#define MAX_BPF_FUNC_ARGS 12 + +static bool btf_type_is_modifier(const struct btf_type *t) +{ + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_TYPEDEF: + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + case BTF_KIND_TYPE_TAG: + return true; + default: + return false; + } +} + +#define MAX_RESOLVE_DEPTH 32 + +static int btf_get_type_size(const struct btf *btf, __u32 type_id, + const struct btf_type **ret_type) +{ + const struct btf_type *t; + int i; + + *ret_type = btf__type_by_id(btf, 0); + if (!type_id) + return 0; + t = btf__type_by_id(btf, type_id); + for (i = 0; i < MAX_RESOLVE_DEPTH && t && btf_type_is_modifier(t); i++) + t = btf__type_by_id(btf, t->type); + if (!t || i == MAX_RESOLVE_DEPTH) + return -EINVAL; + *ret_type = t; + if (btf_is_ptr(t)) + return btf__pointer_size(btf); + if (btf_is_int(t) || btf_is_any_enum(t) || btf_is_struct(t) || btf_is_union(t)) + return t->size; + return -EINVAL; +} + +bool btf_type_is_traceable_func(const struct btf *btf, const struct btf_type *t) +{ + const struct btf_param *args; + const struct btf_type *proto; + __u32 i, nargs; + int ret; + + if (!btf_is_func(t)) + return false; + proto = btf__type_by_id(btf, t->type); + if (!proto || !btf_is_func_proto(proto)) + return false; + + args = (const struct btf_param *)(proto + 1); + nargs = btf_vlen(proto); + if (nargs > MAX_BPF_FUNC_ARGS) + return false; + + /* No support for struct return type. */ + ret = btf_get_type_size(btf, proto->type, &t); + if (ret < 0 || btf_is_struct(t) || btf_is_union(t)) + return false; + + for (i = 0; i < nargs; i++) { + /* No support for variable args. */ + if (i == nargs - 1 && args[i].type == 0) + return false; + ret = btf_get_type_size(btf, args[i].type, &t); + /* No support of struct argument size greater than 16 bytes. */ + if (ret < 0 || ret > 16) + return false; + /* No support for void argument. */ + if (ret == 0) + return false; + } + + return true; +} + static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe, const char *binary_path, size_t offset) { diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 7d93c6c01d60..04cd303fb5a8 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -250,6 +250,7 @@ const struct btf_type *skip_mods_and_typedefs(const struct btf *btf, __u32 id, _ const struct btf_header *btf_header(const struct btf *btf); void btf_set_base_btf(struct btf *btf, const struct btf *base_btf); int btf_relocate(struct btf *btf, const struct btf *base_btf, __u32 **id_map); +bool btf_type_is_traceable_func(const struct btf *btf, const struct btf_type *t); static inline enum btf_func_linkage btf_func_linkage(const struct btf_type *t) { -- cgit v1.2.3 From f2aa370dfe571abf51631c1ac27bb58d5d0e3466 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 6 Jun 2026 14:39:45 +0200 Subject: libbpf: Add support to create tracing multi link Adding bpf_program__attach_tracing_multi function for attaching tracing program to multiple functions. struct bpf_link * bpf_program__attach_tracing_multi(const struct bpf_program *prog, const char *pattern, const struct bpf_tracing_multi_opts *opts); User can specify functions to attach with 'pattern' argument that allows wildcards (*?' supported) or provide BTF ids of functions in array directly via opts argument. These options are mutually exclusive. When using BTF ids, user can also provide cookie value for each provided id/function, that can be retrieved later in bpf program with bpf_get_attach_cookie helper. Each cookie value is paired with provided BTF id with the same array index. Adding support to auto attach programs with following sections: fsession.multi/ fsession.multi.s/ fentry.multi/ fexit.multi/ fentry.multi.s/ fexit.multi.s/ The provided is used as 'pattern' argument in bpf_program__attach_kprobe_multi_opts function. The allows to specify optional kernel module name with following syntax: : In order to attach tracing_multi link to a module functions: - program must be loaded with 'module' btf fd (in attr::attach_btf_obj_fd) - bpf_program__attach_tracing_multi must either have pattern with module spec or BTF ids from the module Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20260606123955.345967-21-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 276 +++++++++++++++++++++++++++++++++++++++++++++++ tools/lib/bpf/libbpf.h | 15 +++ tools/lib/bpf/libbpf.map | 1 + 3 files changed, 292 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 42f0efd70327..1368752aa13c 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -7772,6 +7772,69 @@ static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name, int *btf_obj_fd, int *btf_type_id); +static inline bool is_tracing_multi(enum bpf_attach_type type) +{ + return type == BPF_TRACE_FENTRY_MULTI || type == BPF_TRACE_FEXIT_MULTI || + type == BPF_TRACE_FSESSION_MULTI; +} + +static const struct module_btf *find_attach_module(struct bpf_object *obj, const char *attach) +{ + const char *sep, *mod_name = NULL; + int i, mod_len, err; + + /* + * We expect attach string in the form of either + * - function_pattern or + * - :function_pattern + */ + sep = strchr(attach, ':'); + if (sep) { + mod_name = attach; + mod_len = sep - mod_name; + } + if (!mod_name) + return NULL; + + err = load_module_btfs(obj); + if (err) + return NULL; + + for (i = 0; i < obj->btf_module_cnt; i++) { + const struct module_btf *mod = &obj->btf_modules[i]; + + if (strncmp(mod->name, mod_name, mod_len) == 0 && mod->name[mod_len] == '\0') + return mod; + } + return NULL; +} + +static int tracing_multi_mod_fd(struct bpf_program *prog, int *btf_obj_fd) +{ + const char *attach_name, *sep; + const struct module_btf *mod; + + *btf_obj_fd = 0; + attach_name = strchr(prog->sec_name, '/'); + + /* Program with no details in spec, using kernel btf. */ + if (!attach_name) + return 0; + + /* Program with no module section, using kernel btf. */ + sep = strchr(++attach_name, ':'); + if (!sep) + return 0; + + /* Program with module specified, get its btf fd. */ + mod = find_attach_module(prog->obj, attach_name); + if (!mod) + return -EINVAL; + + *btf_obj_fd = mod->fd; + return 0; +} + /* this is called as prog->sec_def->prog_prepare_load_fn for libbpf-supported sec_defs */ static int libbpf_prepare_prog_load(struct bpf_program *prog, struct bpf_prog_load_opts *opts, long cookie) @@ -7835,6 +7898,18 @@ static int libbpf_prepare_prog_load(struct bpf_program *prog, opts->attach_btf_obj_fd = btf_obj_fd; opts->attach_btf_id = btf_type_id; } + + if (is_tracing_multi(prog->expected_attach_type)) { + int err, btf_obj_fd = 0; + + err = tracing_multi_mod_fd(prog, &btf_obj_fd); + if (err < 0) + return err; + + prog->attach_btf_obj_fd = btf_obj_fd; + opts->attach_btf_obj_fd = btf_obj_fd; + } + return 0; } @@ -9996,6 +10071,7 @@ static int attach_kprobe_session(const struct bpf_program *prog, long cookie, st static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_tracing_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link); static const struct bpf_sec_def section_defs[] = { SEC_DEF("socket", SOCKET_FILTER, 0, SEC_NONE), @@ -10049,6 +10125,12 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("fexit.s+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), SEC_DEF("fsession+", TRACING, BPF_TRACE_FSESSION, SEC_ATTACH_BTF, attach_trace), SEC_DEF("fsession.s+", TRACING, BPF_TRACE_FSESSION, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), + SEC_DEF("fsession.multi+", TRACING, BPF_TRACE_FSESSION_MULTI, 0, attach_tracing_multi), + SEC_DEF("fsession.multi.s+", TRACING, BPF_TRACE_FSESSION_MULTI, SEC_SLEEPABLE, attach_tracing_multi), + SEC_DEF("fentry.multi+", TRACING, BPF_TRACE_FENTRY_MULTI, 0, attach_tracing_multi), + SEC_DEF("fexit.multi+", TRACING, BPF_TRACE_FEXIT_MULTI, 0, attach_tracing_multi), + SEC_DEF("fentry.multi.s+", TRACING, BPF_TRACE_FENTRY_MULTI, SEC_SLEEPABLE, attach_tracing_multi), + SEC_DEF("fexit.multi.s+", TRACING, BPF_TRACE_FEXIT_MULTI, SEC_SLEEPABLE, attach_tracing_multi), SEC_DEF("freplace+", EXT, 0, SEC_ATTACH_BTF, attach_trace), SEC_DEF("lsm+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm), SEC_DEF("lsm.s+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm), @@ -12529,6 +12611,200 @@ bool btf_type_is_traceable_func(const struct btf *btf, const struct btf_type *t) return true; } +static int +collect_btf_func_ids_by_glob(const struct btf *btf, const char *pattern, __u32 **ids) +{ + __u32 type_id, nr_types = btf__type_cnt(btf); + size_t cap = 0, cnt = 0; + + if (!pattern) + return -EINVAL; + + for (type_id = 1; type_id < nr_types; type_id++) { + const struct btf_type *t = btf__type_by_id(btf, type_id); + const char *name; + int err; + + if (btf_kind(t) != BTF_KIND_FUNC) + continue; + name = btf__name_by_offset(btf, t->name_off); + if (!name) + continue; + + if (!glob_match(name, pattern)) + continue; + if (!btf_type_is_traceable_func(btf, t)) + continue; + + err = libbpf_ensure_mem((void **) ids, &cap, sizeof(**ids), cnt + 1); + if (err) { + free(*ids); + return -ENOMEM; + } + (*ids)[cnt++] = type_id; + } + + return cnt; +} + +static int collect_func_ids_by_glob(const struct bpf_program *prog, const char *pattern, __u32 **ids) +{ + struct bpf_object *obj = prog->obj; + const struct module_btf *mod; + struct btf *btf = NULL; + const char *sep; + int err; + + err = bpf_object__load_vmlinux_btf(obj, true); + if (err) + return err; + + /* In case we have module specified, we will find its btf and use that. */ + sep = strchr(pattern, ':'); + if (sep) { + mod = find_attach_module(obj, pattern); + if (!mod) { + err = -EINVAL; + goto cleanup; + } + btf = mod->btf; + pattern = sep + 1; + } else { + /* Program is loaded for kernel module. */ + if (prog->attach_btf_obj_fd) { + err = -EINVAL; + goto cleanup; + } + btf = obj->btf_vmlinux; + } + + err = collect_btf_func_ids_by_glob(btf, pattern, ids); + +cleanup: + bpf_object_cleanup_btf(obj); + return err; +} + +struct bpf_link * +bpf_program__attach_tracing_multi(const struct bpf_program *prog, const char *pattern, + const struct bpf_tracing_multi_opts *opts) +{ + LIBBPF_OPTS(bpf_link_create_opts, lopts); + int prog_fd, link_fd, err, cnt; + __u32 *free_ids = NULL; + struct bpf_link *link; + const __u64 *cookies; + const __u32 *ids; + + if (!OPTS_VALID(opts, bpf_tracing_multi_opts)) + return libbpf_err_ptr(-EINVAL); + + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) { + pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + + cnt = OPTS_GET(opts, cnt, 0); + ids = OPTS_GET(opts, ids, NULL); + cookies = OPTS_GET(opts, cookies, NULL); + + if (!!ids != !!cnt) + return libbpf_err_ptr(-EINVAL); + if (pattern && (ids || cookies)) + return libbpf_err_ptr(-EINVAL); + if (!pattern && !ids) + return libbpf_err_ptr(-EINVAL); + + if (pattern) { + cnt = collect_func_ids_by_glob(prog, pattern, &free_ids); + if (cnt < 0) + return libbpf_err_ptr(cnt); + if (cnt == 0) + return libbpf_err_ptr(-EINVAL); + ids = (const __u32 *) free_ids; + } + + lopts.tracing_multi.ids = ids; + lopts.tracing_multi.cookies = cookies; + lopts.tracing_multi.cnt = cnt; + + link = calloc(1, sizeof(*link)); + if (!link) { + err = -ENOMEM; + goto error; + } + link->detach = &bpf_link__detach_fd; + + link_fd = bpf_link_create(prog_fd, 0, prog->expected_attach_type, &lopts); + if (link_fd < 0) { + err = -errno; + pr_warn("prog '%s': failed to attach: %s\n", prog->name, errstr(err)); + goto error; + } + link->fd = link_fd; + free(free_ids); + return link; + +error: + free(link); + free(free_ids); + return libbpf_err_ptr(err); +} + +static int attach_tracing_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link) +{ + static const char *const prefixes[] = { + "fentry.multi", + "fexit.multi", + "fsession.multi", + "fentry.multi.s", + "fexit.multi.s", + "fsession.multi.s", + }; + const char *spec = NULL; + char *pattern; + size_t i; + int n; + + *link = NULL; + + for (i = 0; i < ARRAY_SIZE(prefixes); i++) { + size_t pfx_len; + + if (!str_has_pfx(prog->sec_name, prefixes[i])) + continue; + + pfx_len = strlen(prefixes[i]); + /* no auto-attach case of, e.g., SEC("fentry.multi") */ + if (prog->sec_name[pfx_len] == '\0') + return 0; + + if (prog->sec_name[pfx_len] != '/') + continue; + + spec = prog->sec_name + pfx_len + 1; + break; + } + + if (!spec) { + pr_warn("prog '%s': invalid section name '%s'\n", + prog->name, prog->sec_name); + return -EINVAL; + } + + n = sscanf(spec, "%m[a-zA-Z0-9_.*?:]", &pattern); + if (n < 1) { + pr_warn("tracing multi pattern is invalid: %s\n", spec); + return -EINVAL; + } + + *link = bpf_program__attach_tracing_multi(prog, pattern, NULL); + free(pattern); + return libbpf_get_error(*link); +} + static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe, const char *binary_path, size_t offset) { diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index bba4e8464396..b965ad571540 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -726,6 +726,21 @@ bpf_program__attach_ksyscall(const struct bpf_program *prog, const char *syscall_name, const struct bpf_ksyscall_opts *opts); +struct bpf_tracing_multi_opts { + /* size of this struct, for forward/backward compatibility */ + size_t sz; + const __u32 *ids; + const __u64 *cookies; + size_t cnt; + size_t :0; +}; + +#define bpf_tracing_multi_opts__last_field cnt + +LIBBPF_API struct bpf_link * +bpf_program__attach_tracing_multi(const struct bpf_program *prog, const char *pattern, + const struct bpf_tracing_multi_opts *opts); + struct bpf_uprobe_opts { /* size of this struct, for forward/backward compatibility */ size_t sz; diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index dfed8d60af05..b731df19ae69 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -458,6 +458,7 @@ LIBBPF_1.7.0 { LIBBPF_1.8.0 { global: + bpf_program__attach_tracing_multi; bpf_program__clone; btf__new_empty_opts; } LIBBPF_1.7.0; -- cgit v1.2.3 From 2922dd58413cd9a7d9cbe029e7d60f3bc432c553 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 6 Jun 2026 14:39:46 +0200 Subject: selftests/bpf: Add tracing multi skel/pattern/ids attach tests Adding tests for tracing_multi link attachment via all possible libbpf apis - skeleton, function pattern and btf ids. Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20260606123955.345967-22-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 3 +- .../selftests/bpf/prog_tests/tracing_multi.c | 258 +++++++++++++++++++++ .../selftests/bpf/progs/tracing_multi_attach.c | 39 ++++ .../selftests/bpf/progs/tracing_multi_check.c | 151 ++++++++++++ 4 files changed, 450 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/tracing_multi.c create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_attach.c create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_check.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 42d9cf848b25..fd885beee0fd 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -512,7 +512,7 @@ SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \ linked_vars.skel.h linked_maps.skel.h \ test_subskeleton.skel.h test_subskeleton_lib.skel.h \ - test_usdt.skel.h + test_usdt.skel.h tracing_multi.skel.h LSKELS := fexit_sleep.c trace_printk.c trace_vprintk.c map_ptr_kern.c \ core_kern.c core_kern_overflow.c test_ringbuf.c \ @@ -538,6 +538,7 @@ test_usdt.skel.h-deps := test_usdt.bpf.o test_usdt_multispec.bpf.o xsk_xdp_progs.skel.h-deps := xsk_xdp_progs.bpf.o xdp_hw_metadata.skel.h-deps := xdp_hw_metadata.bpf.o xdp_features.skel.h-deps := xdp_features.bpf.o +tracing_multi.skel.h-deps := tracing_multi_attach.bpf.o tracing_multi_check.bpf.o LINKED_BPF_OBJS := $(foreach skel,$(LINKED_SKELS),$($(skel)-deps)) LINKED_BPF_SRCS := $(patsubst %.bpf.o,%.c,$(LINKED_BPF_OBJS)) diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_multi.c b/tools/testing/selftests/bpf/prog_tests/tracing_multi.c new file mode 100644 index 000000000000..f333b2514b34 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/tracing_multi.c @@ -0,0 +1,258 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include "bpf/libbpf_internal.h" +#include "tracing_multi.skel.h" +#include "trace_helpers.h" + +static const char * const bpf_fentry_test[] = { + "bpf_fentry_test1", + "bpf_fentry_test2", + "bpf_fentry_test3", + "bpf_fentry_test4", + "bpf_fentry_test5", + "bpf_fentry_test6", + "bpf_fentry_test7", + "bpf_fentry_test8", + "bpf_fentry_test9", + "bpf_fentry_test10", +}; + +#define FUNCS_CNT (ARRAY_SIZE(bpf_fentry_test)) + +static int compare(const void *ppa, const void *ppb) +{ + const char *pa = *(const char **) ppa; + const char *pb = *(const char **) ppb; + + return strcmp(pa, pb); +} + +static void tdestroy_free_nop(void *ptr) +{ +} + +static __u32 *get_ids(const char * const funcs[], int funcs_cnt, const char *mod) +{ + struct btf *btf, *vmlinux_btf = NULL; + __u32 nr, type_id, cnt = 0; + void *root = NULL; + __u32 *ids = NULL; + int i, err = 0; + + btf = btf__load_vmlinux_btf(); + if (!ASSERT_OK_PTR(btf, "btf__load_vmlinux_btf")) + return NULL; + + if (mod) { + vmlinux_btf = btf; + btf = btf__load_module_btf(mod, vmlinux_btf); + if (!ASSERT_OK_PTR(btf, "btf__load_module_btf")) { + btf__free(vmlinux_btf); + return NULL; + } + } + + ids = calloc(funcs_cnt, sizeof(ids[0])); + if (!ids) + goto out; + + /* + * We sort function names by name and search them + * below for each function. + */ + for (i = 0; i < funcs_cnt; i++) { + if (!tsearch(&funcs[i], &root, compare)) { + ASSERT_FAIL("tsearch failed"); + err = -1; + goto error; + } + } + + nr = btf__type_cnt(btf); + for (type_id = 1; type_id < nr && cnt < funcs_cnt; type_id++) { + const struct btf_type *type; + const char *str, ***val; + unsigned int idx; + + type = btf__type_by_id(btf, type_id); + if (!type) { + err = -1; + break; + } + + if (BTF_INFO_KIND(type->info) != BTF_KIND_FUNC) + continue; + + str = btf__name_by_offset(btf, type->name_off); + if (!str) { + err = -1; + break; + } + + val = tfind(&str, &root, compare); + if (!val) + continue; + + /* + * We keep pointer for each function name so we can get the original + * array index and have the resulting ids array matching the original + * function array. + * + * Doing it this way allow us to easily test the cookies support, + * because each cookie is attached to particular function/id. + */ + idx = *val - funcs; + ids[idx] = type_id; + cnt++; + } + +error: + if (err) { + free(ids); + ids = NULL; + } + +out: + tdestroy(root, tdestroy_free_nop); + btf__free(vmlinux_btf); + btf__free(btf); + return ids; +} + +static void tracing_multi_test_run(struct tracing_multi *skel) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + int err, prog_fd; + + prog_fd = bpf_program__fd(skel->progs.test_fentry); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + + /* extra +1 count for sleepable programs */ + ASSERT_EQ(skel->bss->test_result_fentry, FUNCS_CNT + 1, "test_result_fentry"); + ASSERT_EQ(skel->bss->test_result_fexit, FUNCS_CNT + 1, "test_result_fexit"); +} + +static void test_skel_api(void) +{ + struct tracing_multi *skel; + int err; + + skel = tracing_multi__open_and_load(); + if (!ASSERT_OK_PTR(skel, "tracing_multi__open_and_load")) + return; + + skel->bss->pid = getpid(); + + err = tracing_multi__attach(skel); + if (!ASSERT_OK(err, "tracing_multi__attach")) + goto cleanup; + + tracing_multi_test_run(skel); + +cleanup: + tracing_multi__destroy(skel); +} + +static void test_link_api_pattern(void) +{ + struct tracing_multi *skel; + + skel = tracing_multi__open_and_load(); + if (!ASSERT_OK_PTR(skel, "tracing_multi__open_and_load")) + return; + + skel->bss->pid = getpid(); + + skel->links.test_fentry = bpf_program__attach_tracing_multi(skel->progs.test_fentry, + "bpf_fentry_test*", NULL); + if (!ASSERT_OK_PTR(skel->links.test_fentry, "bpf_program__attach_tracing_multi")) + goto cleanup; + + skel->links.test_fexit = bpf_program__attach_tracing_multi(skel->progs.test_fexit, + "bpf_fentry_test*", NULL); + if (!ASSERT_OK_PTR(skel->links.test_fexit, "bpf_program__attach_tracing_multi")) + goto cleanup; + + skel->links.test_fentry_s = bpf_program__attach_tracing_multi(skel->progs.test_fentry_s, + "bpf_fentry_test1", NULL); + if (!ASSERT_OK_PTR(skel->links.test_fentry_s, "bpf_program__attach_tracing_multi")) + goto cleanup; + + skel->links.test_fexit_s = bpf_program__attach_tracing_multi(skel->progs.test_fexit_s, + "bpf_fentry_test1", NULL); + if (!ASSERT_OK_PTR(skel->links.test_fexit_s, "bpf_program__attach_tracing_multi")) + goto cleanup; + + tracing_multi_test_run(skel); + +cleanup: + tracing_multi__destroy(skel); +} + +static void test_link_api_ids(void) +{ + LIBBPF_OPTS(bpf_tracing_multi_opts, opts); + struct tracing_multi *skel; + size_t cnt = FUNCS_CNT; + __u32 *ids; + + skel = tracing_multi__open_and_load(); + if (!ASSERT_OK_PTR(skel, "tracing_multi__open_and_load")) + return; + + skel->bss->pid = getpid(); + + ids = get_ids(bpf_fentry_test, cnt, NULL); + if (!ASSERT_OK_PTR(ids, "get_ids")) + goto cleanup; + + opts.ids = ids; + opts.cnt = cnt; + + skel->links.test_fentry = bpf_program__attach_tracing_multi(skel->progs.test_fentry, + NULL, &opts); + if (!ASSERT_OK_PTR(skel->links.test_fentry, "bpf_program__attach_tracing_multi")) + goto cleanup; + + skel->links.test_fexit = bpf_program__attach_tracing_multi(skel->progs.test_fexit, + NULL, &opts); + if (!ASSERT_OK_PTR(skel->links.test_fexit, "bpf_program__attach_tracing_multi")) + goto cleanup; + + /* Only bpf_fentry_test1 is allowed for sleepable programs. */ + opts.cnt = 1; + skel->links.test_fentry_s = bpf_program__attach_tracing_multi(skel->progs.test_fentry_s, + NULL, &opts); + if (!ASSERT_OK_PTR(skel->links.test_fentry_s, "bpf_program__attach_tracing_multi")) + goto cleanup; + + skel->links.test_fexit_s = bpf_program__attach_tracing_multi(skel->progs.test_fexit_s, + NULL, &opts); + if (!ASSERT_OK_PTR(skel->links.test_fexit_s, "bpf_program__attach_tracing_multi")) + goto cleanup; + + tracing_multi_test_run(skel); + +cleanup: + tracing_multi__destroy(skel); + free(ids); +} + +void test_tracing_multi_test(void) +{ +#ifndef __x86_64__ + test__skip(); + return; +#endif + + if (test__start_subtest("skel_api")) + test_skel_api(); + if (test__start_subtest("link_api_pattern")) + test_link_api_pattern(); + if (test__start_subtest("link_api_ids")) + test_link_api_ids(); +} diff --git a/tools/testing/selftests/bpf/progs/tracing_multi_attach.c b/tools/testing/selftests/bpf/progs/tracing_multi_attach.c new file mode 100644 index 000000000000..332d0a423a43 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tracing_multi_attach.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +char _license[] SEC("license") = "GPL"; + +__hidden extern int tracing_multi_arg_check(__u64 *ctx, __u64 *test_result, bool is_return); + +__u64 test_result_fentry = 0; +__u64 test_result_fexit = 0; + +SEC("fentry.multi/bpf_fentry_test*") +int BPF_PROG(test_fentry) +{ + tracing_multi_arg_check(ctx, &test_result_fentry, false); + return 0; +} + +SEC("fexit.multi/bpf_fentry_test*") +int BPF_PROG(test_fexit) +{ + tracing_multi_arg_check(ctx, &test_result_fexit, true); + return 0; +} + +SEC("fentry.multi.s/bpf_fentry_test1") +int BPF_PROG(test_fentry_s) +{ + tracing_multi_arg_check(ctx, &test_result_fentry, false); + return 0; +} + +SEC("fexit.multi.s/bpf_fentry_test1") +int BPF_PROG(test_fexit_s) +{ + tracing_multi_arg_check(ctx, &test_result_fexit, true); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/tracing_multi_check.c b/tools/testing/selftests/bpf/progs/tracing_multi_check.c new file mode 100644 index 000000000000..333a3a7bae8a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tracing_multi_check.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +char _license[] SEC("license") = "GPL"; + +int pid = 0; + +/* bpf_fentry_test1 is exported as kfunc via vmlinux.h */ +extern const void bpf_fentry_test2 __ksym; +extern const void bpf_fentry_test3 __ksym; +extern const void bpf_fentry_test4 __ksym; +extern const void bpf_fentry_test5 __ksym; +extern const void bpf_fentry_test6 __ksym; +extern const void bpf_fentry_test7 __ksym; +extern const void bpf_fentry_test8 __ksym; +extern const void bpf_fentry_test9 __ksym; +extern const void bpf_fentry_test10 __ksym; + +int tracing_multi_arg_check(__u64 *ctx, __u64 *test_result, bool is_return) +{ + void *ip = (void *) bpf_get_func_ip(ctx); + __u64 value = 0, ret = 0; + long err = 0; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 1; + + if (is_return) + err |= bpf_get_func_ret(ctx, &ret); + + if (ip == &bpf_fentry_test1) { + int a; + + err |= bpf_get_func_arg(ctx, 0, &value); + a = (int) value; + + err |= is_return ? ret != 2 : 0; + + *test_result += err == 0 && a == 1; + } else if (ip == &bpf_fentry_test2) { + __u64 b; + int a; + + err |= bpf_get_func_arg(ctx, 0, &value); + a = (int) value; + err |= bpf_get_func_arg(ctx, 1, &value); + b = value; + + err |= is_return ? ret != 5 : 0; + + *test_result += err == 0 && a == 2 && b == 3; + } else if (ip == &bpf_fentry_test3) { + __u64 c; + char a; + int b; + + err |= bpf_get_func_arg(ctx, 0, &value); + a = (char) value; + err |= bpf_get_func_arg(ctx, 1, &value); + b = (int) value; + err |= bpf_get_func_arg(ctx, 2, &value); + c = value; + + err |= is_return ? ret != 15 : 0; + + *test_result += err == 0 && a == 4 && b == 5 && c == 6; + } else if (ip == &bpf_fentry_test4) { + void *a; + char b; + int c; + __u64 d; + + err |= bpf_get_func_arg(ctx, 0, &value); + a = (void *) value; + err |= bpf_get_func_arg(ctx, 1, &value); + b = (char) value; + err |= bpf_get_func_arg(ctx, 2, &value); + c = (int) value; + err |= bpf_get_func_arg(ctx, 3, &value); + d = value; + + err |= is_return ? ret != 34 : 0; + + *test_result += err == 0 && a == (void *) 7 && b == 8 && c == 9 && d == 10; + } else if (ip == &bpf_fentry_test5) { + __u64 a; + void *b; + short c; + int d; + __u64 e; + + err |= bpf_get_func_arg(ctx, 0, &value); + a = value; + err |= bpf_get_func_arg(ctx, 1, &value); + b = (void *) value; + err |= bpf_get_func_arg(ctx, 2, &value); + c = (short) value; + err |= bpf_get_func_arg(ctx, 3, &value); + d = (int) value; + err |= bpf_get_func_arg(ctx, 4, &value); + e = value; + + err |= is_return ? ret != 65 : 0; + + *test_result += err == 0 && a == 11 && b == (void *) 12 && c == 13 && d == 14 && e == 15; + } else if (ip == &bpf_fentry_test6) { + __u64 a; + void *b; + short c; + int d; + void *e; + __u64 f; + + err |= bpf_get_func_arg(ctx, 0, &value); + a = value; + err |= bpf_get_func_arg(ctx, 1, &value); + b = (void *) value; + err |= bpf_get_func_arg(ctx, 2, &value); + c = (short) value; + err |= bpf_get_func_arg(ctx, 3, &value); + d = (int) value; + err |= bpf_get_func_arg(ctx, 4, &value); + e = (void *) value; + err |= bpf_get_func_arg(ctx, 5, &value); + f = value; + + err |= is_return ? ret != 111 : 0; + + *test_result += err == 0 && a == 16 && b == (void *) 17 && c == 18 && d == 19 && e == (void *) 20 && f == 21; + } else if (ip == &bpf_fentry_test7) { + err |= is_return ? ret != 0 : 0; + + *test_result += err == 0 ? 1 : 0; + } else if (ip == &bpf_fentry_test8) { + err |= is_return ? ret != 0 : 0; + + *test_result += err == 0 ? 1 : 0; + } else if (ip == &bpf_fentry_test9) { + err |= is_return ? ret != 0 : 0; + + *test_result += err == 0 ? 1 : 0; + } else if (ip == &bpf_fentry_test10) { + err |= is_return ? ret != 0 : 0; + + *test_result += err == 0 ? 1 : 0; + } + + return 0; +} -- cgit v1.2.3 From 2863f074f146adf7f63bd567de05ae03fad64a01 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 6 Jun 2026 14:39:47 +0200 Subject: selftests/bpf: Add tracing multi skel/pattern/ids module attach tests Adding tests for tracing_multi link attachment via all possible libbpf apis - skeleton, function pattern and btf ids on top of bpf_testmod kernel module. Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20260606123955.345967-23-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 4 +- .../selftests/bpf/prog_tests/tracing_multi.c | 105 +++++++++++++++++++++ .../bpf/progs/tracing_multi_attach_module.c | 25 +++++ .../selftests/bpf/progs/tracing_multi_check.c | 50 ++++++++++ 4 files changed, 183 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_attach_module.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index fd885beee0fd..ed220558d41b 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -512,7 +512,8 @@ SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \ linked_vars.skel.h linked_maps.skel.h \ test_subskeleton.skel.h test_subskeleton_lib.skel.h \ - test_usdt.skel.h tracing_multi.skel.h + test_usdt.skel.h tracing_multi.skel.h \ + tracing_multi_module.skel.h LSKELS := fexit_sleep.c trace_printk.c trace_vprintk.c map_ptr_kern.c \ core_kern.c core_kern_overflow.c test_ringbuf.c \ @@ -539,6 +540,7 @@ xsk_xdp_progs.skel.h-deps := xsk_xdp_progs.bpf.o xdp_hw_metadata.skel.h-deps := xdp_hw_metadata.bpf.o xdp_features.skel.h-deps := xdp_features.bpf.o tracing_multi.skel.h-deps := tracing_multi_attach.bpf.o tracing_multi_check.bpf.o +tracing_multi_module.skel.h-deps := tracing_multi_attach_module.bpf.o tracing_multi_check.bpf.o LINKED_BPF_OBJS := $(foreach skel,$(LINKED_SKELS),$($(skel)-deps)) LINKED_BPF_SRCS := $(patsubst %.bpf.o,%.c,$(LINKED_BPF_OBJS)) diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_multi.c b/tools/testing/selftests/bpf/prog_tests/tracing_multi.c index f333b2514b34..77134f1e2dc3 100644 --- a/tools/testing/selftests/bpf/prog_tests/tracing_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/tracing_multi.c @@ -5,6 +5,7 @@ #include #include "bpf/libbpf_internal.h" #include "tracing_multi.skel.h" +#include "tracing_multi_module.skel.h" #include "trace_helpers.h" static const char * const bpf_fentry_test[] = { @@ -20,6 +21,14 @@ static const char * const bpf_fentry_test[] = { "bpf_fentry_test10", }; +static const char * const bpf_testmod_fentry_test[] = { + "bpf_testmod_fentry_test1", + "bpf_testmod_fentry_test2", + "bpf_testmod_fentry_test3", + "bpf_testmod_fentry_test7", + "bpf_testmod_fentry_test11", +}; + #define FUNCS_CNT (ARRAY_SIZE(bpf_fentry_test)) static int compare(const void *ppa, const void *ppb) @@ -242,6 +251,96 @@ cleanup: free(ids); } +static void test_module_skel_api(void) +{ + struct tracing_multi_module *skel = NULL; + int err; + + skel = tracing_multi_module__open_and_load(); + if (!ASSERT_OK_PTR(skel, "tracing_multi__open_and_load")) + return; + + skel->bss->pid = getpid(); + + err = tracing_multi_module__attach(skel); + if (!ASSERT_OK(err, "tracing_multi__attach")) + goto cleanup; + + ASSERT_OK(trigger_module_test_read(1), "trigger_read"); + ASSERT_EQ(skel->bss->test_result_fentry, 5, "test_result_fentry"); + ASSERT_EQ(skel->bss->test_result_fexit, 5, "test_result_fexit"); + +cleanup: + tracing_multi_module__destroy(skel); +} + +static void test_module_link_api_pattern(void) +{ + struct tracing_multi_module *skel = NULL; + + skel = tracing_multi_module__open_and_load(); + if (!ASSERT_OK_PTR(skel, "tracing_multi_module__open_and_load")) + return; + + skel->bss->pid = getpid(); + + skel->links.test_fentry = bpf_program__attach_tracing_multi(skel->progs.test_fentry, + "bpf_testmod:bpf_testmod_fentry_test*", NULL); + if (!ASSERT_OK_PTR(skel->links.test_fentry, "bpf_program__attach_tracing_multi")) + goto cleanup; + + skel->links.test_fexit = bpf_program__attach_tracing_multi(skel->progs.test_fexit, + "bpf_testmod:bpf_testmod_fentry_test*", NULL); + if (!ASSERT_OK_PTR(skel->links.test_fexit, "bpf_program__attach_tracing_multi")) + goto cleanup; + + ASSERT_OK(trigger_module_test_read(1), "trigger_read"); + ASSERT_EQ(skel->bss->test_result_fentry, 5, "test_result_fentry"); + ASSERT_EQ(skel->bss->test_result_fexit, 5, "test_result_fexit"); + +cleanup: + tracing_multi_module__destroy(skel); +} + +static void test_module_link_api_ids(void) +{ + size_t cnt = ARRAY_SIZE(bpf_testmod_fentry_test); + LIBBPF_OPTS(bpf_tracing_multi_opts, opts); + struct tracing_multi_module *skel = NULL; + __u32 *ids; + + skel = tracing_multi_module__open_and_load(); + if (!ASSERT_OK_PTR(skel, "tracing_multi_module__open_and_load")) + return; + + skel->bss->pid = getpid(); + + ids = get_ids(bpf_testmod_fentry_test, cnt, "bpf_testmod"); + if (!ASSERT_OK_PTR(ids, "get_ids")) + goto cleanup; + + opts.ids = ids; + opts.cnt = cnt; + + skel->links.test_fentry = bpf_program__attach_tracing_multi(skel->progs.test_fentry, + NULL, &opts); + if (!ASSERT_OK_PTR(skel->links.test_fentry, "bpf_program__attach_tracing_multi")) + goto cleanup; + + skel->links.test_fexit = bpf_program__attach_tracing_multi(skel->progs.test_fexit, + NULL, &opts); + if (!ASSERT_OK_PTR(skel->links.test_fexit, "bpf_program__attach_tracing_multi")) + goto cleanup; + + ASSERT_OK(trigger_module_test_read(1), "trigger_read"); + ASSERT_EQ(skel->bss->test_result_fentry, 5, "test_result_fentry"); + ASSERT_EQ(skel->bss->test_result_fexit, 5, "test_result_fexit"); + +cleanup: + tracing_multi_module__destroy(skel); + free(ids); +} + void test_tracing_multi_test(void) { #ifndef __x86_64__ @@ -255,4 +354,10 @@ void test_tracing_multi_test(void) test_link_api_pattern(); if (test__start_subtest("link_api_ids")) test_link_api_ids(); + if (test__start_subtest("module_skel_api")) + test_module_skel_api(); + if (test__start_subtest("module_link_api_pattern")) + test_module_link_api_pattern(); + if (test__start_subtest("module_link_api_ids")) + test_module_link_api_ids(); } diff --git a/tools/testing/selftests/bpf/progs/tracing_multi_attach_module.c b/tools/testing/selftests/bpf/progs/tracing_multi_attach_module.c new file mode 100644 index 000000000000..b3374f2db450 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tracing_multi_attach_module.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +char _license[] SEC("license") = "GPL"; + +__hidden extern int tracing_multi_arg_check(__u64 *ctx, __u64 *test_result, bool is_return); + +__u64 test_result_fentry = 0; +__u64 test_result_fexit = 0; + +SEC("fentry.multi/bpf_testmod:bpf_testmod_fentry_test*") +int BPF_PROG(test_fentry) +{ + tracing_multi_arg_check(ctx, &test_result_fentry, false); + return 0; +} + +SEC("fexit.multi/bpf_testmod:bpf_testmod_fentry_test*") +int BPF_PROG(test_fexit) +{ + tracing_multi_arg_check(ctx, &test_result_fexit, true); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/tracing_multi_check.c b/tools/testing/selftests/bpf/progs/tracing_multi_check.c index 333a3a7bae8a..7ede84c50cb6 100644 --- a/tools/testing/selftests/bpf/progs/tracing_multi_check.c +++ b/tools/testing/selftests/bpf/progs/tracing_multi_check.c @@ -18,6 +18,12 @@ extern const void bpf_fentry_test8 __ksym; extern const void bpf_fentry_test9 __ksym; extern const void bpf_fentry_test10 __ksym; +extern const void bpf_testmod_fentry_test1 __ksym; +extern const void bpf_testmod_fentry_test2 __ksym; +extern const void bpf_testmod_fentry_test3 __ksym; +extern const void bpf_testmod_fentry_test7 __ksym; +extern const void bpf_testmod_fentry_test11 __ksym; + int tracing_multi_arg_check(__u64 *ctx, __u64 *test_result, bool is_return) { void *ip = (void *) bpf_get_func_ip(ctx); @@ -145,6 +151,50 @@ int tracing_multi_arg_check(__u64 *ctx, __u64 *test_result, bool is_return) err |= is_return ? ret != 0 : 0; *test_result += err == 0 ? 1 : 0; + } else if (ip == &bpf_testmod_fentry_test1) { + int a; + + err |= bpf_get_func_arg(ctx, 0, &value); + a = (int) value; + + err |= is_return ? ret != 2 : 0; + + *test_result += err == 0 && a == 1; + } else if (ip == &bpf_testmod_fentry_test2) { + int a; + __u64 b; + + err |= bpf_get_func_arg(ctx, 0, &value); + a = (int) value; + err |= bpf_get_func_arg(ctx, 1, &value); + b = (__u64) value; + + err |= is_return ? ret != 5 : 0; + + *test_result += err == 0 && a == 2 && b == 3; + } else if (ip == &bpf_testmod_fentry_test3) { + char a; + int b; + __u64 c; + + err |= bpf_get_func_arg(ctx, 0, &value); + a = (char) value; + err |= bpf_get_func_arg(ctx, 1, &value); + b = (int) value; + err |= bpf_get_func_arg(ctx, 2, &value); + c = (__u64) value; + + err |= is_return ? ret != 15 : 0; + + *test_result += err == 0 && a == 4 && b == 5 && c == 6; + } else if (ip == &bpf_testmod_fentry_test7) { + err |= is_return ? ret != 133 : 0; + + *test_result += err == 0; + } else if (ip == &bpf_testmod_fentry_test11) { + err |= is_return ? ret != 231 : 0; + + *test_result += err == 0; } return 0; -- cgit v1.2.3 From 4309f580a0a6608bd0c0fe090ef5283173ff4f1a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 6 Jun 2026 14:39:48 +0200 Subject: selftests/bpf: Add tracing multi intersect tests Adding tracing multi tests for intersecting attached functions. Using bits from (from 1 to 16 values) to specify (up to 4) attached programs, and randomly choosing bpf_fentry_test* functions they are attached to. Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20260606123955.345967-24-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 4 +- .../selftests/bpf/prog_tests/tracing_multi.c | 99 ++++++++++++++++++++++ .../bpf/progs/tracing_multi_intersect_attach.c | 41 +++++++++ 3 files changed, 143 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_intersect_attach.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index ed220558d41b..2b5688c97006 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -513,7 +513,8 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \ linked_vars.skel.h linked_maps.skel.h \ test_subskeleton.skel.h test_subskeleton_lib.skel.h \ test_usdt.skel.h tracing_multi.skel.h \ - tracing_multi_module.skel.h + tracing_multi_module.skel.h \ + tracing_multi_intersect.skel.h LSKELS := fexit_sleep.c trace_printk.c trace_vprintk.c map_ptr_kern.c \ core_kern.c core_kern_overflow.c test_ringbuf.c \ @@ -541,6 +542,7 @@ xdp_hw_metadata.skel.h-deps := xdp_hw_metadata.bpf.o xdp_features.skel.h-deps := xdp_features.bpf.o tracing_multi.skel.h-deps := tracing_multi_attach.bpf.o tracing_multi_check.bpf.o tracing_multi_module.skel.h-deps := tracing_multi_attach_module.bpf.o tracing_multi_check.bpf.o +tracing_multi_intersect.skel.h-deps := tracing_multi_intersect_attach.bpf.o tracing_multi_check.bpf.o LINKED_BPF_OBJS := $(foreach skel,$(LINKED_SKELS),$($(skel)-deps)) LINKED_BPF_SRCS := $(patsubst %.bpf.o,%.c,$(LINKED_BPF_OBJS)) diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_multi.c b/tools/testing/selftests/bpf/prog_tests/tracing_multi.c index 77134f1e2dc3..4dd610e74f9a 100644 --- a/tools/testing/selftests/bpf/prog_tests/tracing_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/tracing_multi.c @@ -6,6 +6,7 @@ #include "bpf/libbpf_internal.h" #include "tracing_multi.skel.h" #include "tracing_multi_module.skel.h" +#include "tracing_multi_intersect.skel.h" #include "trace_helpers.h" static const char * const bpf_fentry_test[] = { @@ -31,6 +32,20 @@ static const char * const bpf_testmod_fentry_test[] = { #define FUNCS_CNT (ARRAY_SIZE(bpf_fentry_test)) +static int get_random_funcs(const char **funcs) +{ + int i, cnt = 0; + + for (i = 0; i < FUNCS_CNT; i++) { + if (rand() % 2) + funcs[cnt++] = bpf_fentry_test[i]; + } + /* we always need at least one.. */ + if (!cnt) + funcs[cnt++] = bpf_fentry_test[rand() % FUNCS_CNT]; + return cnt; +} + static int compare(const void *ppa, const void *ppb) { const char *pa = *(const char **) ppa; @@ -341,6 +356,88 @@ cleanup: free(ids); } +static bool is_set(__u32 mask, __u32 bit) +{ + return (1 << bit) & mask; +} + +static void __test_intersect(__u32 mask, const struct bpf_program *progs[4], __u64 *test_results[4]) +{ + LIBBPF_OPTS(bpf_tracing_multi_opts, opts); + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct bpf_link *links[4] = { NULL }; + const char *funcs[FUNCS_CNT]; + __u64 expected[4]; + __u32 *ids, i; + int err, cnt; + + /* + * We have 4 programs in progs and the mask bits pick which + * of them gets attached to randomly chosen functions. + */ + for (i = 0; i < 4; i++) { + if (!is_set(mask, i)) + continue; + + cnt = get_random_funcs(funcs); + ids = get_ids(funcs, cnt, NULL); + if (!ASSERT_OK_PTR(ids, "get_ids")) + goto cleanup; + + opts.ids = ids; + opts.cnt = cnt; + links[i] = bpf_program__attach_tracing_multi(progs[i], NULL, &opts); + free(ids); + + if (!ASSERT_OK_PTR(links[i], "bpf_program__attach_tracing_multi")) + goto cleanup; + + expected[i] = *test_results[i] + cnt; + } + + err = bpf_prog_test_run_opts(bpf_program__fd(progs[0]), &topts); + ASSERT_OK(err, "test_run"); + + for (i = 0; i < 4; i++) { + if (!is_set(mask, i)) + continue; + ASSERT_EQ(*test_results[i], expected[i], "test_results"); + } + +cleanup: + for (i = 0; i < 4; i++) + bpf_link__destroy(links[i]); +} + +static void test_intersect(void) +{ + struct tracing_multi_intersect *skel; + const struct bpf_program *progs[4]; + __u64 *test_results[4]; + __u32 i; + + skel = tracing_multi_intersect__open_and_load(); + if (!ASSERT_OK_PTR(skel, "tracing_multi_intersect__open_and_load")) + return; + + skel->bss->pid = getpid(); + + progs[0] = skel->progs.fentry_1; + progs[1] = skel->progs.fexit_1; + progs[2] = skel->progs.fentry_2; + progs[3] = skel->progs.fexit_2; + + test_results[0] = &skel->bss->test_result_fentry_1; + test_results[1] = &skel->bss->test_result_fexit_1; + test_results[2] = &skel->bss->test_result_fentry_2; + test_results[3] = &skel->bss->test_result_fexit_2; + + for (i = 1; i < 16; i++) + __test_intersect(i, progs, test_results); + + tracing_multi_intersect__destroy(skel); +} + void test_tracing_multi_test(void) { #ifndef __x86_64__ @@ -360,4 +457,6 @@ void test_tracing_multi_test(void) test_module_link_api_pattern(); if (test__start_subtest("module_link_api_ids")) test_module_link_api_ids(); + if (test__start_subtest("intersect")) + test_intersect(); } diff --git a/tools/testing/selftests/bpf/progs/tracing_multi_intersect_attach.c b/tools/testing/selftests/bpf/progs/tracing_multi_intersect_attach.c new file mode 100644 index 000000000000..cd5be0bb6ffd --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tracing_multi_intersect_attach.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +char _license[] SEC("license") = "GPL"; + +__hidden extern int tracing_multi_arg_check(__u64 *ctx, __u64 *test_result, bool is_return); + +__u64 test_result_fentry_1 = 0; +__u64 test_result_fentry_2 = 0; +__u64 test_result_fexit_1 = 0; +__u64 test_result_fexit_2 = 0; + +SEC("fentry.multi") +int BPF_PROG(fentry_1) +{ + tracing_multi_arg_check(ctx, &test_result_fentry_1, false); + return 0; +} + +SEC("fentry.multi") +int BPF_PROG(fentry_2) +{ + tracing_multi_arg_check(ctx, &test_result_fentry_2, false); + return 0; +} + +SEC("fexit.multi") +int BPF_PROG(fexit_1) +{ + tracing_multi_arg_check(ctx, &test_result_fexit_1, true); + return 0; +} + +SEC("fexit.multi") +int BPF_PROG(fexit_2) +{ + tracing_multi_arg_check(ctx, &test_result_fexit_2, true); + return 0; +} -- cgit v1.2.3 From 1b938f42f5fa1789d0dcc2b9aa6262edba3a7f51 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 6 Jun 2026 14:39:49 +0200 Subject: selftests/bpf: Add tracing multi cookies test Adding tests for using cookies on tracing multi link. Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20260606123955.345967-25-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/tracing_multi.c | 23 ++++++++++++++++++++-- .../selftests/bpf/progs/tracing_multi_check.c | 15 +++++++++++++- 2 files changed, 35 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_multi.c b/tools/testing/selftests/bpf/prog_tests/tracing_multi.c index 4dd610e74f9a..0f066063cb82 100644 --- a/tools/testing/selftests/bpf/prog_tests/tracing_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/tracing_multi.c @@ -9,6 +9,19 @@ #include "tracing_multi_intersect.skel.h" #include "trace_helpers.h" +static __u64 bpf_fentry_test_cookies[] = { + 8, /* bpf_fentry_test1 */ + 9, /* bpf_fentry_test2 */ + 7, /* bpf_fentry_test3 */ + 5, /* bpf_fentry_test4 */ + 4, /* bpf_fentry_test5 */ + 2, /* bpf_fentry_test6 */ + 3, /* bpf_fentry_test7 */ + 1, /* bpf_fentry_test8 */ + 10, /* bpf_fentry_test9 */ + 6, /* bpf_fentry_test10 */ +}; + static const char * const bpf_fentry_test[] = { "bpf_fentry_test1", "bpf_fentry_test2", @@ -217,7 +230,7 @@ cleanup: tracing_multi__destroy(skel); } -static void test_link_api_ids(void) +static void test_link_api_ids(bool test_cookies) { LIBBPF_OPTS(bpf_tracing_multi_opts, opts); struct tracing_multi *skel; @@ -229,6 +242,7 @@ static void test_link_api_ids(void) return; skel->bss->pid = getpid(); + skel->bss->test_cookies = test_cookies; ids = get_ids(bpf_fentry_test, cnt, NULL); if (!ASSERT_OK_PTR(ids, "get_ids")) @@ -237,6 +251,9 @@ static void test_link_api_ids(void) opts.ids = ids; opts.cnt = cnt; + if (test_cookies) + opts.cookies = bpf_fentry_test_cookies; + skel->links.test_fentry = bpf_program__attach_tracing_multi(skel->progs.test_fentry, NULL, &opts); if (!ASSERT_OK_PTR(skel->links.test_fentry, "bpf_program__attach_tracing_multi")) @@ -450,7 +467,7 @@ void test_tracing_multi_test(void) if (test__start_subtest("link_api_pattern")) test_link_api_pattern(); if (test__start_subtest("link_api_ids")) - test_link_api_ids(); + test_link_api_ids(false); if (test__start_subtest("module_skel_api")) test_module_skel_api(); if (test__start_subtest("module_link_api_pattern")) @@ -459,4 +476,6 @@ void test_tracing_multi_test(void) test_module_link_api_ids(); if (test__start_subtest("intersect")) test_intersect(); + if (test__start_subtest("cookies")) + test_link_api_ids(true); } diff --git a/tools/testing/selftests/bpf/progs/tracing_multi_check.c b/tools/testing/selftests/bpf/progs/tracing_multi_check.c index 7ede84c50cb6..b2959ba71179 100644 --- a/tools/testing/selftests/bpf/progs/tracing_multi_check.c +++ b/tools/testing/selftests/bpf/progs/tracing_multi_check.c @@ -6,6 +6,7 @@ char _license[] SEC("license") = "GPL"; int pid = 0; +bool test_cookies = false; /* bpf_fentry_test1 is exported as kfunc via vmlinux.h */ extern const void bpf_fentry_test2 __ksym; @@ -27,7 +28,7 @@ extern const void bpf_testmod_fentry_test11 __ksym; int tracing_multi_arg_check(__u64 *ctx, __u64 *test_result, bool is_return) { void *ip = (void *) bpf_get_func_ip(ctx); - __u64 value = 0, ret = 0; + __u64 value = 0, ret = 0, cookie = 0; long err = 0; if (bpf_get_current_pid_tgid() >> 32 != pid) @@ -35,6 +36,8 @@ int tracing_multi_arg_check(__u64 *ctx, __u64 *test_result, bool is_return) if (is_return) err |= bpf_get_func_ret(ctx, &ret); + if (test_cookies) + cookie = bpf_get_attach_cookie(ctx); if (ip == &bpf_fentry_test1) { int a; @@ -43,6 +46,7 @@ int tracing_multi_arg_check(__u64 *ctx, __u64 *test_result, bool is_return) a = (int) value; err |= is_return ? ret != 2 : 0; + err |= test_cookies ? cookie != 8 : 0; *test_result += err == 0 && a == 1; } else if (ip == &bpf_fentry_test2) { @@ -55,6 +59,7 @@ int tracing_multi_arg_check(__u64 *ctx, __u64 *test_result, bool is_return) b = value; err |= is_return ? ret != 5 : 0; + err |= test_cookies ? cookie != 9 : 0; *test_result += err == 0 && a == 2 && b == 3; } else if (ip == &bpf_fentry_test3) { @@ -70,6 +75,7 @@ int tracing_multi_arg_check(__u64 *ctx, __u64 *test_result, bool is_return) c = value; err |= is_return ? ret != 15 : 0; + err |= test_cookies ? cookie != 7 : 0; *test_result += err == 0 && a == 4 && b == 5 && c == 6; } else if (ip == &bpf_fentry_test4) { @@ -88,6 +94,7 @@ int tracing_multi_arg_check(__u64 *ctx, __u64 *test_result, bool is_return) d = value; err |= is_return ? ret != 34 : 0; + err |= test_cookies ? cookie != 5 : 0; *test_result += err == 0 && a == (void *) 7 && b == 8 && c == 9 && d == 10; } else if (ip == &bpf_fentry_test5) { @@ -109,6 +116,7 @@ int tracing_multi_arg_check(__u64 *ctx, __u64 *test_result, bool is_return) e = value; err |= is_return ? ret != 65 : 0; + err |= test_cookies ? cookie != 4 : 0; *test_result += err == 0 && a == 11 && b == (void *) 12 && c == 13 && d == 14 && e == 15; } else if (ip == &bpf_fentry_test6) { @@ -133,22 +141,27 @@ int tracing_multi_arg_check(__u64 *ctx, __u64 *test_result, bool is_return) f = value; err |= is_return ? ret != 111 : 0; + err |= test_cookies ? cookie != 2 : 0; *test_result += err == 0 && a == 16 && b == (void *) 17 && c == 18 && d == 19 && e == (void *) 20 && f == 21; } else if (ip == &bpf_fentry_test7) { err |= is_return ? ret != 0 : 0; + err |= test_cookies ? cookie != 3 : 0; *test_result += err == 0 ? 1 : 0; } else if (ip == &bpf_fentry_test8) { err |= is_return ? ret != 0 : 0; + err |= test_cookies ? cookie != 1 : 0; *test_result += err == 0 ? 1 : 0; } else if (ip == &bpf_fentry_test9) { err |= is_return ? ret != 0 : 0; + err |= test_cookies ? cookie != 10 : 0; *test_result += err == 0 ? 1 : 0; } else if (ip == &bpf_fentry_test10) { err |= is_return ? ret != 0 : 0; + err |= test_cookies ? cookie != 6 : 0; *test_result += err == 0 ? 1 : 0; } else if (ip == &bpf_testmod_fentry_test1) { -- cgit v1.2.3 From 69f25d4b0c17cc947ce26391cac0015182b07dc0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 6 Jun 2026 14:39:50 +0200 Subject: selftests/bpf: Add tracing multi session test Adding tests for tracing multi link session. Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20260606123955.345967-26-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 4 +- .../selftests/bpf/prog_tests/tracing_multi.c | 45 +++++++++++++++ .../bpf/progs/tracing_multi_session_attach.c | 65 ++++++++++++++++++++++ 3 files changed, 113 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_session_attach.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 2b5688c97006..d53b7e496ac9 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -514,7 +514,8 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \ test_subskeleton.skel.h test_subskeleton_lib.skel.h \ test_usdt.skel.h tracing_multi.skel.h \ tracing_multi_module.skel.h \ - tracing_multi_intersect.skel.h + tracing_multi_intersect.skel.h \ + tracing_multi_session.skel.h LSKELS := fexit_sleep.c trace_printk.c trace_vprintk.c map_ptr_kern.c \ core_kern.c core_kern_overflow.c test_ringbuf.c \ @@ -543,6 +544,7 @@ xdp_features.skel.h-deps := xdp_features.bpf.o tracing_multi.skel.h-deps := tracing_multi_attach.bpf.o tracing_multi_check.bpf.o tracing_multi_module.skel.h-deps := tracing_multi_attach_module.bpf.o tracing_multi_check.bpf.o tracing_multi_intersect.skel.h-deps := tracing_multi_intersect_attach.bpf.o tracing_multi_check.bpf.o +tracing_multi_session.skel.h-deps := tracing_multi_session_attach.bpf.o tracing_multi_check.bpf.o LINKED_BPF_OBJS := $(foreach skel,$(LINKED_SKELS),$($(skel)-deps)) LINKED_BPF_SRCS := $(patsubst %.bpf.o,%.c,$(LINKED_BPF_OBJS)) diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_multi.c b/tools/testing/selftests/bpf/prog_tests/tracing_multi.c index 0f066063cb82..05683b8d0680 100644 --- a/tools/testing/selftests/bpf/prog_tests/tracing_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/tracing_multi.c @@ -7,6 +7,7 @@ #include "tracing_multi.skel.h" #include "tracing_multi_module.skel.h" #include "tracing_multi_intersect.skel.h" +#include "tracing_multi_session.skel.h" #include "trace_helpers.h" static __u64 bpf_fentry_test_cookies[] = { @@ -455,6 +456,48 @@ static void test_intersect(void) tracing_multi_intersect__destroy(skel); } +static void test_session(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct tracing_multi_session *skel; + int err, prog_fd; + + skel = tracing_multi_session__open_and_load(); + if (!ASSERT_OK_PTR(skel, "tracing_multi_session__open_and_load")) + return; + + skel->bss->pid = getpid(); + + err = tracing_multi_session__attach(skel); + if (!ASSERT_OK(err, "tracing_multi_session__attach")) + goto cleanup; + + /* execute kernel session */ + prog_fd = bpf_program__fd(skel->progs.test_session_1); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + + /* 10 for test_session_1, 1 for test_fsession_s */ + ASSERT_EQ(skel->bss->test_result_fentry, 11, "test_result_fentry"); + /* extra count (+1 for each fexit execution) for test_result_fexit cookie check/inc */ + ASSERT_EQ(skel->bss->test_result_fexit, 22, "test_result_fexit"); + + skel->bss->test_result_fentry = 0; + skel->bss->test_result_fexit = 0; + + /* execute bpf_testmo.ko session */ + ASSERT_OK(trigger_module_test_read(1), "trigger_read"); + + /* 5 for test_session_2 */ + ASSERT_EQ(skel->bss->test_result_fentry, 5, "test_result_fentry"); + /* extra count (+1 for each fexit execution) for test_result_fexit cookie */ + ASSERT_EQ(skel->bss->test_result_fexit, 10, "test_result_fexit"); + + +cleanup: + tracing_multi_session__destroy(skel); +} + void test_tracing_multi_test(void) { #ifndef __x86_64__ @@ -478,4 +521,6 @@ void test_tracing_multi_test(void) test_intersect(); if (test__start_subtest("cookies")) test_link_api_ids(true); + if (test__start_subtest("session")) + test_session(); } diff --git a/tools/testing/selftests/bpf/progs/tracing_multi_session_attach.c b/tools/testing/selftests/bpf/progs/tracing_multi_session_attach.c new file mode 100644 index 000000000000..7c9a46016ccd --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tracing_multi_session_attach.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +char _license[] SEC("license") = "GPL"; + +__hidden extern int tracing_multi_arg_check(__u64 *ctx, __u64 *test_result, bool is_return); + +__u64 test_result_fentry = 0; +__u64 test_result_fexit = 0; + +SEC("fsession.multi/bpf_fentry_test*") +int BPF_PROG(test_session_1) +{ + volatile __u64 *cookie = bpf_session_cookie(ctx); + + if (bpf_session_is_return(ctx)) { + if (tracing_multi_arg_check(ctx, &test_result_fexit, true)) + return 0; + /* extra count for test_result_fexit cookie */ + test_result_fexit += *cookie == 0xbeafbeafbeafbeaf; + } else { + if (tracing_multi_arg_check(ctx, &test_result_fentry, false)) + return 0; + *cookie = 0xbeafbeafbeafbeaf; + } + return 0; +} + +SEC("fsession.multi.s/bpf_fentry_test1") +int BPF_PROG(test_fsession_s) +{ + volatile __u64 *cookie = bpf_session_cookie(ctx); + + if (bpf_session_is_return(ctx)) { + if (tracing_multi_arg_check(ctx, &test_result_fexit, true)) + return 0; + /* extra count for test_result_fexit cookie */ + test_result_fexit += *cookie == 0xbeafbeafbeafbeaf; + } else { + if (tracing_multi_arg_check(ctx, &test_result_fentry, false)) + return 0; + *cookie = 0xbeafbeafbeafbeaf; + } + return 0; +} + +SEC("fsession.multi/bpf_testmod:bpf_testmod_fentry_test*") +int BPF_PROG(test_session_2) +{ + volatile __u64 *cookie = bpf_session_cookie(ctx); + + if (bpf_session_is_return(ctx)) { + if (tracing_multi_arg_check(ctx, &test_result_fexit, true)) + return 0; + /* extra count for test_result_fexit cookie */ + test_result_fexit += *cookie == 0xbeafbeafbeafbeaf; + } else { + if (tracing_multi_arg_check(ctx, &test_result_fentry, false)) + return 0; + *cookie = 0xbeafbeafbeafbeaf; + } + return 0; +} -- cgit v1.2.3 From 1fd8328549979d96540252fa826481df93885a5a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 6 Jun 2026 14:39:51 +0200 Subject: selftests/bpf: Add tracing multi attach fails test Adding tests for attach fails on tracing multi link. Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20260606123955.345967-27-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/tracing_multi.c | 96 ++++++++++++++++++++++ .../selftests/bpf/progs/tracing_multi_fail.c | 18 ++++ 2 files changed, 114 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_fail.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_multi.c b/tools/testing/selftests/bpf/prog_tests/tracing_multi.c index 05683b8d0680..7e1bb071ce2a 100644 --- a/tools/testing/selftests/bpf/prog_tests/tracing_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/tracing_multi.c @@ -8,6 +8,7 @@ #include "tracing_multi_module.skel.h" #include "tracing_multi_intersect.skel.h" #include "tracing_multi_session.skel.h" +#include "tracing_multi_fail.skel.h" #include "trace_helpers.h" static __u64 bpf_fentry_test_cookies[] = { @@ -498,6 +499,99 @@ cleanup: tracing_multi_session__destroy(skel); } +static void test_attach_api_fails(void) +{ + LIBBPF_OPTS(bpf_tracing_multi_opts, opts); + static const char * const func[] = { + "bpf_fentry_test2", + }; + struct tracing_multi_fail *skel = NULL; + __u32 ids[2] = {}, *ids2 = NULL; + __u64 cookies[2]; + + skel = tracing_multi_fail__open_and_load(); + if (!ASSERT_OK_PTR(skel, "tracing_multi_fail__open_and_load")) + return; + + /* fail#1 (libbpf) pattern and opts NULL */ + skel->links.test_fentry = bpf_program__attach_tracing_multi(skel->progs.test_fentry, + NULL, NULL); + if (!ASSERT_EQ(libbpf_get_error(skel->links.test_fentry), -EINVAL, "fail_1")) + goto cleanup; + + /* fail#2 (libbpf) pattern and ids */ + LIBBPF_OPTS_RESET(opts, + .ids = ids, + .cnt = 2, + ); + + skel->links.test_fentry = bpf_program__attach_tracing_multi(skel->progs.test_fentry, + "bpf_fentry_test*", &opts); + if (!ASSERT_EQ(libbpf_get_error(skel->links.test_fentry), -EINVAL, "fail_2")) + goto cleanup; + + /* fail#3 (libbpf) pattern and cookies */ + LIBBPF_OPTS_RESET(opts, + .ids = NULL, + .cnt = 2, + .cookies = cookies, + ); + + skel->links.test_fentry = bpf_program__attach_tracing_multi(skel->progs.test_fentry, + "bpf_fentry_test*", &opts); + if (!ASSERT_EQ(libbpf_get_error(skel->links.test_fentry), -EINVAL, "fail_3")) + goto cleanup; + + /* fail#4 (libbpf) bogus pattern */ + skel->links.test_fentry = bpf_program__attach_tracing_multi(skel->progs.test_fentry, + "bpf_not_really_a_function*", NULL); + if (!ASSERT_EQ(libbpf_get_error(skel->links.test_fentry), -EINVAL, "fail_4")) + goto cleanup; + + /* fail#5 (kernel) abnormal cnt */ + LIBBPF_OPTS_RESET(opts, + .ids = ids, + .cnt = INT_MAX, + ); + + skel->links.test_fentry = bpf_program__attach_tracing_multi(skel->progs.test_fentry, + NULL, &opts); + if (!ASSERT_EQ(libbpf_get_error(skel->links.test_fentry), -E2BIG, "fail_5")) + goto cleanup; + + /* fail#6 (kernel) attach sleepable program to not-allowed function */ + ids2 = get_ids(func, 1, NULL); + if (!ASSERT_OK_PTR(ids2, "get_ids")) + goto cleanup; + + LIBBPF_OPTS_RESET(opts, + .ids = ids2, + .cnt = 1, + ); + + skel->links.test_fentry_s = bpf_program__attach_tracing_multi(skel->progs.test_fentry_s, + NULL, &opts); + if (!ASSERT_EQ(libbpf_get_error(skel->links.test_fentry_s), -EINVAL, "fail_6")) + goto cleanup; + + /* fail#7 (kernel) attach with duplicate id */ + ids[0] = ids2[0]; + ids[1] = ids2[0]; + + LIBBPF_OPTS_RESET(opts, + .ids = ids, + .cnt = 2, + ); + + skel->links.test_fentry = bpf_program__attach_tracing_multi(skel->progs.test_fentry, + NULL, &opts); + ASSERT_EQ(libbpf_get_error(skel->links.test_fentry), -EINVAL, "fail_7"); + +cleanup: + tracing_multi_fail__destroy(skel); + free(ids2); +} + void test_tracing_multi_test(void) { #ifndef __x86_64__ @@ -523,4 +617,6 @@ void test_tracing_multi_test(void) test_link_api_ids(true); if (test__start_subtest("session")) test_session(); + if (test__start_subtest("attach_api_fails")) + test_attach_api_fails(); } diff --git a/tools/testing/selftests/bpf/progs/tracing_multi_fail.c b/tools/testing/selftests/bpf/progs/tracing_multi_fail.c new file mode 100644 index 000000000000..7f0375f4213d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tracing_multi_fail.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +char _license[] SEC("license") = "GPL"; + +SEC("fentry.multi") +int BPF_PROG(test_fentry) +{ + return 0; +} + +SEC("fentry.multi.s") +int BPF_PROG(test_fentry_s) +{ + return 0; +} -- cgit v1.2.3 From 443c91d08c4bf48caeab6243edaca4e987573d8a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 6 Jun 2026 14:39:52 +0200 Subject: selftests/bpf: Add tracing multi verifier fails test Adding tests for verifier fails on tracing multi programs. Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20260606123955.345967-28-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/tracing_multi.c | 2 ++ .../selftests/bpf/progs/tracing_multi_verifier.c | 31 ++++++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_verifier.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_multi.c b/tools/testing/selftests/bpf/prog_tests/tracing_multi.c index 7e1bb071ce2a..9e026f2b254d 100644 --- a/tools/testing/selftests/bpf/prog_tests/tracing_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/tracing_multi.c @@ -9,6 +9,7 @@ #include "tracing_multi_intersect.skel.h" #include "tracing_multi_session.skel.h" #include "tracing_multi_fail.skel.h" +#include "tracing_multi_verifier.skel.h" #include "trace_helpers.h" static __u64 bpf_fentry_test_cookies[] = { @@ -619,4 +620,5 @@ void test_tracing_multi_test(void) test_session(); if (test__start_subtest("attach_api_fails")) test_attach_api_fails(); + RUN_TESTS(tracing_multi_verifier); } diff --git a/tools/testing/selftests/bpf/progs/tracing_multi_verifier.c b/tools/testing/selftests/bpf/progs/tracing_multi_verifier.c new file mode 100644 index 000000000000..7b6ed41bf452 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tracing_multi_verifier.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "vmlinux.h" +#include +#include +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +SEC("fentry.multi/bpf_fentry_test1") +__failure +__msg("func 'bpf_multi_func' doesn't have 1-th argument") +int BPF_PROG(fentry_direct_access, int a) +{ + return a; +} + +SEC("fexit.multi/bpf_fentry_test3") +__failure +__msg("invalid bpf_context access off=24 size=8") +int BPF_PROG(fexit_direct_access, char a, int b, __u64 c, int ret) +{ + return ret; +} + +SEC("fsession.multi/bpf_fentry_test4") +__failure +__msg("invalid bpf_context access off=16 size=8") +int BPF_PROG(fsession_direct_access, void *a, char b, int c, __u64 d, int ret) +{ + return c; +} -- cgit v1.2.3 From 4db8f60b6baf64f4f405bc8eb92a36315b353481 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 6 Jun 2026 14:39:53 +0200 Subject: selftests/bpf: Add tracing multi attach benchmark test Adding benchmark test that attaches to (almost) all allowed tracing functions and display attach/detach times. # ./test_progs -t tracing_multi_bench_attach -v bpf_testmod.ko is already unloaded. Loading bpf_testmod.ko... Successfully loaded bpf_testmod.ko. serial_test_tracing_multi_bench_attach:PASS:btf__load_vmlinux_btf 0 nsec serial_test_tracing_multi_bench_attach:PASS:tracing_multi_bench__open_and_load 0 nsec serial_test_tracing_multi_bench_attach:PASS:get_syms 0 nsec serial_test_tracing_multi_bench_attach:PASS:bpf_program__attach_tracing_multi 0 nsec serial_test_tracing_multi_bench_attach: found 51186 functions serial_test_tracing_multi_bench_attach: attached in 1.295s serial_test_tracing_multi_bench_attach: detached in 0.243s #507 tracing_multi_bench_attach:OK Summary: 1/0 PASSED, 0 SKIPPED, 0 FAILED Successfully unloaded bpf_testmod.ko. Exporting skip_entry as is_unsafe_function and using it in the test. Also updating trace_blacklist with ___migrate_enable to be in sync with kernel functions deny list. Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20260606123955.345967-29-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/tracing_multi.c | 124 +++++++++++++++++++++ .../selftests/bpf/progs/tracing_multi_bench.c | 12 ++ tools/testing/selftests/bpf/trace_helpers.c | 7 +- tools/testing/selftests/bpf/trace_helpers.h | 1 + 4 files changed, 141 insertions(+), 3 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_bench.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_multi.c b/tools/testing/selftests/bpf/prog_tests/tracing_multi.c index 9e026f2b254d..cb39bf610823 100644 --- a/tools/testing/selftests/bpf/prog_tests/tracing_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/tracing_multi.c @@ -10,6 +10,7 @@ #include "tracing_multi_session.skel.h" #include "tracing_multi_fail.skel.h" #include "tracing_multi_verifier.skel.h" +#include "tracing_multi_bench.skel.h" #include "trace_helpers.h" static __u64 bpf_fentry_test_cookies[] = { @@ -593,6 +594,129 @@ cleanup: free(ids2); } +void serial_test_tracing_multi_bench_attach(void) +{ + LIBBPF_OPTS(bpf_tracing_multi_opts, opts); + struct tracing_multi_bench *skel = NULL; + long attach_start_ns, attach_end_ns; + long detach_start_ns, detach_end_ns; + double attach_delta, detach_delta; + struct bpf_link *link = NULL; + size_t i, cap = 0, cnt = 0; + struct ksyms *ksyms = NULL; + void *root = NULL; + void *dups = NULL; + __u32 *ids = NULL; + __u32 nr, type_id; + struct btf *btf; + int err; + +#ifndef __x86_64__ + test__skip(); + return; +#endif + + btf = btf__load_vmlinux_btf(); + if (!ASSERT_OK_PTR(btf, "btf__load_vmlinux_btf")) + return; + + skel = tracing_multi_bench__open_and_load(); + if (!ASSERT_OK_PTR(skel, "tracing_multi_bench__open_and_load")) + goto cleanup; + + if (!ASSERT_OK(bpf_get_ksyms(&ksyms, true), "get_syms")) + goto cleanup; + + /* Get all ftrace 'safe' symbols.. */ + for (i = 0; i < ksyms->filtered_cnt; i++) { + if (!tsearch(&ksyms->filtered_syms[i], &root, compare)) { + ASSERT_FAIL("tsearch failed"); + goto cleanup; + } + } + + /* + * Collect names that are not unique in kallsyms. The kernel resolves a + * tracing-multi BTF id to an address with kallsyms_lookup_name(), which + * returns the first symbol of that name. For a duplicate name that may + * be a different (non-ftrace-able) instance than the ftrace-able one in + * available_filter_functions, so attaching to it by BTF id fails with + * -ENOENT (e.g. t_start/t_next/t_stop). ksyms->syms is sorted by name, + * so equal names are adjacent. + */ + for (i = 1; i < ksyms->sym_cnt; i++) { + if (strcmp(ksyms->syms[i].name, ksyms->syms[i - 1].name)) + continue; + if (!tsearch(&ksyms->syms[i].name, &dups, compare)) { + ASSERT_FAIL("tsearch failed"); + goto cleanup; + } + } + + /* ..and filter them through BTF and btf_type_is_traceable_func. */ + nr = btf__type_cnt(btf); + for (type_id = 1; type_id < nr; type_id++) { + const struct btf_type *type; + const char *str; + + type = btf__type_by_id(btf, type_id); + if (!type) + break; + + if (BTF_INFO_KIND(type->info) != BTF_KIND_FUNC) + continue; + + str = btf__name_by_offset(btf, type->name_off); + if (!str) + break; + + if (!tfind(&str, &root, compare)) + continue; + + /* Skip names that are not unique in kallsyms, see above. */ + if (tfind(&str, &dups, compare)) + continue; + + if (!btf_type_is_traceable_func(btf, type)) + continue; + + err = libbpf_ensure_mem((void **) &ids, &cap, sizeof(*ids), cnt + 1); + if (err) + goto cleanup; + + ids[cnt++] = type_id; + } + + opts.ids = ids; + opts.cnt = cnt; + + attach_start_ns = get_time_ns(); + link = bpf_program__attach_tracing_multi(skel->progs.bench, NULL, &opts); + attach_end_ns = get_time_ns(); + + if (!ASSERT_OK_PTR(link, "bpf_program__attach_tracing_multi")) + goto cleanup; + + detach_start_ns = get_time_ns(); + bpf_link__destroy(link); + detach_end_ns = get_time_ns(); + + attach_delta = (attach_end_ns - attach_start_ns) / 1000000000.0; + detach_delta = (detach_end_ns - detach_start_ns) / 1000000000.0; + + printf("%s: found %lu functions\n", __func__, cnt); + printf("%s: attached in %7.3lfs\n", __func__, attach_delta); + printf("%s: detached in %7.3lfs\n", __func__, detach_delta); + +cleanup: + tracing_multi_bench__destroy(skel); + tdestroy(root, tdestroy_free_nop); + tdestroy(dups, tdestroy_free_nop); + free_kallsyms_local(ksyms); + free(ids); + btf__free(btf); +} + void test_tracing_multi_test(void) { #ifndef __x86_64__ diff --git a/tools/testing/selftests/bpf/progs/tracing_multi_bench.c b/tools/testing/selftests/bpf/progs/tracing_multi_bench.c new file mode 100644 index 000000000000..beae946cb8c4 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tracing_multi_bench.c @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +char _license[] SEC("license") = "GPL"; + +SEC("fentry.multi") +int BPF_PROG(bench) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 0e63daf83ed5..679008b310d9 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -546,9 +546,10 @@ static const char * const trace_blacklist[] = { "__rcu_read_lock", "__rcu_read_unlock", "bpf_get_numa_node_id", + "___migrate_enable", }; -static bool skip_entry(char *name) +bool is_unsafe_function(const char *name) { int i; @@ -651,7 +652,7 @@ int bpf_get_ksyms(struct ksyms **ksymsp, bool kernel) free(name); if (sscanf(buf, "%ms$*[^\n]\n", &name) != 1) continue; - if (skip_entry(name)) + if (is_unsafe_function(name)) continue; ks = search_kallsyms_custom_local(ksyms, name, search_kallsyms_compare); @@ -728,7 +729,7 @@ int bpf_get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel) free(name); if (sscanf(buf, "%p %ms$*[^\n]\n", &addr, &name) != 2) continue; - if (skip_entry(name)) + if (is_unsafe_function(name)) continue; if (cnt == max_cnt) { diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h index d5bf1433675d..01c8ecc45627 100644 --- a/tools/testing/selftests/bpf/trace_helpers.h +++ b/tools/testing/selftests/bpf/trace_helpers.h @@ -63,4 +63,5 @@ int read_build_id(const char *path, char *build_id, size_t size); int bpf_get_ksyms(struct ksyms **ksymsp, bool kernel); int bpf_get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel); +bool is_unsafe_function(const char *name); #endif -- cgit v1.2.3 From b349efe49a123f032e54d7e894d708ea5daa10d2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 6 Jun 2026 14:39:54 +0200 Subject: selftests/bpf: Add tracing multi attach rollback tests Adding tests for the rollback code when the tracing_multi link won't get attached, covering 2 reasons: - wrong btf id passed by user, where all previously allocated trampolines will be released - trampoline for requested function is fully attached (has already maximum programs attached) and the link fails, the rollback code needs to release all previously link-ed trampolines and release them We need the bpf_fentry_test* unattached for the tests to pass, so the rollback tests are serial. Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20260606123955.345967-30-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/tracing_multi.c | 212 +++++++++++++++++++++ .../selftests/bpf/progs/tracing_multi_rollback.c | 43 +++++ 2 files changed, 255 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_rollback.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_multi.c b/tools/testing/selftests/bpf/prog_tests/tracing_multi.c index cb39bf610823..f02ffc7f41d7 100644 --- a/tools/testing/selftests/bpf/prog_tests/tracing_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/tracing_multi.c @@ -11,6 +11,7 @@ #include "tracing_multi_fail.skel.h" #include "tracing_multi_verifier.skel.h" #include "tracing_multi_bench.skel.h" +#include "tracing_multi_rollback.skel.h" #include "trace_helpers.h" static __u64 bpf_fentry_test_cookies[] = { @@ -717,6 +718,217 @@ cleanup: btf__free(btf); } +static void tracing_multi_rollback_run(struct tracing_multi_rollback *skel) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + int err, prog_fd; + + prog_fd = bpf_program__fd(skel->progs.test_fentry); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + + /* make sure the rollback code did not leave any program attached */ + ASSERT_EQ(skel->bss->test_result_fentry, 0, "test_result_fentry"); + ASSERT_EQ(skel->bss->test_result_fexit, 0, "test_result_fexit"); +} + +static void test_rollback_put(void) +{ + LIBBPF_OPTS(bpf_tracing_multi_opts, opts); + struct tracing_multi_rollback *skel = NULL; + size_t cnt = FUNCS_CNT; + __u32 *ids = NULL; + int err; + + skel = tracing_multi_rollback__open(); + if (!ASSERT_OK_PTR(skel, "tracing_multi_rollback__open")) + return; + + bpf_program__set_autoload(skel->progs.test_fentry, true); + bpf_program__set_autoload(skel->progs.test_fexit, true); + + err = tracing_multi_rollback__load(skel); + if (!ASSERT_OK(err, "tracing_multi_rollback__load")) + goto cleanup; + + ids = get_ids(bpf_fentry_test, cnt, NULL); + if (!ASSERT_OK_PTR(ids, "get_ids")) + goto cleanup; + + /* + * Mangle last id to trigger rollback, which needs to do put + * on get-ed trampolines. + */ + ids[9] = 0; + + opts.ids = ids; + opts.cnt = cnt; + + skel->bss->pid = getpid(); + + skel->links.test_fentry = bpf_program__attach_tracing_multi(skel->progs.test_fentry, + NULL, &opts); + if (!ASSERT_ERR_PTR(skel->links.test_fentry, "bpf_program__attach_tracing_multi")) + goto cleanup; + + skel->links.test_fexit = bpf_program__attach_tracing_multi(skel->progs.test_fexit, + NULL, &opts); + if (!ASSERT_ERR_PTR(skel->links.test_fexit, "bpf_program__attach_tracing_multi")) + goto cleanup; + + /* We don't really attach any program, but let's make sure. */ + tracing_multi_rollback_run(skel); + +cleanup: + tracing_multi_rollback__destroy(skel); + free(ids); +} + +static void fillers_cleanup(struct tracing_multi_rollback **skels, int cnt) +{ + int i; + + for (i = 0; i < cnt; i++) + tracing_multi_rollback__destroy(skels[i]); + + free(skels); +} + +static struct tracing_multi_rollback *extra_load_and_link(void) +{ + struct tracing_multi_rollback *skel; + int err; + + skel = tracing_multi_rollback__open(); + if (!ASSERT_OK_PTR(skel, "tracing_multi_rollback__open")) + goto cleanup; + + bpf_program__set_autoload(skel->progs.extra, true); + + err = tracing_multi_rollback__load(skel); + if (!ASSERT_OK(err, "tracing_multi_rollback__load")) + goto cleanup; + + skel->links.extra = bpf_program__attach_trace(skel->progs.extra); + if (!ASSERT_OK_PTR(skel->links.extra, "bpf_program__attach_trace")) + goto cleanup; + + return skel; + +cleanup: + tracing_multi_rollback__destroy(skel); + return NULL; +} + +static struct tracing_multi_rollback **fillers_load_and_link(int max) +{ + struct tracing_multi_rollback **skels, *skel; + int i, err; + + skels = calloc(max + 1, sizeof(*skels)); + if (!ASSERT_OK_PTR(skels, "calloc")) + return NULL; + + for (i = 0; i < max; i++) { + skel = skels[i] = tracing_multi_rollback__open(); + if (!ASSERT_OK_PTR(skels[i], "tracing_multi_rollback__open")) + goto cleanup; + + bpf_program__set_autoload(skel->progs.filler, true); + + err = tracing_multi_rollback__load(skel); + if (!ASSERT_OK(err, "tracing_multi_rollback__load")) + goto cleanup; + + skel->links.filler = bpf_program__attach_trace(skel->progs.filler); + if (!ASSERT_OK_PTR(skels[i]->links.filler, "bpf_program__attach_trace")) + goto cleanup; + } + + return skels; + +cleanup: + fillers_cleanup(skels, i + 1); + return NULL; +} + +static void test_rollback_unlink(void) +{ + struct tracing_multi_rollback *skel = NULL, *extra; + LIBBPF_OPTS(bpf_tracing_multi_opts, opts); + struct tracing_multi_rollback **fillers; + size_t cnt = FUNCS_CNT; + __u32 *ids = NULL; + int err, max; + + max = get_bpf_max_tramp_links(); + if (!ASSERT_GE(max, 1, "bpf_max_tramp_links")) + return; + + /* Attach maximum allowed programs to bpf_fentry_test10 */ + fillers = fillers_load_and_link(max); + if (!ASSERT_OK_PTR(fillers, "fillers_load_and_link")) + return; + + extra = extra_load_and_link(); + if (!ASSERT_OK_PTR(extra, "extra_load_and_link")) + goto cleanup; + + skel = tracing_multi_rollback__open(); + if (!ASSERT_OK_PTR(skel, "tracing_multi_rollback__open")) + goto cleanup; + + bpf_program__set_autoload(skel->progs.test_fentry, true); + bpf_program__set_autoload(skel->progs.test_fexit, true); + + /* + * Attach tracing_multi link on bpf_fentry_test1-10, which will + * fail on bpf_fentry_test10 function, because it already has + * maximum allowed programs attached. + * + * The rollback needs to unlink already link-ed trampolines and + * put all of them. + */ + err = tracing_multi_rollback__load(skel); + if (!ASSERT_OK(err, "tracing_multi_rollback__load")) + goto cleanup; + + ids = get_ids(bpf_fentry_test, cnt, NULL); + if (!ASSERT_OK_PTR(ids, "get_ids")) + goto cleanup; + + opts.ids = ids; + opts.cnt = cnt; + + skel->bss->pid = getpid(); + + skel->links.test_fentry = bpf_program__attach_tracing_multi(skel->progs.test_fentry, + NULL, &opts); + if (!ASSERT_ERR_PTR(skel->links.test_fentry, "bpf_program__attach_tracing_multi")) + goto cleanup; + + skel->links.test_fexit = bpf_program__attach_tracing_multi(skel->progs.test_fexit, + NULL, &opts); + if (!ASSERT_ERR_PTR(skel->links.test_fexit, "bpf_program__attach_tracing_multi")) + goto cleanup; + + tracing_multi_rollback_run(skel); + +cleanup: + fillers_cleanup(fillers, max); + tracing_multi_rollback__destroy(extra); + tracing_multi_rollback__destroy(skel); + free(ids); +} + +void serial_test_tracing_multi_attach_rollback(void) +{ + if (test__start_subtest("put")) + test_rollback_put(); + if (test__start_subtest("unlink")) + test_rollback_unlink(); +} + void test_tracing_multi_test(void) { #ifndef __x86_64__ diff --git a/tools/testing/selftests/bpf/progs/tracing_multi_rollback.c b/tools/testing/selftests/bpf/progs/tracing_multi_rollback.c new file mode 100644 index 000000000000..a49d1d841f3a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tracing_multi_rollback.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +char _license[] SEC("license") = "GPL"; + +int pid = 0; + +__u64 test_result_fentry = 0; +__u64 test_result_fexit = 0; + +SEC("?fentry.multi") +int BPF_PROG(test_fentry) +{ + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + test_result_fentry++; + return 0; +} + +SEC("?fexit.multi") +int BPF_PROG(test_fexit) +{ + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + test_result_fexit++; + return 0; +} + +SEC("?fentry/bpf_fentry_test1") +int BPF_PROG(extra) +{ + return 0; +} + +SEC("?fentry/bpf_fentry_test10") +int BPF_PROG(filler) +{ + return 0; +} -- cgit v1.2.3 From 6e1e4a9d60edb0e12d373fb6f2b55d90d20a363b Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Sun, 7 Jun 2026 13:30:43 -0700 Subject: selftests/bpf: Stress LRU rqspinlock recovery paths Introduces stress test for bpf_lru_list that exercises lock-failures and orphan-recovery, added by the LRU rqspinlock conversion. Runs three subtests: common LRU, per-CPU LRU lists (BPF_F_NO_COMMON_LRU), and per-CPU LRU map. Each pins one userspace hammer per CPU and attaches the perf_event NMI BPF prog (update+delete mix) on every online CPU. Pre-fix, lockdep fires the "INITIAL USE -> IN-NMI" splat during stress. After stress test, drain_then_verify_capacity() drains every key and refills the lru map. A stranded node on any CPU's pool would have forced eviction of a just-inserted key on that CPU, surfacing here as a missing lookup. Marked serial_ because per-CPU pinning and high-rate HW perf events would perturb parallel tests. Signed-off-by: Mykyta Yatsenko Link: https://lore.kernel.org/r/20260607-lru_map_spin-v3-3-bcd9332e911b@meta.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/lru_lock_nmi.c | 243 +++++++++++++++++++++ tools/testing/selftests/bpf/progs/lru_lock_nmi.c | 33 +++ 2 files changed, 276 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/lru_lock_nmi.c create mode 100644 tools/testing/selftests/bpf/progs/lru_lock_nmi.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/lru_lock_nmi.c b/tools/testing/selftests/bpf/prog_tests/lru_lock_nmi.c new file mode 100644 index 000000000000..60666a9ba41f --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/lru_lock_nmi.c @@ -0,0 +1,243 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Stress every LRU lock-failure and orphan-recovery. + * perf_event NMI BPF on every online CPU does + * update+delete on a small LRU map; userspace threads on every CPU do + * the same from syscall context. + */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include "testing_helpers.h" +#include "lru_lock_nmi.skel.h" + +#define MAP_ENTRIES 64 +#define KEY_RANGE (MAP_ENTRIES * 2) +#define STRESS_NS (500 * 1000 * 1000ULL) + +struct hammer_arg { + int map_fd; + int cpu; + __u64 deadline_ns; +}; + +struct refill_arg { + int map_fd; + int cpu; + int per_cpu_quota; + int update_errors; +}; + +/* + * Pin the calling thread to @cpu. Uses dynamically-allocated CPU sets so + * we stay correct on hosts with @cpu >= CPU_SETSIZE (default 1024). + */ +static int pin_to_cpu(int cpu) +{ + cpu_set_t *cs; + size_t cs_size; + int err; + + cs = CPU_ALLOC(cpu + 1); + if (!cs) + return -ENOMEM; + cs_size = CPU_ALLOC_SIZE(cpu + 1); + + CPU_ZERO_S(cs_size, cs); + CPU_SET_S(cpu, cs_size, cs); + err = pthread_setaffinity_np(pthread_self(), cs_size, cs); + CPU_FREE(cs); + return err; +} + +static void *hammer_thread(void *p) +{ + struct hammer_arg *a = p; + int nr_possible_cpus = libbpf_num_possible_cpus(); + __u64 val[nr_possible_cpus]; + unsigned int seed; + __u32 key; + + memset(val, 0, sizeof(val)); + pin_to_cpu(a->cpu); + + seed = (unsigned int)a->cpu ^ (unsigned int)(uintptr_t)pthread_self(); + + while (get_time_ns() < a->deadline_ns) { + bool do_update = rand_r(&seed) & 1; + + key = rand_r(&seed) % KEY_RANGE; + if (do_update) + bpf_map_update_elem(a->map_fd, &key, val, BPF_ANY); + else + bpf_map_delete_elem(a->map_fd, &key); + } + return NULL; +} + +static void *refill_thread(void *p) +{ + struct refill_arg *a = p; + int nr_possible_cpus = libbpf_num_possible_cpus(); + __u64 val[nr_possible_cpus]; + __u32 start, end, key; + + memset(val, 0, sizeof(val)); + pin_to_cpu(a->cpu); + + start = (__u32)a->cpu * (__u32)a->per_cpu_quota; + end = start + (__u32)a->per_cpu_quota; + for (key = start; key < end; key++) + if (bpf_map_update_elem(a->map_fd, &key, val, BPF_ANY)) + a->update_errors++; + return NULL; +} + +/* + * Drain the map, then refill it with each CPU inserting only its own + * quota of keys. + * After refill, lookup every key we inserted - a stranded node on any + * CPU's pool would have forced eviction. + */ +static int drain_then_verify_capacity(int map_fd, int nr_cpus) +{ + int per_cpu_quota = MAP_ENTRIES / nr_cpus; + int total = per_cpu_quota * nr_cpus; + int nr_possible_cpus = libbpf_num_possible_cpus(); + pthread_t threads[nr_cpus]; + struct refill_arg args[nr_cpus]; + __u64 val[nr_possible_cpus]; + int i, hits = 0, nthreads = 0; + __u32 key; + + memset(val, 0, sizeof(val)); + + for (key = 0; key < KEY_RANGE; key++) + bpf_map_delete_elem(map_fd, &key); + + for (i = 0; i < nr_cpus; i++) { + args[i] = (struct refill_arg){ + .map_fd = map_fd, + .cpu = i, + .per_cpu_quota = per_cpu_quota, + }; + if (pthread_create(&threads[nthreads], NULL, refill_thread, &args[i]) == 0) + nthreads++; + } + for (i = 0; i < nthreads; i++) + pthread_join(threads[i], NULL); + + for (i = 0; i < nr_cpus; i++) + if (args[i].update_errors) + return -ENOMEM; + + for (key = 0; key < (__u32)total; key++) + if (bpf_map_lookup_elem(map_fd, &key, val) == 0) + hits++; + + return hits == total ? 0 : -EIO; +} + +static void run_variant(enum bpf_map_type type, __u32 map_flags, const char *name) +{ + struct perf_event_attr attr = { + .size = sizeof(attr), + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + .freq = 1, + }; + int nr_cpus, max_cpus = 64; + struct bpf_link *links[max_cpus]; + pthread_t threads[max_cpus]; + struct hammer_arg args[max_cpus]; + struct lru_lock_nmi *skel = NULL; + int map_fd, i, err, nr_threads = 0, pmu_fd = -1; + __u64 deadline; + + nr_cpus = libbpf_num_possible_cpus(); + if (!ASSERT_GT(nr_cpus, 0, "num_cpus")) + return; + + if (nr_cpus > max_cpus) + nr_cpus = max_cpus; + + if (!test__start_subtest(name)) + return; + + memset(links, 0, sizeof(links)); + skel = lru_lock_nmi__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + err = bpf_map__set_type(skel->maps.lru_map, type); + if (!ASSERT_OK(err, "set_type")) + goto cleanup; + err = bpf_map__set_map_flags(skel->maps.lru_map, map_flags); + if (!ASSERT_OK(err, "set_flags")) + goto cleanup; + err = bpf_map__set_max_entries(skel->maps.lru_map, MAP_ENTRIES); + if (!ASSERT_OK(err, "set_max_entries")) + goto cleanup; + + err = lru_lock_nmi__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + skel->bss->hits = 0; + map_fd = bpf_map__fd(skel->maps.lru_map); + attr.sample_freq = read_perf_max_sample_freq(); + + for (i = 0; i < nr_cpus; i++) { + pmu_fd = syscall(__NR_perf_event_open, &attr, -1, i, -1, 0); + if (pmu_fd < 0) { + if (i == 0 && + (errno == ENOENT || errno == EOPNOTSUPP)) { + test__skip(); + goto cleanup; + } + continue; + } + /* libbpf takes ownership of pfd on success */ + links[i] = bpf_program__attach_perf_event(skel->progs.oncpu, pmu_fd); + if (!links[i]) + close(pmu_fd); + } + + deadline = get_time_ns() + STRESS_NS; + for (i = 0; i < nr_cpus; i++) { + args[i].map_fd = map_fd; + args[i].cpu = i; + args[i].deadline_ns = deadline; + if (pthread_create(&threads[nr_threads], NULL, hammer_thread, &args[i]) == 0) + nr_threads++; + } + for (i = 0; i < nr_threads; i++) + pthread_join(threads[i], NULL); + + for (i = 0; i < nr_cpus; i++) { + if (links[i]) { + bpf_link__destroy(links[i]); + links[i] = NULL; + } + } + + ASSERT_GT(skel->bss->hits, 0, "nmi_bpf_ran"); + ASSERT_OK(drain_then_verify_capacity(map_fd, nr_cpus), "drain_then_verify_capacity"); + +cleanup: + for (i = 0; i < nr_cpus; i++) { + if (links[i]) + bpf_link__destroy(links[i]); + } + lru_lock_nmi__destroy(skel); +} + +void serial_test_lru_lock_nmi(void) +{ + run_variant(BPF_MAP_TYPE_LRU_HASH, 0, "common_lru"); + run_variant(BPF_MAP_TYPE_LRU_HASH, BPF_F_NO_COMMON_LRU, "no_common_lru"); + run_variant(BPF_MAP_TYPE_LRU_PERCPU_HASH, 0, "percpu_lru"); +} diff --git a/tools/testing/selftests/bpf/progs/lru_lock_nmi.c b/tools/testing/selftests/bpf/progs/lru_lock_nmi.c new file mode 100644 index 000000000000..c0692cd54237 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/lru_lock_nmi.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +struct { + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __uint(max_entries, 64); + __type(key, __u32); + __type(value, __u64); +} lru_map SEC(".maps"); + +int hits; + +SEC("perf_event") +int oncpu(void *ctx) +{ + /* + * Key range deliberately wider than max_entries to force LRU + * eviction on every other update. + */ + __u32 key = bpf_get_prandom_u32() % 128; + bool do_update = bpf_get_prandom_u32() & 1; + __u64 val = 1; + + if (do_update) + bpf_map_update_elem(&lru_map, &key, &val, BPF_ANY); + else + bpf_map_delete_elem(&lru_map, &key); + __sync_fetch_and_add(&hits, 1); + return 0; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From a3847994b4d20c0701ccc54fe110920ea78e73dc Mon Sep 17 00:00:00 2001 From: Nuoqi Gui Date: Sun, 7 Jun 2026 21:24:14 +0800 Subject: selftests/bpf: Cover dynamic inner array lookup nullability Add a verifier regression test that looks up a constant key through a dynamic inner array template and dereferences the result without a NULL check. The verifier must reject the program because BPF_F_INNER_MAP allows the concrete runtime array to have fewer entries than the template. Signed-off-by: Nuoqi Gui Acked-by: Eduard Zingerman Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20260607-f01-v2-v2-2-da48453146e8@mails.tsinghua.edu.cn Signed-off-by: Kumar Kartikeya Dwivedi --- .../selftests/bpf/progs/verifier_map_in_map.c | 40 ++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_map_in_map.c b/tools/testing/selftests/bpf/progs/verifier_map_in_map.c index 16b761e510f0..b606b5dca734 100644 --- a/tools/testing/selftests/bpf/progs/verifier_map_in_map.c +++ b/tools/testing/selftests/bpf/progs/verifier_map_in_map.c @@ -18,6 +18,20 @@ struct { }); } map_in_map SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); + __array(values, struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(map_flags, BPF_F_INNER_MAP); + __uint(max_entries, 8); + __type(key, int); + __type(value, long); + }); +} map_in_map_dyn SEC(".maps"); + SEC("socket") __description("map in map access") __success __success_unpriv __retval(0) @@ -45,6 +59,32 @@ l0_%=: r0 = 0; \ : __clobber_all); } +SEC("socket") +__description("map in map dynamic inner array lookup is nullable") +__failure __msg("invalid mem access 'map_value_or_null'") +__naked void map_in_map_dynamic_inner_array_lookup_is_nullable(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u32*)(r10 - 4) = r1; \ + r2 = r10; \ + r2 += -4; \ + r1 = %[map_in_map_dyn] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + *(u32*)(r10 - 8) = 4; \ + r2 = r10; \ + r2 += -8; \ + r1 = r0; \ + call %[bpf_map_lookup_elem]; \ + r0 = *(u64 *)(r0 + 0); \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_in_map_dyn) + : __clobber_all); +} + SEC("xdp") __description("map in map state pruning") __success __msg("processed 15 insns") -- cgit v1.2.3 From dd0f9684d2f7d3f99aee63f5fa80562f2207b964 Mon Sep 17 00:00:00 2001 From: Paul Moses Date: Tue, 9 Jun 2026 05:08:54 -0500 Subject: selftests/bpf: Add BTF repeated field count overflow test Add a raw BTF test that exercises repeated special-field expansion with a large array count. The compact element layout keeps the array byte size representable while the repeated field count overflows the old u32 capacity calculation in btf_repeat_fields(). Signed-off-by: Paul Moses Link: https://lore.kernel.org/bpf/SzebdWqm2zREZBf8Tc5Kc-JDWbh9nBztnk4PUu5kRSD1OOdr_ESVTt__2Hd3-lClr47jIjJCXfOH0RHsMpjjpEUh_R2v30nh3T1IXNT6Pbo=@1g4.org Signed-off-by: Kumar Kartikeya Dwivedi --- tools/testing/selftests/bpf/prog_tests/btf.c | 37 ++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index a9de328a8697..96f719a0cec9 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -4258,6 +4258,43 @@ static struct btf_raw_test raw_tests[] = { .max_entries = 1, }, +{ + .descr = "struct test repeated fields count overflow", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_STRUCT_ENC(NAME_TBD, 0, 0), /* [2] */ + BTF_TYPE_TAG_ENC(NAME_TBD, 2), /* [3] */ + BTF_PTR_ENC(3), /* [4] */ + BTF_TYPE_ARRAY_ENC(4, 1, 1), /* [5] */ + BTF_STRUCT_ENC(NAME_TBD, 10, 8), /* [6] */ + BTF_MEMBER_ENC(NAME_TBD, 5, 0), + BTF_MEMBER_ENC(NAME_TBD, 5, 0), + BTF_MEMBER_ENC(NAME_TBD, 5, 0), + BTF_MEMBER_ENC(NAME_TBD, 5, 0), + BTF_MEMBER_ENC(NAME_TBD, 5, 0), + BTF_MEMBER_ENC(NAME_TBD, 5, 0), + BTF_MEMBER_ENC(NAME_TBD, 5, 0), + BTF_MEMBER_ENC(NAME_TBD, 5, 0), + BTF_MEMBER_ENC(NAME_TBD, 5, 0), + BTF_MEMBER_ENC(NAME_TBD, 5, 0), + BTF_TYPE_ARRAY_ENC(6, 1, 0x1999999aU), /* [7] */ + BTF_STRUCT_ENC(NAME_TBD, 2, 8 + 8 * 0x1999999aU), /* [8] */ + BTF_MEMBER_ENC(NAME_TBD, 4, 0), + BTF_MEMBER_ENC(NAME_TBD, 7, 64), + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0prog_test_ref_kfunc\0kptr_untrusted\0elem" + "\0p0\0p1\0p2\0p3\0p4\0p5\0p6\0p7\0p8\0p9" + "\0outer\0trigger\0elems"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "repeat_fields", + .key_size = sizeof(int), + .value_size = 8 + 8 * 0x1999999aU, + .key_type_id = 1, + .value_type_id = 8, + .max_entries = 1, + .btf_load_err = true, +}, }; /* struct btf_raw_test raw_tests[] */ static const char *get_next_str(const char *start, const char *end) -- cgit v1.2.3 From af8c3f170f7314d316023efc0ae670384e220b09 Mon Sep 17 00:00:00 2001 From: Nuoqi Gui Date: Tue, 9 Jun 2026 22:43:51 +0800 Subject: selftests/bpf: Cover writable BTF field global subprog args Add a verifier test for passing a BTF-backed task_struct field pointer to a global subprogram argument typed as writable memory. The direct field store is already rejected. The global subprogram path should be rejected too. The callee must not lose the BTF pointer's read-only provenance. It must not validate the argument as ordinary writable memory. Signed-off-by: Nuoqi Gui Link: https://lore.kernel.org/bpf/20260609-f01-04-btf-writable-arg-v1-2-f449cd970669@mails.tsinghua.edu.cn Signed-off-by: Kumar Kartikeya Dwivedi --- .../selftests/bpf/progs/verifier_global_ptr_args.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c index ea273e152209..0bdeb7bc4687 100644 --- a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c +++ b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c @@ -287,6 +287,25 @@ int trusted_to_untrusted_mem(void *ctx) return subprog_void_untrusted(bpf_get_current_task_btf()); } +__weak int subprog_write_mem_arg(int *p) +{ + if (!p) + return 0; + + *p = 42; + return 0; +} + +SEC("?tp_btf/task_newtask") +__failure +__msg("only read is supported") +int trusted_btf_field_to_writable_mem(void *ctx) +{ + struct task_struct *task = bpf_get_current_task_btf(); + + return subprog_write_mem_arg(&task->prio); +} + SEC("tp_btf/sys_enter") __success int anything_to_untrusted_mem(void *ctx) -- cgit v1.2.3 From 68f4e480b089abae26fbab0c38c3df3cbac3d79d Mon Sep 17 00:00:00 2001 From: Emil Tsalapatis Date: Tue, 9 Jun 2026 02:36:30 -0400 Subject: selftests/bpf: Avoid spurious spmc parallel selftest errors in libarena The libarena parallel spmc selftest is nondeterministic by design. As a result it depends up to a point on the relative timing between the producer and consumer threads. This introduces the possibility for two kinds of spurious failures that this patch addresses. 1) Spurious timeouts. The test proceeds in phases, and threads use a common counter as a barrier to avoid proceeding to the next phase until all threads are ready to do so. If a thread takes too long to reach the barrier, the already waiting threads may time out. Increase the current timeout. The timeout's value is a balance between the maximum amount of time spent on the test and the possibility of spurious failures. Right now the timeout is too short. Err on the side of caution and significantly increase it to avoid spurious failures. 2) Spurious resize failures. Some selftests require the spmc queue to resize itself. This in turn requires for the producer side to be materially faster than the consumer side so that the queue gets full enough for a resize. However, in the benchmark the spmc queue's producer is outnumbered 3:1. To offset it we add busy waits for consume queues. However, we still see occasional failures due to the queue never resizing. Minimize the possibility for this in two ways: First, remove one of the consumers. The 2 consumers still exercise the "race between consumers" scenario. Second, increase the busy wait duration to decrease the rate by which the consumers act on the queue. While at it, also replace a stray invalid error value "153" with EINVAL. Fixes: 42998f819256 ("selftests/bpf: libarena: parallel test harness and spmc parallel selftest") Reported-by: Jakub Kicinski Signed-off-by: Emil Tsalapatis Link: https://lore.kernel.org/r/20260609063630.10245-1-emil@etsalapatis.com Signed-off-by: Alexei Starovoitov --- .../bpf/libarena/selftests/test_parallel_spmc.bpf.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/libarena/selftests/test_parallel_spmc.bpf.c b/tools/testing/selftests/bpf/libarena/selftests/test_parallel_spmc.bpf.c index 981c845e2d15..f08f2a92e194 100644 --- a/tools/testing/selftests/bpf/libarena/selftests/test_parallel_spmc.bpf.c +++ b/tools/testing/selftests/bpf/libarena/selftests/test_parallel_spmc.bpf.c @@ -7,7 +7,7 @@ #include #include -#define TEST_SPMC_THREADS 4 +#define TEST_SPMC_THREADS 3 #define TEST_SPMC_STEALERS (TEST_SPMC_THREADS - 1) /* @@ -17,7 +17,7 @@ * and operations are wait-free we just spin around the quiescence * point instead. If we time out, we just fail the benchmark. */ -#define TEST_SPMC_SYNC_SPINS (1U << 18) +#define TEST_SPMC_SYNC_SPINS BPF_MAX_LOOPS /* * We track all the values we retrieve from the queue @@ -61,7 +61,7 @@ static volatile u64 round_steals; * We have multiple stealers and a single owner. We sometimes want the owner * to successfully outproduce the stealers, we add a busy loop in them. */ -#define TEST_SPMC_WASTE_ROUNDS (1024) +#define TEST_SPMC_WASTE_ROUNDS (1UL << 12) /* * The spmc data structure depends on the runtime fully @@ -112,10 +112,6 @@ static bool spmc_tests_enabled(void) { \ return spmc_##prefix##_stealer(); \ } \ - SEC("syscall") int parallel_test_spmc_##prefix##__3(void) \ - { \ - return spmc_##prefix##_stealer(); \ - } static int spmc_common_init(u64 total) { @@ -452,10 +448,10 @@ static int spmc_resize_owner(void) resized = true; } - /* Did we get to resize while racing/ */ + /* Did we get to resize while racing? */ if (!resized) { test_abort = true; - return -153; + return -EINVAL; } /* -- cgit v1.2.3 From be1d838b88e445fa6edfb9f98af1603cbf2ee94d Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 9 Jun 2026 16:34:07 -0700 Subject: selftests/bpf: Keep int return type for tailcall subprogs LLVM23 ([1]) supports 'true' function signature in BTF. The return type of the caller of a tailcall must be an 'int'. Otherwise, verification will fail (see check_btf_func() in check_btf.c). So with llvm23, it is possible that the compiler may change the caller's return type from 'int' to 'void'. To prevent this, barrier_var() and __sink() are used to avoid returning a constant prone to be optimized. [1] https://github.com/llvm/llvm-project/pull/198426 Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20260609233407.2711577-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/tailcall_bpf2bpf2.c | 5 ++++- .../bpf/progs/tailcall_bpf2bpf_hierarchy1.c | 13 ++++++++---- .../bpf/progs/tailcall_bpf2bpf_hierarchy2.c | 24 +++++++++++++++------- .../bpf/progs/tailcall_bpf2bpf_hierarchy3.c | 13 +++++++++--- .../bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c | 13 +++++++++--- tools/testing/selftests/bpf/progs/verifier_sock.c | 9 ++++++-- 6 files changed, 57 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c index ce97d141daee..c4fadee5aadc 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c @@ -13,11 +13,14 @@ struct { static __noinline int subprog_tail(struct __sk_buff *skb) { + int ret = 1; + if (load_byte(skb, 0)) bpf_tail_call_static(skb, &jmp_table, 1); else bpf_tail_call_static(skb, &jmp_table, 0); - return 1; + barrier_var(ret); + return ret; } int count = 0; diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c index d556b19413d7..1fd07824d88a 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c @@ -16,20 +16,25 @@ int count = 0; static __noinline int subprog_tail(struct __sk_buff *skb) { + int ret = 0; + bpf_tail_call_static(skb, &jmp_table, 0); - return 0; + barrier_var(ret); + return ret; } SEC("tc") int entry(struct __sk_buff *skb) { - int ret = 1; + int ret = 1, ret1, ret2; clobber_regs_stack(); count++; - subprog_tail(skb); - subprog_tail(skb); + ret1 = subprog_tail(skb); + ret2 = subprog_tail(skb); + __sink(ret1); + __sink(ret2); return ret; } diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c index ae94c9c70ab7..6fde0ab92148 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c @@ -25,8 +25,11 @@ int count1 = 0; static __noinline int subprog_tail0(struct __sk_buff *skb) { + int ret = 0; + bpf_tail_call_static(skb, &jmp_table, 0); - return 0; + barrier_var(ret); + return ret; } __auxiliary @@ -41,16 +44,22 @@ int classifier_0(struct __sk_buff *skb) static __noinline int subprog_tail1(struct __sk_buff *skb) { + int ret = 0; + bpf_tail_call_static(skb, &jmp_table, 1); - return 0; + barrier_var(ret); + return ret; } __auxiliary SEC("tc") int classifier_1(struct __sk_buff *skb) { + int ret; + count1++; - subprog_tail1(skb); + ret = subprog_tail1(skb); + __sink(ret); return 0; } @@ -59,13 +68,14 @@ __retval(33) SEC("tc") int tailcall_bpf2bpf_hierarchy_2(struct __sk_buff *skb) { - int ret = 0; + int ret = 0, ret1, ret2; clobber_regs_stack(); - subprog_tail0(skb); - subprog_tail1(skb); - + ret1 = subprog_tail0(skb); + ret2 = subprog_tail1(skb); + __sink(ret1); + __sink(ret2); __sink(ret); return (count1 << 16) | count0; } diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c index 56b6b0099840..0ef9cfb2da8d 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c @@ -33,17 +33,24 @@ int count = 0; static __noinline int subprog_tail(struct __sk_buff *skb, void *jmp_table) { + int ret = 0; + bpf_tail_call_static(skb, jmp_table, 0); - return 0; + barrier_var(ret); + return ret; } __auxiliary SEC("tc") int classifier_0(struct __sk_buff *skb) { + int ret1, ret2; + count++; - subprog_tail(skb, &jmp_table0); - subprog_tail(skb, &jmp_table1); + ret1 = subprog_tail(skb, &jmp_table0); + ret2 = subprog_tail(skb, &jmp_table1); + __sink(ret1); + __sink(ret2); return count; } diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c index 5261395713cd..6db9afee2095 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c @@ -18,18 +18,25 @@ int count = 0; static __noinline int subprog_tail(void *ctx) { + int ret = 0; + bpf_tail_call_static(ctx, &jmp_table, 0); - return 0; + barrier_var(ret); + return ret; } SEC("fentry/dummy") int BPF_PROG(fentry, struct sk_buff *skb) { + int ret1, ret2; + clobber_regs_stack(); count++; - subprog_tail(ctx); - subprog_tail(ctx); + ret1 = subprog_tail(ctx); + ret2 = subprog_tail(ctx); + __sink(ret1); + __sink(ret2); return 0; } diff --git a/tools/testing/selftests/bpf/progs/verifier_sock.c b/tools/testing/selftests/bpf/progs/verifier_sock.c index 9f680cf44512..4f2f3209eec8 100644 --- a/tools/testing/selftests/bpf/progs/verifier_sock.c +++ b/tools/testing/selftests/bpf/progs/verifier_sock.c @@ -1120,8 +1120,11 @@ int tail_call(struct __sk_buff *sk) static __noinline int static_tail_call(struct __sk_buff *sk) { + int ret = 0; + bpf_tail_call_static(sk, &jmp_table, 0); - return 0; + barrier_var(ret); + return ret; } /* Tail calls in sub-programs invalidate packet pointers. */ @@ -1144,10 +1147,12 @@ __failure __msg("invalid mem access") int invalidate_pkt_pointers_by_static_tail_call(struct __sk_buff *sk) { int *p = (void *)(long)sk->data; + int ret; if ((void *)(p + 1) > (void *)(long)sk->data_end) return TCX_DROP; - static_tail_call(sk); + ret = static_tail_call(sk); + __sink(ret); *p = 42; /* this is unsafe */ return TCX_PASS; } -- cgit v1.2.3 From e775c522a455b97db7e0a466c400f74672990bad Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 9 Jun 2026 16:34:12 -0700 Subject: selftests/bpf: Adjust fexit_bpf2bpf ctx layout for llvm23 true signature test_pkt_access_subprog2() is defined in C as int test_pkt_access_subprog2(int val, volatile struct __sk_buff *skb) but llvm optimizes away the unused 'int val' argument. Before llvm23 the BTF signature did not match the optimized assembly, so the verifier set attach_func_proto to NULL and fell back to MAX_BPF_FUNC_REG_ARGS (5) u64 arguments (see btf_ctx_access()). The fexit ctx struct therefore placed the return value after args[5]. With llvm23 the 'true' signature int test_pkt_access_subprog2(volatile struct __sk_buff *skb) is recorded in BTF, so nr_args becomes 1 and the return value moves to the slot right after args[1]. Select the matching args_subprog2 layout based on __clang_major__ so the test works with both old and new llvm. Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20260609233412.2712178-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c index 983b7c233382..f4bbf87b82dd 100644 --- a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c @@ -53,14 +53,23 @@ int BPF_PROG(test_subprog1, struct sk_buff *skb, int ret) * r0 = *(u32 *)(r1 + 0) * w0 <<= 1 * exit - * In such case the verifier falls back to conservative and + * Before llvm23, in such case the verifier falls back to conservative and * tracing program can access arguments and return value as u64 - * instead of accurate types. + * instead of accurate types. With llvm23, the true signature + * int test_pkt_access_subprog2(volatile struct __sk_buff *skb) + * is available in btf. */ +#if __clang_major__ >= 23 +struct args_subprog2 { + __u64 args[1]; + __u64 ret; +}; +#else struct args_subprog2 { __u64 args[5]; __u64 ret; }; +#endif __u64 test_result_subprog2 = 0; SEC("fexit/test_pkt_access_subprog2") int test_subprog2(struct args_subprog2 *ctx) -- cgit v1.2.3 From 94c8d1c21be40a845357854f98ec07e21bb14bc9 Mon Sep 17 00:00:00 2001 From: Justin Suess Date: Tue, 9 Jun 2026 22:25:43 +0200 Subject: bpf: Reject bpf_obj_drop() from tracing progs bpf_obj_drop() runs bpf_obj_free_fields() synchronously for program-allocated objects. When such an object contains NMI unsafe fields, tracing programs that can run from arbitrary instrumented context can reach that destruction from unsafe contexts, including NMI. NMI is likely one instance of this problem, and other instances would include possible unsafe reentrancy. Deferring bpf_obj_drop() is not appealing either: it would add delayed-free machinery to a release operation that otherwise has straightforward synchronous ownership semantics. Reject bpf_obj_drop() and bpf_percpu_obj_drop() from tracing programs that may run from unsafe contexts unless every field in the object's BTF record is explicitly NMI safe. Do not reject sleepable BPF_PROG_TYPE_TRACING programs, since they are not the arbitrary/NMI contexts that motivate the restriction. Note that while bpf_rb_root and bpf_list_head would be NMI safe on their own to free, the objects recursively held by them may not be; be conservative and just mark them as not NMI safe for now. Use a whitelist for the NMI-safe field set instead of listing only known NMI unsafe fields. Locks, async fields, unreferenced kptrs, and refcounts are known to be NMI safe because their destruction is either a no-op, simple state reset, or async cancellation. Referenced kptrs, percpu referenced kptrs, uptrs, graph roots, graph nodes, and any future field type are rejected until audited for arbitrary tracing and NMI contexts. This is less susceptible to future changes in fields that were previously safe by exclusion, and to new fields being added without updating this check. Convert the existing recursive local-object drop success case to a syscall program in the same commit, since this verifier change makes the old tracing program form invalid. The test still exercises bpf_obj_drop() releasing a referenced task kptr from a safe program type. Fixes: ac9f06050a35 ("bpf: Introduce bpf_obj_drop") Signed-off-by: Justin Suess Co-developed-by: Kumar Kartikeya Dwivedi Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20260609202548.3571690-2-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 29 +++++++++++++++ kernel/bpf/verifier.c | 17 +++++++++ .../testing/selftests/bpf/prog_tests/task_kfunc.c | 42 +++++++++++++++++++++- .../selftests/bpf/progs/task_kfunc_success.c | 13 ++++--- 4 files changed, 93 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 62bba7a4876f..0654d2ffadc1 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -492,6 +492,35 @@ static inline bool btf_record_has_field(const struct btf_record *rec, enum btf_f return rec->field_mask & type; } +static inline bool btf_field_is_nmi_safe(enum btf_field_type type) +{ + switch (type) { + case BPF_SPIN_LOCK: + case BPF_RES_SPIN_LOCK: + case BPF_TIMER: + case BPF_WORKQUEUE: + case BPF_TASK_WORK: + case BPF_KPTR_UNREF: + case BPF_REFCOUNT: + return true; + default: + return false; + } +} + +static inline bool btf_record_has_nmi_unsafe_fields(const struct btf_record *rec) +{ + int i; + + if (IS_ERR_OR_NULL(rec)) + return false; + for (i = 0; i < rec->cnt; i++) { + if (!btf_field_is_nmi_safe(rec->fields[i].type)) + return true; + } + return false; +} + static inline void bpf_obj_init(const struct btf_record *rec, void *obj) { int i; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 954b85609f32..eb46a81a8c51 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -205,6 +205,7 @@ static int release_reference_nomark(struct bpf_verifier_state *state, int id); static int release_reference(struct bpf_verifier_env *env, int id); static void invalidate_non_owning_refs(struct bpf_verifier_env *env); static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env); +static bool is_tracing_prog_type(enum bpf_prog_type type); static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg); static bool is_trusted_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg); @@ -12881,6 +12882,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx_p) { bool sleepable, rcu_lock, rcu_unlock, preempt_disable, preempt_enable; + enum bpf_prog_type prog_type = resolve_prog_type(env->prog); struct bpf_reg_state *regs = cur_regs(env); const char *func_name, *ptr_type_name; const struct btf_type *t, *ptr_type; @@ -12957,6 +12959,21 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, if (err < 0) return err; + if ((is_bpf_obj_drop_kfunc(meta.func_id) || + is_bpf_percpu_obj_drop_kfunc(meta.func_id)) && (is_tracing_prog_type(prog_type) || + /* is_tracing_prog_type() for now doesn't cover non-iterator tracing progs. */ + (prog_type == BPF_PROG_TYPE_TRACING && env->prog->expected_attach_type != BPF_TRACE_ITER + && !env->prog->sleepable))) { + struct btf_struct_meta *struct_meta; + + struct_meta = btf_find_struct_meta(meta.arg_btf, meta.arg_btf_id); + if (struct_meta && btf_record_has_nmi_unsafe_fields(struct_meta->record)) { + verbose(env, "%s cannot be used in tracing programs on types with NMI unsafe fields\n", + func_name); + return -EINVAL; + } + } + if (is_bpf_rbtree_add_kfunc(meta.func_id)) { err = push_callback_call(env, insn, insn_idx, meta.subprogno, set_rbtree_add_callback_state); diff --git a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c index 83b90335967a..e6e95c1416e6 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c +++ b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c @@ -68,6 +68,36 @@ cleanup: task_kfunc_success__destroy(skel); } +static void run_syscall_success_test(const char *prog_name) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + struct task_kfunc_success *skel; + struct bpf_program *prog; + int err; + + skel = open_load_task_kfunc_skel(); + if (!ASSERT_OK_PTR(skel, "open_load_skel")) + return; + + if (!ASSERT_OK(skel->bss->err, "pre_run_err")) + goto cleanup; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto cleanup; + + err = bpf_prog_test_run_opts(bpf_program__fd(prog), &opts); + if (!ASSERT_OK(err, "bpf_prog_test_run_opts")) + goto cleanup; + if (!ASSERT_EQ(opts.retval, 0, "retval")) + goto cleanup; + + ASSERT_OK(skel->bss->err, "post_run_err"); + +cleanup: + task_kfunc_success__destroy(skel); +} + static int run_vpid_test(void *prog_name) { struct task_kfunc_success *skel; @@ -140,7 +170,6 @@ static const char * const success_tests[] = { "test_task_acquire_release_argument", "test_task_acquire_release_current", "test_task_acquire_leave_in_map", - "test_task_xchg_release", "test_task_map_acquire_release", "test_task_current_acquire_release", "test_task_from_pid_arg", @@ -151,6 +180,10 @@ static const char * const success_tests[] = { "test_task_kfunc_flavor_relo_not_found", }; +static const char * const syscall_success_tests[] = { + "test_task_xchg_release", +}; + static const char * const vpid_success_tests[] = { "test_task_from_vpid_current", "test_task_from_vpid_invalid", @@ -167,6 +200,13 @@ void test_task_kfunc(void) run_success_test(success_tests[i]); } + for (i = 0; i < ARRAY_SIZE(syscall_success_tests); i++) { + if (!test__start_subtest(syscall_success_tests[i])) + continue; + + run_syscall_success_test(syscall_success_tests[i]); + } + for (i = 0; i < ARRAY_SIZE(vpid_success_tests); i++) { if (!test__start_subtest(vpid_success_tests[i])) continue; diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_success.c b/tools/testing/selftests/bpf/progs/task_kfunc_success.c index 5fb4fc19d26a..d63a79ee33dc 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_success.c +++ b/tools/testing/selftests/bpf/progs/task_kfunc_success.c @@ -140,17 +140,17 @@ int BPF_PROG(test_task_acquire_leave_in_map, struct task_struct *task, u64 clone return 0; } -SEC("tp_btf/task_newtask") -int BPF_PROG(test_task_xchg_release, struct task_struct *task, u64 clone_flags) +SEC("syscall") +int test_task_xchg_release(const void *ctx) { - struct task_struct *kptr, *acquired; + struct task_struct *task, *kptr, *acquired; struct __tasks_kfunc_map_value *v, *local; int refcnt, refcnt_after_drop; long status; - if (!is_test_kfunc_task()) - return 0; + (void)ctx; + task = bpf_get_current_task_btf(); status = tasks_kfunc_map_insert(task); if (status) { err = 1; @@ -191,7 +191,7 @@ int BPF_PROG(test_task_xchg_release, struct task_struct *task, u64 clone_flags) return 0; } - /* Stash a copy into local kptr and check if it is released recursively */ + /* Stash a copy into local kptr and check if it is released recursively. */ acquired = bpf_task_acquire(kptr); if (!acquired) { err = 7; @@ -220,7 +220,6 @@ int BPF_PROG(test_task_xchg_release, struct task_struct *task, u64 clone_flags) } bpf_task_release(kptr); - return 0; } -- cgit v1.2.3 From a3a81d247651218e47153f2d2afd7aee236726fd Mon Sep 17 00:00:00 2001 From: Justin Suess Date: Tue, 9 Jun 2026 22:25:44 +0200 Subject: bpf: Cancel special fields on map value recycle Map update and delete paths currently call bpf_obj_free_fields() when a value is being replaced or recycled. That makes field destruction depend on the context of the update/delete operation. For tracing programs this can include NMI context, where referenced kptr destructors, uptr unpinning, and graph root destruction are not generally safe. Introduce bpf_obj_cancel_fields() for the reusable-value path. It only performs NMI-safe cleanup for timer, workqueue, and task_work fields. Fields that need full destruction are left attached to the recycled value and are destroyed by the final cleanup path instead. Switch array and hashtab update/delete/recycle paths to this cancel helper. Keep bpf_obj_free_fields() for final map destruction and for bpf_mem_alloc destructors. Preallocated hashtabs do not have allocator destructors, so teardown continues to walk the normal and extra elements and fully destroy their fields. This deliberately relaxes the eager-free semantics of map update/delete for special fields. Programs that relied on a recycled map slot becoming empty immediately after update/delete were relying on behavior that cannot be implemented safely from every BPF execution context without offloading arbitrary destructors. There is a chance this change breaks programs making assumptions regarding the eager freeing of fields. If so, we can relax semantics to cancellation only when irqs_disabled() is true in the future. However, theoretically, map values that get reused eagerly already have weaker guarantees as parallel users can recreate freed fields before the new element becomes visible again. Fixes: 14a324f6a67e ("bpf: Wire up freeing of referenced kptr") Signed-off-by: Justin Suess Co-developed-by: Kumar Kartikeya Dwivedi Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20260609202548.3571690-3-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + kernel/bpf/arraymap.c | 8 +-- kernel/bpf/hashtab.c | 32 +++++----- kernel/bpf/syscall.c | 5 ++ .../testing/selftests/bpf/prog_tests/htab_update.c | 4 +- .../testing/selftests/bpf/prog_tests/linked_list.c | 33 +++++----- tools/testing/selftests/bpf/prog_tests/map_kptr.c | 10 +-- .../selftests/bpf/prog_tests/refcounted_kptr.c | 8 ++- tools/testing/selftests/bpf/progs/htab_update.c | 4 +- tools/testing/selftests/bpf/progs/linked_list.c | 71 ++++++++++++++++++++++ .../testing/selftests/bpf/progs/refcounted_kptr.c | 20 +++++- 11 files changed, 146 insertions(+), 50 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 0654d2ffadc1..56f5da2b437f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2717,6 +2717,7 @@ bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *r void bpf_obj_free_timer(const struct btf_record *rec, void *obj); void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj); void bpf_obj_free_task_work(const struct btf_record *rec, void *obj); +void bpf_obj_cancel_fields(struct bpf_map *map, void *obj); void bpf_obj_free_fields(const struct btf_record *rec, void *obj); void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu); diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index e6271a2bf6d6..248b4818178c 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -384,7 +384,7 @@ static long array_map_update_elem(struct bpf_map *map, void *key, void *value, if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { val = this_cpu_ptr(array->pptrs[index & array->index_mask]); copy_map_value(map, val, value); - bpf_obj_free_fields(array->map.record, val); + bpf_obj_cancel_fields(map, val); } else { val = array->value + (u64)array->elem_size * (index & array->index_mask); @@ -392,7 +392,7 @@ static long array_map_update_elem(struct bpf_map *map, void *key, void *value, copy_map_value_locked(map, val, value, false); else copy_map_value(map, val, value); - bpf_obj_free_fields(array->map.record, val); + bpf_obj_cancel_fields(map, val); } return 0; } @@ -432,14 +432,14 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, cpu = map_flags >> 32; ptr = per_cpu_ptr(pptr, cpu); copy_map_value(map, ptr, value); - bpf_obj_free_fields(array->map.record, ptr); + bpf_obj_cancel_fields(map, ptr); goto unlock; } for_each_possible_cpu(cpu) { ptr = per_cpu_ptr(pptr, cpu); val = (map_flags & BPF_F_ALL_CPUS) ? value : value + size * cpu; copy_map_value(map, ptr, val); - bpf_obj_free_fields(array->map.record, ptr); + bpf_obj_cancel_fields(map, ptr); } unlock: rcu_read_unlock(); diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index b4366cad3cfa..9f394e1aa2e8 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -243,6 +243,10 @@ static void htab_free_prealloced_fields(struct bpf_htab *htab) if (IS_ERR_OR_NULL(htab->map.record)) return; + /* + * Preallocated maps do not have a bpf_mem_alloc destructor, so fully + * destroy every element, including the extra elements. + */ if (htab_has_extra_elems(htab)) num_entries += num_possible_cpus(); for (i = 0; i < num_entries; i++) { @@ -833,8 +837,8 @@ static int htab_lru_map_gen_lookup(struct bpf_map *map, return insn - insn_buf; } -static void check_and_free_fields(struct bpf_htab *htab, - struct htab_elem *elem) +static void check_and_cancel_fields(struct bpf_htab *htab, + struct htab_elem *elem) { if (IS_ERR_OR_NULL(htab->map.record)) return; @@ -844,11 +848,11 @@ static void check_and_free_fields(struct bpf_htab *htab, int cpu; for_each_possible_cpu(cpu) - bpf_obj_free_fields(htab->map.record, per_cpu_ptr(pptr, cpu)); + bpf_obj_cancel_fields(&htab->map, per_cpu_ptr(pptr, cpu)); } else { void *map_value = htab_elem_value(elem, htab->map.key_size); - bpf_obj_free_fields(htab->map.record, map_value); + bpf_obj_cancel_fields(&htab->map, map_value); } } @@ -883,7 +887,7 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node) htab_unlock_bucket(b, flags); if (l == tgt_l) - check_and_free_fields(htab, l); + check_and_cancel_fields(htab, l); return l == tgt_l; } @@ -948,7 +952,7 @@ find_first_elem: static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l) { - check_and_free_fields(htab, l); + check_and_cancel_fields(htab, l); if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH) bpf_mem_cache_free(&htab->pcpu_ma, l->ptr_to_pptr); @@ -1001,7 +1005,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) if (htab_is_prealloc(htab)) { bpf_map_dec_elem_count(&htab->map); - check_and_free_fields(htab, l); + check_and_cancel_fields(htab, l); pcpu_freelist_push(&htab->freelist, &l->fnode); } else { dec_elem_count(htab); @@ -1018,7 +1022,7 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr, /* copy true value_size bytes */ ptr = this_cpu_ptr(pptr); copy_map_value(&htab->map, ptr, value); - bpf_obj_free_fields(htab->map.record, ptr); + bpf_obj_cancel_fields(&htab->map, ptr); } else { u32 size = round_up(htab->map.value_size, 8); void *val; @@ -1028,7 +1032,7 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr, cpu = map_flags >> 32; ptr = per_cpu_ptr(pptr, cpu); copy_map_value(&htab->map, ptr, value); - bpf_obj_free_fields(htab->map.record, ptr); + bpf_obj_cancel_fields(&htab->map, ptr); return; } @@ -1036,7 +1040,7 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr, ptr = per_cpu_ptr(pptr, cpu); val = (map_flags & BPF_F_ALL_CPUS) ? value : value + size * cpu; copy_map_value(&htab->map, ptr, val); - bpf_obj_free_fields(htab->map.record, ptr); + bpf_obj_cancel_fields(&htab->map, ptr); } } } @@ -1252,11 +1256,11 @@ static long htab_map_update_elem(struct bpf_map *map, void *key, void *value, if (l_old) { hlist_nulls_del_rcu(&l_old->hash_node); - /* l_old has already been stashed in htab->extra_elems, free - * its special fields before it is available for reuse. + /* l_old has already been stashed in htab->extra_elems, cancel + * its reusable special fields before it is available for reuse. */ if (htab_is_prealloc(htab)) - check_and_free_fields(htab, l_old); + check_and_cancel_fields(htab, l_old); } htab_unlock_bucket(b, flags); if (l_old && !htab_is_prealloc(htab)) @@ -1269,7 +1273,7 @@ err: static void htab_lru_push_free(struct bpf_htab *htab, struct htab_elem *elem) { - check_and_free_fields(htab, elem); + check_and_cancel_fields(htab, elem); bpf_map_dec_elem_count(&htab->map); bpf_lru_push_free(&htab->lru, &elem->lru_node); } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index d4188a992bd8..7ed949f70f82 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -808,6 +808,11 @@ void bpf_obj_free_task_work(const struct btf_record *rec, void *obj) bpf_task_work_cancel_and_free(obj + rec->task_work_off); } +void bpf_obj_cancel_fields(struct bpf_map *map, void *obj) +{ + bpf_map_free_internal_structs(map, obj); +} + void bpf_obj_free_fields(const struct btf_record *rec, void *obj) { const struct btf_field *fields; diff --git a/tools/testing/selftests/bpf/prog_tests/htab_update.c b/tools/testing/selftests/bpf/prog_tests/htab_update.c index ea1a6766fbe9..0a28d4346924 100644 --- a/tools/testing/selftests/bpf/prog_tests/htab_update.c +++ b/tools/testing/selftests/bpf/prog_tests/htab_update.c @@ -23,7 +23,7 @@ static void test_reenter_update(void) if (!ASSERT_OK_PTR(skel, "htab_update__open")) return; - bpf_program__set_autoload(skel->progs.bpf_obj_free_fields, true); + bpf_program__set_autoload(skel->progs.bpf_obj_cancel_fields, true); err = htab_update__load(skel); if (!ASSERT_TRUE(!err, "htab_update__load") || err) goto out; @@ -50,7 +50,7 @@ static void test_reenter_update(void) /* * Second update: replace existing element with same key and trigger * the reentrancy of bpf_map_update_elem(). - * check_and_free_fields() calls bpf_obj_free_fields() on the old + * check_and_cancel_fields() calls bpf_obj_cancel_fields() on the old * value, which is where fentry program runs and performs a nested * bpf_map_update_elem(), triggering -EDEADLK. */ diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c index dbff099860ba..8defea0253ed 100644 --- a/tools/testing/selftests/bpf/prog_tests/linked_list.c +++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c @@ -131,13 +131,14 @@ end: linked_list_fail__destroy(skel); } -static void clear_fields(struct bpf_map *map) +static void clear_fields(struct bpf_program *prog) { - char buf[24]; - int key = 0; + LIBBPF_OPTS(bpf_test_run_opts, opts); + int ret; - memset(buf, 0xff, sizeof(buf)); - ASSERT_OK(bpf_map__update_elem(map, &key, sizeof(key), buf, sizeof(buf), 0), "check_and_free_fields"); + ret = bpf_prog_test_run_opts(bpf_program__fd(prog), &opts); + ASSERT_OK(ret, "clear_fields"); + ASSERT_OK(opts.retval, "clear_fields retval"); } enum { @@ -170,31 +171,31 @@ static void test_linked_list_success(int mode, bool leave_in_map) ASSERT_OK(ret, "map_list_push_pop"); ASSERT_OK(opts.retval, "map_list_push_pop retval"); if (!leave_in_map) - clear_fields(skel->maps.array_map); + clear_fields(skel->progs.clear_map_list); ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.inner_map_list_push_pop), &opts); ASSERT_OK(ret, "inner_map_list_push_pop"); ASSERT_OK(opts.retval, "inner_map_list_push_pop retval"); if (!leave_in_map) - clear_fields(skel->maps.inner_map); + clear_fields(skel->progs.clear_inner_map_list); ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_push_pop), &opts); ASSERT_OK(ret, "global_list_push_pop"); ASSERT_OK(opts.retval, "global_list_push_pop retval"); if (!leave_in_map) - clear_fields(skel->maps.bss_A); + clear_fields(skel->progs.clear_global_list); ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_push_pop_nested), &opts); ASSERT_OK(ret, "global_list_push_pop_nested"); ASSERT_OK(opts.retval, "global_list_push_pop_nested retval"); if (!leave_in_map) - clear_fields(skel->maps.bss_A); + clear_fields(skel->progs.clear_global_nested_list); ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_array_push_pop), &opts); ASSERT_OK(ret, "global_list_array_push_pop"); ASSERT_OK(opts.retval, "global_list_array_push_pop retval"); if (!leave_in_map) - clear_fields(skel->maps.bss_A); + clear_fields(skel->progs.clear_global_array_list); if (mode == PUSH_POP) goto end; @@ -204,19 +205,19 @@ ppm: ASSERT_OK(ret, "map_list_push_pop_multiple"); ASSERT_OK(opts.retval, "map_list_push_pop_multiple retval"); if (!leave_in_map) - clear_fields(skel->maps.array_map); + clear_fields(skel->progs.clear_map_list); ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.inner_map_list_push_pop_multiple), &opts); ASSERT_OK(ret, "inner_map_list_push_pop_multiple"); ASSERT_OK(opts.retval, "inner_map_list_push_pop_multiple retval"); if (!leave_in_map) - clear_fields(skel->maps.inner_map); + clear_fields(skel->progs.clear_inner_map_list); ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_push_pop_multiple), &opts); ASSERT_OK(ret, "global_list_push_pop_multiple"); ASSERT_OK(opts.retval, "global_list_push_pop_multiple retval"); if (!leave_in_map) - clear_fields(skel->maps.bss_A); + clear_fields(skel->progs.clear_global_list); if (mode == PUSH_POP_MULT) goto end; @@ -226,19 +227,19 @@ lil: ASSERT_OK(ret, "map_list_in_list"); ASSERT_OK(opts.retval, "map_list_in_list retval"); if (!leave_in_map) - clear_fields(skel->maps.array_map); + clear_fields(skel->progs.clear_map_list); ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.inner_map_list_in_list), &opts); ASSERT_OK(ret, "inner_map_list_in_list"); ASSERT_OK(opts.retval, "inner_map_list_in_list retval"); if (!leave_in_map) - clear_fields(skel->maps.inner_map); + clear_fields(skel->progs.clear_inner_map_list); ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_in_list), &opts); ASSERT_OK(ret, "global_list_in_list"); ASSERT_OK(opts.retval, "global_list_in_list retval"); if (!leave_in_map) - clear_fields(skel->maps.bss_A); + clear_fields(skel->progs.clear_global_list); end: linked_list__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/map_kptr.c b/tools/testing/selftests/bpf/prog_tests/map_kptr.c index 03b46f17cf53..ec6f2f2e8308 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_kptr.c +++ b/tools/testing/selftests/bpf/prog_tests/map_kptr.c @@ -51,7 +51,6 @@ static void test_map_kptr_success(bool test_run) ret = bpf_map__update_elem(skel->maps.array_map, &key, sizeof(key), buf, sizeof(buf), 0); ASSERT_OK(ret, "array_map update"); - skel->data->ref--; ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); @@ -59,49 +58,42 @@ static void test_map_kptr_success(bool test_run) ret = bpf_map__update_elem(skel->maps.pcpu_array_map, &key, sizeof(key), pbuf, cpu * sizeof(buf), 0); ASSERT_OK(ret, "pcpu_array_map update"); - skel->data->ref--; ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); ret = bpf_map__delete_elem(skel->maps.hash_map, &key, sizeof(key), 0); ASSERT_OK(ret, "hash_map delete"); - skel->data->ref--; ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); ret = bpf_map__delete_elem(skel->maps.pcpu_hash_map, &key, sizeof(key), 0); ASSERT_OK(ret, "pcpu_hash_map delete"); - skel->data->ref--; ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); ret = bpf_map__delete_elem(skel->maps.hash_malloc_map, &key, sizeof(key), 0); ASSERT_OK(ret, "hash_malloc_map delete"); - skel->data->ref--; ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); ret = bpf_map__delete_elem(skel->maps.pcpu_hash_malloc_map, &key, sizeof(key), 0); ASSERT_OK(ret, "pcpu_hash_malloc_map delete"); - skel->data->ref--; ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); ret = bpf_map__delete_elem(skel->maps.lru_hash_map, &key, sizeof(key), 0); ASSERT_OK(ret, "lru_hash_map delete"); - skel->data->ref--; ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); ret = bpf_map__delete_elem(skel->maps.lru_pcpu_hash_map, &key, sizeof(key), 0); ASSERT_OK(ret, "lru_pcpu_hash_map delete"); - skel->data->ref--; ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); @@ -175,7 +167,7 @@ void serial_test_map_kptr(void) ASSERT_OK(kern_sync_rcu(), "sync rcu"); wait_for_map_release(); - /* Observe refcount dropping to 1 on synchronous delete elem */ + /* Observe refcount dropping to 1 on map release. */ test_map_kptr_success(true); } diff --git a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c index d2c0542716a8..1737eba34323 100644 --- a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c +++ b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c @@ -57,6 +57,7 @@ void test_percpu_hash_refcounted_kptr_refcount_leak(void) .data_size_in = sizeof(pkt_v4), .repeat = 1, ); + LIBBPF_OPTS(bpf_test_run_opts, syscall_opts); cpu_nr = libbpf_num_possible_cpus(); if (!ASSERT_GT(cpu_nr, 0, "libbpf_num_possible_cpus")) @@ -87,8 +88,11 @@ void test_percpu_hash_refcounted_kptr_refcount_leak(void) if (!ASSERT_EQ(opts.retval, 2, "opts.retval")) goto out; - err = bpf_map__update_elem(map, &key, sizeof(key), values, values_sz, 0); - if (!ASSERT_OK(err, "bpf_map__update_elem")) + fd = bpf_program__fd(skel->progs.clear_percpu_hash_kptr); + err = bpf_prog_test_run_opts(fd, &syscall_opts); + if (!ASSERT_OK(err, "bpf_prog_test_run_opts")) + goto out; + if (!ASSERT_EQ(syscall_opts.retval, 1, "syscall_opts.retval")) goto out; fd = bpf_program__fd(skel->progs.check_percpu_hash_refcount); diff --git a/tools/testing/selftests/bpf/progs/htab_update.c b/tools/testing/selftests/bpf/progs/htab_update.c index 195d3b2fba00..62c1b1325ec2 100644 --- a/tools/testing/selftests/bpf/progs/htab_update.c +++ b/tools/testing/selftests/bpf/progs/htab_update.c @@ -22,8 +22,8 @@ struct { int pid = 0; int update_err = 0; -SEC("?fentry/bpf_obj_free_fields") -int bpf_obj_free_fields(void *ctx) +SEC("?fentry/bpf_obj_cancel_fields") +int bpf_obj_cancel_fields(void *ctx) { __u32 key = 0; struct val value = { .payload = 1 }; diff --git a/tools/testing/selftests/bpf/progs/linked_list.c b/tools/testing/selftests/bpf/progs/linked_list.c index 421f40835acd..fa97faa5358b 100644 --- a/tools/testing/selftests/bpf/progs/linked_list.c +++ b/tools/testing/selftests/bpf/progs/linked_list.c @@ -290,6 +290,77 @@ int test_list_in_list(struct bpf_spin_lock *lock, struct bpf_list_head *head) return list_in_list(lock, head, true); } +#define MAX_LIST_CLEAR_NODES 256 + +static __always_inline +int clear_list(struct bpf_spin_lock *lock, struct bpf_list_head *head) +{ + struct bpf_list_node *n; + int i; + + for (i = 0; i < MAX_LIST_CLEAR_NODES; i++) { + bpf_spin_lock(lock); + n = bpf_list_pop_front(head); + bpf_spin_unlock(lock); + if (!n) + return 0; + bpf_obj_drop(container_of(n, struct foo, node2)); + } + return 1; +} + +SEC("syscall") +int clear_map_list(void *ctx) +{ + struct map_value *v; + + v = bpf_map_lookup_elem(&array_map, &(int){0}); + if (!v) + return 1; + return clear_list(&v->lock, &v->head); +} + +SEC("syscall") +int clear_inner_map_list(void *ctx) +{ + struct map_value *v; + void *map; + + map = bpf_map_lookup_elem(&map_of_maps, &(int){0}); + if (!map) + return 1; + v = bpf_map_lookup_elem(map, &(int){0}); + if (!v) + return 1; + return clear_list(&v->lock, &v->head); +} + +SEC("syscall") +int clear_global_list(void *ctx) +{ + return clear_list(&glock, &ghead); +} + +SEC("syscall") +int clear_global_nested_list(void *ctx) +{ + return clear_list(&ghead_nested.inner.lock, &ghead_nested.inner.head); +} + +SEC("syscall") +int clear_global_array_list(void *ctx) +{ + int ret; + + ret = clear_list(&glock_c, &ghead_array[0]); + if (ret) + return ret; + ret = clear_list(&glock_c, &ghead_array[1]); + if (ret) + return ret; + return clear_list(&glock_c, &ghead_array_one[0]); +} + SEC("tc") int map_list_push_pop(void *ctx) { diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr.c b/tools/testing/selftests/bpf/progs/refcounted_kptr.c index 13de169ad68f..61906f48025c 100644 --- a/tools/testing/selftests/bpf/progs/refcounted_kptr.c +++ b/tools/testing/selftests/bpf/progs/refcounted_kptr.c @@ -1036,13 +1036,31 @@ int percpu_hash_refcount_leak(void *ctx) struct map_value *v; int key = 0; - v = bpf_map_lookup_elem(&percpu_hash, &key); + v = bpf_map_lookup_percpu_elem(&percpu_hash, &key, 0); if (!v) return 0; return __insert_in_list(&head, &lock, &v->node); } +SEC("syscall") +int clear_percpu_hash_kptr(void *ctx) +{ + struct node_data *n; + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_percpu_elem(&percpu_hash, &key, 0); + if (!v) + return 0; + + n = bpf_kptr_xchg(&v->node, NULL); + if (!n) + return 0; + bpf_obj_drop(n); + return probe_read_refcount(); +} + SEC("tc") int check_percpu_hash_refcount(void *ctx) { -- cgit v1.2.3 From 4b84518137ce841eca2acae83096adb829dad05c Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Tue, 9 Jun 2026 22:25:45 +0200 Subject: selftests/bpf: Exercise unsafe obj drops from tracing progs Add task_kfunc failure cases for bpf_obj_drop() on local objects with referenced kptr fields from tracing and NMI tracing programs. These programs must be rejected because dropping the object would run full special-field destruction synchronously in an unsafe context. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20260609202548.3571690-4-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/task_kfunc_failure.c | 40 ++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c index 8e947d445f8e..8942b5478129 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c +++ b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c @@ -5,6 +5,7 @@ #include #include +#include "../bpf_experimental.h" #include "bpf_misc.h" #include "task_kfunc_common.h" @@ -233,6 +234,45 @@ int BPF_PROG(task_kfunc_release_unacquired, struct task_struct *task, u64 clone_ return 0; } +SEC("tp_btf/task_newtask") +__failure __msg("bpf_obj_drop cannot be used in tracing programs on types with NMI unsafe fields") +int BPF_PROG(task_kfunc_obj_drop_with_kptr, struct task_struct *task, u64 clone_flags) +{ + struct __tasks_kfunc_map_value *local; + + local = bpf_obj_new(typeof(*local)); + if (!local) + return 0; + + bpf_obj_drop(local); + return 0; +} + +SEC("tp_btf/task_newtask") +__failure __msg("bpf_obj_drop cannot be used in tracing programs on types with NMI unsafe fields") +int BPF_PROG(task_kfunc_obj_drop_nmi_with_kptr, struct task_struct *task, + u64 clone_flags) +{ + struct __tasks_kfunc_map_value *local; + struct task_struct *acquired, *old; + + (void)clone_flags; + + local = bpf_obj_new(typeof(*local)); + if (!local) + return 0; + + acquired = bpf_task_acquire(task); + if (acquired) { + old = bpf_kptr_xchg(&local->task, acquired); + if (old) + bpf_task_release(old); + } + + bpf_obj_drop(local); + return 0; +} + SEC("tp_btf/task_newtask") __failure __msg("Possibly NULL pointer passed to trusted R1") int BPF_PROG(task_kfunc_from_pid_no_null_check, struct task_struct *task, u64 clone_flags) -- cgit v1.2.3 From 2e7c6cb4d8437a2fe7cd95aac7ca53d7eb05e9f4 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Tue, 9 Jun 2026 22:25:46 +0200 Subject: selftests/bpf: Exercise kptr map update lifetime Add focused map_kptr coverage for BPF-side map updates that touch values containing referenced kptrs. The new syscall programs stash the testmod refcounted object in an array map, a preallocated hash map, and a no-prealloc hash map, then update the same map from BPF. The refcount must remain elevated after the update, while the userspace runner destroys the skeleton and reuses the existing refcount wait to confirm map teardown releases the kptr. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20260609202548.3571690-5-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/map_kptr.c | 56 ++++++++++++++ tools/testing/selftests/bpf/progs/map_kptr.c | 89 ++++++++++++++++++++++- 2 files changed, 142 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/map_kptr.c b/tools/testing/selftests/bpf/prog_tests/map_kptr.c index ec6f2f2e8308..17e707dddda8 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_kptr.c +++ b/tools/testing/selftests/bpf/prog_tests/map_kptr.c @@ -143,12 +143,68 @@ static void wait_for_map_release(void) map_kptr__destroy(skel); } +enum map_update_kptr_case { + MAP_UPDATE_KPTR_ARRAY, + MAP_UPDATE_KPTR_HASH, + MAP_UPDATE_KPTR_HASH_MALLOC, +}; + +static struct bpf_program *map_update_kptr_prog(struct map_kptr *skel, + enum map_update_kptr_case test) +{ + switch (test) { + case MAP_UPDATE_KPTR_ARRAY: + return skel->progs.test_array_map_update_kptr; + case MAP_UPDATE_KPTR_HASH: + return skel->progs.test_hash_map_update_kptr; + case MAP_UPDATE_KPTR_HASH_MALLOC: + return skel->progs.test_hash_malloc_map_update_kptr; + } + + return NULL; +} + +static void test_map_update_kptr(enum map_update_kptr_case test) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + struct map_kptr *skel; + struct bpf_program *prog; + int ret; + + skel = map_kptr__open_and_load(); + if (!ASSERT_OK_PTR(skel, "map_kptr__open_and_load")) + return; + + prog = map_update_kptr_prog(skel, test); + if (!ASSERT_OK_PTR(prog, "map_update_kptr_prog")) + goto out; + + ret = bpf_prog_test_run_opts(bpf_program__fd(prog), &opts); + if (!ASSERT_OK(ret, "map_update_kptr")) + goto out; + if (!ASSERT_OK(opts.retval, "map_update_kptr retval")) + goto out; + + ASSERT_EQ(skel->bss->num_of_refs, 3, "refs_after_update"); + +out: + map_kptr__destroy(skel); + wait_for_map_release(); +} + void serial_test_map_kptr(void) { struct rcu_tasks_trace_gp *skel; RUN_TESTS(map_kptr_fail); + if (test__start_subtest("update_array_map_kptr")) + test_map_update_kptr(MAP_UPDATE_KPTR_ARRAY); + if (test__start_subtest("update_hash_map_kptr")) + test_map_update_kptr(MAP_UPDATE_KPTR_HASH); + if (test__start_subtest("update_hash_malloc_map_kptr")) + test_map_update_kptr(MAP_UPDATE_KPTR_HASH_MALLOC); + skel = rcu_tasks_trace_gp__open_and_load(); if (!ASSERT_OK_PTR(skel, "rcu_tasks_trace_gp__open_and_load")) return; diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c index e708ffbe1f61..3fbefc568e0a 100644 --- a/tools/testing/selftests/bpf/progs/map_kptr.c +++ b/tools/testing/selftests/bpf/progs/map_kptr.c @@ -489,8 +489,7 @@ int test_map_kptr_ref3(struct __sk_buff *ctx) int num_of_refs; -SEC("syscall") -int count_ref(void *ctx) +static __always_inline int read_ref_count(void) { struct prog_test_ref_kfunc *p; unsigned long arg = 0; @@ -500,11 +499,95 @@ int count_ref(void *ctx) return 1; num_of_refs = p->cnt.refs.counter; - bpf_kfunc_call_test_release(p); return 0; } +SEC("syscall") +int count_ref(void *ctx) +{ + return read_ref_count(); +} + +static __always_inline int stash_ref_ptr(struct map_value *v) +{ + struct prog_test_ref_kfunc *p, *old; + unsigned long arg = 0; + + p = bpf_kfunc_call_test_acquire(&arg); + if (!p) + return 1; + + old = bpf_kptr_xchg(&v->ref_ptr, p); + if (old) { + bpf_kfunc_call_test_release(old); + old = bpf_kptr_xchg(&v->ref_ptr, NULL); + if (old) + bpf_kfunc_call_test_release(old); + return 2; + } + return 0; +} + +static __always_inline int check_refs(int expected) +{ + int ret; + + ret = read_ref_count(); + if (ret) + return ret; + return num_of_refs == expected ? 0 : 3; +} + +SEC("syscall") +int test_array_map_update_kptr(void *ctx) +{ + struct map_value init = {}, *v; + int key = 0, ret; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 1; + ret = stash_ref_ptr(v); + if (ret) + return ret; + ret = check_refs(3); + if (ret) + return ret; + ret = bpf_map_update_elem(&array_map, &key, &init, BPF_EXIST); + if (ret) + return 4; + return check_refs(3); +} + +#define DEFINE_HASH_UPDATE_KPTR_TEST(name, map) \ +SEC("syscall") \ +int name(void *ctx) \ +{ \ + struct map_value init = {}, *v; \ + int key = 0, ret; \ + \ + ret = bpf_map_update_elem(&map, &key, &init, BPF_NOEXIST); \ + if (ret) \ + return 1; \ + v = bpf_map_lookup_elem(&map, &key); \ + if (!v) \ + return 2; \ + ret = stash_ref_ptr(v); \ + if (ret) \ + return ret; \ + ret = check_refs(3); \ + if (ret) \ + return ret; \ + ret = bpf_map_update_elem(&map, &key, &init, BPF_EXIST); \ + if (ret) \ + return 4; \ + return check_refs(3); \ +} + +DEFINE_HASH_UPDATE_KPTR_TEST(test_hash_map_update_kptr, hash_map) +DEFINE_HASH_UPDATE_KPTR_TEST(test_hash_malloc_map_update_kptr, hash_malloc_map) + SEC("syscall") int test_ls_map_kptr_ref1(void *ctx) { -- cgit v1.2.3 From 2e8ad1ff712d2a397e407c9fde60901f68d077dc Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 9 Jun 2026 22:18:31 -0700 Subject: selftests/bpf: Fix bpf_iter/task_vma test For selftest bpf_iter/task_vma, I got a failure like below on my qemu run: test_task_vma_common:FAIL:compare_output unexpected compare_output: actual '561593546000-561593585000r--p0000000000:241256579534/root/devshare/bpf-next/tools/testing/selftests/bpf/test_progs' != expected '561593546000-561593585000r--p0000000000:245551546830/root/devshare/bpf-next/tools/testing/selftests/bpf/test_progs' Further debugging found out file->f_inode->i_ino value may exceed 32bit, e.g., i_ino = 0x14c2eae35, but the format string is '%u'. This caused inode mismatch between bpf iter and proc result. Fix the issue by using format string '%llu' to accommodate 64bit i_ino. Fixes: e8168840e16c ("selftests/bpf: Add test for bpf_iter_task_vma") Signed-off-by: Yonghong Song Acked-by: Leon Hwang Link: https://lore.kernel.org/r/20260610051831.1346659-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c index d64ba7ddaed5..d7fb561ed4fb 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c @@ -52,7 +52,7 @@ SEC("iter/task_vma") int proc_maps(struct bpf_iter__task_vma *ctx) bpf_d_path(&file->f_path, d_path_buf, D_PATH_BUF_SIZE); BPF_SEQ_PRINTF(seq, "%08llx ", vma->vm_pgoff << 12); - BPF_SEQ_PRINTF(seq, "%02x:%02x %u", MAJOR(dev), MINOR(dev), + BPF_SEQ_PRINTF(seq, "%02x:%02x %llu", MAJOR(dev), MINOR(dev), file->f_inode->i_ino); BPF_SEQ_PRINTF(seq, "\t%s\n", d_path_buf); } else { -- cgit v1.2.3 From 30dee2c176e7954f63d1fa3e52d172f30beb9bfb Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Wed, 10 Jun 2026 12:55:39 +0200 Subject: selftests/bpf: Cover tail-call cgroup storage prog-array checks Add tail-call selftests for prog-array ownership when cgroup storage is in use. Verify that loading succeeds when callers and callees reuse the owner's cgroup storage map, and that loading fails for a different storage map and for the A(storage) -> B(no storage) -> C(storage) bridge case addressed in the previous commit. Also verify that a storage-less leaf program which cannot perform tail calls itself is still allowed to join a storage-owned prog array, while a storage-less tail-caller is rejected also at map update time. # LDLIBS=-static PKG_CONFIG='pkg-config --static' ./vmtest.sh -- ./test_progs -t tailcalls [...] #475/25 tailcalls/tailcall_freplace:OK #475/26 tailcalls/tailcall_bpf2bpf_freplace:OK #475/27 tailcalls/tailcall_failure:OK #475/28 tailcalls/reject_tail_call_spin_lock:OK #475/29 tailcalls/reject_tail_call_rcu_lock:OK #475/30 tailcalls/reject_tail_call_preempt_lock:OK #475/31 tailcalls/reject_tail_call_ref:OK #475/32 tailcalls/tailcall_sleepable:OK #475/33 tailcalls/tailcall_cgrp_storage:OK #475/34 tailcalls/tailcall_cgrp_storage_diff_storage:OK #475/35 tailcalls/tailcall_cgrp_storage_no_storage:OK #475/36 tailcalls/tailcall_cgrp_storage_no_storage_leaf:OK #475/37 tailcalls/tailcall_cgrp_storage_no_storage_bridge:OK #475 tailcalls:OK Summary: 1/37 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Lin Ma Signed-off-by: Rongzhen Cui Signed-off-by: Jingguo Tan Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20260610105539.705887-2-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/tailcalls.c | 186 +++++++++++++++++++++ .../selftests/bpf/progs/tailcall_cgrp_storage.c | 44 +++++ .../bpf/progs/tailcall_cgrp_storage_no_storage.c | 26 +++ .../bpf/progs/tailcall_cgrp_storage_owner.c | 32 ++++ 4 files changed, 288 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/tailcall_cgrp_storage.c create mode 100644 tools/testing/selftests/bpf/progs/tailcall_cgrp_storage_no_storage.c create mode 100644 tools/testing/selftests/bpf/progs/tailcall_cgrp_storage_owner.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c index 7d534fde0af9..a5a226d0104c 100644 --- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c +++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c @@ -8,6 +8,9 @@ #include "tailcall_freplace.skel.h" #include "tc_bpf2bpf.skel.h" #include "tailcall_fail.skel.h" +#include "tailcall_cgrp_storage_owner.skel.h" +#include "tailcall_cgrp_storage_no_storage.skel.h" +#include "tailcall_cgrp_storage.skel.h" #include "tailcall_sleepable.skel.h" /* test_tailcall_1 checks basic functionality by patching multiple locations @@ -1654,6 +1657,179 @@ static void test_tailcall_failure() RUN_TESTS(tailcall_fail); } +static void test_tailcall_cgrp_storage(void) +{ + struct tailcall_cgrp_storage_owner *owner_skel = NULL; + struct tailcall_cgrp_storage *skel = NULL; + int err, key = 0, prog_array_fd, prog_fd, storage_map_fd; + + owner_skel = tailcall_cgrp_storage_owner__open_and_load(); + if (!ASSERT_OK_PTR(owner_skel, "owner_open_and_load")) + return; + + prog_array_fd = bpf_map__fd(owner_skel->maps.prog_array); + storage_map_fd = bpf_map__fd(owner_skel->maps.storage_map); + + skel = tailcall_cgrp_storage__open(); + if (!ASSERT_OK_PTR(skel, "tailcall_cgrp_storage__open")) + goto out; + + err = bpf_map__reuse_fd(skel->maps.prog_array, prog_array_fd); + if (!ASSERT_OK(err, "reuse_prog_array")) + goto out; + + err = bpf_map__reuse_fd(skel->maps.storage_map, storage_map_fd); + if (!ASSERT_OK(err, "reuse_storage_map")) + goto out; + + err = bpf_object__load(skel->obj); + if (!ASSERT_OK(err, "tailcall_cgrp_storage__load")) + goto out; + + prog_fd = bpf_program__fd(skel->progs.callee_prog); + err = bpf_map_update_elem(prog_array_fd, &key, &prog_fd, BPF_ANY); + ASSERT_OK(err, "update_prog_array"); +out: + tailcall_cgrp_storage__destroy(skel); + tailcall_cgrp_storage_owner__destroy(owner_skel); +} + +static void test_tailcall_cgrp_storage_diff_storage(void) +{ + struct tailcall_cgrp_storage_owner *owner_skel = NULL; + struct tailcall_cgrp_storage *skel = NULL; + int err, prog_array_fd; + + owner_skel = tailcall_cgrp_storage_owner__open_and_load(); + if (!ASSERT_OK_PTR(owner_skel, "owner_open_and_load")) + return; + + prog_array_fd = bpf_map__fd(owner_skel->maps.prog_array); + + skel = tailcall_cgrp_storage__open(); + if (!ASSERT_OK_PTR(skel, "tailcall_cgrp_storage__open")) + goto out; + + err = bpf_map__reuse_fd(skel->maps.prog_array, prog_array_fd); + if (!ASSERT_OK(err, "reuse_prog_array")) + goto out; + + err = bpf_object__load(skel->obj); + ASSERT_ERR(err, "tailcall_cgrp_storage__load"); +out: + tailcall_cgrp_storage__destroy(skel); + tailcall_cgrp_storage_owner__destroy(owner_skel); +} + +static void test_tailcall_cgrp_storage_no_storage(void) +{ + struct tailcall_cgrp_storage_owner *owner_skel = NULL; + struct tailcall_cgrp_storage_no_storage *skel = NULL; + int err, prog_array_fd; + + owner_skel = tailcall_cgrp_storage_owner__open_and_load(); + if (!ASSERT_OK_PTR(owner_skel, "owner_open_and_load")) + return; + + prog_array_fd = bpf_map__fd(owner_skel->maps.prog_array); + + skel = tailcall_cgrp_storage_no_storage__open(); + if (!ASSERT_OK_PTR(skel, "tailcall_cgrp_storage_no_storage__open")) + goto out; + + err = bpf_map__reuse_fd(skel->maps.prog_array, prog_array_fd); + if (!ASSERT_OK(err, "reuse_prog_array")) + goto out; + + err = bpf_object__load(skel->obj); + ASSERT_ERR(err, "tailcall_cgrp_storage_no_storage__load"); +out: + tailcall_cgrp_storage_no_storage__destroy(skel); + tailcall_cgrp_storage_owner__destroy(owner_skel); +} + +static void test_tailcall_cgrp_storage_no_storage_leaf(void) +{ + struct tailcall_cgrp_storage_owner *owner_skel = NULL; + struct tailcall_cgrp_storage_no_storage *skel = NULL; + int err, key = 0, prog_array_fd, prog_fd; + + owner_skel = tailcall_cgrp_storage_owner__open_and_load(); + if (!ASSERT_OK_PTR(owner_skel, "owner_open_and_load")) + return; + + prog_array_fd = bpf_map__fd(owner_skel->maps.prog_array); + + skel = tailcall_cgrp_storage_no_storage__open_and_load(); + if (!ASSERT_OK_PTR(skel, "tailcall_cgrp_storage_no_storage__open_and_load")) + goto out; + + prog_fd = bpf_program__fd(skel->progs.leaf_prog); + err = bpf_map_update_elem(prog_array_fd, &key, &prog_fd, BPF_ANY); + if (!ASSERT_OK(err, "update_prog_array_leaf")) + goto out; + + prog_fd = bpf_program__fd(skel->progs.caller_prog); + err = bpf_map_update_elem(prog_array_fd, &key, &prog_fd, BPF_ANY); + ASSERT_ERR(err, "update_prog_array_bridge"); +out: + tailcall_cgrp_storage_no_storage__destroy(skel); + tailcall_cgrp_storage_owner__destroy(owner_skel); +} + +static void test_tailcall_cgrp_storage_no_storage_bridge(void) +{ + struct tailcall_cgrp_storage_owner *owner_skel = NULL; + struct tailcall_cgrp_storage_no_storage *bridge_skel = NULL; + struct tailcall_cgrp_storage *callee_skel = NULL; + int err, key = 0, prog_array_fd, prog_fd, storage_map_fd; + + owner_skel = tailcall_cgrp_storage_owner__open_and_load(); + if (!ASSERT_OK_PTR(owner_skel, "owner_open_and_load")) + return; + + prog_array_fd = bpf_map__fd(owner_skel->maps.prog_array); + storage_map_fd = bpf_map__fd(owner_skel->maps.storage_map); + + callee_skel = tailcall_cgrp_storage__open(); + if (!ASSERT_OK_PTR(callee_skel, "tailcall_cgrp_storage__open")) + goto out; + + bpf_program__set_autoload(callee_skel->progs.caller_prog, false); + + err = bpf_map__reuse_fd(callee_skel->maps.prog_array, prog_array_fd); + if (!ASSERT_OK(err, "reuse_prog_array")) + goto out; + + err = bpf_map__reuse_fd(callee_skel->maps.storage_map, storage_map_fd); + if (!ASSERT_OK(err, "reuse_storage_map")) + goto out; + + err = bpf_object__load(callee_skel->obj); + if (!ASSERT_OK(err, "tailcall_cgrp_storage__load")) + goto out; + + prog_fd = bpf_program__fd(callee_skel->progs.callee_prog); + err = bpf_map_update_elem(prog_array_fd, &key, &prog_fd, BPF_ANY); + if (!ASSERT_OK(err, "update_prog_array")) + goto out; + + bridge_skel = tailcall_cgrp_storage_no_storage__open(); + if (!ASSERT_OK_PTR(bridge_skel, "tailcall_cgrp_storage_no_storage__open")) + goto out; + + err = bpf_map__reuse_fd(bridge_skel->maps.prog_array, prog_array_fd); + if (!ASSERT_OK(err, "reuse_prog_array")) + goto out; + + err = bpf_object__load(bridge_skel->obj); + ASSERT_ERR(err, "tailcall_cgrp_storage_no_storage_bridge__load"); +out: + tailcall_cgrp_storage_no_storage__destroy(bridge_skel); + tailcall_cgrp_storage__destroy(callee_skel); + tailcall_cgrp_storage_owner__destroy(owner_skel); +} + noinline void uprobe_sleepable_trigger(void) { asm volatile (""); @@ -1781,4 +1957,14 @@ void test_tailcalls(void) test_tailcall_failure(); if (test__start_subtest("tailcall_sleepable")) test_tailcall_sleepable(); + if (test__start_subtest("tailcall_cgrp_storage")) + test_tailcall_cgrp_storage(); + if (test__start_subtest("tailcall_cgrp_storage_diff_storage")) + test_tailcall_cgrp_storage_diff_storage(); + if (test__start_subtest("tailcall_cgrp_storage_no_storage")) + test_tailcall_cgrp_storage_no_storage(); + if (test__start_subtest("tailcall_cgrp_storage_no_storage_leaf")) + test_tailcall_cgrp_storage_no_storage_leaf(); + if (test__start_subtest("tailcall_cgrp_storage_no_storage_bridge")) + test_tailcall_cgrp_storage_no_storage_bridge(); } diff --git a/tools/testing/selftests/bpf/progs/tailcall_cgrp_storage.c b/tools/testing/selftests/bpf/progs/tailcall_cgrp_storage.c new file mode 100644 index 000000000000..4dd3a0033d75 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tailcall_cgrp_storage.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE); + __type(key, struct bpf_cgroup_storage_key); + __type(value, __u64); +} storage_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} prog_array SEC(".maps"); + +SEC("cgroup_skb/egress") +int caller_prog(struct __sk_buff *skb) +{ + __u64 *storage; + + storage = bpf_get_local_storage(&storage_map, 0); + if (storage) + *storage = 1; + + bpf_tail_call(skb, &prog_array, 0); + return 1; +} + +SEC("cgroup_skb/egress") +int callee_prog(struct __sk_buff *skb) +{ + __u64 *storage; + + storage = bpf_get_local_storage(&storage_map, 0); + if (storage) + *storage = 1; + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/tailcall_cgrp_storage_no_storage.c b/tools/testing/selftests/bpf/progs/tailcall_cgrp_storage_no_storage.c new file mode 100644 index 000000000000..5c69b0af6ff9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tailcall_cgrp_storage_no_storage.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} prog_array SEC(".maps"); + +SEC("cgroup_skb/egress") +int caller_prog(struct __sk_buff *skb) +{ + bpf_tail_call(skb, &prog_array, 0); + return 1; +} + +SEC("cgroup_skb/egress") +int leaf_prog(struct __sk_buff *skb) +{ + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/tailcall_cgrp_storage_owner.c b/tools/testing/selftests/bpf/progs/tailcall_cgrp_storage_owner.c new file mode 100644 index 000000000000..d7e8ec9855c5 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tailcall_cgrp_storage_owner.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE); + __type(key, struct bpf_cgroup_storage_key); + __type(value, __u64); +} storage_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} prog_array SEC(".maps"); + +SEC("cgroup_skb/egress") +int prog_array_owner(struct __sk_buff *skb) +{ + __u64 *storage; + + storage = bpf_get_local_storage(&storage_map, 0); + if (storage) + *storage = 1; + + bpf_tail_call(skb, &prog_array, 0); + return 1; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From f0eff94d07cda9bd71754d95af4301cd437020b8 Mon Sep 17 00:00:00 2001 From: Sun Jian Date: Fri, 12 Jun 2026 19:40:32 +0800 Subject: selftests/bpf: Cover generic devmap egress last-dst rewrite Strengthen xdp_veth_egress to check that each destination observes the MAC selected for its own egress ifindex, instead of only checking that the observed MAC differs from a single magic value. Add a generic XDP last-destination test where an earlier destination does not have a devmap egress program while the final destination does. This covers the case where the final destination runs on the original skb and could otherwise rewrite packet data still shared with an earlier cloned skb. Use deterministic DEVMAP_HASH keys for the egress map so the intended last destination is stable. Initialize the result map with a sentinel value and check that store_mac_1 overwrites it before checking that the earlier destination did not observe the MAC written by the final destination. Suggested-by: Jiayuan Chen Signed-off-by: Sun Jian Link: https://lore.kernel.org/r/20260612114032.244616-3-sun.jian.kdev@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/test_xdp_veth.c | 166 ++++++++++++++++++++- 1 file changed, 163 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c b/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c index 3e98a1665936..1675b32753a8 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c +++ b/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c @@ -456,7 +456,11 @@ static void xdp_veth_egress(u32 flags) .remote_flags = flags, } }; - const char magic_mac[6] = { 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF}; + const unsigned char egress_macs[VETH_PAIRS_COUNT][ETH_ALEN] = { + { 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0x01 }, + { 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0x02 }, + { 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0x03 }, + }; struct xdp_redirect_multi_kern *xdp_redirect_multi_kern; struct bpf_object *bpf_objs[VETH_EGRESS_SKEL_NB]; struct xdp_redirect_map *xdp_redirect_map; @@ -512,7 +516,13 @@ static void xdp_veth_egress(u32 flags) &net_config, prog_cfg, i)) goto destroy_xdp_redirect_map; - err = bpf_map_update_elem(mac_map, &ifindex, magic_mac, 0); + { + __be64 mac = 0; + + memcpy(&mac, egress_macs[i], ETH_ALEN); + err = bpf_map_update_elem(mac_map, &ifindex, &mac, 0); + } + if (!ASSERT_OK(err, "bpf_map_update_elem")) goto destroy_xdp_redirect_map; @@ -531,15 +541,162 @@ static void xdp_veth_egress(u32 flags) for (i = 0; i < 2; i++) { u32 key = i; + __be64 expected = 0; u64 res; err = bpf_map_lookup_elem(res_map, &key, &res); if (!ASSERT_OK(err, "get MAC res")) goto destroy_xdp_redirect_map; - ASSERT_STRNEQ((const char *)&res, magic_mac, ETH_ALEN, "compare mac"); + /* store_mac_1/2 run on the second/third remote veths. */ + memcpy(&expected, egress_macs[i + 1], ETH_ALEN); + ASSERT_EQ(res, expected, "compare mac"); + } + +destroy_xdp_redirect_map: + close_netns(nstoken); + xdp_redirect_map__destroy(xdp_redirect_map); +destroy_xdp_redirect_multi_kern: + xdp_redirect_multi_kern__destroy(xdp_redirect_multi_kern); +destroy_xdp_dummy: + xdp_dummy__destroy(xdp_dummy); + + cleanup_network(&net_config); +} + +static void xdp_veth_egress_last_dst(u32 flags) +{ + struct prog_configuration prog_cfg[VETH_PAIRS_COUNT] = { + { + .local_name = "xdp_redirect_map_all_prog", + .remote_name = "xdp_dummy_prog", + .local_flags = flags, + .remote_flags = flags, + }, + { + .local_name = "xdp_redirect_map_all_prog", + .remote_name = "store_mac_1", + .local_flags = flags, + .remote_flags = flags, + }, + { + .local_name = "xdp_redirect_map_all_prog", + .remote_name = "xdp_dummy_prog", + .local_flags = flags, + .remote_flags = flags, + } + }; + const unsigned char egress_macs[VETH_PAIRS_COUNT][ETH_ALEN] = { + { 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0x01 }, + { 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0x02 }, + { 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0x03 }, + }; + struct xdp_redirect_multi_kern *xdp_redirect_multi_kern; + struct bpf_object *bpf_objs[VETH_EGRESS_SKEL_NB]; + struct xdp_redirect_map *xdp_redirect_map; + struct net_configuration net_config = {}; + int mac_map, egress_map, res_map; + struct nstoken *nstoken = NULL; + struct xdp_dummy *xdp_dummy; + __be64 sentinel_mac = 0; + __be64 last_mac = 0; + __be64 res; + u32 key; + int err; + int i; + + xdp_dummy = xdp_dummy__open_and_load(); + if (!ASSERT_OK_PTR(xdp_dummy, "xdp_dummy__open_and_load")) + return; + + xdp_redirect_multi_kern = xdp_redirect_multi_kern__open_and_load(); + if (!ASSERT_OK_PTR(xdp_redirect_multi_kern, "xdp_redirect_multi_kern__open_and_load")) + goto destroy_xdp_dummy; + + xdp_redirect_map = xdp_redirect_map__open_and_load(); + if (!ASSERT_OK_PTR(xdp_redirect_map, "xdp_redirect_map__open_and_load")) + goto destroy_xdp_redirect_multi_kern; + + if (!ASSERT_OK(create_network(&net_config), "create network")) + goto destroy_xdp_redirect_map; + + mac_map = bpf_map__fd(xdp_redirect_multi_kern->maps.mac_map); + if (!ASSERT_OK_FD(mac_map, "open mac_map")) + goto destroy_xdp_redirect_map; + + egress_map = bpf_map__fd(xdp_redirect_multi_kern->maps.map_egress); + if (!ASSERT_OK_FD(egress_map, "open map_egress")) + goto destroy_xdp_redirect_map; + + bpf_objs[0] = xdp_dummy->obj; + bpf_objs[1] = xdp_redirect_multi_kern->obj; + bpf_objs[2] = xdp_redirect_map->obj; + + nstoken = open_netns(net_config.ns0_name); + if (!ASSERT_OK_PTR(nstoken, "open NS0")) + goto destroy_xdp_redirect_map; + + for (i = 0; i < VETH_PAIRS_COUNT; i++) { + struct bpf_devmap_val devmap_val = {}; + int ifindex = if_nametoindex(net_config.veth_cfg[i].local_veth); + u32 key = i; + + SYS(destroy_xdp_redirect_map, + "ip -n %s neigh add %s lladdr 00:00:00:00:00:01 dev %s", + net_config.veth_cfg[i].namespace, IP_NEIGH, + net_config.veth_cfg[i].remote_veth); + + if (attach_programs_to_veth_pair(bpf_objs, VETH_EGRESS_SKEL_NB, + &net_config, prog_cfg, i)) + goto destroy_xdp_redirect_map; + + { + __be64 mac = 0; + + memcpy(&mac, egress_macs[i], ETH_ALEN); + err = bpf_map_update_elem(mac_map, &ifindex, &mac, 0); + } + + if (!ASSERT_OK(err, "bpf_map_update_elem")) + goto destroy_xdp_redirect_map; + + devmap_val.ifindex = ifindex; + devmap_val.bpf_prog.fd = -1; + + if (i == VETH_PAIRS_COUNT - 1) + devmap_val.bpf_prog.fd = + bpf_program__fd(xdp_redirect_multi_kern->progs.xdp_devmap_prog); + + err = bpf_map_update_elem(egress_map, &key, &devmap_val, 0); + if (!ASSERT_OK(err, "bpf_map_update_elem")) + goto destroy_xdp_redirect_map; } + res_map = bpf_map__fd(xdp_redirect_map->maps.rx_mac); + if (!ASSERT_OK_FD(res_map, "open rx_map")) + goto destroy_xdp_redirect_map; + + memcpy(&sentinel_mac, egress_macs[VETH_PAIRS_COUNT - 1], ETH_ALEN); + memcpy(&last_mac, egress_macs[VETH_PAIRS_COUNT - 1], ETH_ALEN); + + key = 0; + err = bpf_map_update_elem(res_map, &key, &sentinel_mac, 0); + if (!ASSERT_OK(err, "init rx mac")) + goto destroy_xdp_redirect_map; + + SYS_NOFAIL("ip netns exec %s ping %s -i 0.1 -c 4 -W1 > /dev/null ", + net_config.veth_cfg[0].namespace, IP_NEIGH); + + err = bpf_map_lookup_elem(res_map, &key, &res); + if (!ASSERT_OK(err, "get MAC res")) + goto destroy_xdp_redirect_map; + + if (!ASSERT_NEQ(res, sentinel_mac, "rx_mac overwritten by store_mac_1")) + goto destroy_xdp_redirect_map; + + if (!ASSERT_NEQ(res, last_mac, "earlier dst not rewritten by last dst")) + goto destroy_xdp_redirect_map; + destroy_xdp_redirect_map: close_netns(nstoken); xdp_redirect_map__destroy(xdp_redirect_map); @@ -596,4 +753,7 @@ void test_xdp_veth_egress(void) if (test__start_subtest("SKB_MODE/egress")) xdp_veth_egress(XDP_FLAGS_SKB_MODE); + + if (test__start_subtest("SKB_MODE/egress_last_dst")) + xdp_veth_egress_last_dst(XDP_FLAGS_SKB_MODE); } -- cgit v1.2.3 From cec8423776176eb73429443ecb859789af9602e5 Mon Sep 17 00:00:00 2001 From: Xu Kuohai Date: Wed, 10 Jun 2026 20:17:24 +0000 Subject: selftests/bpf: Add retval test for bool and errno LSM cgroup hooks Add test to check the return value when a BPF program exits with 0 for a boolean and an errno LSM hook. For each hook, two BPF programs are attached. The first program returns 0 without calling bpf_set_retval() to exercise the return value translation logic, while the second program reads the retval via bpf_get_retval(). Signed-off-by: Xu Kuohai Link: https://lore.kernel.org/r/20260610201724.733943-3-xukuohai@huaweicloud.com Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/prog_tests/lsm_cgroup.c | 79 ++++++++++++++++++++++ tools/testing/selftests/bpf/progs/lsm_cgroup.c | 30 ++++++++ 2 files changed, 109 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c index 6df25de8f080..41e867467f6c 100644 --- a/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c +++ b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -309,11 +310,89 @@ static void test_lsm_cgroup_nonvoid(void) lsm_cgroup_nonvoid__destroy(skel); } +static void test_lsm_cgroup_retval(void) +{ + struct lsm_cgroup *skel = NULL; + int skipcap_prog_fd1, skipcap_prog_fd2, socket_prog_fd1, socket_prog_fd2; + int cgroup_fd = -1; + int err, fd; + char tmpfile[] = "/tmp/test_lsm_cgroup_retval.XXXXXX"; + + fd = mkstemp(tmpfile); + if (!ASSERT_OK_FD(fd, "mkstemp")) + return; + close(fd); + + cgroup_fd = test__join_cgroup("/default_retval"); + if (!ASSERT_OK_FD(cgroup_fd, "join_cgroup")) + goto cleanup_tmpfile; + + skel = lsm_cgroup__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + goto cleanup_cgroup; + + skipcap_prog_fd1 = bpf_program__fd(skel->progs.skipcap_first); + skipcap_prog_fd2 = bpf_program__fd(skel->progs.skipcap_second); + socket_prog_fd1 = bpf_program__fd(skel->progs.socket_first); + socket_prog_fd2 = bpf_program__fd(skel->progs.socket_second); + + err = bpf_prog_attach(skipcap_prog_fd1, cgroup_fd, BPF_LSM_CGROUP, BPF_F_ALLOW_MULTI); + if (err == -ENOTSUPP) { + test__skip(); + goto cleanup_skeleton; + } + if (!ASSERT_OK(err, "attach first skipcap prog")) + goto cleanup_skeleton; + + err = bpf_prog_attach(skipcap_prog_fd2, cgroup_fd, BPF_LSM_CGROUP, BPF_F_ALLOW_MULTI); + if (!ASSERT_OK(err, "attach second skipcap prog")) + goto cleanup_skipcap1; + + err = bpf_prog_attach(socket_prog_fd1, cgroup_fd, BPF_LSM_CGROUP, BPF_F_ALLOW_MULTI); + if (!ASSERT_OK(err, "attach first sock_create prog")) + goto cleanup_skipcap2; + + err = bpf_prog_attach(socket_prog_fd2, cgroup_fd, BPF_LSM_CGROUP, BPF_F_ALLOW_MULTI); + if (!ASSERT_OK(err, "attach second sock_create prog")) + goto cleanup_sock_create1; + + /* trigger the bool hook by setxattr */ + err = setxattr(tmpfile, "user.test", "value", 5, 0); + if (!ASSERT_OK(err, "setxattr")) + goto cleanup_sock_create2; + + /* trigger the errno hook by creating a socket */ + fd = socket(AF_INET, SOCK_STREAM, 0); + if (!ASSERT_OK_FD(fd, "socket")) + goto cleanup_sock_create2; + close(fd); + + ASSERT_EQ(skel->data->skipcap_retval, 0, "bool_hook_retval_should_be_0"); + ASSERT_EQ(skel->data->socket_retval, -EPERM, "errno_hook_retval_should_be_EPERM"); + +cleanup_sock_create2: + bpf_prog_detach2(socket_prog_fd2, cgroup_fd, BPF_LSM_CGROUP); +cleanup_sock_create1: + bpf_prog_detach2(socket_prog_fd1, cgroup_fd, BPF_LSM_CGROUP); +cleanup_skipcap2: + bpf_prog_detach2(skipcap_prog_fd2, cgroup_fd, BPF_LSM_CGROUP); +cleanup_skipcap1: + bpf_prog_detach2(skipcap_prog_fd1, cgroup_fd, BPF_LSM_CGROUP); +cleanup_skeleton: + lsm_cgroup__destroy(skel); +cleanup_cgroup: + close(cgroup_fd); +cleanup_tmpfile: + unlink(tmpfile); +} + void test_lsm_cgroup(void) { if (test__start_subtest("functional")) test_lsm_cgroup_functional(); if (test__start_subtest("nonvoid")) test_lsm_cgroup_nonvoid(); + if (test__start_subtest("retval")) + test_lsm_cgroup_retval(); btf__free(btf); } diff --git a/tools/testing/selftests/bpf/progs/lsm_cgroup.c b/tools/testing/selftests/bpf/progs/lsm_cgroup.c index d7598538aa2d..3bfa479104be 100644 --- a/tools/testing/selftests/bpf/progs/lsm_cgroup.c +++ b/tools/testing/selftests/bpf/progs/lsm_cgroup.c @@ -35,6 +35,8 @@ int called_socket_bind; int called_socket_bind2; int called_socket_alloc; int called_socket_clone; +int skipcap_retval = -4095; +int socket_retval = -4095; static __always_inline int test_local_storage(void) { @@ -190,3 +192,31 @@ int BPF_PROG(socket_clone, struct sock *newsk, const struct request_sock *req) return 1; } + +SEC("lsm_cgroup/inode_xattr_skipcap") +int BPF_PROG(skipcap_first, const char *name) +{ + return 0; +} + +SEC("lsm_cgroup/inode_xattr_skipcap") +int BPF_PROG(skipcap_second, const char *name) +{ + skipcap_retval = bpf_get_retval(); + bpf_set_retval(0); + return 1; +} + +SEC("lsm_cgroup/socket_create") +int BPF_PROG(socket_first, int family, int type, int protocol, int kern) +{ + return 0; +} + +SEC("lsm_cgroup/socket_create") +int BPF_PROG(socket_second, int family, int type, int protocol, int kern) +{ + socket_retval = bpf_get_retval(); + bpf_set_retval(0); + return 1; +} -- cgit v1.2.3 From 7bfb93e3475be9de894f1cecd3a727d3e1649b03 Mon Sep 17 00:00:00 2001 From: Woojin Ji Date: Fri, 12 Jun 2026 14:26:55 +0900 Subject: selftests/bpf: Add arena direct-value one-past-end reject test BPF_MAP_TYPE_ARENA supports direct-value pseudo loads, but unlike array maps its map value_size is zero and the valid direct-value range is the arena mmap size, max_entries * PAGE_SIZE. Commit 3ac1a467e376 ("bpf: Fix off-by-one boundary validation in arena direct-value access") fixed arena_map_direct_value_addr() to reject an offset exactly at the end of the arena mapping. Add a regression test that loads a BPF_PSEUDO_MAP_VALUE with off == arena_size and verifies that the verifier rejects it with the expected offset in the log. This is intentionally kept as a userspace raw-instruction test. I tried expressing the same BPF_PSEUDO_MAP_VALUE + off == arena_size case in verifier_arena.c with inline assembly. The only form that produces the desired instruction bytes uses __imm_addr(arena), but that emits R_BPF_64_NODYLD32, which the libbpf/bpftool link step rejects. Other register, immediate, and memory constraints either fail in the BPF backend or lower to a normal R_BPF_64_64 load followed by an ALU add, which does not exercise arena_map_direct_value_addr() with the boundary offset in the second ldimm64 slot. A legacy test_verifier fixture can express the raw instruction directly, but it needs arena map creation, mmap, and fixup plumbing in the legacy runner. That is more intrusive than the small prog_tests raw-instruction test. Use the userspace raw-instruction test, following the existing selftests pattern used for direct map-value pseudo loads, so insns[1].imm can be set to arena_size precisely. Assisted-by: ChatGPT:gpt-5.5 Signed-off-by: Woojin Ji Reviewed-by: Emil Tsalapatis Cc: Emil Tsalapatis Cc: Junyoung Jang Link: https://lore.kernel.org/r/20260612-arena-direct-value-v1-v4-1-b81b642f5277@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/arena_direct_value.c | 73 ++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/arena_direct_value.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/arena_direct_value.c b/tools/testing/selftests/bpf/prog_tests/arena_direct_value.c new file mode 100644 index 000000000000..4b4adb3f4b71 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/arena_direct_value.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include + +#define ARENA_PAGES 32 + +static char log_buf[16384]; + +static void test_arena_direct_value_one_past_end(void) +{ + char expected[128]; + __u32 arena_sz = ARENA_PAGES * getpagesize(); + struct bpf_insn insns[] = { + BPF_LD_IMM64_RAW(BPF_REG_1, BPF_PSEUDO_MAP_VALUE, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + LIBBPF_OPTS(bpf_map_create_opts, map_opts); + LIBBPF_OPTS(bpf_prog_load_opts, prog_opts); + void *arena; + int map_fd, prog_fd; + + map_opts.map_flags = BPF_F_MMAPABLE; + prog_opts.log_buf = log_buf; + prog_opts.log_size = sizeof(log_buf); + prog_opts.log_level = 1; + + map_fd = bpf_map_create(BPF_MAP_TYPE_ARENA, "arena_direct_value", + 0, 0, ARENA_PAGES, &map_opts); + if (map_fd < 0) { + if (errno == EOPNOTSUPP) { + test__skip(); + return; + } + ASSERT_GE(map_fd, 0, "bpf_map_create"); + return; + } + + arena = mmap(NULL, arena_sz, PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, 0); + if (!ASSERT_NEQ(arena, MAP_FAILED, "arena_mmap")) + goto cleanup; + + insns[0].imm = map_fd; + insns[1].imm = arena_sz; + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT, + "arena_direct_value", "GPL", insns, + ARRAY_SIZE(insns), &prog_opts); + if (!ASSERT_LT(prog_fd, 0, "prog_load")) { + close(prog_fd); + goto cleanup; + } + + snprintf(expected, sizeof(expected), + "invalid access to map value pointer, value_size=0 off=%u", + arena_sz); + ASSERT_HAS_SUBSTR(log_buf, expected, "verifier_log"); + +cleanup: + if (arena != MAP_FAILED) + munmap(arena, arena_sz); + close(map_fd); +} + +void test_arena_direct_value(void) +{ + if (test__start_subtest("one_past_end")) + test_arena_direct_value_one_past_end(); +} -- cgit v1.2.3 From 2148794eeaf2a898adc791e9472eb80ea55984da Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Sat, 13 Jun 2026 11:07:55 -0700 Subject: bpf: Raise maximum call chain depth to 16 frames Bump MAX_CALL_FRAMES from 8 to 16 to allow deeper call chains that Rust-BPF requires and update selftests. Link: https://lore.kernel.org/r/20260613180755.29671-1-alexei.starovoitov@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 13 +++--- kernel/bpf/verifier.c | 15 ++++--- .../selftests/bpf/progs/test_global_func3.c | 52 +++++++++++++++++++++- .../selftests/bpf/progs/verifier_liveness_exp.c | 2 +- .../selftests/bpf/progs/verifier_scalar_ids.c | 25 +++++------ tools/testing/selftests/bpf/verifier/calls.c | 48 ++++++++++++++++++++ 6 files changed, 128 insertions(+), 27 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index d57b339a8cb8..39a851e690ec 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -404,7 +404,7 @@ struct bpf_func_state { struct bpf_reg_state *stack_arg_regs; /* Outgoing on-stack arguments */ }; -#define MAX_CALL_FRAMES 8 +#define MAX_CALL_FRAMES 16 /* instruction history flags, used in bpf_jmp_history_entry.flags field. * Frame number and SPI are stored in dedicated fields of bpf_jmp_history_entry. @@ -421,20 +421,21 @@ enum { struct bpf_jmp_history_entry { /* insn idx can't be bigger than 1 million */ u32 idx : 20; - u32 frame : 3; /* stack access frame number */ + u32 frame : 4; /* stack access frame number */ u32 spi : 6; /* stack slot index (0..63) */ - u32 : 3; + u32 : 2; u32 prev_idx : 20; /* special INSN_F_xxx flags */ u32 flags : 4; u32 : 8; - /* additional registers that need precision tracking when this - * jump is backtracked, vector of six 10-bit records + /* + * additional registers that need precision tracking when this + * jump is backtracked, vector of five 11-bit records */ u64 linked_regs; }; -static_assert(MAX_CALL_FRAMES <= (1 << 3)); +static_assert(MAX_CALL_FRAMES <= (1 << 4)); static_assert(MAX_BPF_STACK / 8 <= (1 << 6)); /* Maximum number of bpf_reg_state objects that can exist at once */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index eb46a81a8c51..2abc79dbf281 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3144,7 +3144,7 @@ static void mark_indirect_target(struct bpf_verifier_env *env, int idx) env->insn_aux_data[idx].indirect_target = true; } -#define LR_FRAMENO_BITS 3 +#define LR_FRAMENO_BITS 4 #define LR_SPI_BITS 6 #define LR_ENTRY_BITS (LR_SPI_BITS + LR_FRAMENO_BITS + 1) #define LR_SIZE_BITS 4 @@ -3153,7 +3153,11 @@ static void mark_indirect_target(struct bpf_verifier_env *env, int idx) #define LR_SIZE_MASK ((1ull << LR_SIZE_BITS) - 1) #define LR_SPI_OFF LR_FRAMENO_BITS #define LR_IS_REG_OFF (LR_SPI_BITS + LR_FRAMENO_BITS) -#define LINKED_REGS_MAX 6 +#define LINKED_REGS_MAX 5 + +static_assert(MAX_CALL_FRAMES <= (1 << LR_FRAMENO_BITS)); +static_assert(LINKED_REGS_MAX < (1 << LR_SIZE_BITS)); +static_assert(LINKED_REGS_MAX * LR_ENTRY_BITS + LR_SIZE_BITS <= 64); struct linked_reg { u8 frameno; @@ -3177,10 +3181,11 @@ static struct linked_reg *linked_regs_push(struct linked_regs *s) return NULL; } -/* Use u64 as a vector of 6 10-bit values, use first 4-bits to track +/* + * Use u64 as a vector of 5 11-bit values, use first 4-bits to track * number of elements currently in stack. - * Pack one history entry for linked registers as 10 bits in the following format: - * - 3-bits frameno + * Pack one history entry for linked registers as 11 bits in the following format: + * - 4-bits frameno * - 6-bits spi_or_reg * - 1-bit is_reg */ diff --git a/tools/testing/selftests/bpf/progs/test_global_func3.c b/tools/testing/selftests/bpf/progs/test_global_func3.c index 974fd8c19561..b66abb350fb0 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func3.c +++ b/tools/testing/selftests/bpf/progs/test_global_func3.c @@ -53,9 +53,57 @@ int f8(struct __sk_buff *skb) return f7(skb); } +static __attribute__ ((noinline)) +int f9(struct __sk_buff *skb) +{ + return f8(skb); +} + +static __attribute__ ((noinline)) +int f10(struct __sk_buff *skb) +{ + return f9(skb); +} + +static __attribute__ ((noinline)) +int f11(struct __sk_buff *skb) +{ + return f10(skb); +} + +static __attribute__ ((noinline)) +int f12(struct __sk_buff *skb) +{ + return f11(skb); +} + +static __attribute__ ((noinline)) +int f13(struct __sk_buff *skb) +{ + return f12(skb); +} + +static __attribute__ ((noinline)) +int f14(struct __sk_buff *skb) +{ + return f13(skb); +} + +static __attribute__ ((noinline)) +int f15(struct __sk_buff *skb) +{ + return f14(skb); +} + +static __attribute__ ((noinline)) +int f16(struct __sk_buff *skb) +{ + return f15(skb); +} + SEC("tc") -__failure __msg("the call stack of 9 frames") +__failure __msg("the call stack of 17 frames") int global_func3(struct __sk_buff *skb) { - return f8(skb); + return f16(skb); } diff --git a/tools/testing/selftests/bpf/progs/verifier_liveness_exp.c b/tools/testing/selftests/bpf/progs/verifier_liveness_exp.c index b058de623200..72646fa2745e 100644 --- a/tools/testing/selftests/bpf/progs/verifier_liveness_exp.c +++ b/tools/testing/selftests/bpf/progs/verifier_liveness_exp.c @@ -15,7 +15,7 @@ * FP offset at each call site. arg_track keys on (frame, off[]), so * r1=fp-8, r1=fp-16, ... r1=fp-400 produce 50 unique cache keys per level. * - * This test chains 8 subprograms (the MAX_CALL_FRAMES limit). Each + * This test chains 8 subprograms (within the MAX_CALL_FRAMES limit). Each * intermediate function calls the next one 50 times, each time with a * different FP-relative offset in r1. * diff --git a/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c index 70ae14d6084f..e38f102da45f 100644 --- a/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c +++ b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c @@ -372,37 +372,36 @@ __naked void precision_two_ids(void) SEC("socket") __success __log_level(2) __flag(BPF_F_TEST_STATE_FREQ) -/* check that r0 and r6 have different IDs after 'if', - * collect_linked_regs() can't tie more than 6 registers for a single insn. +/* + * check that r0 and r5 have different IDs after 'if', + * collect_linked_regs() can't tie more than 5 registers for a single insn. */ -__msg("8: (25) if r0 > 0x7 goto pc+0 ; R0=scalar(id=1") -__msg("14: (bf) r6 = r6 ; R6=scalar(id=2") -/* check that r{0-5} are marked precise after 'if' */ -__msg("frame0: regs=r0 stack= before 8: (25) if r0 > 0x7 goto pc+0") -__msg("frame0: parent state regs=r0,r1,r2,r3,r4,r5 stack=:") +__msg("7: (25) if r0 > 0x7 goto pc+0 ; R0=scalar(id=1") +__msg("12: (bf) r5 = r5 ; R5=scalar(id=2") +/* check that r{0-4} are marked precise after 'if' */ +__msg("frame0: regs=r0 stack= before 7: (25) if r0 > 0x7 goto pc+0") +__msg("frame0: parent state regs=r0,r1,r2,r3,r4 stack=:") __naked void linked_regs_too_many_regs(void) { asm volatile ( /* r0 = random number up to 0xff */ "call %[bpf_ktime_get_ns];" "r0 &= 0xff;" - /* tie r{0-6} IDs */ + /* tie r{0-5} IDs */ "r1 = r0;" "r2 = r0;" "r3 = r0;" "r4 = r0;" "r5 = r0;" - "r6 = r0;" - /* propagate range for r{0-6} */ + /* propagate range for r{0-5} */ "if r0 > 7 goto +0;" - /* keep r{1-5} live */ + /* keep r{1-4} live */ "r1 = r1;" "r2 = r2;" "r3 = r3;" "r4 = r4;" + /* make r5 appear in the log */ "r5 = r5;" - /* make r6 appear in the log */ - "r6 = r6;" /* force r0 to be precise, * this would cause r{0-4} to be precise because of shared IDs */ diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 42d523a21a43..302d712e0d7e 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -1219,6 +1219,30 @@ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call H */ BPF_EXIT_INSN(), /* H */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call I */ + BPF_EXIT_INSN(), + /* I */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call J */ + BPF_EXIT_INSN(), + /* J */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call K */ + BPF_EXIT_INSN(), + /* K */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call L */ + BPF_EXIT_INSN(), + /* L */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call M */ + BPF_EXIT_INSN(), + /* M */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call N */ + BPF_EXIT_INSN(), + /* N */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call O */ + BPF_EXIT_INSN(), + /* O */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call P */ + BPF_EXIT_INSN(), + /* P */ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -1257,6 +1281,30 @@ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call H */ BPF_EXIT_INSN(), /* H */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call I */ + BPF_EXIT_INSN(), + /* I */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call J */ + BPF_EXIT_INSN(), + /* J */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call K */ + BPF_EXIT_INSN(), + /* K */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call L */ + BPF_EXIT_INSN(), + /* L */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call M */ + BPF_EXIT_INSN(), + /* M */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call N */ + BPF_EXIT_INSN(), + /* N */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call O */ + BPF_EXIT_INSN(), + /* O */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call P */ + BPF_EXIT_INSN(), + /* P */ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, -- cgit v1.2.3 From 26330a9226417c9a3395db9fdb403f7d7371e6b7 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 11 Jun 2026 13:42:26 +0200 Subject: bpf: Add support to specify uprobe_multi target via file descriptor Allow uprobe_multi link to identify the target binary by an already opened file descriptor. Adding new BPF_F_UPROBE_MULTI_PATH_FD flag and the path_fd field for the attr.link_create.uprobe_multi struct. When the flag is set, we resolve the target from path_fd, without the flag, we keep the existing string path behavior. I don't see a use case for supporting O_PATH file descriptors, because we need to read the binary first to get probes offsets, so I'm using the CLASS(fd, f), which fails for O_PATH fds. Assisted-by: Codex:GPT-5.4 Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20260611114230.950379-4-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 7 ++++++- kernel/bpf/syscall.c | 4 ++-- kernel/trace/bpf_trace.c | 43 ++++++++++++++++++++++++++++++++++++------ tools/include/uapi/linux/bpf.h | 7 ++++++- 4 files changed, 51 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 11dd610fa5fa..89b36de5fdbb 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1327,7 +1327,11 @@ enum { * BPF_TRACE_UPROBE_MULTI attach type to create return probe. */ enum { - BPF_F_UPROBE_MULTI_RETURN = (1U << 0) + /* Get return uprobe. */ + BPF_F_UPROBE_MULTI_RETURN = (1U << 0), + + /* Get path from provided path_fd. */ + BPF_F_UPROBE_MULTI_PATH_FD = (1U << 1), }; /* link_create.netfilter.flags used in LINK_CREATE command for @@ -1864,6 +1868,7 @@ union bpf_attr { __u32 cnt; __u32 flags; __u32 pid; + __u32 path_fd; } uprobe_multi; struct { union { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 7ed949f70f82..b44106c8ea75 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3480,7 +3480,7 @@ static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp) seq_printf(m, "link_type:\t%s\n", link->flags == BPF_F_KPROBE_MULTI_RETURN ? "kretprobe_multi" : "kprobe_multi"); else if (link->type == BPF_LINK_TYPE_UPROBE_MULTI) - seq_printf(m, "link_type:\t%s\n", link->flags == BPF_F_UPROBE_MULTI_RETURN ? + seq_printf(m, "link_type:\t%s\n", link->flags & BPF_F_UPROBE_MULTI_RETURN ? "uretprobe_multi" : "uprobe_multi"); else seq_printf(m, "link_type:\t%s\n", bpf_link_type_strs[type]); @@ -5840,7 +5840,7 @@ err_put: return err; } -#define BPF_LINK_CREATE_LAST_FIELD link_create.uprobe_multi.pid +#define BPF_LINK_CREATE_LAST_FIELD link_create.uprobe_multi.path_fd static int link_create(union bpf_attr *attr, bpfptr_t uattr) { struct bpf_prog *prog; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index f8990bc6b64c..82f8feea6931 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -3214,6 +3215,38 @@ static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx) return run_ctx->uprobe->cookie; } +static int bpf_uprobe_multi_get_path(const union bpf_attr *attr, struct path *path) +{ + void __user *upath = u64_to_user_ptr(attr->link_create.uprobe_multi.path); + u32 path_fd = attr->link_create.uprobe_multi.path_fd; + u32 flags = attr->link_create.uprobe_multi.flags; + + if (flags & BPF_F_UPROBE_MULTI_PATH_FD) { + /* + * When BPF_F_UPROBE_MULTI_PATH_FD is set, the executable is + * identified by path_fd, upath must be NULL. + */ + if (upath) + return -EINVAL; + + CLASS(fd, f)(path_fd); + if (fd_empty(f)) + return -EBADF; + *path = fd_file(f)->f_path; + path_get(path); + return 0; + } + + /* + * When BPF_F_UPROBE_MULTI_PATH_FD is not set, the path is resolved + * relative to the cwd (AT_FDCWD) or absolute using the upath string. + */ + if (!upath || path_fd) + return -EINVAL; + + return user_path_at(AT_FDCWD, upath, LOOKUP_FOLLOW, path); +} + int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) { struct bpf_uprobe_multi_link *link = NULL; @@ -3223,7 +3256,6 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr struct task_struct *task = NULL; unsigned long __user *uoffsets; u64 __user *ucookies; - void __user *upath; unsigned long size; u32 flags, cnt, i; struct path path; @@ -3241,19 +3273,18 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr return -EINVAL; flags = attr->link_create.uprobe_multi.flags; - if (flags & ~BPF_F_UPROBE_MULTI_RETURN) + if (flags & ~(BPF_F_UPROBE_MULTI_RETURN | BPF_F_UPROBE_MULTI_PATH_FD)) return -EINVAL; /* - * path, offsets and cnt are mandatory, + * offsets and cnt are mandatory, * ref_ctr_offsets and cookies are optional */ - upath = u64_to_user_ptr(attr->link_create.uprobe_multi.path); uoffsets = u64_to_user_ptr(attr->link_create.uprobe_multi.offsets); cnt = attr->link_create.uprobe_multi.cnt; pid = attr->link_create.uprobe_multi.pid; - if (!upath || !uoffsets || !cnt || pid < 0) + if (!uoffsets || !cnt || pid < 0) return -EINVAL; if (cnt > MAX_UPROBE_MULTI_CNT) return -E2BIG; @@ -3271,7 +3302,7 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr !access_ok(ucookies, size)) return -EFAULT; - err = user_path_at(AT_FDCWD, upath, LOOKUP_FOLLOW, &path); + err = bpf_uprobe_multi_get_path(attr, &path); if (err) return err; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 11dd610fa5fa..89b36de5fdbb 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1327,7 +1327,11 @@ enum { * BPF_TRACE_UPROBE_MULTI attach type to create return probe. */ enum { - BPF_F_UPROBE_MULTI_RETURN = (1U << 0) + /* Get return uprobe. */ + BPF_F_UPROBE_MULTI_RETURN = (1U << 0), + + /* Get path from provided path_fd. */ + BPF_F_UPROBE_MULTI_PATH_FD = (1U << 1), }; /* link_create.netfilter.flags used in LINK_CREATE command for @@ -1864,6 +1868,7 @@ union bpf_attr { __u32 cnt; __u32 flags; __u32 pid; + __u32 path_fd; } uprobe_multi; struct { union { -- cgit v1.2.3 From da3a4c3ec7ed746f7060b69c49a77602931b3dd3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 11 Jun 2026 13:42:27 +0200 Subject: libbpf: Add path_fd to struct bpf_link_create_opts Adding the path_fd field to struct bpf_link_create_opts and passing it through kernel attr interface. Assisted-by: Codex:GPT-5.4 Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20260611114230.950379-5-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/bpf.c | 1 + tools/lib/bpf/bpf.h | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index f37e3416f61a..96819c082c77 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -842,6 +842,7 @@ int bpf_link_create(int prog_fd, int target_fd, attr.link_create.uprobe_multi.ref_ctr_offsets = ptr_to_u64(OPTS_GET(opts, uprobe_multi.ref_ctr_offsets, 0)); attr.link_create.uprobe_multi.cookies = ptr_to_u64(OPTS_GET(opts, uprobe_multi.cookies, 0)); attr.link_create.uprobe_multi.pid = OPTS_GET(opts, uprobe_multi.pid, 0); + attr.link_create.uprobe_multi.path_fd = OPTS_GET(opts, uprobe_multi.path_fd, 0); if (!OPTS_ZEROED(opts, uprobe_multi)) return libbpf_err(-EINVAL); break; diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 012354131cf6..7534a593edae 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -444,6 +444,7 @@ struct bpf_link_create_opts { const unsigned long *ref_ctr_offsets; const __u64 *cookies; __u32 pid; + __u32 path_fd; } uprobe_multi; struct { __u64 cookie; @@ -477,7 +478,7 @@ struct bpf_link_create_opts { }; size_t :0; }; -#define bpf_link_create_opts__last_field uprobe_multi.pid +#define bpf_link_create_opts__last_field uprobe_multi.path_fd LIBBPF_API int bpf_link_create(int prog_fd, int target_fd, enum bpf_attach_type attach_type, -- cgit v1.2.3 From d5026e6bfc70daca4d88a732cae30b72451fdd0c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 11 Jun 2026 13:42:28 +0200 Subject: selftests/bpf: Add uprobe_multi path_fd test Add a uprobe_multi link API selftest that opens /proc/self/exe and passes the resulting descriptor through opts.uprobe_multi.path_fd with BPF_F_UPROBE_MULTI_PATH_FD set. Assisted-by: Codex:GPT-5.4 Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20260611114230.950379-6-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/uprobe_multi_test.c | 62 ++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c index 56cbea280fbd..ffcf3c92f047 100644 --- a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c @@ -2,6 +2,7 @@ #include #include +#include #include #include "uprobe_multi.skel.h" #include "uprobe_multi_bench.skel.h" @@ -757,6 +758,65 @@ static void test_link_api(void) __test_link_api(&child); } +static void test_link_api_path_fd(void) +{ + LIBBPF_OPTS(bpf_link_create_opts, opts); + const char *resolve_path = "/proc/self/exe"; + int prog_fd, link_fd = -1, path_fd = -1; + struct uprobe_multi *skel = NULL; + unsigned long *offsets = NULL; + const char *syms[3] = { + "uprobe_multi_func_1", + "uprobe_multi_func_2", + "uprobe_multi_func_3", + }; + int err; + + err = elf_resolve_syms_offsets(resolve_path, ARRAY_SIZE(syms), syms, + &offsets, STT_FUNC); + if (!ASSERT_OK(err, "elf_resolve_syms_offsets")) + return; + + path_fd = open(resolve_path, O_RDONLY); + if (!ASSERT_GE(path_fd, 0, "path_fd")) + goto cleanup; + + opts.uprobe_multi.path_fd = path_fd; + opts.uprobe_multi.offsets = offsets; + opts.uprobe_multi.cnt = ARRAY_SIZE(syms); + opts.uprobe_multi.flags = BPF_F_UPROBE_MULTI_PATH_FD; + + skel = uprobe_multi__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_multi__open_and_load")) + goto cleanup; + + prog_fd = bpf_program__fd(skel->progs.uprobe); + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_GE(link_fd, 0, "bpf_link_create")) + goto cleanup; + + skel->bss->uprobe_multi_func_1_addr = (__u64)uprobe_multi_func_1; + skel->bss->uprobe_multi_func_2_addr = (__u64)uprobe_multi_func_2; + skel->bss->uprobe_multi_func_3_addr = (__u64)uprobe_multi_func_3; + skel->bss->pid = getpid(); + + uprobe_multi_func_1(); + uprobe_multi_func_2(); + uprobe_multi_func_3(); + + ASSERT_EQ(skel->bss->uprobe_multi_func_1_result, 1, "uprobe_multi_func_1_result"); + ASSERT_EQ(skel->bss->uprobe_multi_func_2_result, 1, "uprobe_multi_func_2_result"); + ASSERT_EQ(skel->bss->uprobe_multi_func_3_result, 1, "uprobe_multi_func_3_result"); + +cleanup: + if (link_fd >= 0) + close(link_fd); + if (path_fd >= 0) + close(path_fd); + uprobe_multi__destroy(skel); + free(offsets); +} + static struct bpf_program * get_program(struct uprobe_multi_consumers *skel, int prog) { @@ -1354,6 +1414,8 @@ void test_uprobe_multi_test(void) test_attach_api_syms(); if (test__start_subtest("link_api")) test_link_api(); + if (test__start_subtest("link_api_path_fd")) + test_link_api_path_fd(); if (test__start_subtest("bench_uprobe")) test_bench_attach_uprobe(); if (test__start_subtest("bench_usdt")) -- cgit v1.2.3 From 3229675be841932879d5f1b2fb38ba9c2777a088 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 11 Jun 2026 13:42:29 +0200 Subject: selftests/bpf: Add uprobe_multi path_fd fail tests Adding tests to attach_api_fails suite to make sure we fail wrong setup for path_fd usage. Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20260611114230.950379-7-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/uprobe_multi_test.c | 32 +++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c index ffcf3c92f047..f0baf5738b75 100644 --- a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c @@ -537,7 +537,37 @@ static void test_attach_api_fails(void) link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); if (!ASSERT_ERR(link_fd, "link_fd")) goto cleanup; - ASSERT_EQ(link_fd, -EINVAL, "pid_is_wrong"); + if (!ASSERT_EQ(link_fd, -EINVAL, "pid_is_wrong")) + goto cleanup; + + /* wrong path_fd */ + LIBBPF_OPTS_RESET(opts, + .uprobe_multi.path = NULL, + .uprobe_multi.path_fd = -1, + .uprobe_multi.flags = BPF_F_UPROBE_MULTI_PATH_FD, + .uprobe_multi.offsets = (unsigned long *)&offset, + .uprobe_multi.cnt = 1, + ); + + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + if (!ASSERT_EQ(link_fd, -EBADF, "path_fd_is_wrong")) + goto cleanup; + + /* path and path_fd both set with BPF_F_UPROBE_MULTI_PATH_FD flag */ + LIBBPF_OPTS_RESET(opts, + .uprobe_multi.path = path, + .uprobe_multi.path_fd = 1, + .uprobe_multi.flags = BPF_F_UPROBE_MULTI_PATH_FD, + .uprobe_multi.offsets = (unsigned long *)&offset, + .uprobe_multi.cnt = 1, + ); + + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + ASSERT_EQ(link_fd, -EINVAL, "path_and_path_fd_together"); cleanup: if (link_fd >= 0) -- cgit v1.2.3 From df29003c55115737a8fb4f8a60c6c2bba4c4a484 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 11 Jun 2026 13:42:30 +0200 Subject: selftests/bpf: Fix typo in verify_umulti_link_info We verify info.uprobe_multi.flags against wrong kprobe-multi flag (BPF_F_KPROBE_MULTI_RETURN). It's the same value as the correct flag (BPF_F_UPROBE_MULTI_RETURN), so there's not functional change. Fixes: 147c69307bcf ("selftests/bpf: Add link_info test for uprobe_multi link") Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20260611114230.950379-8-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/fill_link_info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c index e40114620751..f589eefbf9fb 100644 --- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c +++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c @@ -469,7 +469,7 @@ verify_umulti_link_info(int fd, bool retprobe, __u64 *offsets, ASSERT_EQ(info.uprobe_multi.pid, getpid(), "info.uprobe_multi.pid"); ASSERT_EQ(info.uprobe_multi.count, 3, "info.uprobe_multi.count"); - ASSERT_EQ(info.uprobe_multi.flags & BPF_F_KPROBE_MULTI_RETURN, + ASSERT_EQ(info.uprobe_multi.flags & BPF_F_UPROBE_MULTI_RETURN, retprobe, "info.uprobe_multi.flags.retprobe"); ASSERT_EQ(info.uprobe_multi.path_size, strlen(path) + 1, "info.uprobe_multi.path_size"); ASSERT_STREQ(path_buf, path, "info.uprobe_multi.path"); -- cgit v1.2.3 From 1f24de6b2c81f71f90a7c02be516da99f00d11c7 Mon Sep 17 00:00:00 2001 From: Ethan Nelson-Moore Date: Tue, 9 Jun 2026 21:40:20 -0700 Subject: selftests/bpf: correct CONFIG_PPC64 macro name in comment A comment in tools/testing/selftests/bpf/progs/test_fill_link_info.c incorrectly refers to CONFIG_PPC6 instead of CONFIG_PPC64. Correct it. Discovered while searching for CONFIG_* symbols referenced in code but not defined in any Kconfig file. Signed-off-by: Ethan Nelson-Moore Link: https://lore.kernel.org/r/20260610044023.225820-1-enelsonmoore@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/test_fill_link_info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/test_fill_link_info.c b/tools/testing/selftests/bpf/progs/test_fill_link_info.c index fac33a14f200..137bd6292163 100644 --- a/tools/testing/selftests/bpf/progs/test_fill_link_info.c +++ b/tools/testing/selftests/bpf/progs/test_fill_link_info.c @@ -12,7 +12,7 @@ extern bool CONFIG_PPC64 __kconfig __weak; /* This function is here to have CONFIG_X86_KERNEL_IBT, * CONFIG_PPC_FTRACE_OUT_OF_LINE, CONFIG_KPROBES_ON_FTRACE, - * CONFIG_PPC6 used and added to object BTF. + * CONFIG_PPC64 used and added to object BTF. */ int unused(void) { -- cgit v1.2.3 From 9080b97689dbf8d2c338a9af97cd2b4a714f25bf Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Tue, 2 Jun 2026 15:47:10 +0100 Subject: bpftool: Pass host flags to bootstrap libbpf bpftool builds a bootstrap libbpf with HOSTCC, but the libbpf submake can still inherit target build flags through CFLAGS. This can break cross builds when host objects are compiled with target-only options. Since HOST_CFLAGS contains warning options that are not suitable for building libbpf, use LIBBPF_BOOTSTRAP_CFLAGS with the warning options removed to build the bootstrap libbpf. Clear EXTRA_CFLAGS so target extra flags are not mixed into the host bootstrap libbpf build. Signed-off-by: Leo Yan Acked-by: Quentin Monnet Link: https://lore.kernel.org/r/20260602-tools_build_fix_zero_init_bpf_only-v2-1-c76e5250ea1c@arm.com Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/Makefile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 8f50bc163bb2..c070111df22d 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -47,7 +47,8 @@ $(LIBBPF_INTERNAL_HDRS): $(LIBBPF_HDRS_DIR)/%.h: $(BPF_DIR)/%.h | $(LIBBPF_HDRS_ $(LIBBPF_BOOTSTRAP): $(wildcard $(BPF_DIR)/*.[ch] $(BPF_DIR)/Makefile) | $(LIBBPF_BOOTSTRAP_OUTPUT) $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_BOOTSTRAP_OUTPUT) \ DESTDIR=$(LIBBPF_BOOTSTRAP_DESTDIR:/=) prefix= \ - ARCH= CROSS_COMPILE= CC="$(HOSTCC)" LD="$(HOSTLD)" AR="$(HOSTAR)" $@ install_headers + ARCH= CROSS_COMPILE= CC="$(HOSTCC)" LD="$(HOSTLD)" AR="$(HOSTAR)" \ + CFLAGS="$(LIBBPF_BOOTSTRAP_CFLAGS)" EXTRA_CFLAGS= $@ install_headers $(LIBBPF_BOOTSTRAP_INTERNAL_HDRS): $(LIBBPF_BOOTSTRAP_HDRS_DIR)/%.h: $(BPF_DIR)/%.h | $(LIBBPF_BOOTSTRAP_HDRS_DIR) $(call QUIET_INSTALL, $@) @@ -92,6 +93,9 @@ HOST_CFLAGS := $(subst -I$(LIBBPF_INCLUDE),-I$(LIBBPF_BOOTSTRAP_INCLUDE),\ $(subst $(CLANG_CROSS_FLAGS),,$(CFLAGS))) HOST_LDFLAGS := $(LDFLAGS) +# Remove warnings for libbpf bootstrap build +LIBBPF_BOOTSTRAP_CFLAGS := $(filter-out -W -Wall -Wextra -Wformat -Wformat-signedness,$(HOST_CFLAGS)) + INSTALL ?= install RM ?= rm -f -- cgit v1.2.3 From 956841cbc3d77a9e687182a8bba316e9a2665a50 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Tue, 2 Jun 2026 15:47:11 +0100 Subject: bpftool: Avoid adding EXTRA_CFLAGS to HOST_CFLAGS Prepare for future changes where EXTRA_CFLAGS may include flags not applicable to the host compiler. Move the HOST_CFLAGS assignment before appending EXTRA_CFLAGS to CFLAGS so that HOST_CFLAGS does not inherit flags from EXTRA_CFLAGS. Acked-by: Quentin Monnet Signed-off-by: Leo Yan Link: https://lore.kernel.org/r/20260602-tools_build_fix_zero_init_bpf_only-v2-2-c76e5250ea1c@arm.com Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/Makefile | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index c070111df22d..49bae0359144 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -82,6 +82,12 @@ CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \ ifneq ($(BPFTOOL_VERSION),) CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"' endif + +# This must be done before appending EXTRA_CFLAGS to CFLAGS to avoid +# including flags that are not applicable to the host compiler. +HOST_CFLAGS := $(subst -I$(LIBBPF_INCLUDE),-I$(LIBBPF_BOOTSTRAP_INCLUDE),\ + $(subst $(CLANG_CROSS_FLAGS),,$(CFLAGS))) + ifneq ($(EXTRA_CFLAGS),) CFLAGS += $(EXTRA_CFLAGS) endif @@ -89,8 +95,6 @@ ifneq ($(EXTRA_LDFLAGS),) LDFLAGS += $(EXTRA_LDFLAGS) endif -HOST_CFLAGS := $(subst -I$(LIBBPF_INCLUDE),-I$(LIBBPF_BOOTSTRAP_INCLUDE),\ - $(subst $(CLANG_CROSS_FLAGS),,$(CFLAGS))) HOST_LDFLAGS := $(LDFLAGS) # Remove warnings for libbpf bootstrap build -- cgit v1.2.3 From 3f2fec5b02b6efa1aad3238943858234751ac0f3 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Tue, 2 Jun 2026 15:47:12 +0100 Subject: bpftool: Append extra host flags Append HOST_EXTRACFLAGS to HOST_CFLAGS so that additional flags can be applied to the host compiler. Acked-by: Quentin Monnet Signed-off-by: Leo Yan Link: https://lore.kernel.org/r/20260602-tools_build_fix_zero_init_bpf_only-v2-3-c76e5250ea1c@arm.com Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 49bae0359144..271a7dc77273 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -87,6 +87,7 @@ endif # including flags that are not applicable to the host compiler. HOST_CFLAGS := $(subst -I$(LIBBPF_INCLUDE),-I$(LIBBPF_BOOTSTRAP_INCLUDE),\ $(subst $(CLANG_CROSS_FLAGS),,$(CFLAGS))) +HOST_CFLAGS += $(HOST_EXTRACFLAGS) ifneq ($(EXTRA_CFLAGS),) CFLAGS += $(EXTRA_CFLAGS) -- cgit v1.2.3 From f3846b3800a2cfda9c900b2e94525a1027b04424 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Tue, 2 Jun 2026 15:47:13 +0100 Subject: libbpf: Initialize CFLAGS before including Makefile.include tools/scripts/Makefile.include may expand EXTRA_CFLAGS in a future change. This could alter the initialization of CFLAGS, as the default options "-g -O2" would never be set once EXTRA_CFLAGS is expanded. Prepare for this by moving the CFLAGS initialization before including tools/scripts/Makefile.include, so it is not affected by the extended EXTRA_CFLAGS. Append EXTRA_CFLAGS to CFLAGS only after including Makefile.include and place it last so that the extra flags propagate properly and can override the default options. tools/scripts/Makefile.include already appends $(CLANG_CROSS_FLAGS) to CFLAGS, the Makefile appends $(CLANG_CROSS_FLAGS) again, remove the redundant append. Signed-off-by: Leo Yan Acked-by: Ihor Solodrai Link: https://lore.kernel.org/r/20260602-tools_build_fix_zero_init_bpf_only-v2-4-c76e5250ea1c@arm.com Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/Makefile | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 168140f8e646..eca584fb061e 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -49,6 +49,14 @@ man_dir_SQ = '$(subst ','\'',$(man_dir))' export man_dir man_dir_SQ INSTALL export DESTDIR DESTDIR_SQ +# Defer assigning EXTRA_CFLAGS to CFLAGS until after including +# tools/scripts/Makefile.include, as it may add flags to EXTRA_CFLAGS. +ifdef EXTRA_CFLAGS + CFLAGS := +else + CFLAGS := -g -O2 +endif + include $(srctree)/tools/scripts/Makefile.include # copy a bit from Linux kbuild @@ -70,13 +78,6 @@ LIB_TARGET = libbpf.a libbpf.so.$(LIBBPF_VERSION) LIB_FILE = libbpf.a libbpf.so* PC_FILE = libbpf.pc -# Set compile option CFLAGS -ifdef EXTRA_CFLAGS - CFLAGS := $(EXTRA_CFLAGS) -else - CFLAGS := -g -O2 -endif - # Append required CFLAGS override CFLAGS += -std=gnu89 override CFLAGS += $(EXTRA_WARNINGS) -Wno-switch-enum @@ -84,7 +85,7 @@ override CFLAGS += -Werror -Wall override CFLAGS += $(INCLUDES) override CFLAGS += -fvisibility=hidden override CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -override CFLAGS += $(CLANG_CROSS_FLAGS) +override CFLAGS += $(EXTRA_CFLAGS) # flags specific for shared library SHLIB_FLAGS := -DSHARED -fPIC -- cgit v1.2.3 From b40ba139371c2ba4beffe0533c6d85fda9bc932c Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Tue, 2 Jun 2026 15:47:14 +0100 Subject: tools/bpf: build: Append extra cflags Append EXTRA_CFLAGS to CFLAGS so that additional flags can be applied to the compiler. Signed-off-by: Leo Yan Acked-by: Ihor Solodrai Link: https://lore.kernel.org/r/20260602-tools_build_fix_zero_init_bpf_only-v2-5-c76e5250ea1c@arm.com Signed-off-by: Alexei Starovoitov --- tools/bpf/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile index fd2585af1252..9c19e81f3c27 100644 --- a/tools/bpf/Makefile +++ b/tools/bpf/Makefile @@ -11,6 +11,7 @@ INSTALL ?= install CFLAGS += -Wall -O2 CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi \ -I$(srctree)/tools/include +CFLAGS += $(EXTRA_CFLAGS) # This will work when bpf is built in tools env. where srctree # isn't set and when invoked from selftests build, where srctree -- cgit v1.2.3 From 55ffbe8a15b1254f44d56952fb425a10e3f15c31 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Tue, 2 Jun 2026 15:47:15 +0100 Subject: selftests/bpf: Initialize operation name before use ASAN reports stack-buffer-overflow due to the uninitialized op_name. Initialize it to fix the issue. Fixes: 054b6c7866c7 ("selftests/bpf: Add verifier log tests for BPF_BTF_LOAD command") Signed-off-by: Leo Yan Acked-by: Ihor Solodrai Link: https://lore.kernel.org/r/20260602-tools_build_fix_zero_init_bpf_only-v2-6-c76e5250ea1c@arm.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/verifier_log.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/verifier_log.c b/tools/testing/selftests/bpf/prog_tests/verifier_log.c index c01c0114af1b..4542bb586d72 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier_log.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier_log.c @@ -317,6 +317,7 @@ static void verif_btf_log_subtest(bool bad_btf) res = load_btf(&opts, true); ASSERT_EQ(res, -ENOSPC, "half_log_fd"); ASSERT_EQ(strlen(logs.buf), 24, "log_fixed_25"); + strscpy(op_name, "log_fixed", sizeof(op_name)); ASSERT_STRNEQ(logs.buf, logs.reference, 24, op_name); /* validate rolling verifier log logic: try all variations of log buf -- cgit v1.2.3 From 584f3b7a352586ddf9464faaedea57ac880e0e6d Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Tue, 2 Jun 2026 15:47:16 +0100 Subject: selftests/bpf: Use common CFLAGS for urandom_read The urandom_read helper and its shared library are built with $(CLANG) directly rather than through the normal selftest $(CC) rules. The CFLAGS variable can contain specific flags only for $(CC) but might be imcompatible for $(CLANG) and those flags are not necessarily valid for the clang-only urandom_read build. Split the BPF selftest local flags into COMMON_CFLAGS and append them to CFLAGS for the normal build path. Use COMMON_CFLAGS directly for urandom_read and liburandom_read.so, while still filtering out -static as before. Signed-off-by: Leo Yan Link: https://lore.kernel.org/r/20260602-tools_build_fix_zero_init_bpf_only-v2-7-c76e5250ea1c@arm.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index d53b7e496ac9..302a8aed3bf9 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -57,7 +57,7 @@ srctree := $(patsubst %/,%,$(dir $(srctree))) srctree := $(patsubst %/,%,$(dir $(srctree))) endif -CFLAGS += -g $(OPT_FLAGS) -rdynamic -std=gnu11 \ +COMMON_CFLAGS = -g $(OPT_FLAGS) -rdynamic -std=gnu11 \ -Wall -Werror -fno-omit-frame-pointer \ -Wno-unused-but-set-variable \ $(GENFLAGS) $(SAN_CFLAGS) $(LIBELF_CFLAGS) \ @@ -70,7 +70,7 @@ LDLIBS += $(LIBELF_LIBS) -lz -lrt -lpthread PCAP_CFLAGS := $(shell $(PKG_CONFIG) --cflags libpcap 2>/dev/null && echo "-DTRAFFIC_MONITOR=1") PCAP_LIBS := $(shell $(PKG_CONFIG) --libs libpcap 2>/dev/null) LDLIBS += $(PCAP_LIBS) -CFLAGS += $(PCAP_CFLAGS) +CFLAGS += $(COMMON_CFLAGS) $(PCAP_CFLAGS) # Some utility functions use LLVM libraries jit_disasm_helpers.c-CFLAGS = $(LLVM_CFLAGS) @@ -267,7 +267,7 @@ endif $(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c liburandom_read.map $(call msg,LIB,,$@) $(Q)$(CLANG) $(CLANG_TARGET_ARCH) \ - $(filter-out -static,$(CFLAGS) $(LDFLAGS)) \ + $(filter-out -static,$(COMMON_CFLAGS) $(LDFLAGS)) \ $(filter %.c,$^) $(filter-out -static,$(LDLIBS)) \ -Wno-unused-command-line-argument \ -fuse-ld=$(LLD) -Wl,-znoseparate-code -Wl,--build-id=sha1 \ @@ -277,7 +277,7 @@ $(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c liburandom $(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_read.so $(call msg,BINARY,,$@) $(Q)$(CLANG) $(CLANG_TARGET_ARCH) \ - $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \ + $(filter-out -static,$(COMMON_CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \ -Wno-unused-command-line-argument \ -lurandom_read $(filter-out -static,$(LDLIBS)) -L$(OUTPUT) \ -fuse-ld=$(LLD) -Wl,-znoseparate-code -Wl,--build-id=sha1 \ -- cgit v1.2.3 From 62617d28d9ae123c0d6ba51035caa3ca52b94f7a Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Tue, 2 Jun 2026 15:47:17 +0100 Subject: selftests/bpf: Avoid static LLVM linking for cross builds The BPF selftests prefer static LLVM linking, which works for native builds but can break cross builds. Its --link-static output may include host-only libraries that are unavailable for the cross compilation, causing link failures. Avoid static LLVM linking for cross builds and use shared LLVM libraries instead. Native builds keep the existing behavior. Signed-off-by: Leo Yan Link: https://lore.kernel.org/r/20260602-tools_build_fix_zero_init_bpf_only-v2-8-c76e5250ea1c@arm.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 302a8aed3bf9..b642ee489ea6 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -194,8 +194,15 @@ ifeq ($(feature-llvm),1) LLVM_CONFIG_LIB_COMPONENTS := mcdisassembler all-targets # both llvm-config and lib.mk add -D_GNU_SOURCE, which ends up as conflict LLVM_CFLAGS += $(filter-out -D_GNU_SOURCE,$(shell $(LLVM_CONFIG) --cflags)) - # Prefer linking statically if it's available, otherwise fallback to shared - ifeq ($(shell $(LLVM_CONFIG) --link-static --libs >/dev/null 2>&1 && echo static),static) + # Cross compilation must use dynamic linking to avoid unresolved library + # dependencies. For native build, prefer linking statically if it's + # available, otherwise fallback to shared. + ifneq ($(ARCH), $(HOSTARCH)) + LLVM_LINK_STATIC := + else + LLVM_LINK_STATIC := $(shell $(LLVM_CONFIG) --link-static --libs >/dev/null 2>&1 && echo y) + endif + ifeq ($(LLVM_LINK_STATIC),y) LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-static --libs $(LLVM_CONFIG_LIB_COMPONENTS)) LLVM_LDLIBS += $(filter-out -lxml2,$(shell $(LLVM_CONFIG) --link-static --system-libs $(LLVM_CONFIG_LIB_COMPONENTS))) LLVM_LDLIBS += -lstdc++ -- cgit v1.2.3 From 5cf2c21ab0900b41c0e29c925b9a640a92340d40 Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Sun, 14 Jun 2026 00:24:43 +0800 Subject: selftests/bpf: Add test to verify the fix for bpf_setsockopt() helper Verify the fix by: 1. Attach cgroup sockops prog. 2. Build a tcp connection using ipv4 addr in ipv6 socket. 3. Verify the return value of bpf_setsockopt() helper. Assisted-by: Codex:gpt-5.5-xhigh Signed-off-by: Leon Hwang Link: https://lore.kernel.org/r/20260613162443.60515-3-leon.hwang@linux.dev Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/setget_sockopt.c | 78 ++++++++++++++++++++++ tools/testing/selftests/bpf/progs/setget_sockopt.c | 23 +++++++ 2 files changed, 101 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c index 77fe1bfb7504..4e91d9b615ce 100644 --- a/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c +++ b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c @@ -199,6 +199,83 @@ err_out: bpf_link__destroy(getsockopt_link); } +static int connect_to_v4mapped_v6_fd(int server_fd) +{ + struct sockaddr_storage addr; + struct sockaddr_in *addr4 = (void *)&addr; + socklen_t addrlen = sizeof(addr); + struct sockaddr_in6 addr6 = {}; + int fd = -1, v6only = 0, err; + + err = getsockname(server_fd, (struct sockaddr *)&addr, &addrlen); + if (!ASSERT_OK(err, "getsockname")) + return -1; + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (!ASSERT_GE(fd, 0, "socket")) + return -1; + + err = settimeo(fd, 0); + if (!ASSERT_OK(err, "settimeo")) + goto err_out; + + err = setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &v6only, sizeof(v6only)); + if (!ASSERT_OK(err, "clear_v6only")) + goto err_out; + + addr6.sin6_family = AF_INET6; + addr6.sin6_port = addr4->sin_port; + addr6.sin6_addr.s6_addr[10] = 0xff; + addr6.sin6_addr.s6_addr[11] = 0xff; + memcpy(&addr6.sin6_addr.s6_addr[12], &addr4->sin_addr, sizeof(addr4->sin_addr)); + + err = connect(fd, (struct sockaddr *)&addr6, sizeof(addr6)); + if (!ASSERT_OK(err, "connect")) + goto err_out; + + return fd; + +err_out: + close(fd); + return -1; +} + +static void test_v4mapped_v6_ip_tos(void) +{ + struct setget_sockopt__bss *bss = skel->bss; + int sfd = -1, fd = -1, got = 0, exp = 0x1c; + socklen_t optlen; + + memset(bss, 0, sizeof(*bss)); + bss->v4mapped_v6_ip_tos_enable = 1; + bss->v4mapped_v6_ip_tos_ret = -1; + bss->v4mapped_v6_ip_tos_val = exp; + + sfd = start_server(AF_INET, SOCK_STREAM, addr4_str, 0, 0); + if (!ASSERT_GE(sfd, 0, "start_server")) + goto err_out; + + fd = connect_to_v4mapped_v6_fd(sfd); + if (!ASSERT_GE(fd, 0, "connect_to_v4mapped_v6_fd")) + goto err_out; + + ASSERT_GT(bss->v4mapped_v6_ip_tos_cnt, 0, "v4mapped_v6_ip_tos_cnt"); + ASSERT_EQ(bss->v4mapped_v6_ip_tos_ret, 0, "v4mapped_v6_ip_tos_ret"); + + optlen = sizeof(got); + if (!ASSERT_OK(getsockopt(fd, SOL_IP, IP_TOS, &got, &optlen), "getsockopt_ip_tos")) + goto err_out; + + ASSERT_EQ(got, exp, "ip_tos"); + +err_out: + bss->v4mapped_v6_ip_tos_enable = 0; + if (fd >= 0) + close(fd); + if (sfd >= 0) + close(sfd); +} + void test_setget_sockopt(void) { cg_fd = test__join_cgroup(CG_NAME); @@ -238,6 +315,7 @@ void test_setget_sockopt(void) test_ktls(AF_INET); test_nonstandard_opt(AF_INET); test_nonstandard_opt(AF_INET6); + test_v4mapped_v6_ip_tos(); done: setget_sockopt__destroy(skel); diff --git a/tools/testing/selftests/bpf/progs/setget_sockopt.c b/tools/testing/selftests/bpf/progs/setget_sockopt.c index d330b1511979..636a7cd8e2fa 100644 --- a/tools/testing/selftests/bpf/progs/setget_sockopt.c +++ b/tools/testing/selftests/bpf/progs/setget_sockopt.c @@ -387,6 +387,24 @@ int _getsockopt(struct bpf_sockopt *ctx) return 1; } +int v4mapped_v6_ip_tos_enable; +int v4mapped_v6_ip_tos_ret; +int v4mapped_v6_ip_tos_cnt; +int v4mapped_v6_ip_tos_val; + +static void test_v4mapped_v6_ip_tos(struct bpf_sock_ops *skops) +{ + int tos = v4mapped_v6_ip_tos_val; + + if (!v4mapped_v6_ip_tos_enable || skops->op != BPF_SOCK_OPS_TCP_CONNECT_CB) + return; + if (skops->family != AF_INET6) + return; + + v4mapped_v6_ip_tos_cnt++; + v4mapped_v6_ip_tos_ret = bpf_setsockopt(skops, IPPROTO_IP, IP_TOS, &tos, sizeof(tos)); +} + SEC("sockops") int skops_sockopt(struct bpf_sock_ops *skops) { @@ -401,6 +419,11 @@ int skops_sockopt(struct bpf_sock_ops *skops) if (!sk) return 1; + if (v4mapped_v6_ip_tos_enable) { + test_v4mapped_v6_ip_tos(skops); + return 1; + } + switch (skops->op) { case BPF_SOCK_OPS_TCP_LISTEN_CB: nr_listen += !(bpf_test_sockopt(skops, sk) || -- cgit v1.2.3 From 05ae621d4e3c7bfdcc0a4eef1d66eccfc789ee62 Mon Sep 17 00:00:00 2001 From: David Windsor Date: Thu, 11 Jun 2026 10:35:49 -0400 Subject: selftests/bpf: Add test for sleepable lsm_cgroup rejection Confirm the verifier rejects loading a sleepable BPF_LSM_CGROUP program, as introduced in commit 5b038319be44 ("bpf: Reject sleepable BPF_LSM_CGROUP programs at load time"). Signed-off-by: David Windsor Reviewed-by: Emil Tsalapatis Link: https://lore.kernel.org/r/20260611143549.703914-1-dwindsor@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/verifier_lsm.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_lsm.c b/tools/testing/selftests/bpf/progs/verifier_lsm.c index 38e8e9176862..2f8103bfa14e 100644 --- a/tools/testing/selftests/bpf/progs/verifier_lsm.c +++ b/tools/testing/selftests/bpf/progs/verifier_lsm.c @@ -188,4 +188,13 @@ int BPF_PROG(null_check, struct file *file) return 0; } +SEC("lsm_cgroup/file_open") +__description("sleepable lsm_cgroup program is rejected") +__failure __msg("Program of this type cannot be sleepable") +__flag(BPF_F_SLEEPABLE) +int BPF_PROG(sleepable_lsm_cgroup) +{ + return 0; +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 1f32c0d619d996b395f36a920f58159949be922a Mon Sep 17 00:00:00 2001 From: Gabriele Monaco Date: Thu, 11 Jun 2026 17:07:03 +0200 Subject: selftsets/bpf: Retry map update on helper_fill_hashmap() helper_fill_hashmap() is used also on parallel and stress map tests. Those are consistently failing with ENOMEM on kernels built with PREEMPT_RT if preallocation is disabled. The failure is transient and only called by the memory cache refill running in a preemptible irq_work, which can easily stall in case of contention. Use a retriable update in those cases to handle transient ENOMEM and make the test more stable also on PREEMPT_RT. Also fix the sign of the value printed in case of error (strerror() expects a positive errno while updates return it negative). Signed-off-by: Gabriele Monaco Reviewed-by: Emil Tsalapatis Link: https://lore.kernel.org/r/20260611150704.95133-1-gmonaco@redhat.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_maps.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index ccc5acd55ff9..c32da7bd8be2 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -260,6 +260,16 @@ static void test_hashmap_percpu(unsigned int task, void *data) close(fd); } +#define MAP_RETRIES 20 + +static bool can_retry(int err) +{ + return (err == EAGAIN || err == EBUSY || + ((err == ENOMEM || err == E2BIG) && + map_opts.map_flags == BPF_F_NO_PREALLOC)); +} + + #define VALUE_SIZE 3 static int helper_fill_hashmap(int max_entries) { @@ -274,10 +284,11 @@ static int helper_fill_hashmap(int max_entries) for (i = 0; i < max_entries; i++) { key = i; value[0] = key; - ret = bpf_map_update_elem(fd, &key, value, BPF_NOEXIST); + ret = map_update_retriable(fd, &key, value, BPF_NOEXIST, + MAP_RETRIES, can_retry); CHECK(ret != 0, "can't update hashmap", - "err: %s\n", strerror(ret)); + "err: %s\n", strerror(-ret)); } return fd; @@ -1392,17 +1403,9 @@ static void test_map_stress(void) #define DO_UPDATE 1 #define DO_DELETE 0 -#define MAP_RETRIES 20 #define MAX_DELAY_US 50000 #define MIN_DELAY_RANGE_US 5000 -static bool can_retry(int err) -{ - return (err == EAGAIN || err == EBUSY || - ((err == ENOMEM || err == E2BIG) && - map_opts.map_flags == BPF_F_NO_PREALLOC)); -} - int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts, retry_for_error_fn need_retry) { -- cgit v1.2.3 From 70b139d0483cd42808326c36c4b63d5be4a3cccb Mon Sep 17 00:00:00 2001 From: Sechang Lim Date: Mon, 15 Jun 2026 10:19:59 +0800 Subject: selftests/bpf: add test for bpf_msg_pop_data() overflow Add a test in sockmap_basic.c that calls bpf_msg_pop_data() with a length close to U32_MAX, which overflows the start + len bounds check. The sk_msg program records the return value over a sendmsg and the test checks that the call is rejected with -EINVAL. Reviewed-by: Jiayuan Chen Reviewed-by: Emil Tsalapatis Cc: Jiayuan Chen Signed-off-by: Sechang Lim Signed-off-by: Jiayuan Chen Link: https://lore.kernel.org/r/20260615021959.140010-7-jiayuan.chen@linux.dev Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/sockmap_basic.c | 48 ++++++++++++++++++++++ .../bpf/progs/test_sockmap_msg_pop_data.c | 27 ++++++++++++ 2 files changed, 75 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/test_sockmap_msg_pop_data.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index d2846579285f..cb3229711f93 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -14,6 +14,7 @@ #include "test_sockmap_pass_prog.skel.h" #include "test_sockmap_drop_prog.skel.h" #include "test_sockmap_change_tail.skel.h" +#include "test_sockmap_msg_pop_data.skel.h" #include "bpf_iter_sockmap.skel.h" #include "sockmap_helpers.h" @@ -666,6 +667,51 @@ out: test_sockmap_change_tail__destroy(skel); } +static void test_sockmap_msg_verdict_pop_data(void) +{ + struct test_sockmap_msg_pop_data *skel; + int err, map, verdict; + int c1 = -1, p1 = -1, sent; + int zero = 0; + char *buf; + const size_t len = 32 * 1024; + + skel = test_sockmap_msg_pop_data__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + verdict = bpf_program__fd(skel->progs.prog_msg_pop_data); + map = bpf_map__fd(skel->maps.sock_map); + + err = bpf_prog_attach(verdict, map, BPF_SK_MSG_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach")) + goto out; + + err = create_pair(AF_INET, SOCK_STREAM, &c1, &p1); + if (!ASSERT_OK(err, "create_pair")) + goto out; + + err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem")) + goto out_close; + + buf = calloc(len, 1); + if (!ASSERT_OK_PTR(buf, "calloc")) + goto out_close; + + sent = xsend(c1, buf, len, 0); + ASSERT_EQ(sent, (ssize_t)len, "xsend"); + ASSERT_EQ(skel->data->pop_data_ret, -EINVAL, "pop_data_rejects overflow"); + + free(buf); + +out_close: + close(c1); + close(p1); +out: + test_sockmap_msg_pop_data__destroy(skel); +} + static void test_sockmap_skb_verdict_peek_helper(int map) { int err, c1, p1, zero = 0, sent, recvd, avail; @@ -1373,6 +1419,8 @@ void test_sockmap_basic(void) test_sockmap_skb_verdict_fionread(false); if (test__start_subtest("sockmap skb_verdict change tail")) test_sockmap_skb_verdict_change_tail(); + if (test__start_subtest("sockmap msg_verdict pop_data overflow")) + test_sockmap_msg_verdict_pop_data(); if (test__start_subtest("sockmap skb_verdict msg_f_peek")) test_sockmap_skb_verdict_peek(); if (test__start_subtest("sockmap skb_verdict msg_f_peek with link")) diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_msg_pop_data.c b/tools/testing/selftests/bpf/progs/test_sockmap_msg_pop_data.c new file mode 100644 index 000000000000..301e65b95256 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sockmap_msg_pop_data.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "vmlinux.h" +#include + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); +} sock_map SEC(".maps"); + +#define POP_START 0x48a3 +#define POP_LEN 0xfffffffd + +long pop_data_ret = 1; + +SEC("sk_msg") +int prog_msg_pop_data(struct sk_msg_md *msg) +{ + if (msg->size <= POP_START) + return SK_PASS; + + pop_data_ret = bpf_msg_pop_data(msg, POP_START, POP_LEN, 0); + return SK_PASS; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From e4287bf34f97a88c7d9322f5bde828724c073a6b Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 15 Jun 2026 00:17:58 -0700 Subject: selftests/bpf: Work around llvm stack overflow in crypto progs clang 23 fails to build crypto_bench.c and crypto_sanity.c with "BPF stack limit exceeded". The progs fill a 408-byte bpf_crypto_params on the stack and pass it to bpf_crypto_ctx_create(). clang 23 copies the byte-aligned cipher/key globals into it one byte at a time through the stack, and keeps more than one copy of the struct around. Together that blows the 512-byte limit. Align the source arrays to 8 bytes so the copy is word-wise, and move params off the stack into a static .bss var. static keeps it out of the skeleton, where bpf_crypto_params is an incomplete type. Either change alone is not enough. Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/crypto_bench.c | 21 ++++++++++++++------- tools/testing/selftests/bpf/progs/crypto_sanity.c | 21 ++++++++++++++------- 2 files changed, 28 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/crypto_bench.c b/tools/testing/selftests/bpf/progs/crypto_bench.c index 4ac956b26240..4c0a09aa1e6c 100644 --- a/tools/testing/selftests/bpf/progs/crypto_bench.c +++ b/tools/testing/selftests/bpf/progs/crypto_bench.c @@ -11,10 +11,19 @@ #include "crypto_common.h" const volatile unsigned int len = 16; -char cipher[128] = {}; +/* + * cipher[] and key[] are 8-byte aligned and 'params' is kept off the stack to + * work around an LLVM code generation bug. clang lowers the memcpy() of these + * byte-aligned globals into a per-byte load/store sequence staged on the stack, + * and additionally materializes the on-stack 'struct bpf_crypto_params' twice. + * Both blow the 512-byte BPF stack limit. Aligning the sources lets clang copy + * word-wise, and a global 'params' removes the large object from the stack. + */ +char cipher[128] __attribute__((aligned(8))) = {}; u32 key_len, authsize; char dst[256] = {}; -u8 key[256] = {}; +u8 key[256] __attribute__((aligned(8))) = {}; +static struct bpf_crypto_params params; long hits = 0; int status; @@ -22,11 +31,6 @@ SEC("syscall") int crypto_setup(void *args) { struct bpf_crypto_ctx *cctx; - struct bpf_crypto_params params = { - .type = "skcipher", - .key_len = key_len, - .authsize = authsize, - }; int err = 0; status = 0; @@ -36,6 +40,9 @@ int crypto_setup(void *args) return 0; } + __builtin_memcpy(¶ms.type, "skcipher", sizeof("skcipher")); + params.key_len = key_len; + params.authsize = authsize; __builtin_memcpy(¶ms.algo, cipher, sizeof(cipher)); __builtin_memcpy(¶ms.key, key, sizeof(key)); cctx = bpf_crypto_ctx_create(¶ms, sizeof(params), &err); diff --git a/tools/testing/selftests/bpf/progs/crypto_sanity.c b/tools/testing/selftests/bpf/progs/crypto_sanity.c index dfd8a258f14a..e81f5ac3b1ae 100644 --- a/tools/testing/selftests/bpf/progs/crypto_sanity.c +++ b/tools/testing/selftests/bpf/progs/crypto_sanity.c @@ -10,11 +10,20 @@ #include "bpf_kfuncs.h" #include "crypto_common.h" -unsigned char key[256] = {}; +/* + * key[] and algo[] are 8-byte aligned and 'params' is kept off the stack to + * work around an LLVM code generation bug. clang lowers the memcpy() of these + * byte-aligned globals into a per-byte load/store sequence staged on the stack, + * and additionally materializes the on-stack 'struct bpf_crypto_params' twice. + * Both blow the 512-byte BPF stack limit. Aligning the sources lets clang copy + * word-wise, and a global 'params' removes the large object from the stack. + */ +unsigned char key[256] __attribute__((aligned(8))) = {}; u16 udp_test_port = 7777; u32 authsize, key_len; -char algo[128] = {}; +char algo[128] __attribute__((aligned(8))) = {}; char dst[16] = {}, dst_bad[8] = {}; +static struct bpf_crypto_params params; int status; static int skb_dynptr_validate(struct __sk_buff *skb, struct bpf_dynptr *psrc) @@ -53,11 +62,6 @@ static int skb_dynptr_validate(struct __sk_buff *skb, struct bpf_dynptr *psrc) SEC("syscall") int skb_crypto_setup(void *ctx) { - struct bpf_crypto_params params = { - .type = "skcipher", - .key_len = key_len, - .authsize = authsize, - }; struct bpf_crypto_ctx *cctx; int err; @@ -67,6 +71,9 @@ int skb_crypto_setup(void *ctx) return 0; } + __builtin_memcpy(¶ms.type, "skcipher", sizeof("skcipher")); + params.key_len = key_len; + params.authsize = authsize; __builtin_memcpy(¶ms.algo, algo, sizeof(algo)); __builtin_memcpy(¶ms.key, key, sizeof(key)); -- cgit v1.2.3