summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-04-14 18:04:04 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2026-04-14 18:04:04 -0700
commitf5ad4101009e7f5f5984ffea6923d4fcd470932a (patch)
treec9c25bb09794d372c3028113d1f89f2a2cbcbca4
parente997ac58ad0b47141c62c79cde8356fe5633287a (diff)
parent71b500afd2f7336f5b6c6026f2af546fc079be26 (diff)
downloadlwn-f5ad4101009e7f5f5984ffea6923d4fcd470932a.tar.gz
lwn-f5ad4101009e7f5f5984ffea6923d4fcd470932a.zip
Merge tag 'bpf-next-7.1' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Pull bpf updates from Alexei Starovoitov: - Welcome new BPF maintainers: Kumar Kartikeya Dwivedi, Eduard Zingerman while Martin KaFai Lau reduced his load to Reviwer. - Lots of fixes everywhere from many first time contributors. Thank you All. - Diff stat is dominated by mechanical split of verifier.c into multiple components: - backtrack.c: backtracking logic and jump history - states.c: state equivalence - cfg.c: control flow graph, postorder, strongly connected components - liveness.c: register and stack liveness - fixups.c: post-verification passes: instruction patching, dead code removal, bpf_loop inlining, finalize fastcall 8k line were moved. verifier.c still stands at 20k lines. Further refactoring is planned for the next release. - Replace dynamic stack liveness with static stack liveness based on data flow analysis. This improved the verification time by 2x for some programs and equally reduced memory consumption. New logic is in liveness.c and supported by constant folding in const_fold.c (Eduard Zingerman, Alexei Starovoitov) - Introduce BTF layout to ease addition of new BTF kinds (Alan Maguire) - Use kmalloc_nolock() universally in BPF local storage (Amery Hung) - Fix several bugs in linked registers delta tracking (Daniel Borkmann) - Improve verifier support of arena pointers (Emil Tsalapatis) - Improve verifier tracking of register bounds in min/max and tnum domains (Harishankar Vishwanathan, Paul Chaignon, Hao Sun) - Further extend support for implicit arguments in the verifier (Ihor Solodrai) - Add support for nop,nop5 instruction combo for USDT probes in libbpf (Jiri Olsa) - Support merging multiple module BTFs (Josef Bacik) - Extend applicability of bpf_kptr_xchg (Kaitao Cheng) - Retire rcu_trace_implies_rcu_gp() (Kumar Kartikeya Dwivedi) - Support variable offset context access for 'syscall' programs (Kumar Kartikeya Dwivedi) - Migrate bpf_task_work and dynptr to kmalloc_nolock() (Mykyta Yatsenko) - Fix UAF in in open-coded task_vma iterator (Puranjay Mohan) * tag 'bpf-next-7.1' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (241 commits) selftests/bpf: cover short IPv4/IPv6 inputs with adjust_room bpf: reject short IPv4/IPv6 inputs in bpf_prog_test_run_skb selftests/bpf: Use memfd_create instead of shm_open in cgroup_iter_memcg selftests/bpf: Add test for cgroup storage OOB read bpf: Fix OOB in pcpu_init_value selftests/bpf: Fix reg_bounds to match new tnum-based refinement selftests/bpf: Add tests for non-arena/arena operations bpf: Allow instructions with arena source and non-arena dest registers bpftool: add missing fsession to the usage and docs of bpftool docs/bpf: add missing fsession attach type to docs bpf: add missing fsession to the verifier log bpf: Move BTF checking logic into check_btf.c bpf: Move backtracking logic to backtrack.c bpf: Move state equivalence logic to states.c bpf: Move check_cfg() into cfg.c bpf: Move compute_insn_live_regs() into liveness.c bpf: Move fixup/post-processing logic from verifier.c into fixups.c bpf: Simplify do_check_insn() bpf: Move checks for reserved fields out of the main pass bpf: Delete unused variable ...
-rw-r--r--Documentation/bpf/drgn.rst4
-rw-r--r--Documentation/bpf/libbpf/program_types.rst4
-rw-r--r--MAINTAINERS7
-rw-r--r--arch/arc/net/bpf_jit_arcv2.c8
-rw-r--r--arch/arm64/net/bpf_jit.h4
-rw-r--r--arch/s390/kernel/Makefile2
-rw-r--r--arch/s390/kernel/bpf.c12
-rw-r--r--arch/s390/net/bpf_jit_comp.c157
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/jit.c18
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/verifier.c12
-rw-r--r--include/linux/bpf-cgroup.h2
-rw-r--r--include/linux/bpf.h10
-rw-r--r--include/linux/bpf_local_storage.h15
-rw-r--r--include/linux/bpf_verifier.h423
-rw-r--r--include/linux/btf_ids.h2
-rw-r--r--include/linux/filter.h1
-rw-r--r--include/linux/rcupdate.h9
-rw-r--r--include/uapi/linux/bpf.h4
-rw-r--r--include/uapi/linux/btf.h12
-rw-r--r--kernel/bpf/Makefile3
-rw-r--r--kernel/bpf/arraymap.c4
-rw-r--r--kernel/bpf/backtrack.c934
-rw-r--r--kernel/bpf/bpf_cgrp_storage.c11
-rw-r--r--kernel/bpf/bpf_inode_storage.c11
-rw-r--r--kernel/bpf/bpf_local_storage.c157
-rw-r--r--kernel/bpf/bpf_lsm.c7
-rw-r--r--kernel/bpf/bpf_task_storage.c11
-rw-r--r--kernel/bpf/btf.c86
-rw-r--r--kernel/bpf/cfg.c872
-rw-r--r--kernel/bpf/check_btf.c463
-rw-r--r--kernel/bpf/const_fold.c396
-rw-r--r--kernel/bpf/core.c95
-rw-r--r--kernel/bpf/cpumap.c5
-rw-r--r--kernel/bpf/devmap.c5
-rw-r--r--kernel/bpf/fixups.c2457
-rw-r--r--kernel/bpf/hashtab.c6
-rw-r--r--kernel/bpf/helpers.c243
-rw-r--r--kernel/bpf/liveness.c2400
-rw-r--r--kernel/bpf/local_storage.c2
-rw-r--r--kernel/bpf/log.c62
-rw-r--r--kernel/bpf/memalloc.c33
-rw-r--r--kernel/bpf/offload.c10
-rw-r--r--kernel/bpf/states.c1563
-rw-r--r--kernel/bpf/syscall.c44
-rw-r--r--kernel/bpf/task_iter.c151
-rw-r--r--kernel/bpf/tnum.c46
-rw-r--r--kernel/bpf/verifier.c10000
-rw-r--r--kernel/trace/trace_kprobe.c8
-rw-r--r--net/bpf/test_run.c49
-rw-r--r--net/core/bpf_sk_storage.c23
-rw-r--r--net/ipv6/addrconf.c1
-rw-r--r--net/xdp/xsk.c18
-rw-r--r--net/xdp/xsk_queue.h16
-rw-r--r--scripts/Makefile.btf2
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-btf.rst11
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst2
-rw-r--r--tools/bpf/bpftool/Makefile30
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool8
-rw-r--r--tools/bpf/bpftool/btf.c121
-rw-r--r--tools/bpf/bpftool/jit_disasm.c11
-rw-r--r--tools/bpf/bpftool/main.c7
-rw-r--r--tools/bpf/bpftool/main.h14
-rw-r--r--tools/bpf/bpftool/prog.c2
-rw-r--r--tools/bpf/resolve_btfids/main.c1
-rw-r--r--tools/include/uapi/linux/bpf.h4
-rw-r--r--tools/include/uapi/linux/btf.h12
-rw-r--r--tools/lib/bpf/btf.c623
-rw-r--r--tools/lib/bpf/btf.h20
-rw-r--r--tools/lib/bpf/features.c53
-rw-r--r--tools/lib/bpf/libbpf.c240
-rw-r--r--tools/lib/bpf/libbpf.h44
-rw-r--r--tools/lib/bpf/libbpf.map6
-rw-r--r--tools/lib/bpf/libbpf_internal.h9
-rw-r--r--tools/lib/bpf/libbpf_probes.c40
-rw-r--r--tools/lib/bpf/libbpf_version.h2
-rw-r--r--tools/lib/bpf/relo_core.c2
-rw-r--r--tools/lib/bpf/usdt.c47
-rw-r--r--tools/testing/selftests/bpf/.gitignore2
-rw-r--r--tools/testing/selftests/bpf/Makefile17
-rw-r--r--tools/testing/selftests/bpf/bench.c4
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_local_storage_create.c21
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_trigger.c60
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_uprobes.sh2
-rw-r--r--tools/testing/selftests/bpf/bpf_experimental.h174
-rw-r--r--tools/testing/selftests/bpf/bpftool_helpers.c15
-rw-r--r--tools/testing/selftests/bpf/cgroup_iter_memcg.h2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/access_variable_array.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/align.c712
-rw-r--r--tools/testing/selftests/bpf/prog_tests/attach_probe.c213
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_cookie.c29
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_gotox.c123
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_insn_array.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_kind.c226
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_sanitize.c97
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_write.c111
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_iter_memcg.c35
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_storage.c47
-rw-r--r--tools/testing/selftests/bpf/prog_tests/clone_attach_btf_id.c78
-rw-r--r--tools/testing/selftests/bpf/prog_tests/connect_force_port.c26
-rw-r--r--tools/testing/selftests/bpf/prog_tests/empty_skb.c40
-rw-r--r--tools/testing/selftests/bpf/prog_tests/exceptions.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c26
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_func_args_test.c25
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c28
-rw-r--r--tools/testing/selftests/bpf/prog_tests/htab_reuse.c169
-rw-r--r--tools/testing/selftests/bpf/prog_tests/iter_buf_null_fail.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfunc_call.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c108
-rw-r--r--tools/testing/selftests/bpf/prog_tests/linked_list.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c20
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lsm_bdev.c221
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lwt_misc.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/modify_return.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/module_attach.c173
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_link.c20
-rw-r--r--tools/testing/selftests/bpf/prog_tests/probe_user.c29
-rw-r--r--tools/testing/selftests/bpf/prog_tests/rbtree.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/reg_bounds.c53
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_basic.c17
-rw-r--r--tools/testing/selftests/bpf/prog_tests/spin_lock.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/summarization.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_local_data.h104
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_local_storage.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_global_funcs.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_struct_ops_multi_args.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_task_local_data.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c17
-rw-r--r--tools/testing/selftests/bpf/prog_tests/trampoline_count.c17
-rw-r--r--tools/testing/selftests/bpf/prog_tests/usdt.c92
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier_log.c6
-rw-r--r--tools/testing/selftests/bpf/progs/bench_local_storage_create.c11
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_gotox.c31
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_misc.h68
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_smc.c28
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_iter_memcg.c18
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_storage.c43
-rw-r--r--tools/testing/selftests/bpf/progs/clone_attach_btf_id.c13
-rw-r--r--tools/testing/selftests/bpf/progs/connect_force_port4.c10
-rw-r--r--tools/testing/selftests/bpf/progs/connect_force_port6.c10
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_fail.c115
-rw-r--r--tools/testing/selftests/bpf/progs/empty_skb.c7
-rw-r--r--tools/testing/selftests/bpf/progs/exceptions.c14
-rw-r--r--tools/testing/selftests/bpf/progs/exceptions_assert.c6
-rw-r--r--tools/testing/selftests/bpf/progs/exceptions_fail.c34
-rw-r--r--tools/testing/selftests/bpf/progs/freplace_int_with_void.c11
-rw-r--r--tools/testing/selftests/bpf/progs/freplace_void.c10
-rw-r--r--tools/testing/selftests/bpf/progs/get_func_args_fsession_test.c37
-rw-r--r--tools/testing/selftests/bpf/progs/get_func_args_test.c38
-rw-r--r--tools/testing/selftests/bpf/progs/get_func_ip_fsession_test.c21
-rw-r--r--tools/testing/selftests/bpf/progs/get_func_ip_test.c23
-rw-r--r--tools/testing/selftests/bpf/progs/htab_reuse.c16
-rw-r--r--tools/testing/selftests/bpf/progs/irq.c4
-rw-r--r--tools/testing/selftests/bpf/progs/iter_buf_null_fail.c39
-rw-r--r--tools/testing/selftests/bpf/progs/iters.c6
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_call_test.c98
-rw-r--r--tools/testing/selftests/bpf/progs/kprobe_multi_session.c10
-rw-r--r--tools/testing/selftests/bpf/progs/kprobe_multi_sleepable.c25
-rw-r--r--tools/testing/selftests/bpf/progs/kprobe_write_ctx.c19
-rw-r--r--tools/testing/selftests/bpf/progs/kptr_xchg_inline.c4
-rw-r--r--tools/testing/selftests/bpf/progs/lsm_bdev.c96
-rw-r--r--tools/testing/selftests/bpf/progs/lwt_misc.c22
-rw-r--r--tools/testing/selftests/bpf/progs/map_ptr_kern.c17
-rw-r--r--tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c2
-rw-r--r--tools/testing/selftests/bpf/progs/modify_return.c13
-rw-r--r--tools/testing/selftests/bpf/progs/percpu_alloc_fail.c4
-rw-r--r--tools/testing/selftests/bpf/progs/preempt_lock.c6
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree_search_kptr.c290
-rw-r--r--tools/testing/selftests/bpf/progs/refcounted_kptr.c4
-rw-r--r--tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c2
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_multi_args.c35
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall3.c5
-rw-r--r--tools/testing/selftests/bpf/progs/task_local_data.bpf.h4
-rw-r--r--tools/testing/selftests/bpf/progs/test_access_variable_array.c19
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func3.c18
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func7.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func_deep_stack.c95
-rw-r--r--tools/testing/selftests/bpf/progs/test_module_attach.c63
-rw-r--r--tools/testing/selftests/bpf/progs/test_probe_user.c13
-rw-r--r--tools/testing/selftests/bpf/progs/test_trampoline_count.c12
-rw-r--r--tools/testing/selftests/bpf/progs/test_usdt.c12
-rw-r--r--tools/testing/selftests/bpf/progs/trigger_bench.c10
-rw-r--r--tools/testing/selftests/bpf/progs/uninit_stack.c1
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_align.c581
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_arena.c130
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_async_cb_context.c4
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bounds.c87
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_ctx.c569
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_ctx_ptr_param.c68
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c65
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_div_mod_bounds.c18
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_global_subprogs.c114
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_gotox.c4
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_helper_packet_access.c2
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_helper_value_access.c4
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_int_ptr.c2
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_jeq_infer_not_null.c54
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_ld_ind.c142
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_linked_scalars.c175
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_live_stack.c2411
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_liveness_exp.c139
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_loops1.c3
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_meta_access.c2
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_private_stack.c8
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_scalar_ids.c46
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_spill_fill.c96
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_stack_ptr.c4
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_subprog_precision.c8
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_subprog_topo.c226
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_subreg.c165
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_unpriv.c6
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c10
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_xdp_direct_packet_access.c64
-rw-r--r--tools/testing/selftests/bpf/test_bpftool.py174
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool.sh11
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod.c81
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod.h3
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h1
-rw-r--r--tools/testing/selftests/bpf/test_loader.c248
-rw-r--r--tools/testing/selftests/bpf/test_progs.c45
-rw-r--r--tools/testing/selftests/bpf/test_progs.h1
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.c9
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.h1
-rw-r--r--tools/testing/selftests/bpf/uprobe_multi.c19
-rw-r--r--tools/testing/selftests/bpf/uprobe_multi.ld4
-rw-r--r--tools/testing/selftests/bpf/usdt.h2
-rw-r--r--tools/testing/selftests/bpf/usdt_1.c18
-rw-r--r--tools/testing/selftests/bpf/usdt_2.c16
-rw-r--r--tools/testing/selftests/bpf/verifier/calls.c9
-rw-r--r--tools/testing/selftests/bpf/verifier/junk_insn.c6
-rw-r--r--tools/testing/selftests/bpf/verifier/sleepable.c2
-rw-r--r--tools/testing/selftests/bpf/veristat.c103
233 files changed, 21723 insertions, 11236 deletions
diff --git a/Documentation/bpf/drgn.rst b/Documentation/bpf/drgn.rst
index 41f223c3161e..cabf702eb75a 100644
--- a/Documentation/bpf/drgn.rst
+++ b/Documentation/bpf/drgn.rst
@@ -26,8 +26,8 @@ about these objects, including id, type and name.
The main use-case `bpf_inspect.py`_ covers is to show BPF programs of types
``BPF_PROG_TYPE_EXT`` and ``BPF_PROG_TYPE_TRACING`` attached to other BPF
-programs via ``freplace``/``fentry``/``fexit`` mechanisms, since there is no
-user-space API to get this information.
+programs via ``freplace``/``fentry``/``fexit``/``fsession`` mechanisms, since
+there is no user-space API to get this information.
Getting started
===============
diff --git a/Documentation/bpf/libbpf/program_types.rst b/Documentation/bpf/libbpf/program_types.rst
index 3b837522834b..3a07ce3b7f79 100644
--- a/Documentation/bpf/libbpf/program_types.rst
+++ b/Documentation/bpf/libbpf/program_types.rst
@@ -207,6 +207,10 @@ described in more detail in the footnotes.
+ + +----------------------------------+-----------+
| | | ``fexit.s+`` [#fentry]_ | Yes |
+ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_TRACE_FSESSION`` | ``fsession+`` [#fentry]_ | |
++ + +----------------------------------+-----------+
+| | | ``fsession.s+`` [#fentry]_ | Yes |
++ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_TRACE_ITER`` | ``iter+`` [#iter]_ | |
+ + +----------------------------------+-----------+
| | | ``iter.s+`` [#iter]_ | Yes |
diff --git a/MAINTAINERS b/MAINTAINERS
index 92eb723573aa..88639cc09822 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4815,13 +4815,10 @@ M: Alexei Starovoitov <ast@kernel.org>
M: Daniel Borkmann <daniel@iogearbox.net>
M: Andrii Nakryiko <andrii@kernel.org>
R: Martin KaFai Lau <martin.lau@linux.dev>
-R: Eduard Zingerman <eddyz87@gmail.com>
+M: Eduard Zingerman <eddyz87@gmail.com>
+M: Kumar Kartikeya Dwivedi <memxor@gmail.com>
R: Song Liu <song@kernel.org>
R: Yonghong Song <yonghong.song@linux.dev>
-R: John Fastabend <john.fastabend@gmail.com>
-R: KP Singh <kpsingh@kernel.org>
-R: Stanislav Fomichev <sdf@fomichev.me>
-R: Hao Luo <haoluo@google.com>
R: Jiri Olsa <jolsa@kernel.org>
L: bpf@vger.kernel.org
S: Supported
diff --git a/arch/arc/net/bpf_jit_arcv2.c b/arch/arc/net/bpf_jit_arcv2.c
index 6d989b6d88c6..7ee50aeae5a4 100644
--- a/arch/arc/net/bpf_jit_arcv2.c
+++ b/arch/arc/net/bpf_jit_arcv2.c
@@ -2427,7 +2427,7 @@ u8 arc_prologue(u8 *buf, u32 usage, u16 frame_size)
#ifdef ARC_BPF_JIT_DEBUG
if ((usage & BIT(ARC_R_FP)) && frame_size == 0) {
- pr_err("FP is being saved while there is no frame.");
+ pr_err("FP is being saved while there is no frame.\n");
BUG();
}
#endif
@@ -2454,7 +2454,7 @@ u8 arc_epilogue(u8 *buf, u32 usage, u16 frame_size)
#ifdef ARC_BPF_JIT_DEBUG
if ((usage & BIT(ARC_R_FP)) && frame_size == 0) {
- pr_err("FP is being saved while there is no frame.");
+ pr_err("FP is being saved while there is no frame.\n");
BUG();
}
#endif
@@ -2868,7 +2868,7 @@ u8 gen_jmp_64(u8 *buf, u8 rd, u8 rs, u8 cond, u32 curr_off, u32 targ_off)
break;
default:
#ifdef ARC_BPF_JIT_DEBUG
- pr_err("64-bit jump condition is not known.");
+ pr_err("64-bit jump condition is not known.\n");
BUG();
#endif
}
@@ -2948,7 +2948,7 @@ u8 gen_jmp_32(u8 *buf, u8 rd, u8 rs, u8 cond, u32 curr_off, u32 targ_off)
*/
if (cond >= ARC_CC_LAST) {
#ifdef ARC_BPF_JIT_DEBUG
- pr_err("32-bit jump condition is not known.");
+ pr_err("32-bit jump condition is not known.\n");
BUG();
#endif
return 0;
diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h
index bbea4f36f9f2..d13de4222cfb 100644
--- a/arch/arm64/net/bpf_jit.h
+++ b/arch/arm64/net/bpf_jit.h
@@ -187,7 +187,9 @@
/* Rn - imm12; set condition flags */
#define A64_CMP_I(sf, Rn, imm12) A64_SUBS_I(sf, A64_ZR, Rn, imm12)
/* Rd = Rn */
-#define A64_MOV(sf, Rd, Rn) A64_ADD_I(sf, Rd, Rn, 0)
+#define A64_MOV(sf, Rd, Rn) \
+ (((Rd) == A64_SP || (Rn) == A64_SP) ? A64_ADD_I(sf, Rd, Rn, 0) : \
+ aarch64_insn_gen_move_reg(Rd, Rn, A64_VARIANT(sf)))
/* Bitfield move */
#define A64_BITFIELD(sf, Rd, Rn, immr, imms, type) \
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 42c83d60d6fa..b522c6649fcc 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -80,5 +80,7 @@ obj-$(CONFIG_PERF_EVENTS) += perf_pai.o
obj-$(CONFIG_TRACEPOINTS) += trace.o
+obj-$(CONFIG_BPF_SYSCALL) += bpf.o
+
# vdso
obj-y += vdso/
diff --git a/arch/s390/kernel/bpf.c b/arch/s390/kernel/bpf.c
new file mode 100644
index 000000000000..713337fae626
--- /dev/null
+++ b/arch/s390/kernel/bpf.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <asm/lowcore.h>
+#include <linux/btf.h>
+
+__bpf_kfunc_start_defs();
+
+__bpf_kfunc struct lowcore *bpf_get_lowcore(void)
+{
+ return get_lowcore();
+}
+
+__bpf_kfunc_end_defs();
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index bf92964246eb..d08d159b6319 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -830,25 +830,34 @@ static int bpf_jit_probe_post(struct bpf_jit *jit, struct bpf_prog *fp,
}
/*
- * Sign-extend the register if necessary
+ * Sign- or zero-extend the register if necessary
*/
-static int sign_extend(struct bpf_jit *jit, int r, u8 size, u8 flags)
+static int sign_zero_extend(struct bpf_jit *jit, int r, u8 size, u8 flags)
{
- if (!(flags & BTF_FMODEL_SIGNED_ARG))
- return 0;
-
switch (size) {
case 1:
- /* lgbr %r,%r */
- EMIT4(0xb9060000, r, r);
+ if (flags & BTF_FMODEL_SIGNED_ARG)
+ /* lgbr %r,%r */
+ EMIT4(0xb9060000, r, r);
+ else
+ /* llgcr %r,%r */
+ EMIT4(0xb9840000, r, r);
return 0;
case 2:
- /* lghr %r,%r */
- EMIT4(0xb9070000, r, r);
+ if (flags & BTF_FMODEL_SIGNED_ARG)
+ /* lghr %r,%r */
+ EMIT4(0xb9070000, r, r);
+ else
+ /* llghr %r,%r */
+ EMIT4(0xb9850000, r, r);
return 0;
case 4:
- /* lgfr %r,%r */
- EMIT4(0xb9140000, r, r);
+ if (flags & BTF_FMODEL_SIGNED_ARG)
+ /* lgfr %r,%r */
+ EMIT4(0xb9140000, r, r);
+ else
+ /* llgfr %r,%r */
+ EMIT4(0xb9160000, r, r);
return 0;
case 8:
return 0;
@@ -1798,9 +1807,9 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
return -1;
for (j = 0; j < m->nr_args; j++) {
- if (sign_extend(jit, BPF_REG_1 + j,
- m->arg_size[j],
- m->arg_flags[j]))
+ if (sign_zero_extend(jit, BPF_REG_1 + j,
+ m->arg_size[j],
+ m->arg_flags[j]))
return -1;
}
}
@@ -1862,20 +1871,21 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
jit->prg);
/*
- * if (tail_call_cnt++ >= MAX_TAIL_CALL_CNT)
+ * if (tail_call_cnt >= MAX_TAIL_CALL_CNT)
* goto out;
+ *
+ * tail_call_cnt is read into %w0, which needs to be preserved
+ * until it's incremented and flushed.
*/
off = jit->frame_off +
offsetof(struct prog_frame, tail_call_cnt);
- /* lhi %w0,1 */
- EMIT4_IMM(0xa7080000, REG_W0, 1);
- /* laal %w1,%w0,off(%r15) */
- EMIT6_DISP_LH(0xeb000000, 0x00fa, REG_W1, REG_W0, REG_15, off);
- /* clij %w1,MAX_TAIL_CALL_CNT-1,0x2,out */
+ /* ly %w0,off(%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0058, REG_W0, REG_0, REG_15, off);
+ /* clij %w0,MAX_TAIL_CALL_CNT,0xa,out */
patch_2_clij = jit->prg;
- EMIT6_PCREL_RIEC(0xec000000, 0x007f, REG_W1, MAX_TAIL_CALL_CNT - 1,
- 2, jit->prg);
+ EMIT6_PCREL_RIEC(0xec000000, 0x007f, REG_W0, MAX_TAIL_CALL_CNT,
+ 0xa, jit->prg);
/*
* prog = array->ptrs[index];
@@ -1894,6 +1904,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
patch_3_brc = jit->prg;
EMIT4_PCREL_RIC(0xa7040000, 8, jit->prg);
+ /* tail_call_cnt++; */
+ /* ahi %w0,1 */
+ EMIT4_IMM(0xa70a0000, REG_W0, 1);
+ /* sty %w0,off(%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0050, REG_W0, REG_0, REG_15, off);
+
/*
* Restore registers before calling function
*/
@@ -2480,8 +2496,8 @@ struct bpf_tramp_jit {
int ip_off; /* For bpf_get_func_ip(), has to be at
* (ctx - 16)
*/
- int arg_cnt_off; /* For bpf_get_func_arg_cnt(), has to be at
- * (ctx - 8)
+ int func_meta_off; /* For bpf_get_func_arg_cnt()/fsession, has
+ * to be at (ctx - 8)
*/
int bpf_args_off; /* Offset of BPF_PROG context, which consists
* of BPF arguments followed by return value
@@ -2506,6 +2522,13 @@ static void load_imm64(struct bpf_jit *jit, int dst_reg, u64 val)
EMIT6_IMM(0xc00d0000, dst_reg, val);
}
+static void emit_store_stack_imm64(struct bpf_jit *jit, int tmp_reg, int stack_off, u64 imm)
+{
+ load_imm64(jit, tmp_reg, imm);
+ /* stg %tmp_reg,stack_off(%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, tmp_reg, REG_0, REG_15, stack_off);
+}
+
static int invoke_bpf_prog(struct bpf_tramp_jit *tjit,
const struct btf_func_model *m,
struct bpf_tramp_link *tlink, bool save_ret)
@@ -2520,10 +2543,7 @@ static int invoke_bpf_prog(struct bpf_tramp_jit *tjit,
* run_ctx.cookie = tlink->cookie;
*/
- /* %r0 = tlink->cookie */
- load_imm64(jit, REG_W0, tlink->cookie);
- /* stg %r0,cookie_off(%r15) */
- EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W0, REG_0, REG_15, cookie_off);
+ emit_store_stack_imm64(jit, REG_W0, cookie_off, tlink->cookie);
/*
* if ((start = __bpf_prog_enter(p, &run_ctx)) == 0)
@@ -2555,7 +2575,7 @@ static int invoke_bpf_prog(struct bpf_tramp_jit *tjit,
EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, p->bpf_func);
/* stg %r2,retval_off(%r15) */
if (save_ret) {
- if (sign_extend(jit, REG_2, m->ret_size, m->ret_flags))
+ if (sign_zero_extend(jit, REG_2, m->ret_size, m->ret_flags))
return -1;
EMIT6_DISP_LH(0xe3000000, 0x0024, REG_2, REG_0, REG_15,
tjit->retval_off);
@@ -2581,6 +2601,28 @@ static int invoke_bpf_prog(struct bpf_tramp_jit *tjit,
return 0;
}
+static int invoke_bpf(struct bpf_tramp_jit *tjit,
+ const struct btf_func_model *m,
+ struct bpf_tramp_links *tl, bool save_ret,
+ u64 func_meta, int cookie_off)
+{
+ int i, cur_cookie = (tjit->bpf_args_off - cookie_off) / sizeof(u64);
+ struct bpf_jit *jit = &tjit->common;
+
+ for (i = 0; i < tl->nr_links; i++) {
+ if (bpf_prog_calls_session_cookie(tl->links[i])) {
+ u64 meta = func_meta | ((u64)cur_cookie << BPF_TRAMP_COOKIE_INDEX_SHIFT);
+
+ emit_store_stack_imm64(jit, REG_0, tjit->func_meta_off, meta);
+ cur_cookie--;
+ }
+ if (invoke_bpf_prog(tjit, m, tl->links[i], save_ret))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int alloc_stack(struct bpf_tramp_jit *tjit, size_t size)
{
int stack_offset = tjit->stack_size;
@@ -2610,8 +2652,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
int nr_bpf_args, nr_reg_args, nr_stack_args;
+ int cookie_cnt, cookie_off, fsession_cnt;
struct bpf_jit *jit = &tjit->common;
int arg, bpf_arg_off;
+ u64 func_meta;
int i, j;
/* Support as many stack arguments as "mvc" instruction can handle. */
@@ -2643,6 +2687,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
return -ENOTSUPP;
}
+ cookie_cnt = bpf_fsession_cookie_cnt(tlinks);
+ fsession_cnt = bpf_fsession_cnt(tlinks);
+
/*
* Calculate the stack layout.
*/
@@ -2655,8 +2702,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
tjit->backchain_off = tjit->stack_size - sizeof(u64);
tjit->stack_args_off = alloc_stack(tjit, nr_stack_args * sizeof(u64));
tjit->reg_args_off = alloc_stack(tjit, nr_reg_args * sizeof(u64));
+ cookie_off = alloc_stack(tjit, cookie_cnt * sizeof(u64));
tjit->ip_off = alloc_stack(tjit, sizeof(u64));
- tjit->arg_cnt_off = alloc_stack(tjit, sizeof(u64));
+ tjit->func_meta_off = alloc_stack(tjit, sizeof(u64));
tjit->bpf_args_off = alloc_stack(tjit, nr_bpf_args * sizeof(u64));
tjit->retval_off = alloc_stack(tjit, sizeof(u64));
tjit->r7_r8_off = alloc_stack(tjit, 2 * sizeof(u64));
@@ -2743,18 +2791,14 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
* arg_cnt = m->nr_args;
*/
- if (flags & BPF_TRAMP_F_IP_ARG) {
- /* %r0 = func_addr */
- load_imm64(jit, REG_0, (u64)func_addr);
- /* stg %r0,ip_off(%r15) */
- EMIT6_DISP_LH(0xe3000000, 0x0024, REG_0, REG_0, REG_15,
- tjit->ip_off);
- }
- /* lghi %r0,nr_bpf_args */
- EMIT4_IMM(0xa7090000, REG_0, nr_bpf_args);
- /* stg %r0,arg_cnt_off(%r15) */
+ if (flags & BPF_TRAMP_F_IP_ARG)
+ emit_store_stack_imm64(jit, REG_0, tjit->ip_off, (u64)func_addr);
+ func_meta = nr_bpf_args;
+ /* lghi %r0,func_meta */
+ EMIT4_IMM(0xa7090000, REG_0, func_meta);
+ /* stg %r0,func_meta_off(%r15) */
EMIT6_DISP_LH(0xe3000000, 0x0024, REG_0, REG_0, REG_15,
- tjit->arg_cnt_off);
+ tjit->func_meta_off);
if (flags & BPF_TRAMP_F_CALL_ORIG) {
/*
@@ -2767,10 +2811,17 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, __bpf_tramp_enter);
}
- for (i = 0; i < fentry->nr_links; i++)
- if (invoke_bpf_prog(tjit, m, fentry->links[i],
- flags & BPF_TRAMP_F_RET_FENTRY_RET))
- return -EINVAL;
+ if (fsession_cnt) {
+ /* Clear all the session cookies' value. */
+ for (i = 0; i < cookie_cnt; i++)
+ emit_store_stack_imm64(jit, REG_0, cookie_off + 8 * i, 0);
+ /* Clear the return value to make sure fentry always gets 0. */
+ emit_store_stack_imm64(jit, REG_0, tjit->retval_off, 0);
+ }
+
+ if (invoke_bpf(tjit, m, fentry, flags & BPF_TRAMP_F_RET_FENTRY_RET,
+ func_meta, cookie_off))
+ return -EINVAL;
if (fmod_ret->nr_links) {
/*
@@ -2847,11 +2898,16 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
EMIT6_PCREL_RILC(0xc0040000, 0, (u64)im->ip_epilogue);
}
+ /* Set the "is_return" flag for fsession. */
+ func_meta |= (1ULL << BPF_TRAMP_IS_RETURN_SHIFT);
+ if (fsession_cnt)
+ emit_store_stack_imm64(jit, REG_W0, tjit->func_meta_off,
+ func_meta);
+
/* do_fexit: */
tjit->do_fexit = jit->prg;
- for (i = 0; i < fexit->nr_links; i++)
- if (invoke_bpf_prog(tjit, m, fexit->links[i], false))
- return -EINVAL;
+ if (invoke_bpf(tjit, m, fexit, false, func_meta, cookie_off))
+ return -EINVAL;
if (flags & BPF_TRAMP_F_CALL_ORIG) {
im->ip_epilogue = jit->prg_buf + jit->prg;
@@ -2956,6 +3012,11 @@ bool bpf_jit_supports_arena(void)
return true;
}
+bool bpf_jit_supports_fsession(void)
+{
+ return true;
+}
+
bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena)
{
if (!in_arena)
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index 3a02eef58cc6..c9ab55abfd4c 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -1731,7 +1731,7 @@ map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
/* We only have to reload LM0 if the key is not at start of stack */
lm_off = nfp_prog->stack_frame_depth;
- lm_off += meta->arg2.reg.var_off.value + meta->arg2.reg.off;
+ lm_off += meta->arg2.reg.var_off.value;
load_lm_ptr = meta->arg2.var_off || lm_off;
/* Set LM0 to start of key */
@@ -2874,8 +2874,7 @@ mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
}
if (meta->ptr.type == PTR_TO_STACK)
- return mem_ldx_stack(nfp_prog, meta, size,
- meta->ptr.off + meta->ptr.var_off.value);
+ return mem_ldx_stack(nfp_prog, meta, size, meta->ptr.var_off.value);
if (meta->ptr.type == PTR_TO_MAP_VALUE)
return mem_ldx_emem(nfp_prog, meta, size);
@@ -2985,8 +2984,7 @@ mem_stx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
return mem_stx_data(nfp_prog, meta, size);
if (meta->ptr.type == PTR_TO_STACK)
- return mem_stx_stack(nfp_prog, meta, size,
- meta->ptr.off + meta->ptr.var_off.value);
+ return mem_stx_stack(nfp_prog, meta, size, meta->ptr.var_off.value);
return -EOPNOTSUPP;
}
@@ -4153,9 +4151,9 @@ cross_mem_access(struct bpf_insn *ld, struct nfp_insn_meta *head_ld_meta,
/* Canonicalize the offsets. Turn all of them against the original
* base register.
*/
- head_ld_off = head_ld_meta->insn.off + head_ld_meta->ptr.off;
- head_st_off = head_st_meta->insn.off + head_st_meta->ptr.off;
- ld_off = ld->off + head_ld_meta->ptr.off;
+ head_ld_off = head_ld_meta->insn.off + head_ld_meta->ptr.var_off.value;
+ head_st_off = head_st_meta->insn.off + head_st_meta->ptr.var_off.value;
+ ld_off = ld->off + head_ld_meta->ptr.var_off.value;
/* Ascending order cross. */
if (ld_off > head_ld_off &&
@@ -4326,7 +4324,7 @@ static void nfp_bpf_opt_pkt_cache(struct nfp_prog *nfp_prog)
* support this.
*/
if (meta->ptr.id == range_ptr_id &&
- meta->ptr.off == range_ptr_off) {
+ meta->ptr.var_off.value == range_ptr_off) {
s16 new_start = range_start;
s16 end, off = insn->off;
s16 new_end = range_end;
@@ -4361,7 +4359,7 @@ start_new:
range_node = meta;
range_node->pkt_cache.do_init = true;
range_ptr_id = range_node->ptr.id;
- range_ptr_off = range_node->ptr.off;
+ range_ptr_off = range_node->ptr.var_off.value;
range_start = insn->off;
range_end = insn->off + BPF_LDST_BYTES(insn);
}
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index bc594860e2b5..70368fe7c510 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -98,7 +98,7 @@ static bool nfp_bpf_map_update_value_ok(struct bpf_verifier_env *env)
offmap = map_to_offmap(reg1->map_ptr);
nfp_map = offmap->dev_priv;
- off = reg3->off + reg3->var_off.value;
+ off = reg3->var_off.value;
for (i = 0; i < offmap->map.value_size; i++) {
struct bpf_stack_state *stack_entry;
@@ -137,7 +137,7 @@ nfp_bpf_stack_arg_ok(const char *fname, struct bpf_verifier_env *env,
return false;
}
- off = reg->var_off.value + reg->off;
+ off = reg->var_off.value;
if (-off % 4) {
pr_vlog(env, "%s: unaligned stack pointer %lld\n", fname, -off);
return false;
@@ -147,7 +147,7 @@ nfp_bpf_stack_arg_ok(const char *fname, struct bpf_verifier_env *env,
if (!old_arg)
return true;
- old_off = old_arg->reg.var_off.value + old_arg->reg.off;
+ old_off = old_arg->reg.var_off.value;
old_arg->var_off |= off != old_off;
return true;
@@ -358,8 +358,8 @@ nfp_bpf_check_stack_access(struct nfp_prog *nfp_prog,
if (meta->ptr.type == NOT_INIT)
return 0;
- old_off = meta->ptr.off + meta->ptr.var_off.value;
- new_off = reg->off + reg->var_off.value;
+ old_off = meta->ptr.var_off.value;
+ new_off = reg->var_off.value;
meta->ptr_not_const |= old_off != new_off;
@@ -428,7 +428,7 @@ nfp_bpf_map_mark_used(struct bpf_verifier_env *env, struct nfp_insn_meta *meta,
return -EOPNOTSUPP;
}
- off = reg->var_off.value + meta->insn.off + reg->off;
+ off = reg->var_off.value + meta->insn.off;
size = BPF_LDST_BYTES(&meta->insn);
offmap = map_to_offmap(reg->map_ptr);
nfp_map = offmap->dev_priv;
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 2f535331f926..b2e79c2b41d5 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -184,7 +184,7 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
struct bpf_prog_array *array;
array = rcu_access_pointer(cgrp->bpf.effective[type]);
- return array != &bpf_empty_prog_array.hdr;
+ return array != &bpf_empty_prog_array;
}
/* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 35b1e25bd104..0136a108d083 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2369,18 +2369,13 @@ struct bpf_prog_array {
struct bpf_prog_array_item items[];
};
-struct bpf_empty_prog_array {
- struct bpf_prog_array hdr;
- struct bpf_prog *null_prog;
-};
-
/* to avoid allocating empty bpf_prog_array for cgroups that
* don't have bpf program attached use one global 'bpf_empty_prog_array'
* It will not be modified the caller of bpf_prog_array_alloc()
* (since caller requested prog_cnt == 0)
* that pointer should be 'freed' by bpf_prog_array_free()
*/
-extern struct bpf_empty_prog_array bpf_empty_prog_array;
+extern struct bpf_prog_array bpf_empty_prog_array;
struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
void bpf_prog_array_free(struct bpf_prog_array *progs);
@@ -3950,6 +3945,9 @@ static inline bool bpf_is_subprog(const struct bpf_prog *prog)
return prog->aux->func_idx != 0;
}
+const struct bpf_line_info *bpf_find_linfo(const struct bpf_prog *prog, u32 insn_off);
+void bpf_get_linfo_file_line(struct btf *btf, const struct bpf_line_info *linfo,
+ const char **filep, const char **linep, int *nump);
int bpf_prog_get_file_line(struct bpf_prog *prog, unsigned long ip, const char **filep,
const char **linep, int *nump);
struct bpf_prog *bpf_prog_find_from_stack(void);
diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
index 8157e8da61d4..9e4f5c45c974 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -54,7 +54,6 @@ struct bpf_local_storage_map {
u32 bucket_log;
u16 elem_size;
u16 cache_idx;
- bool use_kmalloc_nolock;
};
struct bpf_local_storage_data {
@@ -86,8 +85,7 @@ struct bpf_local_storage_elem {
*/
};
atomic_t state;
- bool use_kmalloc_nolock;
- /* 3 bytes hole */
+ /* 4 bytes hole */
/* The data is stored in another cacheline to minimize
* the number of cachelines access during a cache hit.
*/
@@ -104,7 +102,6 @@ struct bpf_local_storage {
rqspinlock_t lock; /* Protect adding/removing from the "list" */
u64 mem_charge; /* Copy of mem charged to owner. Protected by "lock" */
refcount_t owner_refcnt;/* Used to pin owner when map_free is uncharging */
- bool use_kmalloc_nolock;
};
/* U16_MAX is much more than enough for sk local storage
@@ -137,8 +134,7 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr);
struct bpf_map *
bpf_local_storage_map_alloc(union bpf_attr *attr,
- struct bpf_local_storage_cache *cache,
- bool use_kmalloc_nolock);
+ struct bpf_local_storage_cache *cache);
void __bpf_local_storage_insert_cache(struct bpf_local_storage *local_storage,
struct bpf_local_storage_map *smap,
@@ -192,7 +188,7 @@ int bpf_selem_link_map(struct bpf_local_storage_map *smap,
struct bpf_local_storage_elem *
bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value,
- bool swap_uptrs, gfp_t gfp_flags);
+ bool swap_uptrs);
void bpf_selem_free(struct bpf_local_storage_elem *selem,
bool reuse_now);
@@ -200,12 +196,11 @@ void bpf_selem_free(struct bpf_local_storage_elem *selem,
int
bpf_local_storage_alloc(void *owner,
struct bpf_local_storage_map *smap,
- struct bpf_local_storage_elem *first_selem,
- gfp_t gfp_flags);
+ struct bpf_local_storage_elem *first_selem);
struct bpf_local_storage_data *
bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
- void *value, u64 map_flags, bool swap_uptrs, gfp_t gfp_flags);
+ void *value, u64 map_flags, bool swap_uptrs);
u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map);
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index ef8e45a362d9..53e8664cb566 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -38,10 +38,9 @@ struct bpf_reg_state {
/* Ordering of fields matters. See states_equal() */
enum bpf_reg_type type;
/*
- * Fixed part of pointer offset, pointer types only.
- * Or constant delta between "linked" scalars with the same ID.
+ * Constant delta between "linked" scalars with the same ID.
*/
- s32 off;
+ s32 delta;
union {
/* valid when type == PTR_TO_PACKET */
int range;
@@ -146,9 +145,9 @@ struct bpf_reg_state {
* Upper bit of ID is used to remember relationship between "linked"
* registers. Example:
* r1 = r2; both will have r1->id == r2->id == N
- * r1 += 10; r1->id == N | BPF_ADD_CONST and r1->off == 10
+ * r1 += 10; r1->id == N | BPF_ADD_CONST and r1->delta == 10
* r3 = r2; both will have r3->id == r2->id == N
- * w3 += 10; r3->id == N | BPF_ADD_CONST32 and r3->off == 10
+ * w3 += 10; r3->id == N | BPF_ADD_CONST32 and r3->delta == 10
*/
#define BPF_ADD_CONST64 (1U << 31)
#define BPF_ADD_CONST32 (1U << 30)
@@ -221,14 +220,67 @@ enum bpf_stack_slot_type {
STACK_DYNPTR,
STACK_ITER,
STACK_IRQ_FLAG,
+ STACK_POISON,
};
#define BPF_REG_SIZE 8 /* size of eBPF register in bytes */
+/* 4-byte stack slot granularity for liveness analysis */
+#define BPF_HALF_REG_SIZE 4
+#define STACK_SLOT_SZ 4
+#define STACK_SLOTS (MAX_BPF_STACK / BPF_HALF_REG_SIZE) /* 128 */
+
+typedef struct {
+ u64 v[2];
+} spis_t;
+
+#define SPIS_ZERO ((spis_t){})
+#define SPIS_ALL ((spis_t){{ U64_MAX, U64_MAX }})
+
+static inline bool spis_is_zero(spis_t s)
+{
+ return s.v[0] == 0 && s.v[1] == 0;
+}
+
+static inline bool spis_equal(spis_t a, spis_t b)
+{
+ return a.v[0] == b.v[0] && a.v[1] == b.v[1];
+}
+
+static inline spis_t spis_or(spis_t a, spis_t b)
+{
+ return (spis_t){{ a.v[0] | b.v[0], a.v[1] | b.v[1] }};
+}
+
+static inline spis_t spis_and(spis_t a, spis_t b)
+{
+ return (spis_t){{ a.v[0] & b.v[0], a.v[1] & b.v[1] }};
+}
+
+static inline spis_t spis_not(spis_t s)
+{
+ return (spis_t){{ ~s.v[0], ~s.v[1] }};
+}
+
+static inline bool spis_test_bit(spis_t s, u32 slot)
+{
+ return s.v[slot / 64] & BIT_ULL(slot % 64);
+}
+
+static inline void spis_or_range(spis_t *mask, u32 lo, u32 hi)
+{
+ u32 w;
+
+ for (w = lo; w <= hi && w < STACK_SLOTS; w++)
+ mask->v[w / 64] |= BIT_ULL(w % 64);
+}
+
#define BPF_REGMASK_ARGS ((1 << BPF_REG_1) | (1 << BPF_REG_2) | \
(1 << BPF_REG_3) | (1 << BPF_REG_4) | \
(1 << BPF_REG_5))
+#define BPF_MAIN_FUNC (-1)
+
#define BPF_DYNPTR_SIZE sizeof(struct bpf_dynptr_kern)
#define BPF_DYNPTR_NR_SLOTS (BPF_DYNPTR_SIZE / BPF_REG_SIZE)
@@ -266,6 +318,7 @@ struct bpf_reference_state {
struct bpf_retval_range {
s32 minval;
s32 maxval;
+ bool return_32bit;
};
/* state of the program:
@@ -424,7 +477,6 @@ struct bpf_verifier_state {
bool speculative;
bool in_sleepable;
- bool cleaned;
/* first and last insn idx of this verifier state */
u32 first_insn_idx;
@@ -595,6 +647,18 @@ struct bpf_insn_aux_data {
u32 scc;
/* registers alive before this instruction. */
u16 live_regs_before;
+ /*
+ * Bitmask of R0-R9 that hold known values at this instruction.
+ * const_reg_mask: scalar constants that fit in 32 bits.
+ * const_reg_map_mask: map pointers, val is map_index into used_maps[].
+ * const_reg_subprog_mask: subprog pointers, val is subprog number.
+ * const_reg_vals[i] holds the 32-bit value for register i.
+ * Populated by compute_const_regs() pre-pass.
+ */
+ u16 const_reg_mask;
+ u16 const_reg_map_mask;
+ u16 const_reg_subprog_mask;
+ u32 const_reg_vals[10];
};
#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
@@ -652,7 +716,7 @@ enum priv_stack_mode {
};
struct bpf_subprog_info {
- /* 'start' has to be the first field otherwise find_subprog() won't work */
+ const char *name; /* name extracted from BTF */
u32 start; /* insn idx of function entry point */
u32 linfo_idx; /* The idx to the main_prog->aux->linfo */
u32 postorder_start; /* The idx to the env->cfg.insn_postorder */
@@ -787,6 +851,8 @@ struct bpf_verifier_env {
const struct bpf_line_info *prev_linfo;
struct bpf_verifier_log log;
struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 2]; /* max + 2 for the fake and exception subprogs */
+ /* subprog indices sorted in topological order: leaves first, callers last */
+ int subprog_topo_order[BPF_MAX_SUBPROGS + 2];
union {
struct bpf_idmap idmap_scratch;
struct bpf_idset idset_scratch;
@@ -805,6 +871,8 @@ struct bpf_verifier_env {
} cfg;
struct backtrack_state bt;
struct bpf_jmp_history_entry *cur_hist_ent;
+ /* Per-callsite copy of parent's converged at_stack_in for cross-frame fills. */
+ struct arg_track **callsite_at_stack;
u32 pass_cnt; /* number of times do_check() was called */
u32 subprog_cnt;
/* number of instructions analyzed by the verifier */
@@ -837,7 +905,9 @@ struct bpf_verifier_env {
u64 scratched_stack_slots;
u64 prev_log_pos, prev_insn_print_pos;
/* buffer used to temporary hold constants as scalar registers */
- struct bpf_reg_state fake_reg[2];
+ struct bpf_reg_state fake_reg[1];
+ /* buffers used to save updated reg states while simulating branches */
+ struct bpf_reg_state true_reg1, true_reg2, false_reg1, false_reg2;
/* buffer used to generate temporary string representations,
* e.g., in reg_type_str() to generate reg_type string
*/
@@ -863,6 +933,30 @@ static inline struct bpf_subprog_info *subprog_info(struct bpf_verifier_env *env
return &env->subprog_info[subprog];
}
+struct bpf_call_summary {
+ u8 num_params;
+ bool is_void;
+ bool fastcall;
+};
+
+static inline bool bpf_helper_call(const struct bpf_insn *insn)
+{
+ return insn->code == (BPF_JMP | BPF_CALL) &&
+ insn->src_reg == 0;
+}
+
+static inline bool bpf_pseudo_call(const struct bpf_insn *insn)
+{
+ return insn->code == (BPF_JMP | BPF_CALL) &&
+ insn->src_reg == BPF_PSEUDO_CALL;
+}
+
+static inline bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn)
+{
+ return insn->code == (BPF_JMP | BPF_CALL) &&
+ insn->src_reg == BPF_PSEUDO_KFUNC_CALL;
+}
+
__printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log,
const char *fmt, va_list args);
__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
@@ -891,6 +985,41 @@ __printf(3, 4) void verbose_linfo(struct bpf_verifier_env *env,
bpf_log(&env->log, "verifier bug: " fmt "\n", ##args); \
})
+static inline void mark_prune_point(struct bpf_verifier_env *env, int idx)
+{
+ env->insn_aux_data[idx].prune_point = true;
+}
+
+static inline bool bpf_is_prune_point(struct bpf_verifier_env *env, int insn_idx)
+{
+ return env->insn_aux_data[insn_idx].prune_point;
+}
+
+static inline void mark_force_checkpoint(struct bpf_verifier_env *env, int idx)
+{
+ env->insn_aux_data[idx].force_checkpoint = true;
+}
+
+static inline bool bpf_is_force_checkpoint(struct bpf_verifier_env *env, int insn_idx)
+{
+ return env->insn_aux_data[insn_idx].force_checkpoint;
+}
+
+static inline void mark_calls_callback(struct bpf_verifier_env *env, int idx)
+{
+ env->insn_aux_data[idx].calls_callback = true;
+}
+
+static inline bool bpf_calls_callback(struct bpf_verifier_env *env, int insn_idx)
+{
+ return env->insn_aux_data[insn_idx].calls_callback;
+}
+
+static inline void mark_jmp_point(struct bpf_verifier_env *env, int idx)
+{
+ env->insn_aux_data[idx].jmp_point = true;
+}
+
static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env)
{
struct bpf_verifier_state *cur = env->cur_state;
@@ -932,6 +1061,11 @@ static inline void bpf_trampoline_unpack_key(u64 key, u32 *obj_id, u32 *btf_id)
*btf_id = key & 0x7FFFFFFF;
}
+int bpf_check_btf_info_early(struct bpf_verifier_env *env,
+ const union bpf_attr *attr, bpfptr_t uattr);
+int bpf_check_btf_info(struct bpf_verifier_env *env,
+ const union bpf_attr *attr, bpfptr_t uattr);
+
int bpf_check_attach_target(struct bpf_verifier_log *log,
const struct bpf_prog *prog,
const struct bpf_prog *tgt_prog,
@@ -941,6 +1075,93 @@ void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab);
int mark_chain_precision(struct bpf_verifier_env *env, int regno);
+int bpf_is_state_visited(struct bpf_verifier_env *env, int insn_idx);
+int bpf_update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st);
+
+void bpf_clear_jmp_history(struct bpf_verifier_state *state);
+int bpf_copy_verifier_state(struct bpf_verifier_state *dst_state,
+ const struct bpf_verifier_state *src);
+struct list_head *bpf_explored_state(struct bpf_verifier_env *env, int idx);
+void bpf_free_verifier_state(struct bpf_verifier_state *state, bool free_self);
+void bpf_free_backedges(struct bpf_scc_visit *visit);
+int bpf_push_jmp_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur,
+ int insn_flags, u64 linked_regs);
+void bpf_bt_sync_linked_regs(struct backtrack_state *bt, struct bpf_jmp_history_entry *hist);
+void bpf_mark_reg_not_init(const struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg);
+void bpf_mark_reg_unknown_imprecise(struct bpf_reg_state *reg);
+void bpf_mark_all_scalars_precise(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *st);
+void bpf_clear_singular_ids(struct bpf_verifier_env *env, struct bpf_verifier_state *st);
+int bpf_mark_chain_precision(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *starting_state,
+ int regno, bool *changed);
+
+static inline int bpf_get_spi(s32 off)
+{
+ return (-off - 1) / BPF_REG_SIZE;
+}
+
+static inline struct bpf_func_state *bpf_func(struct bpf_verifier_env *env,
+ const struct bpf_reg_state *reg)
+{
+ struct bpf_verifier_state *cur = env->cur_state;
+
+ return cur->frame[reg->frameno];
+}
+
+/* Return IP for a given frame in a call stack */
+static inline u32 bpf_frame_insn_idx(struct bpf_verifier_state *st, u32 frame)
+{
+ return frame == st->curframe
+ ? st->insn_idx
+ : st->frame[frame + 1]->callsite;
+}
+
+static inline bool bpf_is_jmp_point(struct bpf_verifier_env *env, int insn_idx)
+{
+ return env->insn_aux_data[insn_idx].jmp_point;
+}
+
+static inline bool bpf_is_spilled_reg(const struct bpf_stack_state *stack)
+{
+ return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL;
+}
+
+static inline bool bpf_is_spilled_scalar_reg(const struct bpf_stack_state *stack)
+{
+ return bpf_is_spilled_reg(stack) && stack->spilled_ptr.type == SCALAR_VALUE;
+}
+
+static inline bool bpf_register_is_null(struct bpf_reg_state *reg)
+{
+ return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
+}
+
+static inline void bpf_bt_set_frame_reg(struct backtrack_state *bt, u32 frame, u32 reg)
+{
+ bt->reg_masks[frame] |= 1 << reg;
+}
+
+static inline void bpf_bt_set_frame_slot(struct backtrack_state *bt, u32 frame, u32 slot)
+{
+ bt->stack_masks[frame] |= 1ull << slot;
+}
+
+static inline bool bt_is_frame_reg_set(struct backtrack_state *bt, u32 frame, u32 reg)
+{
+ return bt->reg_masks[frame] & (1 << reg);
+}
+
+static inline bool bt_is_frame_slot_set(struct backtrack_state *bt, u32 frame, u32 slot)
+{
+ return bt->stack_masks[frame] & (1ull << slot);
+}
+
+bool bpf_map_is_rdonly(const struct bpf_map *map);
+int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val,
+ bool is_ldsx);
+
#define BPF_BASE_TYPE_MASK GENMASK(BPF_BASE_TYPE_BITS - 1, 0)
/* extract base type from bpf_{arg, return, reg}_type. */
@@ -1077,22 +1298,194 @@ void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_verifie
u32 frameno, bool print_all);
void print_insn_state(struct bpf_verifier_env *env, const struct bpf_verifier_state *vstate,
u32 frameno);
+u32 bpf_vlog_alignment(u32 pos);
struct bpf_subprog_info *bpf_find_containing_subprog(struct bpf_verifier_env *env, int off);
int bpf_jmp_offset(struct bpf_insn *insn);
struct bpf_iarray *bpf_insn_successors(struct bpf_verifier_env *env, u32 idx);
void bpf_fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask);
-bool bpf_calls_callback(struct bpf_verifier_env *env, int insn_idx);
+bool bpf_subprog_is_global(const struct bpf_verifier_env *env, int subprog);
+
+int bpf_find_subprog(struct bpf_verifier_env *env, int off);
+int bpf_compute_const_regs(struct bpf_verifier_env *env);
+int bpf_prune_dead_branches(struct bpf_verifier_env *env);
+int bpf_check_cfg(struct bpf_verifier_env *env);
+int bpf_compute_postorder(struct bpf_verifier_env *env);
+int bpf_compute_scc(struct bpf_verifier_env *env);
+
+struct bpf_map_desc {
+ struct bpf_map *ptr;
+ int uid;
+};
+
+struct bpf_kfunc_call_arg_meta {
+ /* In parameters */
+ struct btf *btf;
+ u32 func_id;
+ u32 kfunc_flags;
+ const struct btf_type *func_proto;
+ const char *func_name;
+ /* Out parameters */
+ u32 ref_obj_id;
+ u8 release_regno;
+ bool r0_rdonly;
+ u32 ret_btf_id;
+ u64 r0_size;
+ u32 subprogno;
+ struct {
+ u64 value;
+ bool found;
+ } arg_constant;
+
+ /* arg_{btf,btf_id,owning_ref} are used by kfunc-specific handling,
+ * generally to pass info about user-defined local kptr types to later
+ * verification logic
+ * bpf_obj_drop/bpf_percpu_obj_drop
+ * Record the local kptr type to be drop'd
+ * bpf_refcount_acquire (via KF_ARG_PTR_TO_REFCOUNTED_KPTR arg type)
+ * Record the local kptr type to be refcount_incr'd and use
+ * arg_owning_ref to determine whether refcount_acquire should be
+ * fallible
+ */
+ struct btf *arg_btf;
+ u32 arg_btf_id;
+ bool arg_owning_ref;
+ bool arg_prog;
+
+ struct {
+ struct btf_field *field;
+ } arg_list_head;
+ struct {
+ struct btf_field *field;
+ } arg_rbtree_root;
+ struct {
+ enum bpf_dynptr_type type;
+ u32 id;
+ u32 ref_obj_id;
+ } initialized_dynptr;
+ struct {
+ u8 spi;
+ u8 frameno;
+ } iter;
+ struct bpf_map_desc map;
+ u64 mem_size;
+};
+
+int bpf_get_helper_proto(struct bpf_verifier_env *env, int func_id,
+ const struct bpf_func_proto **ptr);
+int bpf_fetch_kfunc_arg_meta(struct bpf_verifier_env *env, s32 func_id,
+ s16 offset, struct bpf_kfunc_call_arg_meta *meta);
+bool bpf_is_async_callback_calling_insn(struct bpf_insn *insn);
+bool bpf_is_sync_callback_calling_insn(struct bpf_insn *insn);
+static inline bool bpf_is_iter_next_kfunc(struct bpf_kfunc_call_arg_meta *meta)
+{
+ return meta->kfunc_flags & KF_ITER_NEXT;
+}
+
+static inline bool bpf_is_kfunc_sleepable(struct bpf_kfunc_call_arg_meta *meta)
+{
+ return meta->kfunc_flags & KF_SLEEPABLE;
+}
+bool bpf_is_kfunc_pkt_changing(struct bpf_kfunc_call_arg_meta *meta);
+struct bpf_iarray *bpf_iarray_realloc(struct bpf_iarray *old, size_t n_elem);
+int bpf_copy_insn_array_uniq(struct bpf_map *map, u32 start, u32 end, u32 *off);
+bool bpf_insn_is_cond_jump(u8 code);
+bool bpf_is_may_goto_insn(struct bpf_insn *insn);
+
+void bpf_verbose_insn(struct bpf_verifier_env *env, struct bpf_insn *insn);
+bool bpf_get_call_summary(struct bpf_verifier_env *env, struct bpf_insn *call,
+ struct bpf_call_summary *cs);
+s64 bpf_helper_stack_access_bytes(struct bpf_verifier_env *env,
+ struct bpf_insn *insn, int arg,
+ int insn_idx);
+s64 bpf_kfunc_stack_access_bytes(struct bpf_verifier_env *env,
+ struct bpf_insn *insn, int arg,
+ int insn_idx);
+int bpf_compute_subprog_arg_access(struct bpf_verifier_env *env);
int bpf_stack_liveness_init(struct bpf_verifier_env *env);
void bpf_stack_liveness_free(struct bpf_verifier_env *env);
-int bpf_update_live_stack(struct bpf_verifier_env *env);
-int bpf_mark_stack_read(struct bpf_verifier_env *env, u32 frameno, u32 insn_idx, u64 mask);
-void bpf_mark_stack_write(struct bpf_verifier_env *env, u32 frameno, u64 mask);
-int bpf_reset_stack_write_marks(struct bpf_verifier_env *env, u32 insn_idx);
-int bpf_commit_stack_write_marks(struct bpf_verifier_env *env);
int bpf_live_stack_query_init(struct bpf_verifier_env *env, struct bpf_verifier_state *st);
bool bpf_stack_slot_alive(struct bpf_verifier_env *env, u32 frameno, u32 spi);
-void bpf_reset_live_stack_callchain(struct bpf_verifier_env *env);
+int bpf_compute_live_registers(struct bpf_verifier_env *env);
+
+#define BPF_MAP_KEY_POISON (1ULL << 63)
+#define BPF_MAP_KEY_SEEN (1ULL << 62)
+
+static inline bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
+{
+ return aux->map_ptr_state.poison;
+}
+
+static inline bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
+{
+ return aux->map_ptr_state.unpriv;
+}
+
+static inline bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
+{
+ return aux->map_key_state & BPF_MAP_KEY_POISON;
+}
+
+static inline bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
+{
+ return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
+}
+
+static inline u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
+{
+ return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
+}
+
+#define MAX_PACKET_OFF 0xffff
+#define CALLER_SAVED_REGS 6
+
+enum bpf_reg_arg_type {
+ SRC_OP, /* register is used as source operand */
+ DST_OP, /* register is used as destination operand */
+ DST_OP_NO_MARK /* same as above, check only, don't mark */
+};
+
+#define MAX_KFUNC_DESCS 256
+
+struct bpf_kfunc_desc {
+ struct btf_func_model func_model;
+ u32 func_id;
+ s32 imm;
+ u16 offset;
+ unsigned long addr;
+};
+
+struct bpf_kfunc_desc_tab {
+ /* Sorted by func_id (BTF ID) and offset (fd_array offset) during
+ * verification. JITs do lookups by bpf_insn, where func_id may not be
+ * available, therefore at the end of verification do_misc_fixups()
+ * sorts this by imm and offset.
+ */
+ struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS];
+ u32 nr_descs;
+};
+
+/* Functions exported from verifier.c, used by fixups.c */
+bool bpf_is_reg64(struct bpf_insn *insn, u32 regno, struct bpf_reg_state *reg, enum bpf_reg_arg_type t);
+void bpf_clear_insn_aux_data(struct bpf_verifier_env *env, int start, int len);
+void bpf_mark_subprog_exc_cb(struct bpf_verifier_env *env, int subprog);
+bool bpf_allow_tail_call_in_subprogs(struct bpf_verifier_env *env);
+bool bpf_verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm);
+int bpf_add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, u16 offset);
+int bpf_fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
+ struct bpf_insn *insn_buf, int insn_idx, int *cnt);
+
+/* Functions in fixups.c, called from bpf_check() */
+int bpf_remove_fastcall_spills_fills(struct bpf_verifier_env *env);
+int bpf_optimize_bpf_loop(struct bpf_verifier_env *env);
+void bpf_opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env);
+int bpf_opt_remove_dead_code(struct bpf_verifier_env *env);
+int bpf_opt_remove_nops(struct bpf_verifier_env *env);
+int bpf_opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env, const union bpf_attr *attr);
+int bpf_convert_ctx_accesses(struct bpf_verifier_env *env);
+int bpf_jit_subprogs(struct bpf_verifier_env *env);
+int bpf_fixup_call_args(struct bpf_verifier_env *env);
+int bpf_do_misc_fixups(struct bpf_verifier_env *env);
#endif /* _LINUX_BPF_VERIFIER_H */
diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h
index 139bdececdcf..af011db39ab3 100644
--- a/include/linux/btf_ids.h
+++ b/include/linux/btf_ids.h
@@ -217,7 +217,7 @@ BTF_SET8_END(name)
#else
-#define BTF_ID_LIST(name) static u32 __maybe_unused name[64];
+#define BTF_ID_LIST(name) static u32 __maybe_unused name[128];
#define BTF_ID(prefix, name)
#define BTF_ID_FLAGS(prefix, name, ...)
#define BTF_ID_UNUSED
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 44d7ae95ddbc..e40d4071a345 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -21,7 +21,6 @@
#include <linux/if_vlan.h>
#include <linux/vmalloc.h>
#include <linux/sockptr.h>
-#include <crypto/sha1.h>
#include <linux/u64_stats_sync.h>
#include <net/sch_generic.h>
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 18a85c30fd4f..bfa765132de8 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -206,15 +206,6 @@ static inline void exit_tasks_rcu_finish(void) { }
#endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */
/**
- * rcu_trace_implies_rcu_gp - does an RCU Tasks Trace grace period imply an RCU grace period?
- *
- * Now that RCU Tasks Trace is implemented in terms of SRCU-fast, a
- * call to synchronize_rcu_tasks_trace() is guaranteed to imply at least
- * one call to synchronize_rcu().
- */
-static inline bool rcu_trace_implies_rcu_gp(void) { return true; }
-
-/**
* cond_resched_tasks_rcu_qs - Report potential quiescent states to RCU
*
* This macro resembles cond_resched(), except that it is defined to
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c8d400b7680a..552bc5d9afbd 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4645,7 +4645,9 @@ union bpf_attr {
* Description
* Discard reserved ring buffer sample, pointed to by *data*.
* If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification
- * of new data availability is sent.
+ * of new data availability is sent. Discarded records remain in
+ * the ring buffer until consumed by user space, so a later submit
+ * using adaptive wakeup might not wake up the consumer.
* If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification
* of new data availability is sent unconditionally.
* If **0** is specified in *flags*, an adaptive notification
diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
index 266d4ffa6c07..638615ebddc2 100644
--- a/include/uapi/linux/btf.h
+++ b/include/uapi/linux/btf.h
@@ -8,6 +8,16 @@
#define BTF_MAGIC 0xeB9F
#define BTF_VERSION 1
+/*
+ * BTF layout section consists of a struct btf_layout for each known
+ * kind at BTF encoding time.
+ */
+struct btf_layout {
+ __u8 info_sz; /* size of singular element after btf_type */
+ __u8 elem_sz; /* size of each of btf_vlen(t) elements */
+ __u16 flags; /* currently unused */
+};
+
struct btf_header {
__u16 magic;
__u8 version;
@@ -19,6 +29,8 @@ struct btf_header {
__u32 type_len; /* length of type section */
__u32 str_off; /* offset of string section */
__u32 str_len; /* length of string section */
+ __u32 layout_off; /* offset of layout section */
+ __u32 layout_len; /* length of layout section */
};
/* Max # of type identifier */
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 79cf22860a99..399007b67a92 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -6,11 +6,12 @@ cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse
endif
CFLAGS_core.o += -Wno-override-init $(cflags-nogcse-yy)
-obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o token.o liveness.o
+obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o token.o liveness.o const_fold.o
obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o bpf_insn_array.o
obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o
+obj-$(CONFIG_BPF_SYSCALL) += fixups.o cfg.o states.o backtrack.o check_btf.o
obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o
obj-$(CONFIG_BPF_SYSCALL) += disasm.o mprog.o
obj-$(CONFIG_BPF_JIT) += trampoline.o
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 33de68c95ad8..5e25e0353509 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -1015,8 +1015,10 @@ static void bpf_fd_array_map_clear(struct bpf_map *map, bool need_defer)
struct bpf_array *array = container_of(map, struct bpf_array, map);
int i;
- for (i = 0; i < array->map.max_entries; i++)
+ for (i = 0; i < array->map.max_entries; i++) {
__fd_array_map_delete_elem(map, &i, need_defer);
+ cond_resched();
+ }
}
static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key,
diff --git a/kernel/bpf/backtrack.c b/kernel/bpf/backtrack.c
new file mode 100644
index 000000000000..854731dc93fe
--- /dev/null
+++ b/kernel/bpf/backtrack.c
@@ -0,0 +1,934 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
+#include <linux/bpf.h>
+#include <linux/bpf_verifier.h>
+#include <linux/filter.h>
+#include <linux/bitmap.h>
+
+#define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args)
+
+/* for any branch, call, exit record the history of jmps in the given state */
+int bpf_push_jmp_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur,
+ int insn_flags, u64 linked_regs)
+{
+ u32 cnt = cur->jmp_history_cnt;
+ struct bpf_jmp_history_entry *p;
+ size_t alloc_size;
+
+ /* combine instruction flags if we already recorded this instruction */
+ if (env->cur_hist_ent) {
+ /* atomic instructions push insn_flags twice, for READ and
+ * WRITE sides, but they should agree on stack slot
+ */
+ verifier_bug_if((env->cur_hist_ent->flags & insn_flags) &&
+ (env->cur_hist_ent->flags & insn_flags) != insn_flags,
+ env, "insn history: insn_idx %d cur flags %x new flags %x",
+ env->insn_idx, env->cur_hist_ent->flags, insn_flags);
+ env->cur_hist_ent->flags |= insn_flags;
+ verifier_bug_if(env->cur_hist_ent->linked_regs != 0, env,
+ "insn history: insn_idx %d linked_regs: %#llx",
+ env->insn_idx, env->cur_hist_ent->linked_regs);
+ env->cur_hist_ent->linked_regs = linked_regs;
+ return 0;
+ }
+
+ cnt++;
+ alloc_size = kmalloc_size_roundup(size_mul(cnt, sizeof(*p)));
+ p = krealloc(cur->jmp_history, alloc_size, GFP_KERNEL_ACCOUNT);
+ if (!p)
+ return -ENOMEM;
+ cur->jmp_history = p;
+
+ p = &cur->jmp_history[cnt - 1];
+ p->idx = env->insn_idx;
+ p->prev_idx = env->prev_insn_idx;
+ p->flags = insn_flags;
+ p->linked_regs = linked_regs;
+ cur->jmp_history_cnt = cnt;
+ env->cur_hist_ent = p;
+
+ return 0;
+}
+
+static bool is_atomic_load_insn(const struct bpf_insn *insn)
+{
+ return BPF_CLASS(insn->code) == BPF_STX &&
+ BPF_MODE(insn->code) == BPF_ATOMIC &&
+ insn->imm == BPF_LOAD_ACQ;
+}
+
+static bool is_atomic_fetch_insn(const struct bpf_insn *insn)
+{
+ return BPF_CLASS(insn->code) == BPF_STX &&
+ BPF_MODE(insn->code) == BPF_ATOMIC &&
+ (insn->imm & BPF_FETCH);
+}
+
+static int insn_stack_access_spi(int insn_flags)
+{
+ return (insn_flags >> INSN_F_SPI_SHIFT) & INSN_F_SPI_MASK;
+}
+
+static int insn_stack_access_frameno(int insn_flags)
+{
+ return insn_flags & INSN_F_FRAMENO_MASK;
+}
+
+/* Backtrack one insn at a time. If idx is not at the top of recorded
+ * history then previous instruction came from straight line execution.
+ * Return -ENOENT if we exhausted all instructions within given state.
+ *
+ * It's legal to have a bit of a looping with the same starting and ending
+ * insn index within the same state, e.g.: 3->4->5->3, so just because current
+ * instruction index is the same as state's first_idx doesn't mean we are
+ * done. If there is still some jump history left, we should keep going. We
+ * need to take into account that we might have a jump history between given
+ * state's parent and itself, due to checkpointing. In this case, we'll have
+ * history entry recording a jump from last instruction of parent state and
+ * first instruction of given state.
+ */
+static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
+ u32 *history)
+{
+ u32 cnt = *history;
+
+ if (i == st->first_insn_idx) {
+ if (cnt == 0)
+ return -ENOENT;
+ if (cnt == 1 && st->jmp_history[0].idx == i)
+ return -ENOENT;
+ }
+
+ if (cnt && st->jmp_history[cnt - 1].idx == i) {
+ i = st->jmp_history[cnt - 1].prev_idx;
+ (*history)--;
+ } else {
+ i--;
+ }
+ return i;
+}
+
+static struct bpf_jmp_history_entry *get_jmp_hist_entry(struct bpf_verifier_state *st,
+ u32 hist_end, int insn_idx)
+{
+ if (hist_end > 0 && st->jmp_history[hist_end - 1].idx == insn_idx)
+ return &st->jmp_history[hist_end - 1];
+ return NULL;
+}
+
+static inline void bt_init(struct backtrack_state *bt, u32 frame)
+{
+ bt->frame = frame;
+}
+
+static inline void bt_reset(struct backtrack_state *bt)
+{
+ struct bpf_verifier_env *env = bt->env;
+
+ memset(bt, 0, sizeof(*bt));
+ bt->env = env;
+}
+
+static inline u32 bt_empty(struct backtrack_state *bt)
+{
+ u64 mask = 0;
+ int i;
+
+ for (i = 0; i <= bt->frame; i++)
+ mask |= bt->reg_masks[i] | bt->stack_masks[i];
+
+ return mask == 0;
+}
+
+static inline int bt_subprog_enter(struct backtrack_state *bt)
+{
+ if (bt->frame == MAX_CALL_FRAMES - 1) {
+ verifier_bug(bt->env, "subprog enter from frame %d", bt->frame);
+ return -EFAULT;
+ }
+ bt->frame++;
+ return 0;
+}
+
+static inline int bt_subprog_exit(struct backtrack_state *bt)
+{
+ if (bt->frame == 0) {
+ verifier_bug(bt->env, "subprog exit from frame 0");
+ return -EFAULT;
+ }
+ bt->frame--;
+ return 0;
+}
+
+static inline void bt_clear_frame_reg(struct backtrack_state *bt, u32 frame, u32 reg)
+{
+ bt->reg_masks[frame] &= ~(1 << reg);
+}
+
+static inline void bt_set_reg(struct backtrack_state *bt, u32 reg)
+{
+ bpf_bt_set_frame_reg(bt, bt->frame, reg);
+}
+
+static inline void bt_clear_reg(struct backtrack_state *bt, u32 reg)
+{
+ bt_clear_frame_reg(bt, bt->frame, reg);
+}
+
+static inline void bt_clear_frame_slot(struct backtrack_state *bt, u32 frame, u32 slot)
+{
+ bt->stack_masks[frame] &= ~(1ull << slot);
+}
+
+static inline u32 bt_frame_reg_mask(struct backtrack_state *bt, u32 frame)
+{
+ return bt->reg_masks[frame];
+}
+
+static inline u32 bt_reg_mask(struct backtrack_state *bt)
+{
+ return bt->reg_masks[bt->frame];
+}
+
+static inline u64 bt_frame_stack_mask(struct backtrack_state *bt, u32 frame)
+{
+ return bt->stack_masks[frame];
+}
+
+static inline u64 bt_stack_mask(struct backtrack_state *bt)
+{
+ return bt->stack_masks[bt->frame];
+}
+
+static inline bool bt_is_reg_set(struct backtrack_state *bt, u32 reg)
+{
+ return bt->reg_masks[bt->frame] & (1 << reg);
+}
+
+
+/* format registers bitmask, e.g., "r0,r2,r4" for 0x15 mask */
+static void fmt_reg_mask(char *buf, ssize_t buf_sz, u32 reg_mask)
+{
+ DECLARE_BITMAP(mask, 64);
+ bool first = true;
+ int i, n;
+
+ buf[0] = '\0';
+
+ bitmap_from_u64(mask, reg_mask);
+ for_each_set_bit(i, mask, 32) {
+ n = snprintf(buf, buf_sz, "%sr%d", first ? "" : ",", i);
+ first = false;
+ buf += n;
+ buf_sz -= n;
+ if (buf_sz < 0)
+ break;
+ }
+}
+/* format stack slots bitmask, e.g., "-8,-24,-40" for 0x15 mask */
+void bpf_fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask)
+{
+ DECLARE_BITMAP(mask, 64);
+ bool first = true;
+ int i, n;
+
+ buf[0] = '\0';
+
+ bitmap_from_u64(mask, stack_mask);
+ for_each_set_bit(i, mask, 64) {
+ n = snprintf(buf, buf_sz, "%s%d", first ? "" : ",", -(i + 1) * 8);
+ first = false;
+ buf += n;
+ buf_sz -= n;
+ if (buf_sz < 0)
+ break;
+ }
+}
+
+
+/* For given verifier state backtrack_insn() is called from the last insn to
+ * the first insn. Its purpose is to compute a bitmask of registers and
+ * stack slots that needs precision in the parent verifier state.
+ *
+ * @idx is an index of the instruction we are currently processing;
+ * @subseq_idx is an index of the subsequent instruction that:
+ * - *would be* executed next, if jump history is viewed in forward order;
+ * - *was* processed previously during backtracking.
+ */
+static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
+ struct bpf_jmp_history_entry *hist, struct backtrack_state *bt)
+{
+ struct bpf_insn *insn = env->prog->insnsi + idx;
+ u8 class = BPF_CLASS(insn->code);
+ u8 opcode = BPF_OP(insn->code);
+ u8 mode = BPF_MODE(insn->code);
+ u32 dreg = insn->dst_reg;
+ u32 sreg = insn->src_reg;
+ u32 spi, i, fr;
+
+ if (insn->code == 0)
+ return 0;
+ if (env->log.level & BPF_LOG_LEVEL2) {
+ fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_reg_mask(bt));
+ verbose(env, "mark_precise: frame%d: regs=%s ",
+ bt->frame, env->tmp_str_buf);
+ bpf_fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_stack_mask(bt));
+ verbose(env, "stack=%s before ", env->tmp_str_buf);
+ verbose(env, "%d: ", idx);
+ bpf_verbose_insn(env, insn);
+ }
+
+ /* If there is a history record that some registers gained range at this insn,
+ * propagate precision marks to those registers, so that bt_is_reg_set()
+ * accounts for these registers.
+ */
+ bpf_bt_sync_linked_regs(bt, hist);
+
+ if (class == BPF_ALU || class == BPF_ALU64) {
+ if (!bt_is_reg_set(bt, dreg))
+ return 0;
+ if (opcode == BPF_END || opcode == BPF_NEG) {
+ /* sreg is reserved and unused
+ * dreg still need precision before this insn
+ */
+ return 0;
+ } else if (opcode == BPF_MOV) {
+ if (BPF_SRC(insn->code) == BPF_X) {
+ /* dreg = sreg or dreg = (s8, s16, s32)sreg
+ * dreg needs precision after this insn
+ * sreg needs precision before this insn
+ */
+ bt_clear_reg(bt, dreg);
+ if (sreg != BPF_REG_FP)
+ bt_set_reg(bt, sreg);
+ } else {
+ /* dreg = K
+ * dreg needs precision after this insn.
+ * Corresponding register is already marked
+ * as precise=true in this verifier state.
+ * No further markings in parent are necessary
+ */
+ bt_clear_reg(bt, dreg);
+ }
+ } else {
+ if (BPF_SRC(insn->code) == BPF_X) {
+ /* dreg += sreg
+ * both dreg and sreg need precision
+ * before this insn
+ */
+ if (sreg != BPF_REG_FP)
+ bt_set_reg(bt, sreg);
+ } /* else dreg += K
+ * dreg still needs precision before this insn
+ */
+ }
+ } else if (class == BPF_LDX ||
+ is_atomic_load_insn(insn) ||
+ is_atomic_fetch_insn(insn)) {
+ u32 load_reg = dreg;
+
+ /*
+ * Atomic fetch operation writes the old value into
+ * a register (sreg or r0) and if it was tracked for
+ * precision, propagate to the stack slot like we do
+ * in regular ldx.
+ */
+ if (is_atomic_fetch_insn(insn))
+ load_reg = insn->imm == BPF_CMPXCHG ?
+ BPF_REG_0 : sreg;
+
+ if (!bt_is_reg_set(bt, load_reg))
+ return 0;
+ bt_clear_reg(bt, load_reg);
+
+ /* scalars can only be spilled into stack w/o losing precision.
+ * Load from any other memory can be zero extended.
+ * The desire to keep that precision is already indicated
+ * by 'precise' mark in corresponding register of this state.
+ * No further tracking necessary.
+ */
+ if (!hist || !(hist->flags & INSN_F_STACK_ACCESS))
+ return 0;
+ /* dreg = *(u64 *)[fp - off] was a fill from the stack.
+ * that [fp - off] slot contains scalar that needs to be
+ * tracked with precision
+ */
+ spi = insn_stack_access_spi(hist->flags);
+ fr = insn_stack_access_frameno(hist->flags);
+ bpf_bt_set_frame_slot(bt, fr, spi);
+ } else if (class == BPF_STX || class == BPF_ST) {
+ if (bt_is_reg_set(bt, dreg))
+ /* stx & st shouldn't be using _scalar_ dst_reg
+ * to access memory. It means backtracking
+ * encountered a case of pointer subtraction.
+ */
+ return -ENOTSUPP;
+ /* scalars can only be spilled into stack */
+ if (!hist || !(hist->flags & INSN_F_STACK_ACCESS))
+ return 0;
+ spi = insn_stack_access_spi(hist->flags);
+ fr = insn_stack_access_frameno(hist->flags);
+ if (!bt_is_frame_slot_set(bt, fr, spi))
+ return 0;
+ bt_clear_frame_slot(bt, fr, spi);
+ if (class == BPF_STX)
+ bt_set_reg(bt, sreg);
+ } else if (class == BPF_JMP || class == BPF_JMP32) {
+ if (bpf_pseudo_call(insn)) {
+ int subprog_insn_idx, subprog;
+
+ subprog_insn_idx = idx + insn->imm + 1;
+ subprog = bpf_find_subprog(env, subprog_insn_idx);
+ if (subprog < 0)
+ return -EFAULT;
+
+ if (bpf_subprog_is_global(env, subprog)) {
+ /* check that jump history doesn't have any
+ * extra instructions from subprog; the next
+ * instruction after call to global subprog
+ * should be literally next instruction in
+ * caller program
+ */
+ verifier_bug_if(idx + 1 != subseq_idx, env,
+ "extra insn from subprog");
+ /* r1-r5 are invalidated after subprog call,
+ * so for global func call it shouldn't be set
+ * anymore
+ */
+ if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
+ verifier_bug(env, "global subprog unexpected regs %x",
+ bt_reg_mask(bt));
+ return -EFAULT;
+ }
+ /* global subprog always sets R0 */
+ bt_clear_reg(bt, BPF_REG_0);
+ return 0;
+ } else {
+ /* static subprog call instruction, which
+ * means that we are exiting current subprog,
+ * so only r1-r5 could be still requested as
+ * precise, r0 and r6-r10 or any stack slot in
+ * the current frame should be zero by now
+ */
+ if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
+ verifier_bug(env, "static subprog unexpected regs %x",
+ bt_reg_mask(bt));
+ return -EFAULT;
+ }
+ /* we are now tracking register spills correctly,
+ * so any instance of leftover slots is a bug
+ */
+ if (bt_stack_mask(bt) != 0) {
+ verifier_bug(env,
+ "static subprog leftover stack slots %llx",
+ bt_stack_mask(bt));
+ return -EFAULT;
+ }
+ /* propagate r1-r5 to the caller */
+ for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
+ if (bt_is_reg_set(bt, i)) {
+ bt_clear_reg(bt, i);
+ bpf_bt_set_frame_reg(bt, bt->frame - 1, i);
+ }
+ }
+ if (bt_subprog_exit(bt))
+ return -EFAULT;
+ return 0;
+ }
+ } else if (bpf_is_sync_callback_calling_insn(insn) && idx != subseq_idx - 1) {
+ /* exit from callback subprog to callback-calling helper or
+ * kfunc call. Use idx/subseq_idx check to discern it from
+ * straight line code backtracking.
+ * Unlike the subprog call handling above, we shouldn't
+ * propagate precision of r1-r5 (if any requested), as they are
+ * not actually arguments passed directly to callback subprogs
+ */
+ if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
+ verifier_bug(env, "callback unexpected regs %x",
+ bt_reg_mask(bt));
+ return -EFAULT;
+ }
+ if (bt_stack_mask(bt) != 0) {
+ verifier_bug(env, "callback leftover stack slots %llx",
+ bt_stack_mask(bt));
+ return -EFAULT;
+ }
+ /* clear r1-r5 in callback subprog's mask */
+ for (i = BPF_REG_1; i <= BPF_REG_5; i++)
+ bt_clear_reg(bt, i);
+ if (bt_subprog_exit(bt))
+ return -EFAULT;
+ return 0;
+ } else if (opcode == BPF_CALL) {
+ /* kfunc with imm==0 is invalid and fixup_kfunc_call will
+ * catch this error later. Make backtracking conservative
+ * with ENOTSUPP.
+ */
+ if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && insn->imm == 0)
+ return -ENOTSUPP;
+ /* regular helper call sets R0 */
+ bt_clear_reg(bt, BPF_REG_0);
+ if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
+ /* if backtracking was looking for registers R1-R5
+ * they should have been found already.
+ */
+ verifier_bug(env, "backtracking call unexpected regs %x",
+ bt_reg_mask(bt));
+ return -EFAULT;
+ }
+ if (insn->src_reg == BPF_REG_0 && insn->imm == BPF_FUNC_tail_call
+ && subseq_idx - idx != 1) {
+ if (bt_subprog_enter(bt))
+ return -EFAULT;
+ }
+ } else if (opcode == BPF_EXIT) {
+ bool r0_precise;
+
+ /* Backtracking to a nested function call, 'idx' is a part of
+ * the inner frame 'subseq_idx' is a part of the outer frame.
+ * In case of a regular function call, instructions giving
+ * precision to registers R1-R5 should have been found already.
+ * In case of a callback, it is ok to have R1-R5 marked for
+ * backtracking, as these registers are set by the function
+ * invoking callback.
+ */
+ if (subseq_idx >= 0 && bpf_calls_callback(env, subseq_idx))
+ for (i = BPF_REG_1; i <= BPF_REG_5; i++)
+ bt_clear_reg(bt, i);
+ if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
+ verifier_bug(env, "backtracking exit unexpected regs %x",
+ bt_reg_mask(bt));
+ return -EFAULT;
+ }
+
+ /* BPF_EXIT in subprog or callback always returns
+ * right after the call instruction, so by checking
+ * whether the instruction at subseq_idx-1 is subprog
+ * call or not we can distinguish actual exit from
+ * *subprog* from exit from *callback*. In the former
+ * case, we need to propagate r0 precision, if
+ * necessary. In the former we never do that.
+ */
+ r0_precise = subseq_idx - 1 >= 0 &&
+ bpf_pseudo_call(&env->prog->insnsi[subseq_idx - 1]) &&
+ bt_is_reg_set(bt, BPF_REG_0);
+
+ bt_clear_reg(bt, BPF_REG_0);
+ if (bt_subprog_enter(bt))
+ return -EFAULT;
+
+ if (r0_precise)
+ bt_set_reg(bt, BPF_REG_0);
+ /* r6-r9 and stack slots will stay set in caller frame
+ * bitmasks until we return back from callee(s)
+ */
+ return 0;
+ } else if (BPF_SRC(insn->code) == BPF_X) {
+ if (!bt_is_reg_set(bt, dreg) && !bt_is_reg_set(bt, sreg))
+ return 0;
+ /* dreg <cond> sreg
+ * Both dreg and sreg need precision before
+ * this insn. If only sreg was marked precise
+ * before it would be equally necessary to
+ * propagate it to dreg.
+ */
+ if (!hist || !(hist->flags & INSN_F_SRC_REG_STACK))
+ bt_set_reg(bt, sreg);
+ if (!hist || !(hist->flags & INSN_F_DST_REG_STACK))
+ bt_set_reg(bt, dreg);
+ } else if (BPF_SRC(insn->code) == BPF_K) {
+ /* dreg <cond> K
+ * Only dreg still needs precision before
+ * this insn, so for the K-based conditional
+ * there is nothing new to be marked.
+ */
+ }
+ } else if (class == BPF_LD) {
+ if (!bt_is_reg_set(bt, dreg))
+ return 0;
+ bt_clear_reg(bt, dreg);
+ /* It's ld_imm64 or ld_abs or ld_ind.
+ * For ld_imm64 no further tracking of precision
+ * into parent is necessary
+ */
+ if (mode == BPF_IND || mode == BPF_ABS)
+ /* to be analyzed */
+ return -ENOTSUPP;
+ }
+ /* Propagate precision marks to linked registers, to account for
+ * registers marked as precise in this function.
+ */
+ bpf_bt_sync_linked_regs(bt, hist);
+ return 0;
+}
+
+/* the scalar precision tracking algorithm:
+ * . at the start all registers have precise=false.
+ * . scalar ranges are tracked as normal through alu and jmp insns.
+ * . once precise value of the scalar register is used in:
+ * . ptr + scalar alu
+ * . if (scalar cond K|scalar)
+ * . helper_call(.., scalar, ...) where ARG_CONST is expected
+ * backtrack through the verifier states and mark all registers and
+ * stack slots with spilled constants that these scalar registers
+ * should be precise.
+ * . during state pruning two registers (or spilled stack slots)
+ * are equivalent if both are not precise.
+ *
+ * Note the verifier cannot simply walk register parentage chain,
+ * since many different registers and stack slots could have been
+ * used to compute single precise scalar.
+ *
+ * The approach of starting with precise=true for all registers and then
+ * backtrack to mark a register as not precise when the verifier detects
+ * that program doesn't care about specific value (e.g., when helper
+ * takes register as ARG_ANYTHING parameter) is not safe.
+ *
+ * It's ok to walk single parentage chain of the verifier states.
+ * It's possible that this backtracking will go all the way till 1st insn.
+ * All other branches will be explored for needing precision later.
+ *
+ * The backtracking needs to deal with cases like:
+ * R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
+ * r9 -= r8
+ * r5 = r9
+ * if r5 > 0x79f goto pc+7
+ * R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
+ * r5 += 1
+ * ...
+ * call bpf_perf_event_output#25
+ * where .arg5_type = ARG_CONST_SIZE_OR_ZERO
+ *
+ * and this case:
+ * r6 = 1
+ * call foo // uses callee's r6 inside to compute r0
+ * r0 += r6
+ * if r0 == 0 goto
+ *
+ * to track above reg_mask/stack_mask needs to be independent for each frame.
+ *
+ * Also if parent's curframe > frame where backtracking started,
+ * the verifier need to mark registers in both frames, otherwise callees
+ * may incorrectly prune callers. This is similar to
+ * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
+ *
+ * For now backtracking falls back into conservative marking.
+ */
+void bpf_mark_all_scalars_precise(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *st)
+{
+ struct bpf_func_state *func;
+ struct bpf_reg_state *reg;
+ int i, j;
+
+ if (env->log.level & BPF_LOG_LEVEL2) {
+ verbose(env, "mark_precise: frame%d: falling back to forcing all scalars precise\n",
+ st->curframe);
+ }
+
+ /* big hammer: mark all scalars precise in this path.
+ * pop_stack may still get !precise scalars.
+ * We also skip current state and go straight to first parent state,
+ * because precision markings in current non-checkpointed state are
+ * not needed. See why in the comment in __mark_chain_precision below.
+ */
+ for (st = st->parent; st; st = st->parent) {
+ for (i = 0; i <= st->curframe; i++) {
+ func = st->frame[i];
+ for (j = 0; j < BPF_REG_FP; j++) {
+ reg = &func->regs[j];
+ if (reg->type != SCALAR_VALUE || reg->precise)
+ continue;
+ reg->precise = true;
+ if (env->log.level & BPF_LOG_LEVEL2) {
+ verbose(env, "force_precise: frame%d: forcing r%d to be precise\n",
+ i, j);
+ }
+ }
+ for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
+ if (!bpf_is_spilled_reg(&func->stack[j]))
+ continue;
+ reg = &func->stack[j].spilled_ptr;
+ if (reg->type != SCALAR_VALUE || reg->precise)
+ continue;
+ reg->precise = true;
+ if (env->log.level & BPF_LOG_LEVEL2) {
+ verbose(env, "force_precise: frame%d: forcing fp%d to be precise\n",
+ i, -(j + 1) * 8);
+ }
+ }
+ }
+ }
+}
+
+/*
+ * bpf_mark_chain_precision() backtracks BPF program instruction sequence and
+ * chain of verifier states making sure that register *regno* (if regno >= 0)
+ * and/or stack slot *spi* (if spi >= 0) are marked as precisely tracked
+ * SCALARS, as well as any other registers and slots that contribute to
+ * a tracked state of given registers/stack slots, depending on specific BPF
+ * assembly instructions (see backtrack_insns() for exact instruction handling
+ * logic). This backtracking relies on recorded jmp_history and is able to
+ * traverse entire chain of parent states. This process ends only when all the
+ * necessary registers/slots and their transitive dependencies are marked as
+ * precise.
+ *
+ * One important and subtle aspect is that precise marks *do not matter* in
+ * the currently verified state (current state). It is important to understand
+ * why this is the case.
+ *
+ * First, note that current state is the state that is not yet "checkpointed",
+ * i.e., it is not yet put into env->explored_states, and it has no children
+ * states as well. It's ephemeral, and can end up either a) being discarded if
+ * compatible explored state is found at some point or BPF_EXIT instruction is
+ * reached or b) checkpointed and put into env->explored_states, branching out
+ * into one or more children states.
+ *
+ * In the former case, precise markings in current state are completely
+ * ignored by state comparison code (see regsafe() for details). Only
+ * checkpointed ("old") state precise markings are important, and if old
+ * state's register/slot is precise, regsafe() assumes current state's
+ * register/slot as precise and checks value ranges exactly and precisely. If
+ * states turn out to be compatible, current state's necessary precise
+ * markings and any required parent states' precise markings are enforced
+ * after the fact with propagate_precision() logic, after the fact. But it's
+ * important to realize that in this case, even after marking current state
+ * registers/slots as precise, we immediately discard current state. So what
+ * actually matters is any of the precise markings propagated into current
+ * state's parent states, which are always checkpointed (due to b) case above).
+ * As such, for scenario a) it doesn't matter if current state has precise
+ * markings set or not.
+ *
+ * Now, for the scenario b), checkpointing and forking into child(ren)
+ * state(s). Note that before current state gets to checkpointing step, any
+ * processed instruction always assumes precise SCALAR register/slot
+ * knowledge: if precise value or range is useful to prune jump branch, BPF
+ * verifier takes this opportunity enthusiastically. Similarly, when
+ * register's value is used to calculate offset or memory address, exact
+ * knowledge of SCALAR range is assumed, checked, and enforced. So, similar to
+ * what we mentioned above about state comparison ignoring precise markings
+ * during state comparison, BPF verifier ignores and also assumes precise
+ * markings *at will* during instruction verification process. But as verifier
+ * assumes precision, it also propagates any precision dependencies across
+ * parent states, which are not yet finalized, so can be further restricted
+ * based on new knowledge gained from restrictions enforced by their children
+ * states. This is so that once those parent states are finalized, i.e., when
+ * they have no more active children state, state comparison logic in
+ * is_state_visited() would enforce strict and precise SCALAR ranges, if
+ * required for correctness.
+ *
+ * To build a bit more intuition, note also that once a state is checkpointed,
+ * the path we took to get to that state is not important. This is crucial
+ * property for state pruning. When state is checkpointed and finalized at
+ * some instruction index, it can be correctly and safely used to "short
+ * circuit" any *compatible* state that reaches exactly the same instruction
+ * index. I.e., if we jumped to that instruction from a completely different
+ * code path than original finalized state was derived from, it doesn't
+ * matter, current state can be discarded because from that instruction
+ * forward having a compatible state will ensure we will safely reach the
+ * exit. States describe preconditions for further exploration, but completely
+ * forget the history of how we got here.
+ *
+ * This also means that even if we needed precise SCALAR range to get to
+ * finalized state, but from that point forward *that same* SCALAR register is
+ * never used in a precise context (i.e., it's precise value is not needed for
+ * correctness), it's correct and safe to mark such register as "imprecise"
+ * (i.e., precise marking set to false). This is what we rely on when we do
+ * not set precise marking in current state. If no child state requires
+ * precision for any given SCALAR register, it's safe to dictate that it can
+ * be imprecise. If any child state does require this register to be precise,
+ * we'll mark it precise later retroactively during precise markings
+ * propagation from child state to parent states.
+ *
+ * Skipping precise marking setting in current state is a mild version of
+ * relying on the above observation. But we can utilize this property even
+ * more aggressively by proactively forgetting any precise marking in the
+ * current state (which we inherited from the parent state), right before we
+ * checkpoint it and branch off into new child state. This is done by
+ * mark_all_scalars_imprecise() to hopefully get more permissive and generic
+ * finalized states which help in short circuiting more future states.
+ */
+int bpf_mark_chain_precision(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *starting_state,
+ int regno,
+ bool *changed)
+{
+ struct bpf_verifier_state *st = starting_state;
+ struct backtrack_state *bt = &env->bt;
+ int first_idx = st->first_insn_idx;
+ int last_idx = starting_state->insn_idx;
+ int subseq_idx = -1;
+ struct bpf_func_state *func;
+ bool tmp, skip_first = true;
+ struct bpf_reg_state *reg;
+ int i, fr, err;
+
+ if (!env->bpf_capable)
+ return 0;
+
+ changed = changed ?: &tmp;
+ /* set frame number from which we are starting to backtrack */
+ bt_init(bt, starting_state->curframe);
+
+ /* Do sanity checks against current state of register and/or stack
+ * slot, but don't set precise flag in current state, as precision
+ * tracking in the current state is unnecessary.
+ */
+ func = st->frame[bt->frame];
+ if (regno >= 0) {
+ reg = &func->regs[regno];
+ if (reg->type != SCALAR_VALUE) {
+ verifier_bug(env, "backtracking misuse");
+ return -EFAULT;
+ }
+ bt_set_reg(bt, regno);
+ }
+
+ if (bt_empty(bt))
+ return 0;
+
+ for (;;) {
+ DECLARE_BITMAP(mask, 64);
+ u32 history = st->jmp_history_cnt;
+ struct bpf_jmp_history_entry *hist;
+
+ if (env->log.level & BPF_LOG_LEVEL2) {
+ verbose(env, "mark_precise: frame%d: last_idx %d first_idx %d subseq_idx %d \n",
+ bt->frame, last_idx, first_idx, subseq_idx);
+ }
+
+ if (last_idx < 0) {
+ /* we are at the entry into subprog, which
+ * is expected for global funcs, but only if
+ * requested precise registers are R1-R5
+ * (which are global func's input arguments)
+ */
+ if (st->curframe == 0 &&
+ st->frame[0]->subprogno > 0 &&
+ st->frame[0]->callsite == BPF_MAIN_FUNC &&
+ bt_stack_mask(bt) == 0 &&
+ (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) == 0) {
+ bitmap_from_u64(mask, bt_reg_mask(bt));
+ for_each_set_bit(i, mask, 32) {
+ reg = &st->frame[0]->regs[i];
+ bt_clear_reg(bt, i);
+ if (reg->type == SCALAR_VALUE) {
+ reg->precise = true;
+ *changed = true;
+ }
+ }
+ return 0;
+ }
+
+ verifier_bug(env, "backtracking func entry subprog %d reg_mask %x stack_mask %llx",
+ st->frame[0]->subprogno, bt_reg_mask(bt), bt_stack_mask(bt));
+ return -EFAULT;
+ }
+
+ for (i = last_idx;;) {
+ if (skip_first) {
+ err = 0;
+ skip_first = false;
+ } else {
+ hist = get_jmp_hist_entry(st, history, i);
+ err = backtrack_insn(env, i, subseq_idx, hist, bt);
+ }
+ if (err == -ENOTSUPP) {
+ bpf_mark_all_scalars_precise(env, starting_state);
+ bt_reset(bt);
+ return 0;
+ } else if (err) {
+ return err;
+ }
+ if (bt_empty(bt))
+ /* Found assignment(s) into tracked register in this state.
+ * Since this state is already marked, just return.
+ * Nothing to be tracked further in the parent state.
+ */
+ return 0;
+ subseq_idx = i;
+ i = get_prev_insn_idx(st, i, &history);
+ if (i == -ENOENT)
+ break;
+ if (i >= env->prog->len) {
+ /* This can happen if backtracking reached insn 0
+ * and there are still reg_mask or stack_mask
+ * to backtrack.
+ * It means the backtracking missed the spot where
+ * particular register was initialized with a constant.
+ */
+ verifier_bug(env, "backtracking idx %d", i);
+ return -EFAULT;
+ }
+ }
+ st = st->parent;
+ if (!st)
+ break;
+
+ for (fr = bt->frame; fr >= 0; fr--) {
+ func = st->frame[fr];
+ bitmap_from_u64(mask, bt_frame_reg_mask(bt, fr));
+ for_each_set_bit(i, mask, 32) {
+ reg = &func->regs[i];
+ if (reg->type != SCALAR_VALUE) {
+ bt_clear_frame_reg(bt, fr, i);
+ continue;
+ }
+ if (reg->precise) {
+ bt_clear_frame_reg(bt, fr, i);
+ } else {
+ reg->precise = true;
+ *changed = true;
+ }
+ }
+
+ bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr));
+ for_each_set_bit(i, mask, 64) {
+ if (verifier_bug_if(i >= func->allocated_stack / BPF_REG_SIZE,
+ env, "stack slot %d, total slots %d",
+ i, func->allocated_stack / BPF_REG_SIZE))
+ return -EFAULT;
+
+ if (!bpf_is_spilled_scalar_reg(&func->stack[i])) {
+ bt_clear_frame_slot(bt, fr, i);
+ continue;
+ }
+ reg = &func->stack[i].spilled_ptr;
+ if (reg->precise) {
+ bt_clear_frame_slot(bt, fr, i);
+ } else {
+ reg->precise = true;
+ *changed = true;
+ }
+ }
+ if (env->log.level & BPF_LOG_LEVEL2) {
+ fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN,
+ bt_frame_reg_mask(bt, fr));
+ verbose(env, "mark_precise: frame%d: parent state regs=%s ",
+ fr, env->tmp_str_buf);
+ bpf_fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN,
+ bt_frame_stack_mask(bt, fr));
+ verbose(env, "stack=%s: ", env->tmp_str_buf);
+ print_verifier_state(env, st, fr, true);
+ }
+ }
+
+ if (bt_empty(bt))
+ return 0;
+
+ subseq_idx = first_idx;
+ last_idx = st->last_insn_idx;
+ first_idx = st->first_insn_idx;
+ }
+
+ /* if we still have requested precise regs or slots, we missed
+ * something (e.g., stack access through non-r10 register), so
+ * fallback to marking all precise
+ */
+ if (!bt_empty(bt)) {
+ bpf_mark_all_scalars_precise(env, starting_state);
+ bt_reset(bt);
+ }
+
+ return 0;
+}
diff --git a/kernel/bpf/bpf_cgrp_storage.c b/kernel/bpf/bpf_cgrp_storage.c
index c2a2ead1f466..c76e9b0fabba 100644
--- a/kernel/bpf/bpf_cgrp_storage.c
+++ b/kernel/bpf/bpf_cgrp_storage.c
@@ -76,7 +76,7 @@ static long bpf_cgrp_storage_update_elem(struct bpf_map *map, void *key,
return PTR_ERR(cgroup);
sdata = bpf_local_storage_update(cgroup, (struct bpf_local_storage_map *)map,
- value, map_flags, false, GFP_ATOMIC);
+ value, map_flags, false);
cgroup_put(cgroup);
return PTR_ERR_OR_ZERO(sdata);
}
@@ -114,7 +114,7 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key)
static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
{
- return bpf_local_storage_map_alloc(attr, &cgroup_cache, true);
+ return bpf_local_storage_map_alloc(attr, &cgroup_cache);
}
static void cgroup_storage_map_free(struct bpf_map *map)
@@ -122,9 +122,8 @@ static void cgroup_storage_map_free(struct bpf_map *map)
bpf_local_storage_map_free(map, &cgroup_cache);
}
-/* *gfp_flags* is a hidden argument provided by the verifier */
-BPF_CALL_5(bpf_cgrp_storage_get, struct bpf_map *, map, struct cgroup *, cgroup,
- void *, value, u64, flags, gfp_t, gfp_flags)
+BPF_CALL_4(bpf_cgrp_storage_get, struct bpf_map *, map, struct cgroup *, cgroup,
+ void *, value, u64, flags)
{
struct bpf_local_storage_data *sdata;
@@ -143,7 +142,7 @@ BPF_CALL_5(bpf_cgrp_storage_get, struct bpf_map *, map, struct cgroup *, cgroup,
if (!percpu_ref_is_dying(&cgroup->self.refcnt) &&
(flags & BPF_LOCAL_STORAGE_GET_F_CREATE))
sdata = bpf_local_storage_update(cgroup, (struct bpf_local_storage_map *)map,
- value, BPF_NOEXIST, false, gfp_flags);
+ value, BPF_NOEXIST, false);
out:
return IS_ERR_OR_NULL(sdata) ? (unsigned long)NULL : (unsigned long)sdata->data;
diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c
index e86734609f3d..0da8d923e39d 100644
--- a/kernel/bpf/bpf_inode_storage.c
+++ b/kernel/bpf/bpf_inode_storage.c
@@ -98,7 +98,7 @@ static long bpf_fd_inode_storage_update_elem(struct bpf_map *map, void *key,
sdata = bpf_local_storage_update(file_inode(fd_file(f)),
(struct bpf_local_storage_map *)map,
- value, map_flags, false, GFP_ATOMIC);
+ value, map_flags, false);
return PTR_ERR_OR_ZERO(sdata);
}
@@ -122,9 +122,8 @@ static long bpf_fd_inode_storage_delete_elem(struct bpf_map *map, void *key)
return inode_storage_delete(file_inode(fd_file(f)), map);
}
-/* *gfp_flags* is a hidden argument provided by the verifier */
-BPF_CALL_5(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
- void *, value, u64, flags, gfp_t, gfp_flags)
+BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
+ void *, value, u64, flags)
{
struct bpf_local_storage_data *sdata;
@@ -150,7 +149,7 @@ BPF_CALL_5(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
sdata = bpf_local_storage_update(
inode, (struct bpf_local_storage_map *)map, value,
- BPF_NOEXIST, false, gfp_flags);
+ BPF_NOEXIST, false);
return IS_ERR(sdata) ? (unsigned long)NULL :
(unsigned long)sdata->data;
}
@@ -179,7 +178,7 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key,
static struct bpf_map *inode_storage_map_alloc(union bpf_attr *attr)
{
- return bpf_local_storage_map_alloc(attr, &inode_cache, false);
+ return bpf_local_storage_map_alloc(attr, &inode_cache);
}
static void inode_storage_map_free(struct bpf_map *map)
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index 9c96a4477f81..6fc6a4b672b5 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -68,25 +68,19 @@ static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem)
struct bpf_local_storage_elem *
bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
- void *value, bool swap_uptrs, gfp_t gfp_flags)
+ void *value, bool swap_uptrs)
{
struct bpf_local_storage_elem *selem;
if (mem_charge(smap, owner, smap->elem_size))
return NULL;
- if (smap->use_kmalloc_nolock) {
- selem = bpf_map_kmalloc_nolock(&smap->map, smap->elem_size,
- __GFP_ZERO, NUMA_NO_NODE);
- } else {
- selem = bpf_map_kzalloc(&smap->map, smap->elem_size,
- gfp_flags | __GFP_NOWARN);
- }
+ selem = bpf_map_kmalloc_nolock(&smap->map, smap->elem_size,
+ __GFP_ZERO, NUMA_NO_NODE);
if (selem) {
RCU_INIT_POINTER(SDATA(selem)->smap, smap);
atomic_set(&selem->state, 0);
- selem->use_kmalloc_nolock = smap->use_kmalloc_nolock;
if (value) {
/* No need to call check_and_init_map_value as memory is zero init */
@@ -102,8 +96,7 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
return NULL;
}
-/* rcu tasks trace callback for use_kmalloc_nolock == false */
-static void __bpf_local_storage_free_trace_rcu(struct rcu_head *rcu)
+static void bpf_local_storage_free_trace_rcu(struct rcu_head *rcu)
{
struct bpf_local_storage *local_storage;
@@ -115,47 +108,14 @@ static void __bpf_local_storage_free_trace_rcu(struct rcu_head *rcu)
kfree(local_storage);
}
-/* Handle use_kmalloc_nolock == false */
-static void __bpf_local_storage_free(struct bpf_local_storage *local_storage,
- bool vanilla_rcu)
-{
- if (vanilla_rcu)
- kfree_rcu(local_storage, rcu);
- else
- call_rcu_tasks_trace(&local_storage->rcu,
- __bpf_local_storage_free_trace_rcu);
-}
-
-static void bpf_local_storage_free_rcu(struct rcu_head *rcu)
-{
- struct bpf_local_storage *local_storage;
-
- local_storage = container_of(rcu, struct bpf_local_storage, rcu);
- kfree_nolock(local_storage);
-}
-
-static void bpf_local_storage_free_trace_rcu(struct rcu_head *rcu)
-{
- /*
- * RCU Tasks Trace grace period implies RCU grace period, do
- * kfree() directly.
- */
- bpf_local_storage_free_rcu(rcu);
-}
-
static void bpf_local_storage_free(struct bpf_local_storage *local_storage,
bool reuse_now)
{
if (!local_storage)
return;
- if (!local_storage->use_kmalloc_nolock) {
- __bpf_local_storage_free(local_storage, reuse_now);
- return;
- }
-
if (reuse_now) {
- call_rcu(&local_storage->rcu, bpf_local_storage_free_rcu);
+ kfree_rcu(local_storage, rcu);
return;
}
@@ -163,42 +123,7 @@ static void bpf_local_storage_free(struct bpf_local_storage *local_storage,
bpf_local_storage_free_trace_rcu);
}
-/* rcu callback for use_kmalloc_nolock == false */
-static void __bpf_selem_free_rcu(struct rcu_head *rcu)
-{
- struct bpf_local_storage_elem *selem;
- struct bpf_local_storage_map *smap;
-
- selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
- /* bpf_selem_unlink_nofail may have already cleared smap and freed fields. */
- smap = rcu_dereference_check(SDATA(selem)->smap, 1);
-
- if (smap)
- bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
- kfree(selem);
-}
-
-/* rcu tasks trace callback for use_kmalloc_nolock == false */
-static void __bpf_selem_free_trace_rcu(struct rcu_head *rcu)
-{
- /*
- * RCU Tasks Trace grace period implies RCU grace period, do
- * kfree() directly.
- */
- __bpf_selem_free_rcu(rcu);
-}
-
-/* Handle use_kmalloc_nolock == false */
-static void __bpf_selem_free(struct bpf_local_storage_elem *selem,
- bool vanilla_rcu)
-{
- if (vanilla_rcu)
- call_rcu(&selem->rcu, __bpf_selem_free_rcu);
- else
- call_rcu_tasks_trace(&selem->rcu, __bpf_selem_free_trace_rcu);
-}
-
-static void bpf_selem_free_rcu(struct rcu_head *rcu)
+static void bpf_selem_free_trace_rcu(struct rcu_head *rcu)
{
struct bpf_local_storage_elem *selem;
struct bpf_local_storage_map *smap;
@@ -209,37 +134,24 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu)
if (smap)
bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
- kfree_nolock(selem);
-}
-
-static void bpf_selem_free_trace_rcu(struct rcu_head *rcu)
-{
/*
* RCU Tasks Trace grace period implies RCU grace period, do
* kfree() directly.
*/
- bpf_selem_free_rcu(rcu);
+ kfree(selem);
}
void bpf_selem_free(struct bpf_local_storage_elem *selem,
bool reuse_now)
{
- if (!selem->use_kmalloc_nolock) {
- /*
- * No uptr will be unpin even when reuse_now == false since uptr
- * is only supported in task local storage, where
- * smap->use_kmalloc_nolock == true.
- */
- __bpf_selem_free(selem, reuse_now);
- return;
- }
+ struct bpf_local_storage_map *smap;
+
+ smap = rcu_dereference_check(SDATA(selem)->smap, 1);
if (reuse_now) {
- /*
- * While it is okay to call bpf_obj_free_fields() that unpins uptr when
- * reuse_now == true, keep it in bpf_selem_free_rcu() for simplicity.
- */
- call_rcu(&selem->rcu, bpf_selem_free_rcu);
+ if (smap)
+ bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
+ kfree_rcu(selem, rcu);
return;
}
@@ -393,6 +305,9 @@ int bpf_selem_unlink(struct bpf_local_storage_elem *selem)
unsigned long flags;
int err;
+ if (in_nmi())
+ return -EOPNOTSUPP;
+
if (unlikely(!selem_linked_to_storage_lockless(selem)))
/* selem has already been unlinked from sk */
return 0;
@@ -494,6 +409,14 @@ static void bpf_selem_unlink_nofail(struct bpf_local_storage_elem *selem,
}
raw_res_spin_unlock_irqrestore(&local_storage->lock, flags);
}
+ /*
+ * Highly unlikely scenario: memory leak
+ *
+ * When destroy() fails to acqurire local_storage->lock and initializes
+ * selem->local_storage to NULL before any racing map_free() sees the same
+ * selem, no one will free the local storage.
+ */
+ WARN_ON_ONCE(err && !in_map_free);
if (!err || !in_map_free)
RCU_INIT_POINTER(selem->local_storage, NULL);
}
@@ -552,8 +475,7 @@ static int check_flags(const struct bpf_local_storage_data *old_sdata,
int bpf_local_storage_alloc(void *owner,
struct bpf_local_storage_map *smap,
- struct bpf_local_storage_elem *first_selem,
- gfp_t gfp_flags)
+ struct bpf_local_storage_elem *first_selem)
{
struct bpf_local_storage *prev_storage, *storage;
struct bpf_local_storage **owner_storage_ptr;
@@ -565,12 +487,8 @@ int bpf_local_storage_alloc(void *owner,
if (err)
return err;
- if (smap->use_kmalloc_nolock)
- storage = bpf_map_kmalloc_nolock(&smap->map, sizeof(*storage),
- __GFP_ZERO, NUMA_NO_NODE);
- else
- storage = bpf_map_kzalloc(&smap->map, sizeof(*storage),
- gfp_flags | __GFP_NOWARN);
+ storage = bpf_map_kmalloc_nolock(&smap->map, sizeof(*storage),
+ __GFP_ZERO, NUMA_NO_NODE);
if (!storage) {
err = -ENOMEM;
goto uncharge;
@@ -580,7 +498,6 @@ int bpf_local_storage_alloc(void *owner,
raw_res_spin_lock_init(&storage->lock);
storage->owner = owner;
storage->mem_charge = sizeof(*storage);
- storage->use_kmalloc_nolock = smap->use_kmalloc_nolock;
refcount_set(&storage->owner_refcnt, 1);
bpf_selem_link_storage_nolock(storage, first_selem);
@@ -628,7 +545,7 @@ uncharge:
*/
struct bpf_local_storage_data *
bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
- void *value, u64 map_flags, bool swap_uptrs, gfp_t gfp_flags)
+ void *value, u64 map_flags, bool swap_uptrs)
{
struct bpf_local_storage_data *old_sdata = NULL;
struct bpf_local_storage_elem *alloc_selem, *selem = NULL;
@@ -645,9 +562,6 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
!btf_record_has_field(smap->map.record, BPF_SPIN_LOCK)))
return ERR_PTR(-EINVAL);
- if (gfp_flags == GFP_KERNEL && (map_flags & ~BPF_F_LOCK) != BPF_NOEXIST)
- return ERR_PTR(-EINVAL);
-
local_storage = rcu_dereference_check(*owner_storage(smap, owner),
bpf_rcu_lock_held());
if (!local_storage || hlist_empty(&local_storage->list)) {
@@ -656,11 +570,11 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
if (err)
return ERR_PTR(err);
- selem = bpf_selem_alloc(smap, owner, value, swap_uptrs, gfp_flags);
+ selem = bpf_selem_alloc(smap, owner, value, swap_uptrs);
if (!selem)
return ERR_PTR(-ENOMEM);
- err = bpf_local_storage_alloc(owner, smap, selem, gfp_flags);
+ err = bpf_local_storage_alloc(owner, smap, selem);
if (err) {
bpf_selem_free(selem, true);
mem_uncharge(smap, owner, smap->elem_size);
@@ -690,7 +604,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
/* A lookup has just been done before and concluded a new selem is
* needed. The chance of an unnecessary alloc is unlikely.
*/
- alloc_selem = selem = bpf_selem_alloc(smap, owner, value, swap_uptrs, gfp_flags);
+ alloc_selem = selem = bpf_selem_alloc(smap, owner, value, swap_uptrs);
if (!alloc_selem)
return ERR_PTR(-ENOMEM);
@@ -857,8 +771,7 @@ u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map)
struct bpf_map *
bpf_local_storage_map_alloc(union bpf_attr *attr,
- struct bpf_local_storage_cache *cache,
- bool use_kmalloc_nolock)
+ struct bpf_local_storage_cache *cache)
{
struct bpf_local_storage_map *smap;
unsigned int i;
@@ -890,12 +803,6 @@ bpf_local_storage_map_alloc(union bpf_attr *attr,
smap->elem_size = offsetof(struct bpf_local_storage_elem,
sdata.data[attr->value_size]);
- /* In PREEMPT_RT, kmalloc(GFP_ATOMIC) is still not safe in non
- * preemptible context. Thus, enforce all storages to use
- * kmalloc_nolock() when CONFIG_PREEMPT_RT is enabled.
- */
- smap->use_kmalloc_nolock = IS_ENABLED(CONFIG_PREEMPT_RT) ? true : use_kmalloc_nolock;
-
smap->cache_idx = bpf_local_storage_cache_idx_get(cache);
return &smap->map;
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 0c4a0c8e6f70..c5c925f00202 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -359,8 +359,6 @@ BTF_ID(func, bpf_lsm_sb_umount)
BTF_ID(func, bpf_lsm_settime)
#ifdef CONFIG_SECURITY_NETWORK
-BTF_ID(func, bpf_lsm_inet_conn_established)
-
BTF_ID(func, bpf_lsm_socket_accept)
BTF_ID(func, bpf_lsm_socket_bind)
BTF_ID(func, bpf_lsm_socket_connect)
@@ -381,8 +379,9 @@ BTF_ID(func, bpf_lsm_syslog)
BTF_ID(func, bpf_lsm_task_alloc)
BTF_ID(func, bpf_lsm_task_prctl)
BTF_ID(func, bpf_lsm_task_setscheduler)
-BTF_ID(func, bpf_lsm_task_to_inode)
BTF_ID(func, bpf_lsm_userns_create)
+BTF_ID(func, bpf_lsm_bdev_alloc_security)
+BTF_ID(func, bpf_lsm_bdev_setintegrity)
BTF_SET_END(sleepable_lsm_hooks)
BTF_SET_START(untrusted_lsm_hooks)
@@ -395,6 +394,8 @@ BTF_ID(func, bpf_lsm_sk_alloc_security)
BTF_ID(func, bpf_lsm_sk_free_security)
#endif /* CONFIG_SECURITY_NETWORK */
BTF_ID(func, bpf_lsm_task_free)
+BTF_ID(func, bpf_lsm_bdev_alloc_security)
+BTF_ID(func, bpf_lsm_bdev_free_security)
BTF_SET_END(untrusted_lsm_hooks)
bool bpf_lsm_is_sleepable_hook(u32 btf_id)
diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
index 605506792b5b..4b342be29eac 100644
--- a/kernel/bpf/bpf_task_storage.c
+++ b/kernel/bpf/bpf_task_storage.c
@@ -118,7 +118,7 @@ static long bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
sdata = bpf_local_storage_update(
task, (struct bpf_local_storage_map *)map, value, map_flags,
- true, GFP_ATOMIC);
+ true);
err = PTR_ERR_OR_ZERO(sdata);
out:
@@ -165,9 +165,8 @@ out:
return err;
}
-/* *gfp_flags* is a hidden argument provided by the verifier */
-BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
- task, void *, value, u64, flags, gfp_t, gfp_flags)
+BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
+ task, void *, value, u64, flags)
{
struct bpf_local_storage_data *sdata;
@@ -184,7 +183,7 @@ BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
(flags & BPF_LOCAL_STORAGE_GET_F_CREATE)) {
sdata = bpf_local_storage_update(
task, (struct bpf_local_storage_map *)map, value,
- BPF_NOEXIST, false, gfp_flags);
+ BPF_NOEXIST, false);
return IS_ERR(sdata) ? (unsigned long)NULL : (unsigned long)sdata->data;
}
@@ -212,7 +211,7 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key)
static struct bpf_map *task_storage_map_alloc(union bpf_attr *attr)
{
- return bpf_local_storage_map_alloc(attr, &task_cache, true);
+ return bpf_local_storage_map_alloc(attr, &task_cache);
}
static void task_storage_map_free(struct bpf_map *map)
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 71f9143fe90f..a62d78581207 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -270,6 +270,7 @@ struct btf {
struct btf_id_dtor_kfunc_tab *dtor_kfunc_tab;
struct btf_struct_metas *struct_meta_tab;
struct btf_struct_ops_tab *struct_ops_tab;
+ struct btf_layout *layout;
/* split BTF support */
struct btf *base_btf;
@@ -1707,6 +1708,11 @@ static void btf_verifier_log_hdr(struct btf_verifier_env *env,
__btf_verifier_log(log, "type_len: %u\n", hdr->type_len);
__btf_verifier_log(log, "str_off: %u\n", hdr->str_off);
__btf_verifier_log(log, "str_len: %u\n", hdr->str_len);
+ if (hdr->hdr_len >= sizeof(struct btf_header) &&
+ btf_data_size >= hdr->hdr_len) {
+ __btf_verifier_log(log, "layout_off: %u\n", hdr->layout_off);
+ __btf_verifier_log(log, "layout_len: %u\n", hdr->layout_len);
+ }
__btf_verifier_log(log, "btf_total_size: %u\n", btf_data_size);
}
@@ -5526,7 +5532,8 @@ static int btf_parse_str_sec(struct btf_verifier_env *env)
start = btf->nohdr_data + hdr->str_off;
end = start + hdr->str_len;
- if (end != btf->data + btf->data_size) {
+ if (hdr->hdr_len < sizeof(struct btf_header) &&
+ end != btf->data + btf->data_size) {
btf_verifier_log(env, "String section is not at the end");
return -EINVAL;
}
@@ -5547,9 +5554,46 @@ static int btf_parse_str_sec(struct btf_verifier_env *env)
return 0;
}
+static int btf_parse_layout_sec(struct btf_verifier_env *env)
+{
+ const struct btf_header *hdr = &env->btf->hdr;
+ struct btf *btf = env->btf;
+ void *start, *end;
+
+ if (hdr->hdr_len < sizeof(struct btf_header) ||
+ hdr->layout_len == 0)
+ return 0;
+
+ /* Layout section must align to 4 bytes */
+ if (hdr->layout_off & (sizeof(u32) - 1)) {
+ btf_verifier_log(env, "Unaligned layout_off");
+ return -EINVAL;
+ }
+ start = btf->nohdr_data + hdr->layout_off;
+ end = start + hdr->layout_len;
+
+ if (hdr->layout_len < sizeof(struct btf_layout)) {
+ btf_verifier_log(env, "Layout section is too small");
+ return -EINVAL;
+ }
+ if (hdr->layout_len % sizeof(struct btf_layout) != 0) {
+ btf_verifier_log(env, "layout_len is not multiple of %zu",
+ sizeof(struct btf_layout));
+ return -EINVAL;
+ }
+ if (end > btf->data + btf->data_size) {
+ btf_verifier_log(env, "Layout section is too big");
+ return -EINVAL;
+ }
+ btf->layout = start;
+
+ return 0;
+}
+
static const size_t btf_sec_info_offset[] = {
offsetof(struct btf_header, type_off),
offsetof(struct btf_header, str_off),
+ offsetof(struct btf_header, layout_off)
};
static int btf_sec_info_cmp(const void *a, const void *b)
@@ -5565,24 +5609,28 @@ static int btf_check_sec_info(struct btf_verifier_env *env,
{
struct btf_sec_info secs[ARRAY_SIZE(btf_sec_info_offset)];
u32 total, expected_total, i;
+ u32 nr_secs = ARRAY_SIZE(btf_sec_info_offset);
const struct btf_header *hdr;
const struct btf *btf;
btf = env->btf;
hdr = &btf->hdr;
+ if (hdr->hdr_len < sizeof(struct btf_header) || hdr->layout_len == 0)
+ nr_secs--;
+
/* Populate the secs from hdr */
- for (i = 0; i < ARRAY_SIZE(btf_sec_info_offset); i++)
+ for (i = 0; i < nr_secs; i++)
secs[i] = *(struct btf_sec_info *)((void *)hdr +
btf_sec_info_offset[i]);
- sort(secs, ARRAY_SIZE(btf_sec_info_offset),
+ sort(secs, nr_secs,
sizeof(struct btf_sec_info), btf_sec_info_cmp, NULL);
/* Check for gaps and overlap among sections */
total = 0;
expected_total = btf_data_size - hdr->hdr_len;
- for (i = 0; i < ARRAY_SIZE(btf_sec_info_offset); i++) {
+ for (i = 0; i < nr_secs; i++) {
if (expected_total < secs[i].off) {
btf_verifier_log(env, "Invalid section offset");
return -EINVAL;
@@ -5938,6 +5986,10 @@ static struct btf *btf_parse(const union bpf_attr *attr, bpfptr_t uattr, u32 uat
if (err)
goto errout;
+ err = btf_parse_layout_sec(env);
+ if (err)
+ goto errout;
+
err = btf_parse_type_sec(env);
if (err)
goto errout;
@@ -6517,13 +6569,6 @@ struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog)
return prog->aux->attach_btf;
}
-static bool is_void_or_int_ptr(struct btf *btf, const struct btf_type *t)
-{
- /* skip modifiers */
- t = btf_type_skip_modifiers(btf, t->type, NULL);
- return btf_type_is_void(t) || btf_type_is_int(t);
-}
-
u32 btf_ctx_arg_idx(struct btf *btf, const struct btf_type *func_proto,
int off)
{
@@ -6912,10 +6957,14 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
}
/*
- * If it's a pointer to void, it's the same as scalar from the verifier
- * safety POV. Either way, no futher pointer walking is allowed.
+ * If it's a single or multilevel pointer, except a pointer
+ * to a structure, it's the same as scalar from the verifier
+ * safety POV. Multilevel pointers to structures are treated as
+ * scalars. The verifier lacks the context to infer the size of
+ * their target memory regions. Either way, no further pointer
+ * walking is allowed.
*/
- if (is_void_or_int_ptr(btf, t))
+ if (!btf_type_is_struct_ptr(btf, t))
return true;
/* this is a pointer to another type */
@@ -7845,15 +7894,16 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog)
tname, nargs, MAX_BPF_FUNC_REG_ARGS);
return -EINVAL;
}
- /* check that function returns int, exception cb also requires this */
+ /* check that function is void or returns int, exception cb also requires this */
t = btf_type_by_id(btf, t->type);
while (btf_type_is_modifier(t))
t = btf_type_by_id(btf, t->type);
- if (!btf_type_is_int(t) && !btf_is_any_enum(t)) {
+ if (!btf_type_is_void(t) && !btf_type_is_int(t) && !btf_is_any_enum(t)) {
if (!is_global)
return -EINVAL;
bpf_log(log,
- "Global function %s() doesn't return scalar. Only those are supported.\n",
+ "Global function %s() return value not void or scalar. "
+ "Only those are supported.\n",
tname);
return -EINVAL;
}
@@ -9019,7 +9069,7 @@ static int btf_check_dtor_kfuncs(struct btf *btf, const struct btf_id_dtor_kfunc
if (!t || !btf_type_is_ptr(t))
return -EINVAL;
- if (IS_ENABLED(CONFIG_CFI_CLANG)) {
+ if (IS_ENABLED(CONFIG_CFI)) {
/* Ensure the destructor kfunc type matches btf_dtor_kfunc_t */
t = btf_type_by_id(btf, t->type);
if (!btf_type_is_void(t))
diff --git a/kernel/bpf/cfg.c b/kernel/bpf/cfg.c
new file mode 100644
index 000000000000..998f42a8189a
--- /dev/null
+++ b/kernel/bpf/cfg.c
@@ -0,0 +1,872 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
+#include <linux/bpf.h>
+#include <linux/bpf_verifier.h>
+#include <linux/filter.h>
+#include <linux/sort.h>
+
+#define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args)
+
+/* non-recursive DFS pseudo code
+ * 1 procedure DFS-iterative(G,v):
+ * 2 label v as discovered
+ * 3 let S be a stack
+ * 4 S.push(v)
+ * 5 while S is not empty
+ * 6 t <- S.peek()
+ * 7 if t is what we're looking for:
+ * 8 return t
+ * 9 for all edges e in G.adjacentEdges(t) do
+ * 10 if edge e is already labelled
+ * 11 continue with the next edge
+ * 12 w <- G.adjacentVertex(t,e)
+ * 13 if vertex w is not discovered and not explored
+ * 14 label e as tree-edge
+ * 15 label w as discovered
+ * 16 S.push(w)
+ * 17 continue at 5
+ * 18 else if vertex w is discovered
+ * 19 label e as back-edge
+ * 20 else
+ * 21 // vertex w is explored
+ * 22 label e as forward- or cross-edge
+ * 23 label t as explored
+ * 24 S.pop()
+ *
+ * convention:
+ * 0x10 - discovered
+ * 0x11 - discovered and fall-through edge labelled
+ * 0x12 - discovered and fall-through and branch edges labelled
+ * 0x20 - explored
+ */
+
+enum {
+ DISCOVERED = 0x10,
+ EXPLORED = 0x20,
+ FALLTHROUGH = 1,
+ BRANCH = 2,
+};
+
+
+static void mark_subprog_changes_pkt_data(struct bpf_verifier_env *env, int off)
+{
+ struct bpf_subprog_info *subprog;
+
+ subprog = bpf_find_containing_subprog(env, off);
+ subprog->changes_pkt_data = true;
+}
+
+static void mark_subprog_might_sleep(struct bpf_verifier_env *env, int off)
+{
+ struct bpf_subprog_info *subprog;
+
+ subprog = bpf_find_containing_subprog(env, off);
+ subprog->might_sleep = true;
+}
+
+/* 't' is an index of a call-site.
+ * 'w' is a callee entry point.
+ * Eventually this function would be called when env->cfg.insn_state[w] == EXPLORED.
+ * Rely on DFS traversal order and absence of recursive calls to guarantee that
+ * callee's change_pkt_data marks would be correct at that moment.
+ */
+static void merge_callee_effects(struct bpf_verifier_env *env, int t, int w)
+{
+ struct bpf_subprog_info *caller, *callee;
+
+ caller = bpf_find_containing_subprog(env, t);
+ callee = bpf_find_containing_subprog(env, w);
+ caller->changes_pkt_data |= callee->changes_pkt_data;
+ caller->might_sleep |= callee->might_sleep;
+}
+
+enum {
+ DONE_EXPLORING = 0,
+ KEEP_EXPLORING = 1,
+};
+
+/* t, w, e - match pseudo-code above:
+ * t - index of current instruction
+ * w - next instruction
+ * e - edge
+ */
+static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
+{
+ int *insn_stack = env->cfg.insn_stack;
+ int *insn_state = env->cfg.insn_state;
+
+ if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
+ return DONE_EXPLORING;
+
+ if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
+ return DONE_EXPLORING;
+
+ if (w < 0 || w >= env->prog->len) {
+ verbose_linfo(env, t, "%d: ", t);
+ verbose(env, "jump out of range from insn %d to %d\n", t, w);
+ return -EINVAL;
+ }
+
+ if (e == BRANCH) {
+ /* mark branch target for state pruning */
+ mark_prune_point(env, w);
+ mark_jmp_point(env, w);
+ }
+
+ if (insn_state[w] == 0) {
+ /* tree-edge */
+ insn_state[t] = DISCOVERED | e;
+ insn_state[w] = DISCOVERED;
+ if (env->cfg.cur_stack >= env->prog->len)
+ return -E2BIG;
+ insn_stack[env->cfg.cur_stack++] = w;
+ return KEEP_EXPLORING;
+ } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
+ if (env->bpf_capable)
+ return DONE_EXPLORING;
+ verbose_linfo(env, t, "%d: ", t);
+ verbose_linfo(env, w, "%d: ", w);
+ verbose(env, "back-edge from insn %d to %d\n", t, w);
+ return -EINVAL;
+ } else if (insn_state[w] == EXPLORED) {
+ /* forward- or cross-edge */
+ insn_state[t] = DISCOVERED | e;
+ } else {
+ verifier_bug(env, "insn state internal bug");
+ return -EFAULT;
+ }
+ return DONE_EXPLORING;
+}
+
+static int visit_func_call_insn(int t, struct bpf_insn *insns,
+ struct bpf_verifier_env *env,
+ bool visit_callee)
+{
+ int ret, insn_sz;
+ int w;
+
+ insn_sz = bpf_is_ldimm64(&insns[t]) ? 2 : 1;
+ ret = push_insn(t, t + insn_sz, FALLTHROUGH, env);
+ if (ret)
+ return ret;
+
+ mark_prune_point(env, t + insn_sz);
+ /* when we exit from subprog, we need to record non-linear history */
+ mark_jmp_point(env, t + insn_sz);
+
+ if (visit_callee) {
+ w = t + insns[t].imm + 1;
+ mark_prune_point(env, t);
+ merge_callee_effects(env, t, w);
+ ret = push_insn(t, w, BRANCH, env);
+ }
+ return ret;
+}
+
+struct bpf_iarray *bpf_iarray_realloc(struct bpf_iarray *old, size_t n_elem)
+{
+ size_t new_size = sizeof(struct bpf_iarray) + n_elem * sizeof(old->items[0]);
+ struct bpf_iarray *new;
+
+ new = kvrealloc(old, new_size, GFP_KERNEL_ACCOUNT);
+ if (!new) {
+ /* this is what callers always want, so simplify the call site */
+ kvfree(old);
+ return NULL;
+ }
+
+ new->cnt = n_elem;
+ return new;
+}
+
+static int copy_insn_array(struct bpf_map *map, u32 start, u32 end, u32 *items)
+{
+ struct bpf_insn_array_value *value;
+ u32 i;
+
+ for (i = start; i <= end; i++) {
+ value = map->ops->map_lookup_elem(map, &i);
+ /*
+ * map_lookup_elem of an array map will never return an error,
+ * but not checking it makes some static analysers to worry
+ */
+ if (IS_ERR(value))
+ return PTR_ERR(value);
+ else if (!value)
+ return -EINVAL;
+ items[i - start] = value->xlated_off;
+ }
+ return 0;
+}
+
+static int cmp_ptr_to_u32(const void *a, const void *b)
+{
+ return *(u32 *)a - *(u32 *)b;
+}
+
+static int sort_insn_array_uniq(u32 *items, int cnt)
+{
+ int unique = 1;
+ int i;
+
+ sort(items, cnt, sizeof(items[0]), cmp_ptr_to_u32, NULL);
+
+ for (i = 1; i < cnt; i++)
+ if (items[i] != items[unique - 1])
+ items[unique++] = items[i];
+
+ return unique;
+}
+
+/*
+ * sort_unique({map[start], ..., map[end]}) into off
+ */
+int bpf_copy_insn_array_uniq(struct bpf_map *map, u32 start, u32 end, u32 *off)
+{
+ u32 n = end - start + 1;
+ int err;
+
+ err = copy_insn_array(map, start, end, off);
+ if (err)
+ return err;
+
+ return sort_insn_array_uniq(off, n);
+}
+
+/*
+ * Copy all unique offsets from the map
+ */
+static struct bpf_iarray *jt_from_map(struct bpf_map *map)
+{
+ struct bpf_iarray *jt;
+ int err;
+ int n;
+
+ jt = bpf_iarray_realloc(NULL, map->max_entries);
+ if (!jt)
+ return ERR_PTR(-ENOMEM);
+
+ n = bpf_copy_insn_array_uniq(map, 0, map->max_entries - 1, jt->items);
+ if (n < 0) {
+ err = n;
+ goto err_free;
+ }
+ if (n == 0) {
+ err = -EINVAL;
+ goto err_free;
+ }
+ jt->cnt = n;
+ return jt;
+
+err_free:
+ kvfree(jt);
+ return ERR_PTR(err);
+}
+
+/*
+ * Find and collect all maps which fit in the subprog. Return the result as one
+ * combined jump table in jt->items (allocated with kvcalloc)
+ */
+static struct bpf_iarray *jt_from_subprog(struct bpf_verifier_env *env,
+ int subprog_start, int subprog_end)
+{
+ struct bpf_iarray *jt = NULL;
+ struct bpf_map *map;
+ struct bpf_iarray *jt_cur;
+ int i;
+
+ for (i = 0; i < env->insn_array_map_cnt; i++) {
+ /*
+ * TODO (when needed): collect only jump tables, not static keys
+ * or maps for indirect calls
+ */
+ map = env->insn_array_maps[i];
+
+ jt_cur = jt_from_map(map);
+ if (IS_ERR(jt_cur)) {
+ kvfree(jt);
+ return jt_cur;
+ }
+
+ /*
+ * This is enough to check one element. The full table is
+ * checked to fit inside the subprog later in create_jt()
+ */
+ if (jt_cur->items[0] >= subprog_start && jt_cur->items[0] < subprog_end) {
+ u32 old_cnt = jt ? jt->cnt : 0;
+ jt = bpf_iarray_realloc(jt, old_cnt + jt_cur->cnt);
+ if (!jt) {
+ kvfree(jt_cur);
+ return ERR_PTR(-ENOMEM);
+ }
+ memcpy(jt->items + old_cnt, jt_cur->items, jt_cur->cnt << 2);
+ }
+
+ kvfree(jt_cur);
+ }
+
+ if (!jt) {
+ verbose(env, "no jump tables found for subprog starting at %u\n", subprog_start);
+ return ERR_PTR(-EINVAL);
+ }
+
+ jt->cnt = sort_insn_array_uniq(jt->items, jt->cnt);
+ return jt;
+}
+
+static struct bpf_iarray *
+create_jt(int t, struct bpf_verifier_env *env)
+{
+ struct bpf_subprog_info *subprog;
+ int subprog_start, subprog_end;
+ struct bpf_iarray *jt;
+ int i;
+
+ subprog = bpf_find_containing_subprog(env, t);
+ subprog_start = subprog->start;
+ subprog_end = (subprog + 1)->start;
+ jt = jt_from_subprog(env, subprog_start, subprog_end);
+ if (IS_ERR(jt))
+ return jt;
+
+ /* Check that the every element of the jump table fits within the given subprogram */
+ for (i = 0; i < jt->cnt; i++) {
+ if (jt->items[i] < subprog_start || jt->items[i] >= subprog_end) {
+ verbose(env, "jump table for insn %d points outside of the subprog [%u,%u]\n",
+ t, subprog_start, subprog_end);
+ kvfree(jt);
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ return jt;
+}
+
+/* "conditional jump with N edges" */
+static int visit_gotox_insn(int t, struct bpf_verifier_env *env)
+{
+ int *insn_stack = env->cfg.insn_stack;
+ int *insn_state = env->cfg.insn_state;
+ bool keep_exploring = false;
+ struct bpf_iarray *jt;
+ int i, w;
+
+ jt = env->insn_aux_data[t].jt;
+ if (!jt) {
+ jt = create_jt(t, env);
+ if (IS_ERR(jt))
+ return PTR_ERR(jt);
+
+ env->insn_aux_data[t].jt = jt;
+ }
+
+ mark_prune_point(env, t);
+ for (i = 0; i < jt->cnt; i++) {
+ w = jt->items[i];
+ if (w < 0 || w >= env->prog->len) {
+ verbose(env, "indirect jump out of range from insn %d to %d\n", t, w);
+ return -EINVAL;
+ }
+
+ mark_jmp_point(env, w);
+
+ /* EXPLORED || DISCOVERED */
+ if (insn_state[w])
+ continue;
+
+ if (env->cfg.cur_stack >= env->prog->len)
+ return -E2BIG;
+
+ insn_stack[env->cfg.cur_stack++] = w;
+ insn_state[w] |= DISCOVERED;
+ keep_exploring = true;
+ }
+
+ return keep_exploring ? KEEP_EXPLORING : DONE_EXPLORING;
+}
+
+/*
+ * Instructions that can abnormally return from a subprog (tail_call
+ * upon success, ld_{abs,ind} upon load failure) have a hidden exit
+ * that the verifier must account for.
+ */
+static int visit_abnormal_return_insn(struct bpf_verifier_env *env, int t)
+{
+ struct bpf_subprog_info *subprog;
+ struct bpf_iarray *jt;
+
+ if (env->insn_aux_data[t].jt)
+ return 0;
+
+ jt = bpf_iarray_realloc(NULL, 2);
+ if (!jt)
+ return -ENOMEM;
+
+ subprog = bpf_find_containing_subprog(env, t);
+ jt->items[0] = t + 1;
+ jt->items[1] = subprog->exit_idx;
+ env->insn_aux_data[t].jt = jt;
+ return 0;
+}
+
+/* Visits the instruction at index t and returns one of the following:
+ * < 0 - an error occurred
+ * DONE_EXPLORING - the instruction was fully explored
+ * KEEP_EXPLORING - there is still work to be done before it is fully explored
+ */
+static int visit_insn(int t, struct bpf_verifier_env *env)
+{
+ struct bpf_insn *insns = env->prog->insnsi, *insn = &insns[t];
+ int ret, off, insn_sz;
+
+ if (bpf_pseudo_func(insn))
+ return visit_func_call_insn(t, insns, env, true);
+
+ /* All non-branch instructions have a single fall-through edge. */
+ if (BPF_CLASS(insn->code) != BPF_JMP &&
+ BPF_CLASS(insn->code) != BPF_JMP32) {
+ if (BPF_CLASS(insn->code) == BPF_LD &&
+ (BPF_MODE(insn->code) == BPF_ABS ||
+ BPF_MODE(insn->code) == BPF_IND)) {
+ ret = visit_abnormal_return_insn(env, t);
+ if (ret)
+ return ret;
+ }
+ insn_sz = bpf_is_ldimm64(insn) ? 2 : 1;
+ return push_insn(t, t + insn_sz, FALLTHROUGH, env);
+ }
+
+ switch (BPF_OP(insn->code)) {
+ case BPF_EXIT:
+ return DONE_EXPLORING;
+
+ case BPF_CALL:
+ if (bpf_is_async_callback_calling_insn(insn))
+ /* Mark this call insn as a prune point to trigger
+ * is_state_visited() check before call itself is
+ * processed by __check_func_call(). Otherwise new
+ * async state will be pushed for further exploration.
+ */
+ mark_prune_point(env, t);
+ /* For functions that invoke callbacks it is not known how many times
+ * callback would be called. Verifier models callback calling functions
+ * by repeatedly visiting callback bodies and returning to origin call
+ * instruction.
+ * In order to stop such iteration verifier needs to identify when a
+ * state identical some state from a previous iteration is reached.
+ * Check below forces creation of checkpoint before callback calling
+ * instruction to allow search for such identical states.
+ */
+ if (bpf_is_sync_callback_calling_insn(insn)) {
+ mark_calls_callback(env, t);
+ mark_force_checkpoint(env, t);
+ mark_prune_point(env, t);
+ mark_jmp_point(env, t);
+ }
+ if (bpf_helper_call(insn)) {
+ const struct bpf_func_proto *fp;
+
+ ret = bpf_get_helper_proto(env, insn->imm, &fp);
+ /* If called in a non-sleepable context program will be
+ * rejected anyway, so we should end up with precise
+ * sleepable marks on subprogs, except for dead code
+ * elimination.
+ */
+ if (ret == 0 && fp->might_sleep)
+ mark_subprog_might_sleep(env, t);
+ if (bpf_helper_changes_pkt_data(insn->imm))
+ mark_subprog_changes_pkt_data(env, t);
+ if (insn->imm == BPF_FUNC_tail_call) {
+ ret = visit_abnormal_return_insn(env, t);
+ if (ret)
+ return ret;
+ }
+ } else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
+ struct bpf_kfunc_call_arg_meta meta;
+
+ ret = bpf_fetch_kfunc_arg_meta(env, insn->imm, insn->off, &meta);
+ if (ret == 0 && bpf_is_iter_next_kfunc(&meta)) {
+ mark_prune_point(env, t);
+ /* Checking and saving state checkpoints at iter_next() call
+ * is crucial for fast convergence of open-coded iterator loop
+ * logic, so we need to force it. If we don't do that,
+ * is_state_visited() might skip saving a checkpoint, causing
+ * unnecessarily long sequence of not checkpointed
+ * instructions and jumps, leading to exhaustion of jump
+ * history buffer, and potentially other undesired outcomes.
+ * It is expected that with correct open-coded iterators
+ * convergence will happen quickly, so we don't run a risk of
+ * exhausting memory.
+ */
+ mark_force_checkpoint(env, t);
+ }
+ /* Same as helpers, if called in a non-sleepable context
+ * program will be rejected anyway, so we should end up
+ * with precise sleepable marks on subprogs, except for
+ * dead code elimination.
+ */
+ if (ret == 0 && bpf_is_kfunc_sleepable(&meta))
+ mark_subprog_might_sleep(env, t);
+ if (ret == 0 && bpf_is_kfunc_pkt_changing(&meta))
+ mark_subprog_changes_pkt_data(env, t);
+ }
+ return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL);
+
+ case BPF_JA:
+ if (BPF_SRC(insn->code) == BPF_X)
+ return visit_gotox_insn(t, env);
+
+ if (BPF_CLASS(insn->code) == BPF_JMP)
+ off = insn->off;
+ else
+ off = insn->imm;
+
+ /* unconditional jump with single edge */
+ ret = push_insn(t, t + off + 1, FALLTHROUGH, env);
+ if (ret)
+ return ret;
+
+ mark_prune_point(env, t + off + 1);
+ mark_jmp_point(env, t + off + 1);
+
+ return ret;
+
+ default:
+ /* conditional jump with two edges */
+ mark_prune_point(env, t);
+ if (bpf_is_may_goto_insn(insn))
+ mark_force_checkpoint(env, t);
+
+ ret = push_insn(t, t + 1, FALLTHROUGH, env);
+ if (ret)
+ return ret;
+
+ return push_insn(t, t + insn->off + 1, BRANCH, env);
+ }
+}
+
+/* non-recursive depth-first-search to detect loops in BPF program
+ * loop == back-edge in directed graph
+ */
+int bpf_check_cfg(struct bpf_verifier_env *env)
+{
+ int insn_cnt = env->prog->len;
+ int *insn_stack, *insn_state;
+ int ex_insn_beg, i, ret = 0;
+
+ insn_state = env->cfg.insn_state = kvzalloc_objs(int, insn_cnt,
+ GFP_KERNEL_ACCOUNT);
+ if (!insn_state)
+ return -ENOMEM;
+
+ insn_stack = env->cfg.insn_stack = kvzalloc_objs(int, insn_cnt,
+ GFP_KERNEL_ACCOUNT);
+ if (!insn_stack) {
+ kvfree(insn_state);
+ return -ENOMEM;
+ }
+
+ ex_insn_beg = env->exception_callback_subprog
+ ? env->subprog_info[env->exception_callback_subprog].start
+ : 0;
+
+ insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
+ insn_stack[0] = 0; /* 0 is the first instruction */
+ env->cfg.cur_stack = 1;
+
+walk_cfg:
+ while (env->cfg.cur_stack > 0) {
+ int t = insn_stack[env->cfg.cur_stack - 1];
+
+ ret = visit_insn(t, env);
+ switch (ret) {
+ case DONE_EXPLORING:
+ insn_state[t] = EXPLORED;
+ env->cfg.cur_stack--;
+ break;
+ case KEEP_EXPLORING:
+ break;
+ default:
+ if (ret > 0) {
+ verifier_bug(env, "visit_insn internal bug");
+ ret = -EFAULT;
+ }
+ goto err_free;
+ }
+ }
+
+ if (env->cfg.cur_stack < 0) {
+ verifier_bug(env, "pop stack internal bug");
+ ret = -EFAULT;
+ goto err_free;
+ }
+
+ if (ex_insn_beg && insn_state[ex_insn_beg] != EXPLORED) {
+ insn_state[ex_insn_beg] = DISCOVERED;
+ insn_stack[0] = ex_insn_beg;
+ env->cfg.cur_stack = 1;
+ goto walk_cfg;
+ }
+
+ for (i = 0; i < insn_cnt; i++) {
+ struct bpf_insn *insn = &env->prog->insnsi[i];
+
+ if (insn_state[i] != EXPLORED) {
+ verbose(env, "unreachable insn %d\n", i);
+ ret = -EINVAL;
+ goto err_free;
+ }
+ if (bpf_is_ldimm64(insn)) {
+ if (insn_state[i + 1] != 0) {
+ verbose(env, "jump into the middle of ldimm64 insn %d\n", i);
+ ret = -EINVAL;
+ goto err_free;
+ }
+ i++; /* skip second half of ldimm64 */
+ }
+ }
+ ret = 0; /* cfg looks good */
+ env->prog->aux->changes_pkt_data = env->subprog_info[0].changes_pkt_data;
+ env->prog->aux->might_sleep = env->subprog_info[0].might_sleep;
+
+err_free:
+ kvfree(insn_state);
+ kvfree(insn_stack);
+ env->cfg.insn_state = env->cfg.insn_stack = NULL;
+ return ret;
+}
+
+/*
+ * For each subprogram 'i' fill array env->cfg.insn_subprogram sub-range
+ * [env->subprog_info[i].postorder_start, env->subprog_info[i+1].postorder_start)
+ * with indices of 'i' instructions in postorder.
+ */
+int bpf_compute_postorder(struct bpf_verifier_env *env)
+{
+ u32 cur_postorder, i, top, stack_sz, s;
+ int *stack = NULL, *postorder = NULL, *state = NULL;
+ struct bpf_iarray *succ;
+
+ postorder = kvzalloc_objs(int, env->prog->len, GFP_KERNEL_ACCOUNT);
+ state = kvzalloc_objs(int, env->prog->len, GFP_KERNEL_ACCOUNT);
+ stack = kvzalloc_objs(int, env->prog->len, GFP_KERNEL_ACCOUNT);
+ if (!postorder || !state || !stack) {
+ kvfree(postorder);
+ kvfree(state);
+ kvfree(stack);
+ return -ENOMEM;
+ }
+ cur_postorder = 0;
+ for (i = 0; i < env->subprog_cnt; i++) {
+ env->subprog_info[i].postorder_start = cur_postorder;
+ stack[0] = env->subprog_info[i].start;
+ stack_sz = 1;
+ do {
+ top = stack[stack_sz - 1];
+ state[top] |= DISCOVERED;
+ if (state[top] & EXPLORED) {
+ postorder[cur_postorder++] = top;
+ stack_sz--;
+ continue;
+ }
+ succ = bpf_insn_successors(env, top);
+ for (s = 0; s < succ->cnt; ++s) {
+ if (!state[succ->items[s]]) {
+ stack[stack_sz++] = succ->items[s];
+ state[succ->items[s]] |= DISCOVERED;
+ }
+ }
+ state[top] |= EXPLORED;
+ } while (stack_sz);
+ }
+ env->subprog_info[i].postorder_start = cur_postorder;
+ env->cfg.insn_postorder = postorder;
+ env->cfg.cur_postorder = cur_postorder;
+ kvfree(stack);
+ kvfree(state);
+ return 0;
+}
+
+/*
+ * Compute strongly connected components (SCCs) on the CFG.
+ * Assign an SCC number to each instruction, recorded in env->insn_aux[*].scc.
+ * If instruction is a sole member of its SCC and there are no self edges,
+ * assign it SCC number of zero.
+ * Uses a non-recursive adaptation of Tarjan's algorithm for SCC computation.
+ */
+int bpf_compute_scc(struct bpf_verifier_env *env)
+{
+ const u32 NOT_ON_STACK = U32_MAX;
+
+ struct bpf_insn_aux_data *aux = env->insn_aux_data;
+ const u32 insn_cnt = env->prog->len;
+ int stack_sz, dfs_sz, err = 0;
+ u32 *stack, *pre, *low, *dfs;
+ u32 i, j, t, w;
+ u32 next_preorder_num;
+ u32 next_scc_id;
+ bool assign_scc;
+ struct bpf_iarray *succ;
+
+ next_preorder_num = 1;
+ next_scc_id = 1;
+ /*
+ * - 'stack' accumulates vertices in DFS order, see invariant comment below;
+ * - 'pre[t] == p' => preorder number of vertex 't' is 'p';
+ * - 'low[t] == n' => smallest preorder number of the vertex reachable from 't' is 'n';
+ * - 'dfs' DFS traversal stack, used to emulate explicit recursion.
+ */
+ stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT);
+ pre = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT);
+ low = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT);
+ dfs = kvcalloc(insn_cnt, sizeof(*dfs), GFP_KERNEL_ACCOUNT);
+ if (!stack || !pre || !low || !dfs) {
+ err = -ENOMEM;
+ goto exit;
+ }
+ /*
+ * References:
+ * [1] R. Tarjan "Depth-First Search and Linear Graph Algorithms"
+ * [2] D. J. Pearce "A Space-Efficient Algorithm for Finding Strongly Connected Components"
+ *
+ * The algorithm maintains the following invariant:
+ * - suppose there is a path 'u' ~> 'v', such that 'pre[v] < pre[u]';
+ * - then, vertex 'u' remains on stack while vertex 'v' is on stack.
+ *
+ * Consequently:
+ * - If 'low[v] < pre[v]', there is a path from 'v' to some vertex 'u',
+ * such that 'pre[u] == low[v]'; vertex 'u' is currently on the stack,
+ * and thus there is an SCC (loop) containing both 'u' and 'v'.
+ * - If 'low[v] == pre[v]', loops containing 'v' have been explored,
+ * and 'v' can be considered the root of some SCC.
+ *
+ * Here is a pseudo-code for an explicitly recursive version of the algorithm:
+ *
+ * NOT_ON_STACK = insn_cnt + 1
+ * pre = [0] * insn_cnt
+ * low = [0] * insn_cnt
+ * scc = [0] * insn_cnt
+ * stack = []
+ *
+ * next_preorder_num = 1
+ * next_scc_id = 1
+ *
+ * def recur(w):
+ * nonlocal next_preorder_num
+ * nonlocal next_scc_id
+ *
+ * pre[w] = next_preorder_num
+ * low[w] = next_preorder_num
+ * next_preorder_num += 1
+ * stack.append(w)
+ * for s in successors(w):
+ * # Note: for classic algorithm the block below should look as:
+ * #
+ * # if pre[s] == 0:
+ * # recur(s)
+ * # low[w] = min(low[w], low[s])
+ * # elif low[s] != NOT_ON_STACK:
+ * # low[w] = min(low[w], pre[s])
+ * #
+ * # But replacing both 'min' instructions with 'low[w] = min(low[w], low[s])'
+ * # does not break the invariant and makes iterative version of the algorithm
+ * # simpler. See 'Algorithm #3' from [2].
+ *
+ * # 's' not yet visited
+ * if pre[s] == 0:
+ * recur(s)
+ * # if 's' is on stack, pick lowest reachable preorder number from it;
+ * # if 's' is not on stack 'low[s] == NOT_ON_STACK > low[w]',
+ * # so 'min' would be a noop.
+ * low[w] = min(low[w], low[s])
+ *
+ * if low[w] == pre[w]:
+ * # 'w' is the root of an SCC, pop all vertices
+ * # below 'w' on stack and assign same SCC to them.
+ * while True:
+ * t = stack.pop()
+ * low[t] = NOT_ON_STACK
+ * scc[t] = next_scc_id
+ * if t == w:
+ * break
+ * next_scc_id += 1
+ *
+ * for i in range(0, insn_cnt):
+ * if pre[i] == 0:
+ * recur(i)
+ *
+ * Below implementation replaces explicit recursion with array 'dfs'.
+ */
+ for (i = 0; i < insn_cnt; i++) {
+ if (pre[i])
+ continue;
+ stack_sz = 0;
+ dfs_sz = 1;
+ dfs[0] = i;
+dfs_continue:
+ while (dfs_sz) {
+ w = dfs[dfs_sz - 1];
+ if (pre[w] == 0) {
+ low[w] = next_preorder_num;
+ pre[w] = next_preorder_num;
+ next_preorder_num++;
+ stack[stack_sz++] = w;
+ }
+ /* Visit 'w' successors */
+ succ = bpf_insn_successors(env, w);
+ for (j = 0; j < succ->cnt; ++j) {
+ if (pre[succ->items[j]]) {
+ low[w] = min(low[w], low[succ->items[j]]);
+ } else {
+ dfs[dfs_sz++] = succ->items[j];
+ goto dfs_continue;
+ }
+ }
+ /*
+ * Preserve the invariant: if some vertex above in the stack
+ * is reachable from 'w', keep 'w' on the stack.
+ */
+ if (low[w] < pre[w]) {
+ dfs_sz--;
+ goto dfs_continue;
+ }
+ /*
+ * Assign SCC number only if component has two or more elements,
+ * or if component has a self reference, or if instruction is a
+ * callback calling function (implicit loop).
+ */
+ assign_scc = stack[stack_sz - 1] != w; /* two or more elements? */
+ for (j = 0; j < succ->cnt; ++j) { /* self reference? */
+ if (succ->items[j] == w) {
+ assign_scc = true;
+ break;
+ }
+ }
+ if (bpf_calls_callback(env, w)) /* implicit loop? */
+ assign_scc = true;
+ /* Pop component elements from stack */
+ do {
+ t = stack[--stack_sz];
+ low[t] = NOT_ON_STACK;
+ if (assign_scc)
+ aux[t].scc = next_scc_id;
+ } while (t != w);
+ if (assign_scc)
+ next_scc_id++;
+ dfs_sz--;
+ }
+ }
+ env->scc_info = kvzalloc_objs(*env->scc_info, next_scc_id,
+ GFP_KERNEL_ACCOUNT);
+ if (!env->scc_info) {
+ err = -ENOMEM;
+ goto exit;
+ }
+ env->scc_cnt = next_scc_id;
+exit:
+ kvfree(stack);
+ kvfree(pre);
+ kvfree(low);
+ kvfree(dfs);
+ return err;
+}
diff --git a/kernel/bpf/check_btf.c b/kernel/bpf/check_btf.c
new file mode 100644
index 000000000000..93bebe6fe12e
--- /dev/null
+++ b/kernel/bpf/check_btf.c
@@ -0,0 +1,463 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
+#include <linux/bpf.h>
+#include <linux/bpf_verifier.h>
+#include <linux/filter.h>
+#include <linux/btf.h>
+
+#define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args)
+
+static int check_abnormal_return(struct bpf_verifier_env *env)
+{
+ int i;
+
+ for (i = 1; i < env->subprog_cnt; i++) {
+ if (env->subprog_info[i].has_ld_abs) {
+ verbose(env, "LD_ABS is not allowed in subprogs without BTF\n");
+ return -EINVAL;
+ }
+ if (env->subprog_info[i].has_tail_call) {
+ verbose(env, "tail_call is not allowed in subprogs without BTF\n");
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+/* The minimum supported BTF func info size */
+#define MIN_BPF_FUNCINFO_SIZE 8
+#define MAX_FUNCINFO_REC_SIZE 252
+
+static int check_btf_func_early(struct bpf_verifier_env *env,
+ const union bpf_attr *attr,
+ bpfptr_t uattr)
+{
+ u32 krec_size = sizeof(struct bpf_func_info);
+ const struct btf_type *type, *func_proto;
+ u32 i, nfuncs, urec_size, min_size;
+ struct bpf_func_info *krecord;
+ struct bpf_prog *prog;
+ const struct btf *btf;
+ u32 prev_offset = 0;
+ bpfptr_t urecord;
+ int ret = -ENOMEM;
+
+ nfuncs = attr->func_info_cnt;
+ if (!nfuncs) {
+ if (check_abnormal_return(env))
+ return -EINVAL;
+ return 0;
+ }
+
+ urec_size = attr->func_info_rec_size;
+ if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
+ urec_size > MAX_FUNCINFO_REC_SIZE ||
+ urec_size % sizeof(u32)) {
+ verbose(env, "invalid func info rec size %u\n", urec_size);
+ return -EINVAL;
+ }
+
+ prog = env->prog;
+ btf = prog->aux->btf;
+
+ urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
+ min_size = min_t(u32, krec_size, urec_size);
+
+ krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
+ if (!krecord)
+ return -ENOMEM;
+
+ for (i = 0; i < nfuncs; i++) {
+ ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
+ if (ret) {
+ if (ret == -E2BIG) {
+ verbose(env, "nonzero tailing record in func info");
+ /* set the size kernel expects so loader can zero
+ * out the rest of the record.
+ */
+ if (copy_to_bpfptr_offset(uattr,
+ offsetof(union bpf_attr, func_info_rec_size),
+ &min_size, sizeof(min_size)))
+ ret = -EFAULT;
+ }
+ goto err_free;
+ }
+
+ if (copy_from_bpfptr(&krecord[i], urecord, min_size)) {
+ ret = -EFAULT;
+ goto err_free;
+ }
+
+ /* check insn_off */
+ ret = -EINVAL;
+ if (i == 0) {
+ if (krecord[i].insn_off) {
+ verbose(env,
+ "nonzero insn_off %u for the first func info record",
+ krecord[i].insn_off);
+ goto err_free;
+ }
+ } else if (krecord[i].insn_off <= prev_offset) {
+ verbose(env,
+ "same or smaller insn offset (%u) than previous func info record (%u)",
+ krecord[i].insn_off, prev_offset);
+ goto err_free;
+ }
+
+ /* check type_id */
+ type = btf_type_by_id(btf, krecord[i].type_id);
+ if (!type || !btf_type_is_func(type)) {
+ verbose(env, "invalid type id %d in func info",
+ krecord[i].type_id);
+ goto err_free;
+ }
+
+ func_proto = btf_type_by_id(btf, type->type);
+ if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto)))
+ /* btf_func_check() already verified it during BTF load */
+ goto err_free;
+
+ prev_offset = krecord[i].insn_off;
+ bpfptr_add(&urecord, urec_size);
+ }
+
+ prog->aux->func_info = krecord;
+ prog->aux->func_info_cnt = nfuncs;
+ return 0;
+
+err_free:
+ kvfree(krecord);
+ return ret;
+}
+
+static int check_btf_func(struct bpf_verifier_env *env,
+ const union bpf_attr *attr,
+ bpfptr_t uattr)
+{
+ const struct btf_type *type, *func_proto, *ret_type;
+ u32 i, nfuncs, urec_size;
+ struct bpf_func_info *krecord;
+ struct bpf_func_info_aux *info_aux = NULL;
+ struct bpf_prog *prog;
+ const struct btf *btf;
+ bpfptr_t urecord;
+ bool scalar_return;
+ int ret = -ENOMEM;
+
+ nfuncs = attr->func_info_cnt;
+ if (!nfuncs) {
+ if (check_abnormal_return(env))
+ return -EINVAL;
+ return 0;
+ }
+ if (nfuncs != env->subprog_cnt) {
+ verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
+ return -EINVAL;
+ }
+
+ urec_size = attr->func_info_rec_size;
+
+ prog = env->prog;
+ btf = prog->aux->btf;
+
+ urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
+
+ krecord = prog->aux->func_info;
+ info_aux = kzalloc_objs(*info_aux, nfuncs,
+ GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
+ if (!info_aux)
+ return -ENOMEM;
+
+ for (i = 0; i < nfuncs; i++) {
+ /* check insn_off */
+ ret = -EINVAL;
+
+ if (env->subprog_info[i].start != krecord[i].insn_off) {
+ verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
+ goto err_free;
+ }
+
+ /* Already checked type_id */
+ type = btf_type_by_id(btf, krecord[i].type_id);
+ info_aux[i].linkage = BTF_INFO_VLEN(type->info);
+ /* Already checked func_proto */
+ func_proto = btf_type_by_id(btf, type->type);
+
+ ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
+ scalar_return =
+ btf_type_is_small_int(ret_type) || btf_is_any_enum(ret_type);
+ if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
+ verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
+ goto err_free;
+ }
+ if (i && !scalar_return && env->subprog_info[i].has_tail_call) {
+ verbose(env, "tail_call is only allowed in functions that return 'int'.\n");
+ goto err_free;
+ }
+
+ env->subprog_info[i].name = btf_name_by_offset(btf, type->name_off);
+ bpfptr_add(&urecord, urec_size);
+ }
+
+ prog->aux->func_info_aux = info_aux;
+ return 0;
+
+err_free:
+ kfree(info_aux);
+ return ret;
+}
+
+#define MIN_BPF_LINEINFO_SIZE offsetofend(struct bpf_line_info, line_col)
+#define MAX_LINEINFO_REC_SIZE MAX_FUNCINFO_REC_SIZE
+
+static int check_btf_line(struct bpf_verifier_env *env,
+ const union bpf_attr *attr,
+ bpfptr_t uattr)
+{
+ u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
+ struct bpf_subprog_info *sub;
+ struct bpf_line_info *linfo;
+ struct bpf_prog *prog;
+ const struct btf *btf;
+ bpfptr_t ulinfo;
+ int err;
+
+ nr_linfo = attr->line_info_cnt;
+ if (!nr_linfo)
+ return 0;
+ if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info))
+ return -EINVAL;
+
+ rec_size = attr->line_info_rec_size;
+ if (rec_size < MIN_BPF_LINEINFO_SIZE ||
+ rec_size > MAX_LINEINFO_REC_SIZE ||
+ rec_size & (sizeof(u32) - 1))
+ return -EINVAL;
+
+ /* Need to zero it in case the userspace may
+ * pass in a smaller bpf_line_info object.
+ */
+ linfo = kvzalloc_objs(struct bpf_line_info, nr_linfo,
+ GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
+ if (!linfo)
+ return -ENOMEM;
+
+ prog = env->prog;
+ btf = prog->aux->btf;
+
+ s = 0;
+ sub = env->subprog_info;
+ ulinfo = make_bpfptr(attr->line_info, uattr.is_kernel);
+ expected_size = sizeof(struct bpf_line_info);
+ ncopy = min_t(u32, expected_size, rec_size);
+ for (i = 0; i < nr_linfo; i++) {
+ err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
+ if (err) {
+ if (err == -E2BIG) {
+ verbose(env, "nonzero tailing record in line_info");
+ if (copy_to_bpfptr_offset(uattr,
+ offsetof(union bpf_attr, line_info_rec_size),
+ &expected_size, sizeof(expected_size)))
+ err = -EFAULT;
+ }
+ goto err_free;
+ }
+
+ if (copy_from_bpfptr(&linfo[i], ulinfo, ncopy)) {
+ err = -EFAULT;
+ goto err_free;
+ }
+
+ /*
+ * Check insn_off to ensure
+ * 1) strictly increasing AND
+ * 2) bounded by prog->len
+ *
+ * The linfo[0].insn_off == 0 check logically falls into
+ * the later "missing bpf_line_info for func..." case
+ * because the first linfo[0].insn_off must be the
+ * first sub also and the first sub must have
+ * subprog_info[0].start == 0.
+ */
+ if ((i && linfo[i].insn_off <= prev_offset) ||
+ linfo[i].insn_off >= prog->len) {
+ verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
+ i, linfo[i].insn_off, prev_offset,
+ prog->len);
+ err = -EINVAL;
+ goto err_free;
+ }
+
+ if (!prog->insnsi[linfo[i].insn_off].code) {
+ verbose(env,
+ "Invalid insn code at line_info[%u].insn_off\n",
+ i);
+ err = -EINVAL;
+ goto err_free;
+ }
+
+ if (!btf_name_by_offset(btf, linfo[i].line_off) ||
+ !btf_name_by_offset(btf, linfo[i].file_name_off)) {
+ verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
+ err = -EINVAL;
+ goto err_free;
+ }
+
+ if (s != env->subprog_cnt) {
+ if (linfo[i].insn_off == sub[s].start) {
+ sub[s].linfo_idx = i;
+ s++;
+ } else if (sub[s].start < linfo[i].insn_off) {
+ verbose(env, "missing bpf_line_info for func#%u\n", s);
+ err = -EINVAL;
+ goto err_free;
+ }
+ }
+
+ prev_offset = linfo[i].insn_off;
+ bpfptr_add(&ulinfo, rec_size);
+ }
+
+ if (s != env->subprog_cnt) {
+ verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
+ env->subprog_cnt - s, s);
+ err = -EINVAL;
+ goto err_free;
+ }
+
+ prog->aux->linfo = linfo;
+ prog->aux->nr_linfo = nr_linfo;
+
+ return 0;
+
+err_free:
+ kvfree(linfo);
+ return err;
+}
+
+#define MIN_CORE_RELO_SIZE sizeof(struct bpf_core_relo)
+#define MAX_CORE_RELO_SIZE MAX_FUNCINFO_REC_SIZE
+
+static int check_core_relo(struct bpf_verifier_env *env,
+ const union bpf_attr *attr,
+ bpfptr_t uattr)
+{
+ u32 i, nr_core_relo, ncopy, expected_size, rec_size;
+ struct bpf_core_relo core_relo = {};
+ struct bpf_prog *prog = env->prog;
+ const struct btf *btf = prog->aux->btf;
+ struct bpf_core_ctx ctx = {
+ .log = &env->log,
+ .btf = btf,
+ };
+ bpfptr_t u_core_relo;
+ int err;
+
+ nr_core_relo = attr->core_relo_cnt;
+ if (!nr_core_relo)
+ return 0;
+ if (nr_core_relo > INT_MAX / sizeof(struct bpf_core_relo))
+ return -EINVAL;
+
+ rec_size = attr->core_relo_rec_size;
+ if (rec_size < MIN_CORE_RELO_SIZE ||
+ rec_size > MAX_CORE_RELO_SIZE ||
+ rec_size % sizeof(u32))
+ return -EINVAL;
+
+ u_core_relo = make_bpfptr(attr->core_relos, uattr.is_kernel);
+ expected_size = sizeof(struct bpf_core_relo);
+ ncopy = min_t(u32, expected_size, rec_size);
+
+ /* Unlike func_info and line_info, copy and apply each CO-RE
+ * relocation record one at a time.
+ */
+ for (i = 0; i < nr_core_relo; i++) {
+ /* future proofing when sizeof(bpf_core_relo) changes */
+ err = bpf_check_uarg_tail_zero(u_core_relo, expected_size, rec_size);
+ if (err) {
+ if (err == -E2BIG) {
+ verbose(env, "nonzero tailing record in core_relo");
+ if (copy_to_bpfptr_offset(uattr,
+ offsetof(union bpf_attr, core_relo_rec_size),
+ &expected_size, sizeof(expected_size)))
+ err = -EFAULT;
+ }
+ break;
+ }
+
+ if (copy_from_bpfptr(&core_relo, u_core_relo, ncopy)) {
+ err = -EFAULT;
+ break;
+ }
+
+ if (core_relo.insn_off % 8 || core_relo.insn_off / 8 >= prog->len) {
+ verbose(env, "Invalid core_relo[%u].insn_off:%u prog->len:%u\n",
+ i, core_relo.insn_off, prog->len);
+ err = -EINVAL;
+ break;
+ }
+
+ err = bpf_core_apply(&ctx, &core_relo, i,
+ &prog->insnsi[core_relo.insn_off / 8]);
+ if (err)
+ break;
+ bpfptr_add(&u_core_relo, rec_size);
+ }
+ return err;
+}
+
+int bpf_check_btf_info_early(struct bpf_verifier_env *env,
+ const union bpf_attr *attr,
+ bpfptr_t uattr)
+{
+ struct btf *btf;
+ int err;
+
+ if (!attr->func_info_cnt && !attr->line_info_cnt) {
+ if (check_abnormal_return(env))
+ return -EINVAL;
+ return 0;
+ }
+
+ btf = btf_get_by_fd(attr->prog_btf_fd);
+ if (IS_ERR(btf))
+ return PTR_ERR(btf);
+ if (btf_is_kernel(btf)) {
+ btf_put(btf);
+ return -EACCES;
+ }
+ env->prog->aux->btf = btf;
+
+ err = check_btf_func_early(env, attr, uattr);
+ if (err)
+ return err;
+ return 0;
+}
+
+int bpf_check_btf_info(struct bpf_verifier_env *env,
+ const union bpf_attr *attr,
+ bpfptr_t uattr)
+{
+ int err;
+
+ if (!attr->func_info_cnt && !attr->line_info_cnt) {
+ if (check_abnormal_return(env))
+ return -EINVAL;
+ return 0;
+ }
+
+ err = check_btf_func(env, attr, uattr);
+ if (err)
+ return err;
+
+ err = check_btf_line(env, attr, uattr);
+ if (err)
+ return err;
+
+ err = check_core_relo(env, attr, uattr);
+ if (err)
+ return err;
+
+ return 0;
+}
diff --git a/kernel/bpf/const_fold.c b/kernel/bpf/const_fold.c
new file mode 100644
index 000000000000..db73c4740b1e
--- /dev/null
+++ b/kernel/bpf/const_fold.c
@@ -0,0 +1,396 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf_verifier.h>
+
+/*
+ * Forward dataflow analysis to determine constant register values at every
+ * instruction. Tracks 64-bit constant values in R0-R9 through the program,
+ * using a fixed-point iteration in reverse postorder. Records which registers
+ * hold known constants and their values in
+ * env->insn_aux_data[].{const_reg_mask, const_reg_vals}.
+ */
+
+enum const_arg_state {
+ CONST_ARG_UNVISITED, /* instruction not yet reached */
+ CONST_ARG_UNKNOWN, /* register value not a known constant */
+ CONST_ARG_CONST, /* register holds a known 64-bit constant */
+ CONST_ARG_MAP_PTR, /* register holds a map pointer, map_index is set */
+ CONST_ARG_MAP_VALUE, /* register points to map value data, val is offset */
+ CONST_ARG_SUBPROG, /* register holds a subprog pointer, val is subprog number */
+};
+
+struct const_arg_info {
+ enum const_arg_state state;
+ u32 map_index;
+ u64 val;
+};
+
+static bool ci_is_unvisited(const struct const_arg_info *ci)
+{
+ return ci->state == CONST_ARG_UNVISITED;
+}
+
+static bool ci_is_unknown(const struct const_arg_info *ci)
+{
+ return ci->state == CONST_ARG_UNKNOWN;
+}
+
+static bool ci_is_const(const struct const_arg_info *ci)
+{
+ return ci->state == CONST_ARG_CONST;
+}
+
+static bool ci_is_map_value(const struct const_arg_info *ci)
+{
+ return ci->state == CONST_ARG_MAP_VALUE;
+}
+
+/* Transfer function: compute output register state from instruction. */
+static void const_reg_xfer(struct bpf_verifier_env *env, struct const_arg_info *ci_out,
+ struct bpf_insn *insn, struct bpf_insn *insns, int idx)
+{
+ struct const_arg_info unknown = { .state = CONST_ARG_UNKNOWN, .val = 0 };
+ struct const_arg_info *dst = &ci_out[insn->dst_reg];
+ struct const_arg_info *src = &ci_out[insn->src_reg];
+ u8 class = BPF_CLASS(insn->code);
+ u8 mode = BPF_MODE(insn->code);
+ u8 opcode = BPF_OP(insn->code) | BPF_SRC(insn->code);
+ int r;
+
+ switch (class) {
+ case BPF_ALU:
+ case BPF_ALU64:
+ switch (opcode) {
+ case BPF_MOV | BPF_K:
+ dst->state = CONST_ARG_CONST;
+ dst->val = (s64)insn->imm;
+ break;
+ case BPF_MOV | BPF_X:
+ *dst = *src;
+ if (!insn->off)
+ break;
+ if (!ci_is_const(dst)) {
+ *dst = unknown;
+ break;
+ }
+ switch (insn->off) {
+ case 8: dst->val = (s8)dst->val; break;
+ case 16: dst->val = (s16)dst->val; break;
+ case 32: dst->val = (s32)dst->val; break;
+ default: *dst = unknown; break;
+ }
+ break;
+ case BPF_ADD | BPF_K:
+ if (!ci_is_const(dst) && !ci_is_map_value(dst)) {
+ *dst = unknown;
+ break;
+ }
+ dst->val += insn->imm;
+ break;
+ case BPF_SUB | BPF_K:
+ if (!ci_is_const(dst) && !ci_is_map_value(dst)) {
+ *dst = unknown;
+ break;
+ }
+ dst->val -= insn->imm;
+ break;
+ case BPF_AND | BPF_K:
+ if (!ci_is_const(dst)) {
+ if (!insn->imm) {
+ dst->state = CONST_ARG_CONST;
+ dst->val = 0;
+ } else {
+ *dst = unknown;
+ }
+ break;
+ }
+ dst->val &= (s64)insn->imm;
+ break;
+ case BPF_AND | BPF_X:
+ if (ci_is_const(dst) && dst->val == 0)
+ break; /* 0 & x == 0 */
+ if (ci_is_const(src) && src->val == 0) {
+ dst->state = CONST_ARG_CONST;
+ dst->val = 0;
+ break;
+ }
+ if (!ci_is_const(dst) || !ci_is_const(src)) {
+ *dst = unknown;
+ break;
+ }
+ dst->val &= src->val;
+ break;
+ default:
+ *dst = unknown;
+ break;
+ }
+ if (class == BPF_ALU) {
+ if (ci_is_const(dst))
+ dst->val = (u32)dst->val;
+ else if (!ci_is_unknown(dst))
+ *dst = unknown;
+ }
+ break;
+ case BPF_LD:
+ if (mode == BPF_ABS || mode == BPF_IND)
+ goto process_call;
+ if (mode != BPF_IMM || BPF_SIZE(insn->code) != BPF_DW)
+ break;
+ if (insn->src_reg == BPF_PSEUDO_FUNC) {
+ int subprog = bpf_find_subprog(env, idx + insn->imm + 1);
+
+ if (subprog >= 0) {
+ dst->state = CONST_ARG_SUBPROG;
+ dst->val = subprog;
+ } else {
+ *dst = unknown;
+ }
+ } else if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
+ insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
+ dst->state = CONST_ARG_MAP_VALUE;
+ dst->map_index = env->insn_aux_data[idx].map_index;
+ dst->val = env->insn_aux_data[idx].map_off;
+ } else if (insn->src_reg == BPF_PSEUDO_MAP_FD ||
+ insn->src_reg == BPF_PSEUDO_MAP_IDX) {
+ dst->state = CONST_ARG_MAP_PTR;
+ dst->map_index = env->insn_aux_data[idx].map_index;
+ } else if (insn->src_reg == 0) {
+ dst->state = CONST_ARG_CONST;
+ dst->val = (u64)(u32)insn->imm | ((u64)(u32)insns[idx + 1].imm << 32);
+ } else {
+ *dst = unknown;
+ }
+ break;
+ case BPF_LDX:
+ if (!ci_is_map_value(src)) {
+ *dst = unknown;
+ break;
+ }
+ struct bpf_map *map = env->used_maps[src->map_index];
+ int size = bpf_size_to_bytes(BPF_SIZE(insn->code));
+ bool is_ldsx = mode == BPF_MEMSX;
+ int off = src->val + insn->off;
+ u64 val = 0;
+
+ if (!bpf_map_is_rdonly(map) || !map->ops->map_direct_value_addr ||
+ map->map_type == BPF_MAP_TYPE_INSN_ARRAY ||
+ off < 0 || off + size > map->value_size ||
+ bpf_map_direct_read(map, off, size, &val, is_ldsx)) {
+ *dst = unknown;
+ break;
+ }
+ dst->state = CONST_ARG_CONST;
+ dst->val = val;
+ break;
+ case BPF_JMP:
+ if (opcode != BPF_CALL)
+ break;
+process_call:
+ for (r = BPF_REG_0; r <= BPF_REG_5; r++)
+ ci_out[r] = unknown;
+ break;
+ case BPF_STX:
+ if (mode != BPF_ATOMIC)
+ break;
+ if (insn->imm == BPF_CMPXCHG)
+ ci_out[BPF_REG_0] = unknown;
+ else if (insn->imm == BPF_LOAD_ACQ)
+ *dst = unknown;
+ else if (insn->imm & BPF_FETCH)
+ *src = unknown;
+ break;
+ }
+}
+
+/* Join function: merge output state into a successor's input state. */
+static bool const_reg_join(struct const_arg_info *ci_target,
+ struct const_arg_info *ci_out)
+{
+ bool changed = false;
+ int r;
+
+ for (r = 0; r < MAX_BPF_REG; r++) {
+ struct const_arg_info *old = &ci_target[r];
+ struct const_arg_info *new = &ci_out[r];
+
+ if (ci_is_unvisited(old) && !ci_is_unvisited(new)) {
+ ci_target[r] = *new;
+ changed = true;
+ } else if (!ci_is_unknown(old) && !ci_is_unvisited(old) &&
+ (new->state != old->state || new->val != old->val ||
+ new->map_index != old->map_index)) {
+ old->state = CONST_ARG_UNKNOWN;
+ changed = true;
+ }
+ }
+ return changed;
+}
+
+int bpf_compute_const_regs(struct bpf_verifier_env *env)
+{
+ struct const_arg_info unknown = { .state = CONST_ARG_UNKNOWN, .val = 0 };
+ struct bpf_insn_aux_data *insn_aux = env->insn_aux_data;
+ struct bpf_insn *insns = env->prog->insnsi;
+ int insn_cnt = env->prog->len;
+ struct const_arg_info (*ci_in)[MAX_BPF_REG];
+ struct const_arg_info ci_out[MAX_BPF_REG];
+ struct bpf_iarray *succ;
+ bool changed;
+ int i, r;
+
+ /* kvzalloc zeroes memory, so all entries start as CONST_ARG_UNVISITED (0) */
+ ci_in = kvzalloc_objs(*ci_in, insn_cnt, GFP_KERNEL_ACCOUNT);
+ if (!ci_in)
+ return -ENOMEM;
+
+ /* Subprogram entries (including main at subprog 0): all registers unknown */
+ for (i = 0; i < env->subprog_cnt; i++) {
+ int start = env->subprog_info[i].start;
+
+ for (r = 0; r < MAX_BPF_REG; r++)
+ ci_in[start][r] = unknown;
+ }
+
+redo:
+ changed = false;
+ for (i = env->cfg.cur_postorder - 1; i >= 0; i--) {
+ int idx = env->cfg.insn_postorder[i];
+ struct bpf_insn *insn = &insns[idx];
+ struct const_arg_info *ci = ci_in[idx];
+
+ memcpy(ci_out, ci, sizeof(ci_out));
+
+ const_reg_xfer(env, ci_out, insn, insns, idx);
+
+ succ = bpf_insn_successors(env, idx);
+ for (int s = 0; s < succ->cnt; s++)
+ changed |= const_reg_join(ci_in[succ->items[s]], ci_out);
+ }
+ if (changed)
+ goto redo;
+
+ /* Save computed constants into insn_aux[] if they fit into 32-bit */
+ for (i = 0; i < insn_cnt; i++) {
+ u16 mask = 0, map_mask = 0, subprog_mask = 0;
+ struct bpf_insn_aux_data *aux = &insn_aux[i];
+ struct const_arg_info *ci = ci_in[i];
+
+ for (r = BPF_REG_0; r < ARRAY_SIZE(aux->const_reg_vals); r++) {
+ struct const_arg_info *c = &ci[r];
+
+ switch (c->state) {
+ case CONST_ARG_CONST: {
+ u64 val = c->val;
+
+ if (val != (u32)val)
+ break;
+ mask |= BIT(r);
+ aux->const_reg_vals[r] = val;
+ break;
+ }
+ case CONST_ARG_MAP_PTR:
+ map_mask |= BIT(r);
+ aux->const_reg_vals[r] = c->map_index;
+ break;
+ case CONST_ARG_SUBPROG:
+ subprog_mask |= BIT(r);
+ aux->const_reg_vals[r] = c->val;
+ break;
+ default:
+ break;
+ }
+ }
+ aux->const_reg_mask = mask;
+ aux->const_reg_map_mask = map_mask;
+ aux->const_reg_subprog_mask = subprog_mask;
+ }
+
+ kvfree(ci_in);
+ return 0;
+}
+
+static int eval_const_branch(u8 opcode, u64 dst_val, u64 src_val)
+{
+ switch (BPF_OP(opcode)) {
+ case BPF_JEQ: return dst_val == src_val;
+ case BPF_JNE: return dst_val != src_val;
+ case BPF_JGT: return dst_val > src_val;
+ case BPF_JGE: return dst_val >= src_val;
+ case BPF_JLT: return dst_val < src_val;
+ case BPF_JLE: return dst_val <= src_val;
+ case BPF_JSGT: return (s64)dst_val > (s64)src_val;
+ case BPF_JSGE: return (s64)dst_val >= (s64)src_val;
+ case BPF_JSLT: return (s64)dst_val < (s64)src_val;
+ case BPF_JSLE: return (s64)dst_val <= (s64)src_val;
+ case BPF_JSET: return (bool)(dst_val & src_val);
+ default: return -1;
+ }
+}
+
+/*
+ * Rewrite conditional branches with constant outcomes into unconditional
+ * jumps using register values resolved by bpf_compute_const_regs() pass.
+ * This eliminates dead edges from the CFG so that compute_live_registers()
+ * doesn't propagate liveness through dead code.
+ */
+int bpf_prune_dead_branches(struct bpf_verifier_env *env)
+{
+ struct bpf_insn_aux_data *insn_aux = env->insn_aux_data;
+ struct bpf_insn *insns = env->prog->insnsi;
+ int insn_cnt = env->prog->len;
+ bool changed = false;
+ int i;
+
+ for (i = 0; i < insn_cnt; i++) {
+ struct bpf_insn_aux_data *aux = &insn_aux[i];
+ struct bpf_insn *insn = &insns[i];
+ u8 class = BPF_CLASS(insn->code);
+ u64 dst_val, src_val;
+ int taken;
+
+ if (!bpf_insn_is_cond_jump(insn->code))
+ continue;
+ if (bpf_is_may_goto_insn(insn))
+ continue;
+
+ if (!(aux->const_reg_mask & BIT(insn->dst_reg)))
+ continue;
+ dst_val = aux->const_reg_vals[insn->dst_reg];
+
+ if (BPF_SRC(insn->code) == BPF_K) {
+ src_val = insn->imm;
+ } else {
+ if (!(aux->const_reg_mask & BIT(insn->src_reg)))
+ continue;
+ src_val = aux->const_reg_vals[insn->src_reg];
+ }
+
+ if (class == BPF_JMP32) {
+ /*
+ * The (s32) cast maps the 32-bit range into two u64 sub-ranges:
+ * [0x00000000, 0x7FFFFFFF] -> [0x0000000000000000, 0x000000007FFFFFFF]
+ * [0x80000000, 0xFFFFFFFF] -> [0xFFFFFFFF80000000, 0xFFFFFFFFFFFFFFFF]
+ * The ordering is preserved within each sub-range, and
+ * the second sub-range is above the first as u64.
+ */
+ dst_val = (s32)dst_val;
+ src_val = (s32)src_val;
+ }
+
+ taken = eval_const_branch(insn->code, dst_val, src_val);
+ if (taken < 0) {
+ bpf_log(&env->log, "Unknown conditional jump %x\n", insn->code);
+ return -EFAULT;
+ }
+ *insn = BPF_JMP_A(taken ? insn->off : 0);
+ changed = true;
+ }
+
+ if (!changed)
+ return 0;
+ /* recompute postorder, since CFG has changed */
+ kvfree(env->cfg.insn_postorder);
+ env->cfg.insn_postorder = NULL;
+ return bpf_compute_postorder(env);
+}
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 7b675a451ec8..066b86e7233c 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -18,7 +18,6 @@
*/
#include <uapi/linux/btf.h>
-#include <crypto/sha1.h>
#include <linux/filter.h>
#include <linux/skbuff.h>
#include <linux/vmalloc.h>
@@ -1487,6 +1486,8 @@ void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other)
* know whether fp here is the clone or the original.
*/
fp->aux->prog = fp;
+ if (fp->aux->offload)
+ fp->aux->offload->prog = fp;
bpf_prog_clone_free(fp_other);
}
@@ -2087,12 +2088,12 @@ select_insn:
if (unlikely(tail_call_cnt >= MAX_TAIL_CALL_CNT))
goto out;
- tail_call_cnt++;
-
prog = READ_ONCE(array->ptrs[index]);
if (!prog)
goto out;
+ tail_call_cnt++;
+
/* ARG1 at this point is guaranteed to point to CTX from
* the verifier side due to the fact that the tail call is
* handled like a helper, that is, bpf_tail_call_proto,
@@ -2613,8 +2614,10 @@ static struct bpf_prog_dummy {
},
};
-struct bpf_empty_prog_array bpf_empty_prog_array = {
- .null_prog = NULL,
+struct bpf_prog_array bpf_empty_prog_array = {
+ .items = {
+ { .prog = NULL },
+ },
};
EXPORT_SYMBOL(bpf_empty_prog_array);
@@ -2625,14 +2628,14 @@ struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
if (prog_cnt)
p = kzalloc_flex(*p, items, prog_cnt + 1, flags);
else
- p = &bpf_empty_prog_array.hdr;
+ p = &bpf_empty_prog_array;
return p;
}
void bpf_prog_array_free(struct bpf_prog_array *progs)
{
- if (!progs || progs == &bpf_empty_prog_array.hdr)
+ if (!progs || progs == &bpf_empty_prog_array)
return;
kfree_rcu(progs, rcu);
}
@@ -2641,19 +2644,17 @@ static void __bpf_prog_array_free_sleepable_cb(struct rcu_head *rcu)
{
struct bpf_prog_array *progs;
- /* If RCU Tasks Trace grace period implies RCU grace period, there is
- * no need to call kfree_rcu(), just call kfree() directly.
+ /*
+ * RCU Tasks Trace grace period implies RCU grace period, there is no
+ * need to call kfree_rcu(), just call kfree() directly.
*/
progs = container_of(rcu, struct bpf_prog_array, rcu);
- if (rcu_trace_implies_rcu_gp())
- kfree(progs);
- else
- kfree_rcu(progs, rcu);
+ kfree(progs);
}
void bpf_prog_array_free_sleepable(struct bpf_prog_array *progs)
{
- if (!progs || progs == &bpf_empty_prog_array.hdr)
+ if (!progs || progs == &bpf_empty_prog_array)
return;
call_rcu_tasks_trace(&progs->rcu, __bpf_prog_array_free_sleepable_cb);
}
@@ -3314,6 +3315,63 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_bulk_tx);
#ifdef CONFIG_BPF_SYSCALL
+void bpf_get_linfo_file_line(struct btf *btf, const struct bpf_line_info *linfo,
+ const char **filep, const char **linep, int *nump)
+{
+ /* Get base component of the file path. */
+ if (filep) {
+ *filep = btf_name_by_offset(btf, linfo->file_name_off);
+ *filep = kbasename(*filep);
+ }
+
+ /* Obtain the source line, and strip whitespace in prefix. */
+ if (linep) {
+ *linep = btf_name_by_offset(btf, linfo->line_off);
+ while (isspace(**linep))
+ *linep += 1;
+ }
+
+ if (nump)
+ *nump = BPF_LINE_INFO_LINE_NUM(linfo->line_col);
+}
+
+const struct bpf_line_info *bpf_find_linfo(const struct bpf_prog *prog, u32 insn_off)
+{
+ const struct bpf_line_info *linfo;
+ u32 nr_linfo;
+ int l, r, m;
+
+ nr_linfo = prog->aux->nr_linfo;
+ if (!nr_linfo || insn_off >= prog->len)
+ return NULL;
+
+ linfo = prog->aux->linfo;
+ /* Loop invariant: linfo[l].insn_off <= insns_off.
+ * linfo[0].insn_off == 0 which always satisfies above condition.
+ * Binary search is searching for rightmost linfo entry that satisfies
+ * the above invariant, giving us the desired record that covers given
+ * instruction offset.
+ */
+ l = 0;
+ r = nr_linfo - 1;
+ while (l < r) {
+ /* (r - l + 1) / 2 means we break a tie to the right, so if:
+ * l=1, r=2, linfo[l].insn_off <= insn_off, linfo[r].insn_off > insn_off,
+ * then m=2, we see that linfo[m].insn_off > insn_off, and so
+ * r becomes 1 and we exit the loop with correct l==1.
+ * If the tie was broken to the left, m=1 would end us up in
+ * an endless loop where l and m stay at 1 and r stays at 2.
+ */
+ m = l + (r - l + 1) / 2;
+ if (linfo[m].insn_off <= insn_off)
+ l = m;
+ else
+ r = m - 1;
+ }
+
+ return &linfo[l];
+}
+
int bpf_prog_get_file_line(struct bpf_prog *prog, unsigned long ip, const char **filep,
const char **linep, int *nump)
{
@@ -3348,14 +3406,7 @@ int bpf_prog_get_file_line(struct bpf_prog *prog, unsigned long ip, const char *
if (idx == -1)
return -ENOENT;
- /* Get base component of the file path. */
- *filep = btf_name_by_offset(btf, linfo[idx].file_name_off);
- *filep = kbasename(*filep);
- /* Obtain the source line, and strip whitespace in prefix. */
- *linep = btf_name_by_offset(btf, linfo[idx].line_off);
- while (isspace(**linep))
- *linep += 1;
- *nump = BPF_LINE_INFO_LINE_NUM(linfo[idx].line_col);
+ bpf_get_linfo_file_line(btf, &linfo[idx], filep, linep, nump);
return 0;
}
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 32b43cb9061b..5e59ab896f05 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -223,7 +223,10 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu,
}
break;
default:
- bpf_warn_invalid_xdp_action(NULL, rcpu->prog, act);
+ bpf_warn_invalid_xdp_action(xdpf->dev_rx, rcpu->prog, act);
+ fallthrough;
+ case XDP_ABORTED:
+ trace_xdp_exception(xdpf->dev_rx, rcpu->prog, act);
fallthrough;
case XDP_DROP:
xdp_return_frame(xdpf);
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 3d619d01088e..cc0a43ebab6b 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -665,7 +665,7 @@ int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx,
for (i = 0; i < dtab->n_buckets; i++) {
head = dev_map_index_hash(dtab, i);
hlist_for_each_entry_rcu(dst, head, index_hlist,
- lockdep_is_held(&dtab->index_lock)) {
+ rcu_read_lock_bh_held()) {
if (!is_valid_dst(dst, xdpf))
continue;
@@ -747,7 +747,6 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
struct bpf_dtab_netdev *dst, *last_dst = NULL;
int excluded_devices[1+MAX_NEST_DEV];
struct hlist_head *head;
- struct hlist_node *next;
int num_excluded = 0;
unsigned int i;
int err;
@@ -787,7 +786,7 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
} else { /* BPF_MAP_TYPE_DEVMAP_HASH */
for (i = 0; i < dtab->n_buckets; i++) {
head = dev_map_index_hash(dtab, i);
- hlist_for_each_entry_safe(dst, next, head, index_hlist) {
+ hlist_for_each_entry_rcu(dst, head, index_hlist, rcu_read_lock_bh_held()) {
if (is_ifindex_excluded(excluded_devices, num_excluded,
dst->dev->ifindex))
continue;
diff --git a/kernel/bpf/fixups.c b/kernel/bpf/fixups.c
new file mode 100644
index 000000000000..67c9b28767e1
--- /dev/null
+++ b/kernel/bpf/fixups.c
@@ -0,0 +1,2457 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/bpf_verifier.h>
+#include <linux/filter.h>
+#include <linux/vmalloc.h>
+#include <linux/bsearch.h>
+#include <linux/sort.h>
+#include <linux/perf_event.h>
+#include <net/xdp.h>
+#include "disasm.h"
+
+#define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args)
+
+static bool is_cmpxchg_insn(const struct bpf_insn *insn)
+{
+ return BPF_CLASS(insn->code) == BPF_STX &&
+ BPF_MODE(insn->code) == BPF_ATOMIC &&
+ insn->imm == BPF_CMPXCHG;
+}
+
+/* Return the regno defined by the insn, or -1. */
+static int insn_def_regno(const struct bpf_insn *insn)
+{
+ switch (BPF_CLASS(insn->code)) {
+ case BPF_JMP:
+ case BPF_JMP32:
+ case BPF_ST:
+ return -1;
+ case BPF_STX:
+ if (BPF_MODE(insn->code) == BPF_ATOMIC ||
+ BPF_MODE(insn->code) == BPF_PROBE_ATOMIC) {
+ if (insn->imm == BPF_CMPXCHG)
+ return BPF_REG_0;
+ else if (insn->imm == BPF_LOAD_ACQ)
+ return insn->dst_reg;
+ else if (insn->imm & BPF_FETCH)
+ return insn->src_reg;
+ }
+ return -1;
+ default:
+ return insn->dst_reg;
+ }
+}
+
+/* Return TRUE if INSN has defined any 32-bit value explicitly. */
+static bool insn_has_def32(struct bpf_insn *insn)
+{
+ int dst_reg = insn_def_regno(insn);
+
+ if (dst_reg == -1)
+ return false;
+
+ return !bpf_is_reg64(insn, dst_reg, NULL, DST_OP);
+}
+
+static int kfunc_desc_cmp_by_imm_off(const void *a, const void *b)
+{
+ const struct bpf_kfunc_desc *d0 = a;
+ const struct bpf_kfunc_desc *d1 = b;
+
+ if (d0->imm != d1->imm)
+ return d0->imm < d1->imm ? -1 : 1;
+ if (d0->offset != d1->offset)
+ return d0->offset < d1->offset ? -1 : 1;
+ return 0;
+}
+
+const struct btf_func_model *
+bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
+ const struct bpf_insn *insn)
+{
+ const struct bpf_kfunc_desc desc = {
+ .imm = insn->imm,
+ .offset = insn->off,
+ };
+ const struct bpf_kfunc_desc *res;
+ struct bpf_kfunc_desc_tab *tab;
+
+ tab = prog->aux->kfunc_tab;
+ res = bsearch(&desc, tab->descs, tab->nr_descs,
+ sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm_off);
+
+ return res ? &res->func_model : NULL;
+}
+
+static int set_kfunc_desc_imm(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc)
+{
+ unsigned long call_imm;
+
+ if (bpf_jit_supports_far_kfunc_call()) {
+ call_imm = desc->func_id;
+ } else {
+ call_imm = BPF_CALL_IMM(desc->addr);
+ /* Check whether the relative offset overflows desc->imm */
+ if ((unsigned long)(s32)call_imm != call_imm) {
+ verbose(env, "address of kernel func_id %u is out of range\n",
+ desc->func_id);
+ return -EINVAL;
+ }
+ }
+ desc->imm = call_imm;
+ return 0;
+}
+
+static int sort_kfunc_descs_by_imm_off(struct bpf_verifier_env *env)
+{
+ struct bpf_kfunc_desc_tab *tab;
+ int i, err;
+
+ tab = env->prog->aux->kfunc_tab;
+ if (!tab)
+ return 0;
+
+ for (i = 0; i < tab->nr_descs; i++) {
+ err = set_kfunc_desc_imm(env, &tab->descs[i]);
+ if (err)
+ return err;
+ }
+
+ sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
+ kfunc_desc_cmp_by_imm_off, NULL);
+ return 0;
+}
+
+static int add_kfunc_in_insns(struct bpf_verifier_env *env,
+ struct bpf_insn *insn, int cnt)
+{
+ int i, ret;
+
+ for (i = 0; i < cnt; i++, insn++) {
+ if (bpf_pseudo_kfunc_call(insn)) {
+ ret = bpf_add_kfunc_call(env, insn->imm, insn->off);
+ if (ret < 0)
+ return ret;
+ }
+ }
+ return 0;
+}
+
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
+static int get_callee_stack_depth(struct bpf_verifier_env *env,
+ const struct bpf_insn *insn, int idx)
+{
+ int start = idx + insn->imm + 1, subprog;
+
+ subprog = bpf_find_subprog(env, start);
+ if (verifier_bug_if(subprog < 0, env, "get stack depth: no program at insn %d", start))
+ return -EFAULT;
+ return env->subprog_info[subprog].stack_depth;
+}
+#endif
+
+/* single env->prog->insni[off] instruction was replaced with the range
+ * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying
+ * [0, off) and [off, end) to new locations, so the patched range stays zero
+ */
+static void adjust_insn_aux_data(struct bpf_verifier_env *env,
+ struct bpf_prog *new_prog, u32 off, u32 cnt)
+{
+ struct bpf_insn_aux_data *data = env->insn_aux_data;
+ struct bpf_insn *insn = new_prog->insnsi;
+ u32 old_seen = data[off].seen;
+ u32 prog_len;
+ int i;
+
+ /* aux info at OFF always needs adjustment, no matter fast path
+ * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
+ * original insn at old prog.
+ */
+ data[off].zext_dst = insn_has_def32(insn + off + cnt - 1);
+
+ if (cnt == 1)
+ return;
+ prog_len = new_prog->len;
+
+ memmove(data + off + cnt - 1, data + off,
+ sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
+ memset(data + off, 0, sizeof(struct bpf_insn_aux_data) * (cnt - 1));
+ for (i = off; i < off + cnt - 1; i++) {
+ /* Expand insni[off]'s seen count to the patched range. */
+ data[i].seen = old_seen;
+ data[i].zext_dst = insn_has_def32(insn + i);
+ }
+}
+
+static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
+{
+ int i;
+
+ if (len == 1)
+ return;
+ /* NOTE: fake 'exit' subprog should be updated as well. */
+ for (i = 0; i <= env->subprog_cnt; i++) {
+ if (env->subprog_info[i].start <= off)
+ continue;
+ env->subprog_info[i].start += len - 1;
+ }
+}
+
+static void adjust_insn_arrays(struct bpf_verifier_env *env, u32 off, u32 len)
+{
+ int i;
+
+ if (len == 1)
+ return;
+
+ for (i = 0; i < env->insn_array_map_cnt; i++)
+ bpf_insn_array_adjust(env->insn_array_maps[i], off, len);
+}
+
+static void adjust_insn_arrays_after_remove(struct bpf_verifier_env *env, u32 off, u32 len)
+{
+ int i;
+
+ for (i = 0; i < env->insn_array_map_cnt; i++)
+ bpf_insn_array_adjust_after_remove(env->insn_array_maps[i], off, len);
+}
+
+static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
+{
+ struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
+ int i, sz = prog->aux->size_poke_tab;
+ struct bpf_jit_poke_descriptor *desc;
+
+ for (i = 0; i < sz; i++) {
+ desc = &tab[i];
+ if (desc->insn_idx <= off)
+ continue;
+ desc->insn_idx += len - 1;
+ }
+}
+
+static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
+ const struct bpf_insn *patch, u32 len)
+{
+ struct bpf_prog *new_prog;
+ struct bpf_insn_aux_data *new_data = NULL;
+
+ if (len > 1) {
+ new_data = vrealloc(env->insn_aux_data,
+ array_size(env->prog->len + len - 1,
+ sizeof(struct bpf_insn_aux_data)),
+ GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+ if (!new_data)
+ return NULL;
+
+ env->insn_aux_data = new_data;
+ }
+
+ new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
+ if (IS_ERR(new_prog)) {
+ if (PTR_ERR(new_prog) == -ERANGE)
+ verbose(env,
+ "insn %d cannot be patched due to 16-bit range\n",
+ env->insn_aux_data[off].orig_idx);
+ return NULL;
+ }
+ adjust_insn_aux_data(env, new_prog, off, len);
+ adjust_subprog_starts(env, off, len);
+ adjust_insn_arrays(env, off, len);
+ adjust_poke_descs(new_prog, off, len);
+ return new_prog;
+}
+
+/*
+ * For all jmp insns in a given 'prog' that point to 'tgt_idx' insn adjust the
+ * jump offset by 'delta'.
+ */
+static int adjust_jmp_off(struct bpf_prog *prog, u32 tgt_idx, u32 delta)
+{
+ struct bpf_insn *insn = prog->insnsi;
+ u32 insn_cnt = prog->len, i;
+ s32 imm;
+ s16 off;
+
+ for (i = 0; i < insn_cnt; i++, insn++) {
+ u8 code = insn->code;
+
+ if (tgt_idx <= i && i < tgt_idx + delta)
+ continue;
+
+ if ((BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) ||
+ BPF_OP(code) == BPF_CALL || BPF_OP(code) == BPF_EXIT)
+ continue;
+
+ if (insn->code == (BPF_JMP32 | BPF_JA)) {
+ if (i + 1 + insn->imm != tgt_idx)
+ continue;
+ if (check_add_overflow(insn->imm, delta, &imm))
+ return -ERANGE;
+ insn->imm = imm;
+ } else {
+ if (i + 1 + insn->off != tgt_idx)
+ continue;
+ if (check_add_overflow(insn->off, delta, &off))
+ return -ERANGE;
+ insn->off = off;
+ }
+ }
+ return 0;
+}
+
+static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
+ u32 off, u32 cnt)
+{
+ int i, j;
+
+ /* find first prog starting at or after off (first to remove) */
+ for (i = 0; i < env->subprog_cnt; i++)
+ if (env->subprog_info[i].start >= off)
+ break;
+ /* find first prog starting at or after off + cnt (first to stay) */
+ for (j = i; j < env->subprog_cnt; j++)
+ if (env->subprog_info[j].start >= off + cnt)
+ break;
+ /* if j doesn't start exactly at off + cnt, we are just removing
+ * the front of previous prog
+ */
+ if (env->subprog_info[j].start != off + cnt)
+ j--;
+
+ if (j > i) {
+ struct bpf_prog_aux *aux = env->prog->aux;
+ int move;
+
+ /* move fake 'exit' subprog as well */
+ move = env->subprog_cnt + 1 - j;
+
+ memmove(env->subprog_info + i,
+ env->subprog_info + j,
+ sizeof(*env->subprog_info) * move);
+ env->subprog_cnt -= j - i;
+
+ /* remove func_info */
+ if (aux->func_info) {
+ move = aux->func_info_cnt - j;
+
+ memmove(aux->func_info + i,
+ aux->func_info + j,
+ sizeof(*aux->func_info) * move);
+ aux->func_info_cnt -= j - i;
+ /* func_info->insn_off is set after all code rewrites,
+ * in adjust_btf_func() - no need to adjust
+ */
+ }
+ } else {
+ /* convert i from "first prog to remove" to "first to adjust" */
+ if (env->subprog_info[i].start == off)
+ i++;
+ }
+
+ /* update fake 'exit' subprog as well */
+ for (; i <= env->subprog_cnt; i++)
+ env->subprog_info[i].start -= cnt;
+
+ return 0;
+}
+
+static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
+ u32 cnt)
+{
+ struct bpf_prog *prog = env->prog;
+ u32 i, l_off, l_cnt, nr_linfo;
+ struct bpf_line_info *linfo;
+
+ nr_linfo = prog->aux->nr_linfo;
+ if (!nr_linfo)
+ return 0;
+
+ linfo = prog->aux->linfo;
+
+ /* find first line info to remove, count lines to be removed */
+ for (i = 0; i < nr_linfo; i++)
+ if (linfo[i].insn_off >= off)
+ break;
+
+ l_off = i;
+ l_cnt = 0;
+ for (; i < nr_linfo; i++)
+ if (linfo[i].insn_off < off + cnt)
+ l_cnt++;
+ else
+ break;
+
+ /* First live insn doesn't match first live linfo, it needs to "inherit"
+ * last removed linfo. prog is already modified, so prog->len == off
+ * means no live instructions after (tail of the program was removed).
+ */
+ if (prog->len != off && l_cnt &&
+ (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
+ l_cnt--;
+ linfo[--i].insn_off = off + cnt;
+ }
+
+ /* remove the line info which refer to the removed instructions */
+ if (l_cnt) {
+ memmove(linfo + l_off, linfo + i,
+ sizeof(*linfo) * (nr_linfo - i));
+
+ prog->aux->nr_linfo -= l_cnt;
+ nr_linfo = prog->aux->nr_linfo;
+ }
+
+ /* pull all linfo[i].insn_off >= off + cnt in by cnt */
+ for (i = l_off; i < nr_linfo; i++)
+ linfo[i].insn_off -= cnt;
+
+ /* fix up all subprogs (incl. 'exit') which start >= off */
+ for (i = 0; i <= env->subprog_cnt; i++)
+ if (env->subprog_info[i].linfo_idx > l_off) {
+ /* program may have started in the removed region but
+ * may not be fully removed
+ */
+ if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
+ env->subprog_info[i].linfo_idx -= l_cnt;
+ else
+ env->subprog_info[i].linfo_idx = l_off;
+ }
+
+ return 0;
+}
+
+/*
+ * Clean up dynamically allocated fields of aux data for instructions [start, ...]
+ */
+void bpf_clear_insn_aux_data(struct bpf_verifier_env *env, int start, int len)
+{
+ struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
+ struct bpf_insn *insns = env->prog->insnsi;
+ int end = start + len;
+ int i;
+
+ for (i = start; i < end; i++) {
+ if (aux_data[i].jt) {
+ kvfree(aux_data[i].jt);
+ aux_data[i].jt = NULL;
+ }
+
+ if (bpf_is_ldimm64(&insns[i]))
+ i++;
+ }
+}
+
+static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
+{
+ struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
+ unsigned int orig_prog_len = env->prog->len;
+ int err;
+
+ if (bpf_prog_is_offloaded(env->prog->aux))
+ bpf_prog_offload_remove_insns(env, off, cnt);
+
+ /* Should be called before bpf_remove_insns, as it uses prog->insnsi */
+ bpf_clear_insn_aux_data(env, off, cnt);
+
+ err = bpf_remove_insns(env->prog, off, cnt);
+ if (err)
+ return err;
+
+ err = adjust_subprog_starts_after_remove(env, off, cnt);
+ if (err)
+ return err;
+
+ err = bpf_adj_linfo_after_remove(env, off, cnt);
+ if (err)
+ return err;
+
+ adjust_insn_arrays_after_remove(env, off, cnt);
+
+ memmove(aux_data + off, aux_data + off + cnt,
+ sizeof(*aux_data) * (orig_prog_len - off - cnt));
+
+ return 0;
+}
+
+static const struct bpf_insn NOP = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
+static const struct bpf_insn MAY_GOTO_0 = BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0, 0);
+
+bool bpf_insn_is_cond_jump(u8 code)
+{
+ u8 op;
+
+ op = BPF_OP(code);
+ if (BPF_CLASS(code) == BPF_JMP32)
+ return op != BPF_JA;
+
+ if (BPF_CLASS(code) != BPF_JMP)
+ return false;
+
+ return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
+}
+
+void bpf_opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
+{
+ struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
+ struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
+ struct bpf_insn *insn = env->prog->insnsi;
+ const int insn_cnt = env->prog->len;
+ int i;
+
+ for (i = 0; i < insn_cnt; i++, insn++) {
+ if (!bpf_insn_is_cond_jump(insn->code))
+ continue;
+
+ if (!aux_data[i + 1].seen)
+ ja.off = insn->off;
+ else if (!aux_data[i + 1 + insn->off].seen)
+ ja.off = 0;
+ else
+ continue;
+
+ if (bpf_prog_is_offloaded(env->prog->aux))
+ bpf_prog_offload_replace_insn(env, i, &ja);
+
+ memcpy(insn, &ja, sizeof(ja));
+ }
+}
+
+int bpf_opt_remove_dead_code(struct bpf_verifier_env *env)
+{
+ struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
+ int insn_cnt = env->prog->len;
+ int i, err;
+
+ for (i = 0; i < insn_cnt; i++) {
+ int j;
+
+ j = 0;
+ while (i + j < insn_cnt && !aux_data[i + j].seen)
+ j++;
+ if (!j)
+ continue;
+
+ err = verifier_remove_insns(env, i, j);
+ if (err)
+ return err;
+ insn_cnt = env->prog->len;
+ }
+
+ return 0;
+}
+
+int bpf_opt_remove_nops(struct bpf_verifier_env *env)
+{
+ struct bpf_insn *insn = env->prog->insnsi;
+ int insn_cnt = env->prog->len;
+ bool is_may_goto_0, is_ja;
+ int i, err;
+
+ for (i = 0; i < insn_cnt; i++) {
+ is_may_goto_0 = !memcmp(&insn[i], &MAY_GOTO_0, sizeof(MAY_GOTO_0));
+ is_ja = !memcmp(&insn[i], &NOP, sizeof(NOP));
+
+ if (!is_may_goto_0 && !is_ja)
+ continue;
+
+ err = verifier_remove_insns(env, i, 1);
+ if (err)
+ return err;
+ insn_cnt--;
+ /* Go back one insn to catch may_goto +1; may_goto +0 sequence */
+ i -= (is_may_goto_0 && i > 0) ? 2 : 1;
+ }
+
+ return 0;
+}
+
+int bpf_opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
+ const union bpf_attr *attr)
+{
+ struct bpf_insn *patch;
+ /* use env->insn_buf as two independent buffers */
+ struct bpf_insn *zext_patch = env->insn_buf;
+ struct bpf_insn *rnd_hi32_patch = &env->insn_buf[2];
+ struct bpf_insn_aux_data *aux = env->insn_aux_data;
+ int i, patch_len, delta = 0, len = env->prog->len;
+ struct bpf_insn *insns = env->prog->insnsi;
+ struct bpf_prog *new_prog;
+ bool rnd_hi32;
+
+ rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
+ zext_patch[1] = BPF_ZEXT_REG(0);
+ rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
+ rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
+ rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
+ for (i = 0; i < len; i++) {
+ int adj_idx = i + delta;
+ struct bpf_insn insn;
+ int load_reg;
+
+ insn = insns[adj_idx];
+ load_reg = insn_def_regno(&insn);
+ if (!aux[adj_idx].zext_dst) {
+ u8 code, class;
+ u32 imm_rnd;
+
+ if (!rnd_hi32)
+ continue;
+
+ code = insn.code;
+ class = BPF_CLASS(code);
+ if (load_reg == -1)
+ continue;
+
+ /* NOTE: arg "reg" (the fourth one) is only used for
+ * BPF_STX + SRC_OP, so it is safe to pass NULL
+ * here.
+ */
+ if (bpf_is_reg64(&insn, load_reg, NULL, DST_OP)) {
+ if (class == BPF_LD &&
+ BPF_MODE(code) == BPF_IMM)
+ i++;
+ continue;
+ }
+
+ /* ctx load could be transformed into wider load. */
+ if (class == BPF_LDX &&
+ aux[adj_idx].ptr_type == PTR_TO_CTX)
+ continue;
+
+ imm_rnd = get_random_u32();
+ rnd_hi32_patch[0] = insn;
+ rnd_hi32_patch[1].imm = imm_rnd;
+ rnd_hi32_patch[3].dst_reg = load_reg;
+ patch = rnd_hi32_patch;
+ patch_len = 4;
+ goto apply_patch_buffer;
+ }
+
+ /* Add in an zero-extend instruction if a) the JIT has requested
+ * it or b) it's a CMPXCHG.
+ *
+ * The latter is because: BPF_CMPXCHG always loads a value into
+ * R0, therefore always zero-extends. However some archs'
+ * equivalent instruction only does this load when the
+ * comparison is successful. This detail of CMPXCHG is
+ * orthogonal to the general zero-extension behaviour of the
+ * CPU, so it's treated independently of bpf_jit_needs_zext.
+ */
+ if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn))
+ continue;
+
+ /* Zero-extension is done by the caller. */
+ if (bpf_pseudo_kfunc_call(&insn))
+ continue;
+
+ if (verifier_bug_if(load_reg == -1, env,
+ "zext_dst is set, but no reg is defined"))
+ return -EFAULT;
+
+ zext_patch[0] = insn;
+ zext_patch[1].dst_reg = load_reg;
+ zext_patch[1].src_reg = load_reg;
+ patch = zext_patch;
+ patch_len = 2;
+apply_patch_buffer:
+ new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
+ if (!new_prog)
+ return -ENOMEM;
+ env->prog = new_prog;
+ insns = new_prog->insnsi;
+ aux = env->insn_aux_data;
+ delta += patch_len - 1;
+ }
+
+ return 0;
+}
+
+/* convert load instructions that access fields of a context type into a
+ * sequence of instructions that access fields of the underlying structure:
+ * struct __sk_buff -> struct sk_buff
+ * struct bpf_sock_ops -> struct sock
+ */
+int bpf_convert_ctx_accesses(struct bpf_verifier_env *env)
+{
+ struct bpf_subprog_info *subprogs = env->subprog_info;
+ const struct bpf_verifier_ops *ops = env->ops;
+ int i, cnt, size, ctx_field_size, ret, delta = 0, epilogue_cnt = 0;
+ const int insn_cnt = env->prog->len;
+ struct bpf_insn *epilogue_buf = env->epilogue_buf;
+ struct bpf_insn *insn_buf = env->insn_buf;
+ struct bpf_insn *insn;
+ u32 target_size, size_default, off;
+ struct bpf_prog *new_prog;
+ enum bpf_access_type type;
+ bool is_narrower_load;
+ int epilogue_idx = 0;
+
+ if (ops->gen_epilogue) {
+ epilogue_cnt = ops->gen_epilogue(epilogue_buf, env->prog,
+ -(subprogs[0].stack_depth + 8));
+ if (epilogue_cnt >= INSN_BUF_SIZE) {
+ verifier_bug(env, "epilogue is too long");
+ return -EFAULT;
+ } else if (epilogue_cnt) {
+ /* Save the ARG_PTR_TO_CTX for the epilogue to use */
+ cnt = 0;
+ subprogs[0].stack_depth += 8;
+ insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_1,
+ -subprogs[0].stack_depth);
+ insn_buf[cnt++] = env->prog->insnsi[0];
+ new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+ env->prog = new_prog;
+ delta += cnt - 1;
+
+ ret = add_kfunc_in_insns(env, epilogue_buf, epilogue_cnt - 1);
+ if (ret < 0)
+ return ret;
+ }
+ }
+
+ if (ops->gen_prologue || env->seen_direct_write) {
+ if (!ops->gen_prologue) {
+ verifier_bug(env, "gen_prologue is null");
+ return -EFAULT;
+ }
+ cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
+ env->prog);
+ if (cnt >= INSN_BUF_SIZE) {
+ verifier_bug(env, "prologue is too long");
+ return -EFAULT;
+ } else if (cnt) {
+ new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ env->prog = new_prog;
+ delta += cnt - 1;
+
+ ret = add_kfunc_in_insns(env, insn_buf, cnt - 1);
+ if (ret < 0)
+ return ret;
+ }
+ }
+
+ if (delta)
+ WARN_ON(adjust_jmp_off(env->prog, 0, delta));
+
+ if (bpf_prog_is_offloaded(env->prog->aux))
+ return 0;
+
+ insn = env->prog->insnsi + delta;
+
+ for (i = 0; i < insn_cnt; i++, insn++) {
+ bpf_convert_ctx_access_t convert_ctx_access;
+ u8 mode;
+
+ if (env->insn_aux_data[i + delta].nospec) {
+ WARN_ON_ONCE(env->insn_aux_data[i + delta].alu_state);
+ struct bpf_insn *patch = insn_buf;
+
+ *patch++ = BPF_ST_NOSPEC();
+ *patch++ = *insn;
+ cnt = patch - insn_buf;
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ /* This can not be easily merged with the
+ * nospec_result-case, because an insn may require a
+ * nospec before and after itself. Therefore also do not
+ * 'continue' here but potentially apply further
+ * patching to insn. *insn should equal patch[1] now.
+ */
+ }
+
+ if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
+ insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
+ insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
+ insn->code == (BPF_LDX | BPF_MEM | BPF_DW) ||
+ insn->code == (BPF_LDX | BPF_MEMSX | BPF_B) ||
+ insn->code == (BPF_LDX | BPF_MEMSX | BPF_H) ||
+ insn->code == (BPF_LDX | BPF_MEMSX | BPF_W)) {
+ type = BPF_READ;
+ } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
+ insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
+ insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
+ insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
+ insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
+ insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
+ insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
+ insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
+ type = BPF_WRITE;
+ } else if ((insn->code == (BPF_STX | BPF_ATOMIC | BPF_B) ||
+ insn->code == (BPF_STX | BPF_ATOMIC | BPF_H) ||
+ insn->code == (BPF_STX | BPF_ATOMIC | BPF_W) ||
+ insn->code == (BPF_STX | BPF_ATOMIC | BPF_DW)) &&
+ env->insn_aux_data[i + delta].ptr_type == PTR_TO_ARENA) {
+ insn->code = BPF_STX | BPF_PROBE_ATOMIC | BPF_SIZE(insn->code);
+ env->prog->aux->num_exentries++;
+ continue;
+ } else if (insn->code == (BPF_JMP | BPF_EXIT) &&
+ epilogue_cnt &&
+ i + delta < subprogs[1].start) {
+ /* Generate epilogue for the main prog */
+ if (epilogue_idx) {
+ /* jump back to the earlier generated epilogue */
+ insn_buf[0] = BPF_JMP32_A(epilogue_idx - i - delta - 1);
+ cnt = 1;
+ } else {
+ memcpy(insn_buf, epilogue_buf,
+ epilogue_cnt * sizeof(*epilogue_buf));
+ cnt = epilogue_cnt;
+ /* epilogue_idx cannot be 0. It must have at
+ * least one ctx ptr saving insn before the
+ * epilogue.
+ */
+ epilogue_idx = i + delta;
+ }
+ goto patch_insn_buf;
+ } else {
+ continue;
+ }
+
+ if (type == BPF_WRITE &&
+ env->insn_aux_data[i + delta].nospec_result) {
+ /* nospec_result is only used to mitigate Spectre v4 and
+ * to limit verification-time for Spectre v1.
+ */
+ struct bpf_insn *patch = insn_buf;
+
+ *patch++ = *insn;
+ *patch++ = BPF_ST_NOSPEC();
+ cnt = patch - insn_buf;
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ continue;
+ }
+
+ switch ((int)env->insn_aux_data[i + delta].ptr_type) {
+ case PTR_TO_CTX:
+ if (!ops->convert_ctx_access)
+ continue;
+ convert_ctx_access = ops->convert_ctx_access;
+ break;
+ case PTR_TO_SOCKET:
+ case PTR_TO_SOCK_COMMON:
+ convert_ctx_access = bpf_sock_convert_ctx_access;
+ break;
+ case PTR_TO_TCP_SOCK:
+ convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
+ break;
+ case PTR_TO_XDP_SOCK:
+ convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
+ break;
+ case PTR_TO_BTF_ID:
+ case PTR_TO_BTF_ID | PTR_UNTRUSTED:
+ /* PTR_TO_BTF_ID | MEM_ALLOC always has a valid lifetime, unlike
+ * PTR_TO_BTF_ID, and an active ref_obj_id, but the same cannot
+ * be said once it is marked PTR_UNTRUSTED, hence we must handle
+ * any faults for loads into such types. BPF_WRITE is disallowed
+ * for this case.
+ */
+ case PTR_TO_BTF_ID | MEM_ALLOC | PTR_UNTRUSTED:
+ case PTR_TO_MEM | MEM_RDONLY | PTR_UNTRUSTED:
+ if (type == BPF_READ) {
+ if (BPF_MODE(insn->code) == BPF_MEM)
+ insn->code = BPF_LDX | BPF_PROBE_MEM |
+ BPF_SIZE((insn)->code);
+ else
+ insn->code = BPF_LDX | BPF_PROBE_MEMSX |
+ BPF_SIZE((insn)->code);
+ env->prog->aux->num_exentries++;
+ }
+ continue;
+ case PTR_TO_ARENA:
+ if (BPF_MODE(insn->code) == BPF_MEMSX) {
+ if (!bpf_jit_supports_insn(insn, true)) {
+ verbose(env, "sign extending loads from arena are not supported yet\n");
+ return -EOPNOTSUPP;
+ }
+ insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32SX | BPF_SIZE(insn->code);
+ } else {
+ insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32 | BPF_SIZE(insn->code);
+ }
+ env->prog->aux->num_exentries++;
+ continue;
+ default:
+ continue;
+ }
+
+ ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
+ size = BPF_LDST_BYTES(insn);
+ mode = BPF_MODE(insn->code);
+
+ /* If the read access is a narrower load of the field,
+ * convert to a 4/8-byte load, to minimum program type specific
+ * convert_ctx_access changes. If conversion is successful,
+ * we will apply proper mask to the result.
+ */
+ is_narrower_load = size < ctx_field_size;
+ size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
+ off = insn->off;
+ if (is_narrower_load) {
+ u8 size_code;
+
+ if (type == BPF_WRITE) {
+ verifier_bug(env, "narrow ctx access misconfigured");
+ return -EFAULT;
+ }
+
+ size_code = BPF_H;
+ if (ctx_field_size == 4)
+ size_code = BPF_W;
+ else if (ctx_field_size == 8)
+ size_code = BPF_DW;
+
+ insn->off = off & ~(size_default - 1);
+ insn->code = BPF_LDX | BPF_MEM | size_code;
+ }
+
+ target_size = 0;
+ cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
+ &target_size);
+ if (cnt == 0 || cnt >= INSN_BUF_SIZE ||
+ (ctx_field_size && !target_size)) {
+ verifier_bug(env, "error during ctx access conversion (%d)", cnt);
+ return -EFAULT;
+ }
+
+ if (is_narrower_load && size < target_size) {
+ u8 shift = bpf_ctx_narrow_access_offset(
+ off, size, size_default) * 8;
+ if (shift && cnt + 1 >= INSN_BUF_SIZE) {
+ verifier_bug(env, "narrow ctx load misconfigured");
+ return -EFAULT;
+ }
+ if (ctx_field_size <= 4) {
+ if (shift)
+ insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
+ insn->dst_reg,
+ shift);
+ insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
+ (1 << size * 8) - 1);
+ } else {
+ if (shift)
+ insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
+ insn->dst_reg,
+ shift);
+ insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
+ (1ULL << size * 8) - 1);
+ }
+ }
+ if (mode == BPF_MEMSX)
+ insn_buf[cnt++] = BPF_RAW_INSN(BPF_ALU64 | BPF_MOV | BPF_X,
+ insn->dst_reg, insn->dst_reg,
+ size * 8, 0);
+
+patch_insn_buf:
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+
+ /* keep walking new program and skip insns we just inserted */
+ env->prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ }
+
+ return 0;
+}
+
+int bpf_jit_subprogs(struct bpf_verifier_env *env)
+{
+ struct bpf_prog *prog = env->prog, **func, *tmp;
+ int i, j, subprog_start, subprog_end = 0, len, subprog;
+ struct bpf_map *map_ptr;
+ struct bpf_insn *insn;
+ void *old_bpf_func;
+ int err, num_exentries;
+ int old_len, subprog_start_adjustment = 0;
+
+ if (env->subprog_cnt <= 1)
+ return 0;
+
+ for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
+ if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn))
+ continue;
+
+ /* Upon error here we cannot fall back to interpreter but
+ * need a hard reject of the program. Thus -EFAULT is
+ * propagated in any case.
+ */
+ subprog = bpf_find_subprog(env, i + insn->imm + 1);
+ if (verifier_bug_if(subprog < 0, env, "No program to jit at insn %d",
+ i + insn->imm + 1))
+ return -EFAULT;
+ /* temporarily remember subprog id inside insn instead of
+ * aux_data, since next loop will split up all insns into funcs
+ */
+ insn->off = subprog;
+ /* remember original imm in case JIT fails and fallback
+ * to interpreter will be needed
+ */
+ env->insn_aux_data[i].call_imm = insn->imm;
+ /* point imm to __bpf_call_base+1 from JITs point of view */
+ insn->imm = 1;
+ if (bpf_pseudo_func(insn)) {
+#if defined(MODULES_VADDR)
+ u64 addr = MODULES_VADDR;
+#else
+ u64 addr = VMALLOC_START;
+#endif
+ /* jit (e.g. x86_64) may emit fewer instructions
+ * if it learns a u32 imm is the same as a u64 imm.
+ * Set close enough to possible prog address.
+ */
+ insn[0].imm = (u32)addr;
+ insn[1].imm = addr >> 32;
+ }
+ }
+
+ err = bpf_prog_alloc_jited_linfo(prog);
+ if (err)
+ goto out_undo_insn;
+
+ err = -ENOMEM;
+ func = kzalloc_objs(prog, env->subprog_cnt);
+ if (!func)
+ goto out_undo_insn;
+
+ for (i = 0; i < env->subprog_cnt; i++) {
+ subprog_start = subprog_end;
+ subprog_end = env->subprog_info[i + 1].start;
+
+ len = subprog_end - subprog_start;
+ /* bpf_prog_run() doesn't call subprogs directly,
+ * hence main prog stats include the runtime of subprogs.
+ * subprogs don't have IDs and not reachable via prog_get_next_id
+ * func[i]->stats will never be accessed and stays NULL
+ */
+ func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
+ if (!func[i])
+ goto out_free;
+ memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
+ len * sizeof(struct bpf_insn));
+ func[i]->type = prog->type;
+ func[i]->len = len;
+ if (bpf_prog_calc_tag(func[i]))
+ goto out_free;
+ func[i]->is_func = 1;
+ func[i]->sleepable = prog->sleepable;
+ func[i]->aux->func_idx = i;
+ /* Below members will be freed only at prog->aux */
+ func[i]->aux->btf = prog->aux->btf;
+ func[i]->aux->subprog_start = subprog_start + subprog_start_adjustment;
+ func[i]->aux->func_info = prog->aux->func_info;
+ func[i]->aux->func_info_cnt = prog->aux->func_info_cnt;
+ func[i]->aux->poke_tab = prog->aux->poke_tab;
+ func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
+ func[i]->aux->main_prog_aux = prog->aux;
+
+ for (j = 0; j < prog->aux->size_poke_tab; j++) {
+ struct bpf_jit_poke_descriptor *poke;
+
+ poke = &prog->aux->poke_tab[j];
+ if (poke->insn_idx < subprog_end &&
+ poke->insn_idx >= subprog_start)
+ poke->aux = func[i]->aux;
+ }
+
+ func[i]->aux->name[0] = 'F';
+ func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
+ if (env->subprog_info[i].priv_stack_mode == PRIV_STACK_ADAPTIVE)
+ func[i]->aux->jits_use_priv_stack = true;
+
+ func[i]->jit_requested = 1;
+ func[i]->blinding_requested = prog->blinding_requested;
+ func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
+ func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab;
+ func[i]->aux->linfo = prog->aux->linfo;
+ func[i]->aux->nr_linfo = prog->aux->nr_linfo;
+ func[i]->aux->jited_linfo = prog->aux->jited_linfo;
+ func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
+ func[i]->aux->arena = prog->aux->arena;
+ func[i]->aux->used_maps = env->used_maps;
+ func[i]->aux->used_map_cnt = env->used_map_cnt;
+ num_exentries = 0;
+ insn = func[i]->insnsi;
+ for (j = 0; j < func[i]->len; j++, insn++) {
+ if (BPF_CLASS(insn->code) == BPF_LDX &&
+ (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
+ BPF_MODE(insn->code) == BPF_PROBE_MEM32 ||
+ BPF_MODE(insn->code) == BPF_PROBE_MEM32SX ||
+ BPF_MODE(insn->code) == BPF_PROBE_MEMSX))
+ num_exentries++;
+ if ((BPF_CLASS(insn->code) == BPF_STX ||
+ BPF_CLASS(insn->code) == BPF_ST) &&
+ BPF_MODE(insn->code) == BPF_PROBE_MEM32)
+ num_exentries++;
+ if (BPF_CLASS(insn->code) == BPF_STX &&
+ BPF_MODE(insn->code) == BPF_PROBE_ATOMIC)
+ num_exentries++;
+ }
+ func[i]->aux->num_exentries = num_exentries;
+ func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
+ func[i]->aux->exception_cb = env->subprog_info[i].is_exception_cb;
+ func[i]->aux->changes_pkt_data = env->subprog_info[i].changes_pkt_data;
+ func[i]->aux->might_sleep = env->subprog_info[i].might_sleep;
+ if (!i)
+ func[i]->aux->exception_boundary = env->seen_exception;
+
+ /*
+ * To properly pass the absolute subprog start to jit
+ * all instruction adjustments should be accumulated
+ */
+ old_len = func[i]->len;
+ func[i] = bpf_int_jit_compile(func[i]);
+ subprog_start_adjustment += func[i]->len - old_len;
+
+ if (!func[i]->jited) {
+ err = -ENOTSUPP;
+ goto out_free;
+ }
+ cond_resched();
+ }
+
+ /* at this point all bpf functions were successfully JITed
+ * now populate all bpf_calls with correct addresses and
+ * run last pass of JIT
+ */
+ for (i = 0; i < env->subprog_cnt; i++) {
+ insn = func[i]->insnsi;
+ for (j = 0; j < func[i]->len; j++, insn++) {
+ if (bpf_pseudo_func(insn)) {
+ subprog = insn->off;
+ insn[0].imm = (u32)(long)func[subprog]->bpf_func;
+ insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
+ continue;
+ }
+ if (!bpf_pseudo_call(insn))
+ continue;
+ subprog = insn->off;
+ insn->imm = BPF_CALL_IMM(func[subprog]->bpf_func);
+ }
+
+ /* we use the aux data to keep a list of the start addresses
+ * of the JITed images for each function in the program
+ *
+ * for some architectures, such as powerpc64, the imm field
+ * might not be large enough to hold the offset of the start
+ * address of the callee's JITed image from __bpf_call_base
+ *
+ * in such cases, we can lookup the start address of a callee
+ * by using its subprog id, available from the off field of
+ * the call instruction, as an index for this list
+ */
+ func[i]->aux->func = func;
+ func[i]->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt;
+ func[i]->aux->real_func_cnt = env->subprog_cnt;
+ }
+ for (i = 0; i < env->subprog_cnt; i++) {
+ old_bpf_func = func[i]->bpf_func;
+ tmp = bpf_int_jit_compile(func[i]);
+ if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
+ verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
+ err = -ENOTSUPP;
+ goto out_free;
+ }
+ cond_resched();
+ }
+
+ /*
+ * Cleanup func[i]->aux fields which aren't required
+ * or can become invalid in future
+ */
+ for (i = 0; i < env->subprog_cnt; i++) {
+ func[i]->aux->used_maps = NULL;
+ func[i]->aux->used_map_cnt = 0;
+ }
+
+ /* finally lock prog and jit images for all functions and
+ * populate kallsysm. Begin at the first subprogram, since
+ * bpf_prog_load will add the kallsyms for the main program.
+ */
+ for (i = 1; i < env->subprog_cnt; i++) {
+ err = bpf_prog_lock_ro(func[i]);
+ if (err)
+ goto out_free;
+ }
+
+ for (i = 1; i < env->subprog_cnt; i++)
+ bpf_prog_kallsyms_add(func[i]);
+
+ /* Last step: make now unused interpreter insns from main
+ * prog consistent for later dump requests, so they can
+ * later look the same as if they were interpreted only.
+ */
+ for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
+ if (bpf_pseudo_func(insn)) {
+ insn[0].imm = env->insn_aux_data[i].call_imm;
+ insn[1].imm = insn->off;
+ insn->off = 0;
+ continue;
+ }
+ if (!bpf_pseudo_call(insn))
+ continue;
+ insn->off = env->insn_aux_data[i].call_imm;
+ subprog = bpf_find_subprog(env, i + insn->off + 1);
+ insn->imm = subprog;
+ }
+
+ prog->jited = 1;
+ prog->bpf_func = func[0]->bpf_func;
+ prog->jited_len = func[0]->jited_len;
+ prog->aux->extable = func[0]->aux->extable;
+ prog->aux->num_exentries = func[0]->aux->num_exentries;
+ prog->aux->func = func;
+ prog->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt;
+ prog->aux->real_func_cnt = env->subprog_cnt;
+ prog->aux->bpf_exception_cb = (void *)func[env->exception_callback_subprog]->bpf_func;
+ prog->aux->exception_boundary = func[0]->aux->exception_boundary;
+ bpf_prog_jit_attempt_done(prog);
+ return 0;
+out_free:
+ /* We failed JIT'ing, so at this point we need to unregister poke
+ * descriptors from subprogs, so that kernel is not attempting to
+ * patch it anymore as we're freeing the subprog JIT memory.
+ */
+ for (i = 0; i < prog->aux->size_poke_tab; i++) {
+ map_ptr = prog->aux->poke_tab[i].tail_call.map;
+ map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
+ }
+ /* At this point we're guaranteed that poke descriptors are not
+ * live anymore. We can just unlink its descriptor table as it's
+ * released with the main prog.
+ */
+ for (i = 0; i < env->subprog_cnt; i++) {
+ if (!func[i])
+ continue;
+ func[i]->aux->poke_tab = NULL;
+ bpf_jit_free(func[i]);
+ }
+ kfree(func);
+out_undo_insn:
+ /* cleanup main prog to be interpreted */
+ prog->jit_requested = 0;
+ prog->blinding_requested = 0;
+ for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
+ if (!bpf_pseudo_call(insn))
+ continue;
+ insn->off = 0;
+ insn->imm = env->insn_aux_data[i].call_imm;
+ }
+ bpf_prog_jit_attempt_done(prog);
+ return err;
+}
+
+int bpf_fixup_call_args(struct bpf_verifier_env *env)
+{
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
+ struct bpf_prog *prog = env->prog;
+ struct bpf_insn *insn = prog->insnsi;
+ bool has_kfunc_call = bpf_prog_has_kfunc_call(prog);
+ int i, depth;
+#endif
+ int err = 0;
+
+ if (env->prog->jit_requested &&
+ !bpf_prog_is_offloaded(env->prog->aux)) {
+ err = bpf_jit_subprogs(env);
+ if (err == 0)
+ return 0;
+ if (err == -EFAULT)
+ return err;
+ }
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
+ if (has_kfunc_call) {
+ verbose(env, "calling kernel functions are not allowed in non-JITed programs\n");
+ return -EINVAL;
+ }
+ if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
+ /* When JIT fails the progs with bpf2bpf calls and tail_calls
+ * have to be rejected, since interpreter doesn't support them yet.
+ */
+ verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
+ return -EINVAL;
+ }
+ for (i = 0; i < prog->len; i++, insn++) {
+ if (bpf_pseudo_func(insn)) {
+ /* When JIT fails the progs with callback calls
+ * have to be rejected, since interpreter doesn't support them yet.
+ */
+ verbose(env, "callbacks are not allowed in non-JITed programs\n");
+ return -EINVAL;
+ }
+
+ if (!bpf_pseudo_call(insn))
+ continue;
+ depth = get_callee_stack_depth(env, insn, i);
+ if (depth < 0)
+ return depth;
+ bpf_patch_call_args(insn, depth);
+ }
+ err = 0;
+#endif
+ return err;
+}
+
+
+/* The function requires that first instruction in 'patch' is insnsi[prog->len - 1] */
+static int add_hidden_subprog(struct bpf_verifier_env *env, struct bpf_insn *patch, int len)
+{
+ struct bpf_subprog_info *info = env->subprog_info;
+ int cnt = env->subprog_cnt;
+ struct bpf_prog *prog;
+
+ /* We only reserve one slot for hidden subprogs in subprog_info. */
+ if (env->hidden_subprog_cnt) {
+ verifier_bug(env, "only one hidden subprog supported");
+ return -EFAULT;
+ }
+ /* We're not patching any existing instruction, just appending the new
+ * ones for the hidden subprog. Hence all of the adjustment operations
+ * in bpf_patch_insn_data are no-ops.
+ */
+ prog = bpf_patch_insn_data(env, env->prog->len - 1, patch, len);
+ if (!prog)
+ return -ENOMEM;
+ env->prog = prog;
+ info[cnt + 1].start = info[cnt].start;
+ info[cnt].start = prog->len - len + 1;
+ env->subprog_cnt++;
+ env->hidden_subprog_cnt++;
+ return 0;
+}
+
+/* Do various post-verification rewrites in a single program pass.
+ * These rewrites simplify JIT and interpreter implementations.
+ */
+int bpf_do_misc_fixups(struct bpf_verifier_env *env)
+{
+ struct bpf_prog *prog = env->prog;
+ enum bpf_attach_type eatype = prog->expected_attach_type;
+ enum bpf_prog_type prog_type = resolve_prog_type(prog);
+ struct bpf_insn *insn = prog->insnsi;
+ const struct bpf_func_proto *fn;
+ const int insn_cnt = prog->len;
+ const struct bpf_map_ops *ops;
+ struct bpf_insn_aux_data *aux;
+ struct bpf_insn *insn_buf = env->insn_buf;
+ struct bpf_prog *new_prog;
+ struct bpf_map *map_ptr;
+ int i, ret, cnt, delta = 0, cur_subprog = 0;
+ struct bpf_subprog_info *subprogs = env->subprog_info;
+ u16 stack_depth = subprogs[cur_subprog].stack_depth;
+ u16 stack_depth_extra = 0;
+
+ if (env->seen_exception && !env->exception_callback_subprog) {
+ struct bpf_insn *patch = insn_buf;
+
+ *patch++ = env->prog->insnsi[insn_cnt - 1];
+ *patch++ = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
+ *patch++ = BPF_EXIT_INSN();
+ ret = add_hidden_subprog(env, insn_buf, patch - insn_buf);
+ if (ret < 0)
+ return ret;
+ prog = env->prog;
+ insn = prog->insnsi;
+
+ env->exception_callback_subprog = env->subprog_cnt - 1;
+ /* Don't update insn_cnt, as add_hidden_subprog always appends insns */
+ bpf_mark_subprog_exc_cb(env, env->exception_callback_subprog);
+ }
+
+ for (i = 0; i < insn_cnt;) {
+ if (insn->code == (BPF_ALU64 | BPF_MOV | BPF_X) && insn->imm) {
+ if ((insn->off == BPF_ADDR_SPACE_CAST && insn->imm == 1) ||
+ (((struct bpf_map *)env->prog->aux->arena)->map_flags & BPF_F_NO_USER_CONV)) {
+ /* convert to 32-bit mov that clears upper 32-bit */
+ insn->code = BPF_ALU | BPF_MOV | BPF_X;
+ /* clear off and imm, so it's a normal 'wX = wY' from JIT pov */
+ insn->off = 0;
+ insn->imm = 0;
+ } /* cast from as(0) to as(1) should be handled by JIT */
+ goto next_insn;
+ }
+
+ if (env->insn_aux_data[i + delta].needs_zext)
+ /* Convert BPF_CLASS(insn->code) == BPF_ALU64 to 32-bit ALU */
+ insn->code = BPF_ALU | BPF_OP(insn->code) | BPF_SRC(insn->code);
+
+ /* Make sdiv/smod divide-by-minus-one exceptions impossible. */
+ if ((insn->code == (BPF_ALU64 | BPF_MOD | BPF_K) ||
+ insn->code == (BPF_ALU64 | BPF_DIV | BPF_K) ||
+ insn->code == (BPF_ALU | BPF_MOD | BPF_K) ||
+ insn->code == (BPF_ALU | BPF_DIV | BPF_K)) &&
+ insn->off == 1 && insn->imm == -1) {
+ bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
+ bool isdiv = BPF_OP(insn->code) == BPF_DIV;
+ struct bpf_insn *patch = insn_buf;
+
+ if (isdiv)
+ *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
+ BPF_NEG | BPF_K, insn->dst_reg,
+ 0, 0, 0);
+ else
+ *patch++ = BPF_MOV32_IMM(insn->dst_reg, 0);
+
+ cnt = patch - insn_buf;
+
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ goto next_insn;
+ }
+
+ /* Make divide-by-zero and divide-by-minus-one exceptions impossible. */
+ if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
+ insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
+ insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
+ insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
+ bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
+ bool isdiv = BPF_OP(insn->code) == BPF_DIV;
+ bool is_sdiv = isdiv && insn->off == 1;
+ bool is_smod = !isdiv && insn->off == 1;
+ struct bpf_insn *patch = insn_buf;
+
+ if (is_sdiv) {
+ /* [R,W]x sdiv 0 -> 0
+ * LLONG_MIN sdiv -1 -> LLONG_MIN
+ * INT_MIN sdiv -1 -> INT_MIN
+ */
+ *patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg);
+ *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
+ BPF_ADD | BPF_K, BPF_REG_AX,
+ 0, 0, 1);
+ *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+ BPF_JGT | BPF_K, BPF_REG_AX,
+ 0, 4, 1);
+ *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+ BPF_JEQ | BPF_K, BPF_REG_AX,
+ 0, 1, 0);
+ *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
+ BPF_MOV | BPF_K, insn->dst_reg,
+ 0, 0, 0);
+ /* BPF_NEG(LLONG_MIN) == -LLONG_MIN == LLONG_MIN */
+ *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
+ BPF_NEG | BPF_K, insn->dst_reg,
+ 0, 0, 0);
+ *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
+ *patch++ = *insn;
+ cnt = patch - insn_buf;
+ } else if (is_smod) {
+ /* [R,W]x mod 0 -> [R,W]x */
+ /* [R,W]x mod -1 -> 0 */
+ *patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg);
+ *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
+ BPF_ADD | BPF_K, BPF_REG_AX,
+ 0, 0, 1);
+ *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+ BPF_JGT | BPF_K, BPF_REG_AX,
+ 0, 3, 1);
+ *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+ BPF_JEQ | BPF_K, BPF_REG_AX,
+ 0, 3 + (is64 ? 0 : 1), 1);
+ *patch++ = BPF_MOV32_IMM(insn->dst_reg, 0);
+ *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
+ *patch++ = *insn;
+
+ if (!is64) {
+ *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
+ *patch++ = BPF_MOV32_REG(insn->dst_reg, insn->dst_reg);
+ }
+ cnt = patch - insn_buf;
+ } else if (isdiv) {
+ /* [R,W]x div 0 -> 0 */
+ *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+ BPF_JNE | BPF_K, insn->src_reg,
+ 0, 2, 0);
+ *patch++ = BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg);
+ *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
+ *patch++ = *insn;
+ cnt = patch - insn_buf;
+ } else {
+ /* [R,W]x mod 0 -> [R,W]x */
+ *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+ BPF_JEQ | BPF_K, insn->src_reg,
+ 0, 1 + (is64 ? 0 : 1), 0);
+ *patch++ = *insn;
+
+ if (!is64) {
+ *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
+ *patch++ = BPF_MOV32_REG(insn->dst_reg, insn->dst_reg);
+ }
+ cnt = patch - insn_buf;
+ }
+
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ goto next_insn;
+ }
+
+ /* Make it impossible to de-reference a userspace address */
+ if (BPF_CLASS(insn->code) == BPF_LDX &&
+ (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
+ BPF_MODE(insn->code) == BPF_PROBE_MEMSX)) {
+ struct bpf_insn *patch = insn_buf;
+ u64 uaddress_limit = bpf_arch_uaddress_limit();
+
+ if (!uaddress_limit)
+ goto next_insn;
+
+ *patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg);
+ if (insn->off)
+ *patch++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_AX, insn->off);
+ *patch++ = BPF_ALU64_IMM(BPF_RSH, BPF_REG_AX, 32);
+ *patch++ = BPF_JMP_IMM(BPF_JLE, BPF_REG_AX, uaddress_limit >> 32, 2);
+ *patch++ = *insn;
+ *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
+ *patch++ = BPF_MOV64_IMM(insn->dst_reg, 0);
+
+ cnt = patch - insn_buf;
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ goto next_insn;
+ }
+
+ /* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
+ if (BPF_CLASS(insn->code) == BPF_LD &&
+ (BPF_MODE(insn->code) == BPF_ABS ||
+ BPF_MODE(insn->code) == BPF_IND)) {
+ cnt = env->ops->gen_ld_abs(insn, insn_buf);
+ if (cnt == 0 || cnt >= INSN_BUF_SIZE) {
+ verifier_bug(env, "%d insns generated for ld_abs", cnt);
+ return -EFAULT;
+ }
+
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ goto next_insn;
+ }
+
+ /* Rewrite pointer arithmetic to mitigate speculation attacks. */
+ if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
+ insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
+ const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
+ const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
+ struct bpf_insn *patch = insn_buf;
+ bool issrc, isneg, isimm;
+ u32 off_reg;
+
+ aux = &env->insn_aux_data[i + delta];
+ if (!aux->alu_state ||
+ aux->alu_state == BPF_ALU_NON_POINTER)
+ goto next_insn;
+
+ isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
+ issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
+ BPF_ALU_SANITIZE_SRC;
+ isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
+
+ off_reg = issrc ? insn->src_reg : insn->dst_reg;
+ if (isimm) {
+ *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
+ } else {
+ if (isneg)
+ *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
+ *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
+ *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
+ *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
+ *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
+ *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
+ *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
+ }
+ if (!issrc)
+ *patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
+ insn->src_reg = BPF_REG_AX;
+ if (isneg)
+ insn->code = insn->code == code_add ?
+ code_sub : code_add;
+ *patch++ = *insn;
+ if (issrc && isneg && !isimm)
+ *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
+ cnt = patch - insn_buf;
+
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ goto next_insn;
+ }
+
+ if (bpf_is_may_goto_insn(insn) && bpf_jit_supports_timed_may_goto()) {
+ int stack_off_cnt = -stack_depth - 16;
+
+ /*
+ * Two 8 byte slots, depth-16 stores the count, and
+ * depth-8 stores the start timestamp of the loop.
+ *
+ * The starting value of count is BPF_MAX_TIMED_LOOPS
+ * (0xffff). Every iteration loads it and subs it by 1,
+ * until the value becomes 0 in AX (thus, 1 in stack),
+ * after which we call arch_bpf_timed_may_goto, which
+ * either sets AX to 0xffff to keep looping, or to 0
+ * upon timeout. AX is then stored into the stack. In
+ * the next iteration, we either see 0 and break out, or
+ * continue iterating until the next time value is 0
+ * after subtraction, rinse and repeat.
+ */
+ stack_depth_extra = 16;
+ insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off_cnt);
+ if (insn->off >= 0)
+ insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 5);
+ else
+ insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1);
+ insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1);
+ insn_buf[3] = BPF_JMP_IMM(BPF_JNE, BPF_REG_AX, 0, 2);
+ /*
+ * AX is used as an argument to pass in stack_off_cnt
+ * (to add to r10/fp), and also as the return value of
+ * the call to arch_bpf_timed_may_goto.
+ */
+ insn_buf[4] = BPF_MOV64_IMM(BPF_REG_AX, stack_off_cnt);
+ insn_buf[5] = BPF_EMIT_CALL(arch_bpf_timed_may_goto);
+ insn_buf[6] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off_cnt);
+ cnt = 7;
+
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ goto next_insn;
+ } else if (bpf_is_may_goto_insn(insn)) {
+ int stack_off = -stack_depth - 8;
+
+ stack_depth_extra = 8;
+ insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off);
+ if (insn->off >= 0)
+ insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 2);
+ else
+ insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1);
+ insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1);
+ insn_buf[3] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off);
+ cnt = 4;
+
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ goto next_insn;
+ }
+
+ if (insn->code != (BPF_JMP | BPF_CALL))
+ goto next_insn;
+ if (insn->src_reg == BPF_PSEUDO_CALL)
+ goto next_insn;
+ if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
+ ret = bpf_fixup_kfunc_call(env, insn, insn_buf, i + delta, &cnt);
+ if (ret)
+ return ret;
+ if (cnt == 0)
+ goto next_insn;
+
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ goto next_insn;
+ }
+
+ /* Skip inlining the helper call if the JIT does it. */
+ if (bpf_jit_inlines_helper_call(insn->imm))
+ goto next_insn;
+
+ if (insn->imm == BPF_FUNC_get_route_realm)
+ prog->dst_needed = 1;
+ if (insn->imm == BPF_FUNC_get_prandom_u32)
+ bpf_user_rnd_init_once();
+ if (insn->imm == BPF_FUNC_override_return)
+ prog->kprobe_override = 1;
+ if (insn->imm == BPF_FUNC_tail_call) {
+ /* If we tail call into other programs, we
+ * cannot make any assumptions since they can
+ * be replaced dynamically during runtime in
+ * the program array.
+ */
+ prog->cb_access = 1;
+ if (!bpf_allow_tail_call_in_subprogs(env))
+ prog->aux->stack_depth = MAX_BPF_STACK;
+ prog->aux->max_pkt_offset = MAX_PACKET_OFF;
+
+ /* mark bpf_tail_call as different opcode to avoid
+ * conditional branch in the interpreter for every normal
+ * call and to prevent accidental JITing by JIT compiler
+ * that doesn't support bpf_tail_call yet
+ */
+ insn->imm = 0;
+ insn->code = BPF_JMP | BPF_TAIL_CALL;
+
+ aux = &env->insn_aux_data[i + delta];
+ if (env->bpf_capable && !prog->blinding_requested &&
+ prog->jit_requested &&
+ !bpf_map_key_poisoned(aux) &&
+ !bpf_map_ptr_poisoned(aux) &&
+ !bpf_map_ptr_unpriv(aux)) {
+ struct bpf_jit_poke_descriptor desc = {
+ .reason = BPF_POKE_REASON_TAIL_CALL,
+ .tail_call.map = aux->map_ptr_state.map_ptr,
+ .tail_call.key = bpf_map_key_immediate(aux),
+ .insn_idx = i + delta,
+ };
+
+ ret = bpf_jit_add_poke_descriptor(prog, &desc);
+ if (ret < 0) {
+ verbose(env, "adding tail call poke descriptor failed\n");
+ return ret;
+ }
+
+ insn->imm = ret + 1;
+ goto next_insn;
+ }
+
+ if (!bpf_map_ptr_unpriv(aux))
+ goto next_insn;
+
+ /* instead of changing every JIT dealing with tail_call
+ * emit two extra insns:
+ * if (index >= max_entries) goto out;
+ * index &= array->index_mask;
+ * to avoid out-of-bounds cpu speculation
+ */
+ if (bpf_map_ptr_poisoned(aux)) {
+ verbose(env, "tail_call abusing map_ptr\n");
+ return -EINVAL;
+ }
+
+ map_ptr = aux->map_ptr_state.map_ptr;
+ insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
+ map_ptr->max_entries, 2);
+ insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
+ container_of(map_ptr,
+ struct bpf_array,
+ map)->index_mask);
+ insn_buf[2] = *insn;
+ cnt = 3;
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ goto next_insn;
+ }
+
+ if (insn->imm == BPF_FUNC_timer_set_callback) {
+ /* The verifier will process callback_fn as many times as necessary
+ * with different maps and the register states prepared by
+ * set_timer_callback_state will be accurate.
+ *
+ * The following use case is valid:
+ * map1 is shared by prog1, prog2, prog3.
+ * prog1 calls bpf_timer_init for some map1 elements
+ * prog2 calls bpf_timer_set_callback for some map1 elements.
+ * Those that were not bpf_timer_init-ed will return -EINVAL.
+ * prog3 calls bpf_timer_start for some map1 elements.
+ * Those that were not both bpf_timer_init-ed and
+ * bpf_timer_set_callback-ed will return -EINVAL.
+ */
+ struct bpf_insn ld_addrs[2] = {
+ BPF_LD_IMM64(BPF_REG_3, (long)prog->aux),
+ };
+
+ insn_buf[0] = ld_addrs[0];
+ insn_buf[1] = ld_addrs[1];
+ insn_buf[2] = *insn;
+ cnt = 3;
+
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ goto patch_call_imm;
+ }
+
+ /* bpf_per_cpu_ptr() and bpf_this_cpu_ptr() */
+ if (env->insn_aux_data[i + delta].call_with_percpu_alloc_ptr) {
+ /* patch with 'r1 = *(u64 *)(r1 + 0)' since for percpu data,
+ * bpf_mem_alloc() returns a ptr to the percpu data ptr.
+ */
+ insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0);
+ insn_buf[1] = *insn;
+ cnt = 2;
+
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ goto patch_call_imm;
+ }
+
+ /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
+ * and other inlining handlers are currently limited to 64 bit
+ * only.
+ */
+ if (prog->jit_requested && BITS_PER_LONG == 64 &&
+ (insn->imm == BPF_FUNC_map_lookup_elem ||
+ insn->imm == BPF_FUNC_map_update_elem ||
+ insn->imm == BPF_FUNC_map_delete_elem ||
+ insn->imm == BPF_FUNC_map_push_elem ||
+ insn->imm == BPF_FUNC_map_pop_elem ||
+ insn->imm == BPF_FUNC_map_peek_elem ||
+ insn->imm == BPF_FUNC_redirect_map ||
+ insn->imm == BPF_FUNC_for_each_map_elem ||
+ insn->imm == BPF_FUNC_map_lookup_percpu_elem)) {
+ aux = &env->insn_aux_data[i + delta];
+ if (bpf_map_ptr_poisoned(aux))
+ goto patch_call_imm;
+
+ map_ptr = aux->map_ptr_state.map_ptr;
+ ops = map_ptr->ops;
+ if (insn->imm == BPF_FUNC_map_lookup_elem &&
+ ops->map_gen_lookup) {
+ cnt = ops->map_gen_lookup(map_ptr, insn_buf);
+ if (cnt == -EOPNOTSUPP)
+ goto patch_map_ops_generic;
+ if (cnt <= 0 || cnt >= INSN_BUF_SIZE) {
+ verifier_bug(env, "%d insns generated for map lookup", cnt);
+ return -EFAULT;
+ }
+
+ new_prog = bpf_patch_insn_data(env, i + delta,
+ insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ goto next_insn;
+ }
+
+ BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
+ (void *(*)(struct bpf_map *map, void *key))NULL));
+ BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
+ (long (*)(struct bpf_map *map, void *key))NULL));
+ BUILD_BUG_ON(!__same_type(ops->map_update_elem,
+ (long (*)(struct bpf_map *map, void *key, void *value,
+ u64 flags))NULL));
+ BUILD_BUG_ON(!__same_type(ops->map_push_elem,
+ (long (*)(struct bpf_map *map, void *value,
+ u64 flags))NULL));
+ BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
+ (long (*)(struct bpf_map *map, void *value))NULL));
+ BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
+ (long (*)(struct bpf_map *map, void *value))NULL));
+ BUILD_BUG_ON(!__same_type(ops->map_redirect,
+ (long (*)(struct bpf_map *map, u64 index, u64 flags))NULL));
+ BUILD_BUG_ON(!__same_type(ops->map_for_each_callback,
+ (long (*)(struct bpf_map *map,
+ bpf_callback_t callback_fn,
+ void *callback_ctx,
+ u64 flags))NULL));
+ BUILD_BUG_ON(!__same_type(ops->map_lookup_percpu_elem,
+ (void *(*)(struct bpf_map *map, void *key, u32 cpu))NULL));
+
+patch_map_ops_generic:
+ switch (insn->imm) {
+ case BPF_FUNC_map_lookup_elem:
+ insn->imm = BPF_CALL_IMM(ops->map_lookup_elem);
+ goto next_insn;
+ case BPF_FUNC_map_update_elem:
+ insn->imm = BPF_CALL_IMM(ops->map_update_elem);
+ goto next_insn;
+ case BPF_FUNC_map_delete_elem:
+ insn->imm = BPF_CALL_IMM(ops->map_delete_elem);
+ goto next_insn;
+ case BPF_FUNC_map_push_elem:
+ insn->imm = BPF_CALL_IMM(ops->map_push_elem);
+ goto next_insn;
+ case BPF_FUNC_map_pop_elem:
+ insn->imm = BPF_CALL_IMM(ops->map_pop_elem);
+ goto next_insn;
+ case BPF_FUNC_map_peek_elem:
+ insn->imm = BPF_CALL_IMM(ops->map_peek_elem);
+ goto next_insn;
+ case BPF_FUNC_redirect_map:
+ insn->imm = BPF_CALL_IMM(ops->map_redirect);
+ goto next_insn;
+ case BPF_FUNC_for_each_map_elem:
+ insn->imm = BPF_CALL_IMM(ops->map_for_each_callback);
+ goto next_insn;
+ case BPF_FUNC_map_lookup_percpu_elem:
+ insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem);
+ goto next_insn;
+ }
+
+ goto patch_call_imm;
+ }
+
+ /* Implement bpf_jiffies64 inline. */
+ if (prog->jit_requested && BITS_PER_LONG == 64 &&
+ insn->imm == BPF_FUNC_jiffies64) {
+ struct bpf_insn ld_jiffies_addr[2] = {
+ BPF_LD_IMM64(BPF_REG_0,
+ (unsigned long)&jiffies),
+ };
+
+ insn_buf[0] = ld_jiffies_addr[0];
+ insn_buf[1] = ld_jiffies_addr[1];
+ insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
+ BPF_REG_0, 0);
+ cnt = 3;
+
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
+ cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ goto next_insn;
+ }
+
+#if defined(CONFIG_X86_64) && !defined(CONFIG_UML)
+ /* Implement bpf_get_smp_processor_id() inline. */
+ if (insn->imm == BPF_FUNC_get_smp_processor_id &&
+ bpf_verifier_inlines_helper_call(env, insn->imm)) {
+ /* BPF_FUNC_get_smp_processor_id inlining is an
+ * optimization, so if cpu_number is ever
+ * changed in some incompatible and hard to support
+ * way, it's fine to back out this inlining logic
+ */
+#ifdef CONFIG_SMP
+ insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, (u32)(unsigned long)&cpu_number);
+ insn_buf[1] = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0);
+ insn_buf[2] = BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0);
+ cnt = 3;
+#else
+ insn_buf[0] = BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_0);
+ cnt = 1;
+#endif
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ goto next_insn;
+ }
+
+ /* Implement bpf_get_current_task() and bpf_get_current_task_btf() inline. */
+ if ((insn->imm == BPF_FUNC_get_current_task || insn->imm == BPF_FUNC_get_current_task_btf) &&
+ bpf_verifier_inlines_helper_call(env, insn->imm)) {
+ insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, (u32)(unsigned long)&current_task);
+ insn_buf[1] = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0);
+ insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0);
+ cnt = 3;
+
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ goto next_insn;
+ }
+#endif
+ /* Implement bpf_get_func_arg inline. */
+ if (prog_type == BPF_PROG_TYPE_TRACING &&
+ insn->imm == BPF_FUNC_get_func_arg) {
+ if (eatype == BPF_TRACE_RAW_TP) {
+ int nr_args = btf_type_vlen(prog->aux->attach_func_proto);
+
+ /* skip 'void *__data' in btf_trace_##name() and save to reg0 */
+ insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, nr_args - 1);
+ cnt = 1;
+ } else {
+ /* Load nr_args from ctx - 8 */
+ insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
+ insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
+ cnt = 2;
+ }
+ insn_buf[cnt++] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, 6);
+ insn_buf[cnt++] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 3);
+ insn_buf[cnt++] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1);
+ insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0);
+ insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
+ insn_buf[cnt++] = BPF_MOV64_IMM(BPF_REG_0, 0);
+ insn_buf[cnt++] = BPF_JMP_A(1);
+ insn_buf[cnt++] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
+
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ goto next_insn;
+ }
+
+ /* Implement bpf_get_func_ret inline. */
+ if (prog_type == BPF_PROG_TYPE_TRACING &&
+ insn->imm == BPF_FUNC_get_func_ret) {
+ if (eatype == BPF_TRACE_FEXIT ||
+ eatype == BPF_TRACE_FSESSION ||
+ eatype == BPF_MODIFY_RETURN) {
+ /* Load nr_args from ctx - 8 */
+ insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
+ insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
+ insn_buf[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
+ insn_buf[3] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1);
+ insn_buf[4] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
+ insn_buf[5] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0);
+ insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, 0);
+ cnt = 7;
+ } else {
+ insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, -EOPNOTSUPP);
+ cnt = 1;
+ }
+
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ goto next_insn;
+ }
+
+ /* Implement get_func_arg_cnt inline. */
+ if (prog_type == BPF_PROG_TYPE_TRACING &&
+ insn->imm == BPF_FUNC_get_func_arg_cnt) {
+ if (eatype == BPF_TRACE_RAW_TP) {
+ int nr_args = btf_type_vlen(prog->aux->attach_func_proto);
+
+ /* skip 'void *__data' in btf_trace_##name() and save to reg0 */
+ insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, nr_args - 1);
+ cnt = 1;
+ } else {
+ /* Load nr_args from ctx - 8 */
+ insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
+ insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
+ cnt = 2;
+ }
+
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ goto next_insn;
+ }
+
+ /* Implement bpf_get_func_ip inline. */
+ if (prog_type == BPF_PROG_TYPE_TRACING &&
+ insn->imm == BPF_FUNC_get_func_ip) {
+ /* Load IP address from ctx - 16 */
+ insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -16);
+
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
+ if (!new_prog)
+ return -ENOMEM;
+
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ goto next_insn;
+ }
+
+ /* Implement bpf_get_branch_snapshot inline. */
+ if (IS_ENABLED(CONFIG_PERF_EVENTS) &&
+ prog->jit_requested && BITS_PER_LONG == 64 &&
+ insn->imm == BPF_FUNC_get_branch_snapshot) {
+ /* We are dealing with the following func protos:
+ * u64 bpf_get_branch_snapshot(void *buf, u32 size, u64 flags);
+ * int perf_snapshot_branch_stack(struct perf_branch_entry *entries, u32 cnt);
+ */
+ const u32 br_entry_size = sizeof(struct perf_branch_entry);
+
+ /* struct perf_branch_entry is part of UAPI and is
+ * used as an array element, so extremely unlikely to
+ * ever grow or shrink
+ */
+ BUILD_BUG_ON(br_entry_size != 24);
+
+ /* if (unlikely(flags)) return -EINVAL */
+ insn_buf[0] = BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 0, 7);
+
+ /* Transform size (bytes) into number of entries (cnt = size / 24).
+ * But to avoid expensive division instruction, we implement
+ * divide-by-3 through multiplication, followed by further
+ * division by 8 through 3-bit right shift.
+ * Refer to book "Hacker's Delight, 2nd ed." by Henry S. Warren, Jr.,
+ * p. 227, chapter "Unsigned Division by 3" for details and proofs.
+ *
+ * N / 3 <=> M * N / 2^33, where M = (2^33 + 1) / 3 = 0xaaaaaaab.
+ */
+ insn_buf[1] = BPF_MOV32_IMM(BPF_REG_0, 0xaaaaaaab);
+ insn_buf[2] = BPF_ALU64_REG(BPF_MUL, BPF_REG_2, BPF_REG_0);
+ insn_buf[3] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36);
+
+ /* call perf_snapshot_branch_stack implementation */
+ insn_buf[4] = BPF_EMIT_CALL(static_call_query(perf_snapshot_branch_stack));
+ /* if (entry_cnt == 0) return -ENOENT */
+ insn_buf[5] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4);
+ /* return entry_cnt * sizeof(struct perf_branch_entry) */
+ insn_buf[6] = BPF_ALU32_IMM(BPF_MUL, BPF_REG_0, br_entry_size);
+ insn_buf[7] = BPF_JMP_A(3);
+ /* return -EINVAL; */
+ insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
+ insn_buf[9] = BPF_JMP_A(1);
+ /* return -ENOENT; */
+ insn_buf[10] = BPF_MOV64_IMM(BPF_REG_0, -ENOENT);
+ cnt = 11;
+
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ goto next_insn;
+ }
+
+ /* Implement bpf_kptr_xchg inline */
+ if (prog->jit_requested && BITS_PER_LONG == 64 &&
+ insn->imm == BPF_FUNC_kptr_xchg &&
+ bpf_jit_supports_ptr_xchg()) {
+ insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_2);
+ insn_buf[1] = BPF_ATOMIC_OP(BPF_DW, BPF_XCHG, BPF_REG_1, BPF_REG_0, 0);
+ cnt = 2;
+
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ goto next_insn;
+ }
+patch_call_imm:
+ fn = env->ops->get_func_proto(insn->imm, env->prog);
+ /* all functions that have prototype and verifier allowed
+ * programs to call them, must be real in-kernel functions
+ */
+ if (!fn->func) {
+ verifier_bug(env,
+ "not inlined functions %s#%d is missing func",
+ func_id_name(insn->imm), insn->imm);
+ return -EFAULT;
+ }
+ insn->imm = fn->func - __bpf_call_base;
+next_insn:
+ if (subprogs[cur_subprog + 1].start == i + delta + 1) {
+ subprogs[cur_subprog].stack_depth += stack_depth_extra;
+ subprogs[cur_subprog].stack_extra = stack_depth_extra;
+
+ stack_depth = subprogs[cur_subprog].stack_depth;
+ if (stack_depth > MAX_BPF_STACK && !prog->jit_requested) {
+ verbose(env, "stack size %d(extra %d) is too large\n",
+ stack_depth, stack_depth_extra);
+ return -EINVAL;
+ }
+ cur_subprog++;
+ stack_depth = subprogs[cur_subprog].stack_depth;
+ stack_depth_extra = 0;
+ }
+ i++;
+ insn++;
+ }
+
+ env->prog->aux->stack_depth = subprogs[0].stack_depth;
+ for (i = 0; i < env->subprog_cnt; i++) {
+ int delta = bpf_jit_supports_timed_may_goto() ? 2 : 1;
+ int subprog_start = subprogs[i].start;
+ int stack_slots = subprogs[i].stack_extra / 8;
+ int slots = delta, cnt = 0;
+
+ if (!stack_slots)
+ continue;
+ /* We need two slots in case timed may_goto is supported. */
+ if (stack_slots > slots) {
+ verifier_bug(env, "stack_slots supports may_goto only");
+ return -EFAULT;
+ }
+
+ stack_depth = subprogs[i].stack_depth;
+ if (bpf_jit_supports_timed_may_goto()) {
+ insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth,
+ BPF_MAX_TIMED_LOOPS);
+ insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth + 8, 0);
+ } else {
+ /* Add ST insn to subprog prologue to init extra stack */
+ insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth,
+ BPF_MAX_LOOPS);
+ }
+ /* Copy first actual insn to preserve it */
+ insn_buf[cnt++] = env->prog->insnsi[subprog_start];
+
+ new_prog = bpf_patch_insn_data(env, subprog_start, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+ env->prog = prog = new_prog;
+ /*
+ * If may_goto is a first insn of a prog there could be a jmp
+ * insn that points to it, hence adjust all such jmps to point
+ * to insn after BPF_ST that inits may_goto count.
+ * Adjustment will succeed because bpf_patch_insn_data() didn't fail.
+ */
+ WARN_ON(adjust_jmp_off(env->prog, subprog_start, delta));
+ }
+
+ /* Since poke tab is now finalized, publish aux to tracker. */
+ for (i = 0; i < prog->aux->size_poke_tab; i++) {
+ map_ptr = prog->aux->poke_tab[i].tail_call.map;
+ if (!map_ptr->ops->map_poke_track ||
+ !map_ptr->ops->map_poke_untrack ||
+ !map_ptr->ops->map_poke_run) {
+ verifier_bug(env, "poke tab is misconfigured");
+ return -EFAULT;
+ }
+
+ ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
+ if (ret < 0) {
+ verbose(env, "tracking tail call prog failed\n");
+ return ret;
+ }
+ }
+
+ ret = sort_kfunc_descs_by_imm_off(env);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env,
+ int position,
+ s32 stack_base,
+ u32 callback_subprogno,
+ u32 *total_cnt)
+{
+ s32 r6_offset = stack_base + 0 * BPF_REG_SIZE;
+ s32 r7_offset = stack_base + 1 * BPF_REG_SIZE;
+ s32 r8_offset = stack_base + 2 * BPF_REG_SIZE;
+ int reg_loop_max = BPF_REG_6;
+ int reg_loop_cnt = BPF_REG_7;
+ int reg_loop_ctx = BPF_REG_8;
+
+ struct bpf_insn *insn_buf = env->insn_buf;
+ struct bpf_prog *new_prog;
+ u32 callback_start;
+ u32 call_insn_offset;
+ s32 callback_offset;
+ u32 cnt = 0;
+
+ /* This represents an inlined version of bpf_iter.c:bpf_loop,
+ * be careful to modify this code in sync.
+ */
+
+ /* Return error and jump to the end of the patch if
+ * expected number of iterations is too big.
+ */
+ insn_buf[cnt++] = BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2);
+ insn_buf[cnt++] = BPF_MOV32_IMM(BPF_REG_0, -E2BIG);
+ insn_buf[cnt++] = BPF_JMP_IMM(BPF_JA, 0, 0, 16);
+ /* spill R6, R7, R8 to use these as loop vars */
+ insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset);
+ insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset);
+ insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset);
+ /* initialize loop vars */
+ insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_max, BPF_REG_1);
+ insn_buf[cnt++] = BPF_MOV32_IMM(reg_loop_cnt, 0);
+ insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3);
+ /* loop header,
+ * if reg_loop_cnt >= reg_loop_max skip the loop body
+ */
+ insn_buf[cnt++] = BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5);
+ /* callback call,
+ * correct callback offset would be set after patching
+ */
+ insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt);
+ insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx);
+ insn_buf[cnt++] = BPF_CALL_REL(0);
+ /* increment loop counter */
+ insn_buf[cnt++] = BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1);
+ /* jump to loop header if callback returned 0 */
+ insn_buf[cnt++] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6);
+ /* return value of bpf_loop,
+ * set R0 to the number of iterations
+ */
+ insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt);
+ /* restore original values of R6, R7, R8 */
+ insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset);
+ insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset);
+ insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset);
+
+ *total_cnt = cnt;
+ new_prog = bpf_patch_insn_data(env, position, insn_buf, cnt);
+ if (!new_prog)
+ return new_prog;
+
+ /* callback start is known only after patching */
+ callback_start = env->subprog_info[callback_subprogno].start;
+ /* Note: insn_buf[12] is an offset of BPF_CALL_REL instruction */
+ call_insn_offset = position + 12;
+ callback_offset = callback_start - call_insn_offset - 1;
+ new_prog->insnsi[call_insn_offset].imm = callback_offset;
+
+ return new_prog;
+}
+
+static bool is_bpf_loop_call(struct bpf_insn *insn)
+{
+ return insn->code == (BPF_JMP | BPF_CALL) &&
+ insn->src_reg == 0 &&
+ insn->imm == BPF_FUNC_loop;
+}
+
+/* For all sub-programs in the program (including main) check
+ * insn_aux_data to see if there are bpf_loop calls that require
+ * inlining. If such calls are found the calls are replaced with a
+ * sequence of instructions produced by `inline_bpf_loop` function and
+ * subprog stack_depth is increased by the size of 3 registers.
+ * This stack space is used to spill values of the R6, R7, R8. These
+ * registers are used to store the loop bound, counter and context
+ * variables.
+ */
+int bpf_optimize_bpf_loop(struct bpf_verifier_env *env)
+{
+ struct bpf_subprog_info *subprogs = env->subprog_info;
+ int i, cur_subprog = 0, cnt, delta = 0;
+ struct bpf_insn *insn = env->prog->insnsi;
+ int insn_cnt = env->prog->len;
+ u16 stack_depth = subprogs[cur_subprog].stack_depth;
+ u16 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
+ u16 stack_depth_extra = 0;
+
+ for (i = 0; i < insn_cnt; i++, insn++) {
+ struct bpf_loop_inline_state *inline_state =
+ &env->insn_aux_data[i + delta].loop_inline_state;
+
+ if (is_bpf_loop_call(insn) && inline_state->fit_for_inline) {
+ struct bpf_prog *new_prog;
+
+ stack_depth_extra = BPF_REG_SIZE * 3 + stack_depth_roundup;
+ new_prog = inline_bpf_loop(env,
+ i + delta,
+ -(stack_depth + stack_depth_extra),
+ inline_state->callback_subprogno,
+ &cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ }
+
+ if (subprogs[cur_subprog + 1].start == i + delta + 1) {
+ subprogs[cur_subprog].stack_depth += stack_depth_extra;
+ cur_subprog++;
+ stack_depth = subprogs[cur_subprog].stack_depth;
+ stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
+ stack_depth_extra = 0;
+ }
+ }
+
+ env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
+
+ return 0;
+}
+
+/* Remove unnecessary spill/fill pairs, members of fastcall pattern,
+ * adjust subprograms stack depth when possible.
+ */
+int bpf_remove_fastcall_spills_fills(struct bpf_verifier_env *env)
+{
+ struct bpf_subprog_info *subprog = env->subprog_info;
+ struct bpf_insn_aux_data *aux = env->insn_aux_data;
+ struct bpf_insn *insn = env->prog->insnsi;
+ int insn_cnt = env->prog->len;
+ u32 spills_num;
+ bool modified = false;
+ int i, j;
+
+ for (i = 0; i < insn_cnt; i++, insn++) {
+ if (aux[i].fastcall_spills_num > 0) {
+ spills_num = aux[i].fastcall_spills_num;
+ /* NOPs would be removed by opt_remove_nops() */
+ for (j = 1; j <= spills_num; ++j) {
+ *(insn - j) = NOP;
+ *(insn + j) = NOP;
+ }
+ modified = true;
+ }
+ if ((subprog + 1)->start == i + 1) {
+ if (modified && !subprog->keep_fastcall_stack)
+ subprog->stack_depth = -subprog->fastcall_stack_off;
+ subprog++;
+ modified = false;
+ }
+ }
+
+ return 0;
+}
+
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index bc6bc8bb871d..3dd9b4924ae4 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1056,7 +1056,7 @@ static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr,
for_each_possible_cpu(cpu) {
if (cpu == current_cpu)
- copy_map_value_long(&htab->map, per_cpu_ptr(pptr, cpu), value);
+ copy_map_value(&htab->map, per_cpu_ptr(pptr, cpu), value);
else /* Since elem is preallocated, we cannot touch special fields */
zero_map_value(&htab->map, per_cpu_ptr(pptr, cpu));
}
@@ -1138,6 +1138,10 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
} else if (fd_htab_map_needs_adjust(htab)) {
size = round_up(size, 8);
memcpy(htab_elem_value(l_new, key_size), value, size);
+ } else if (map_flags & BPF_F_LOCK) {
+ copy_map_value_locked(&htab->map,
+ htab_elem_value(l_new, key_size),
+ value, false);
} else {
copy_map_value(&htab->map, htab_elem_value(l_new, key_size), value);
}
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 6eb6c82ed2ee..bb95e287b0dc 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1272,7 +1272,7 @@ static void bpf_async_cb_rcu_tasks_trace_free(struct rcu_head *rcu)
return;
}
- /* rcu_trace_implies_rcu_gp() is true and will remain so */
+ /* RCU Tasks Trace grace period implies RCU grace period. */
bpf_async_cb_rcu_free(rcu);
}
@@ -2302,9 +2302,20 @@ void bpf_rb_root_free(const struct btf_field *field, void *rb_root,
__bpf_kfunc_start_defs();
-__bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign)
+/**
+ * bpf_obj_new() - allocate an object described by program BTF
+ * @local_type_id__k: type ID in program BTF
+ * @meta: verifier-supplied struct metadata
+ *
+ * Allocate an object of the type identified by @local_type_id__k and
+ * initialize its special fields. BPF programs can use
+ * bpf_core_type_id_local() to provide @local_type_id__k. The verifier
+ * rewrites @meta; BPF programs do not set it.
+ *
+ * Return: Pointer to the allocated object, or %NULL on failure.
+ */
+__bpf_kfunc void *bpf_obj_new(u64 local_type_id__k, struct btf_struct_meta *meta)
{
- struct btf_struct_meta *meta = meta__ign;
u64 size = local_type_id__k;
void *p;
@@ -2313,17 +2324,39 @@ __bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign)
return NULL;
if (meta)
bpf_obj_init(meta->record, p);
+
return p;
}
-__bpf_kfunc void *bpf_percpu_obj_new_impl(u64 local_type_id__k, void *meta__ign)
+__bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign)
+{
+ return bpf_obj_new(local_type_id__k, meta__ign);
+}
+
+/**
+ * bpf_percpu_obj_new() - allocate a percpu object described by program BTF
+ * @local_type_id__k: type ID in program BTF
+ * @meta: verifier-supplied struct metadata
+ *
+ * Allocate a percpu object of the type identified by @local_type_id__k. BPF
+ * programs can use bpf_core_type_id_local() to provide @local_type_id__k.
+ * The verifier rewrites @meta; BPF programs do not set it.
+ *
+ * Return: Pointer to the allocated percpu object, or %NULL on failure.
+ */
+__bpf_kfunc void *bpf_percpu_obj_new(u64 local_type_id__k, struct btf_struct_meta *meta)
{
u64 size = local_type_id__k;
- /* The verifier has ensured that meta__ign must be NULL */
+ /* The verifier has ensured that meta must be NULL */
return bpf_mem_alloc(&bpf_global_percpu_ma, size);
}
+__bpf_kfunc void *bpf_percpu_obj_new_impl(u64 local_type_id__k, void *meta__ign)
+{
+ return bpf_percpu_obj_new(local_type_id__k, meta__ign);
+}
+
/* Must be called under migrate_disable(), as required by bpf_mem_free */
void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu)
{
@@ -2347,23 +2380,56 @@ void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu)
bpf_mem_free_rcu(ma, p);
}
-__bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign)
+/**
+ * bpf_obj_drop() - drop a previously allocated object
+ * @p__alloc: object to free
+ * @meta: verifier-supplied struct metadata
+ *
+ * Destroy special fields in @p__alloc as needed and free the object. The
+ * verifier rewrites @meta; BPF programs do not set it.
+ */
+__bpf_kfunc void bpf_obj_drop(void *p__alloc, struct btf_struct_meta *meta)
{
- struct btf_struct_meta *meta = meta__ign;
void *p = p__alloc;
__bpf_obj_drop_impl(p, meta ? meta->record : NULL, false);
}
-__bpf_kfunc void bpf_percpu_obj_drop_impl(void *p__alloc, void *meta__ign)
+__bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign)
{
- /* The verifier has ensured that meta__ign must be NULL */
+ return bpf_obj_drop(p__alloc, meta__ign);
+}
+
+/**
+ * bpf_percpu_obj_drop() - drop a previously allocated percpu object
+ * @p__alloc: percpu object to free
+ * @meta: verifier-supplied struct metadata
+ *
+ * Free @p__alloc. The verifier rewrites @meta; BPF programs do not set it.
+ */
+__bpf_kfunc void bpf_percpu_obj_drop(void *p__alloc, struct btf_struct_meta *meta)
+{
+ /* The verifier has ensured that meta must be NULL */
bpf_mem_free_rcu(&bpf_global_percpu_ma, p__alloc);
}
-__bpf_kfunc void *bpf_refcount_acquire_impl(void *p__refcounted_kptr, void *meta__ign)
+__bpf_kfunc void bpf_percpu_obj_drop_impl(void *p__alloc, void *meta__ign)
+{
+ bpf_percpu_obj_drop(p__alloc, meta__ign);
+}
+
+/**
+ * bpf_refcount_acquire() - turn a local kptr into an owning reference
+ * @p__refcounted_kptr: non-owning local kptr
+ * @meta: verifier-supplied struct metadata
+ *
+ * Increment the refcount for @p__refcounted_kptr. The verifier rewrites
+ * @meta; BPF programs do not set it.
+ *
+ * Return: Owning reference to @p__refcounted_kptr, or %NULL on failure.
+ */
+__bpf_kfunc void *bpf_refcount_acquire(void *p__refcounted_kptr, struct btf_struct_meta *meta)
{
- struct btf_struct_meta *meta = meta__ign;
struct bpf_refcount *ref;
/* Could just cast directly to refcount_t *, but need some code using
@@ -2379,6 +2445,11 @@ __bpf_kfunc void *bpf_refcount_acquire_impl(void *p__refcounted_kptr, void *meta
return (void *)p__refcounted_kptr;
}
+__bpf_kfunc void *bpf_refcount_acquire_impl(void *p__refcounted_kptr, void *meta__ign)
+{
+ return bpf_refcount_acquire(p__refcounted_kptr, meta__ign);
+}
+
static int __bpf_list_add(struct bpf_list_node_kern *node,
struct bpf_list_head *head,
bool tail, struct btf_record *rec, u64 off)
@@ -2406,24 +2477,62 @@ static int __bpf_list_add(struct bpf_list_node_kern *node,
return 0;
}
+/**
+ * bpf_list_push_front() - add a node to the front of a BPF linked list
+ * @head: list head
+ * @node: node to insert
+ * @meta: verifier-supplied struct metadata
+ * @off: verifier-supplied offset of @node within the containing object
+ *
+ * Insert @node at the front of @head. The verifier rewrites @meta and @off;
+ * BPF programs do not set them.
+ *
+ * Return: 0 on success, or %-EINVAL if @node is already linked.
+ */
+__bpf_kfunc int bpf_list_push_front(struct bpf_list_head *head,
+ struct bpf_list_node *node,
+ struct btf_struct_meta *meta,
+ u64 off)
+{
+ struct bpf_list_node_kern *n = (void *)node;
+
+ return __bpf_list_add(n, head, false, meta ? meta->record : NULL, off);
+}
+
__bpf_kfunc int bpf_list_push_front_impl(struct bpf_list_head *head,
struct bpf_list_node *node,
void *meta__ign, u64 off)
{
+ return bpf_list_push_front(head, node, meta__ign, off);
+}
+
+/**
+ * bpf_list_push_back() - add a node to the back of a BPF linked list
+ * @head: list head
+ * @node: node to insert
+ * @meta: verifier-supplied struct metadata
+ * @off: verifier-supplied offset of @node within the containing object
+ *
+ * Insert @node at the back of @head. The verifier rewrites @meta and @off;
+ * BPF programs do not set them.
+ *
+ * Return: 0 on success, or %-EINVAL if @node is already linked.
+ */
+__bpf_kfunc int bpf_list_push_back(struct bpf_list_head *head,
+ struct bpf_list_node *node,
+ struct btf_struct_meta *meta,
+ u64 off)
+{
struct bpf_list_node_kern *n = (void *)node;
- struct btf_struct_meta *meta = meta__ign;
- return __bpf_list_add(n, head, false, meta ? meta->record : NULL, off);
+ return __bpf_list_add(n, head, true, meta ? meta->record : NULL, off);
}
__bpf_kfunc int bpf_list_push_back_impl(struct bpf_list_head *head,
struct bpf_list_node *node,
void *meta__ign, u64 off)
{
- struct bpf_list_node_kern *n = (void *)node;
- struct btf_struct_meta *meta = meta__ign;
-
- return __bpf_list_add(n, head, true, meta ? meta->record : NULL, off);
+ return bpf_list_push_back(head, node, meta__ign, off);
}
static struct bpf_list_node *__bpf_list_del(struct bpf_list_head *head, bool tail)
@@ -2535,16 +2644,37 @@ static int __bpf_rbtree_add(struct bpf_rb_root *root,
return 0;
}
-__bpf_kfunc int bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node,
- bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b),
- void *meta__ign, u64 off)
+/**
+ * bpf_rbtree_add() - add a node to a BPF rbtree
+ * @root: tree root
+ * @node: node to insert
+ * @less: comparator used to order nodes
+ * @meta: verifier-supplied struct metadata
+ * @off: verifier-supplied offset of @node within the containing object
+ *
+ * Insert @node into @root using @less. The verifier rewrites @meta and @off;
+ * BPF programs do not set them.
+ *
+ * Return: 0 on success, or %-EINVAL if @node is already linked in a tree.
+ */
+__bpf_kfunc int bpf_rbtree_add(struct bpf_rb_root *root,
+ struct bpf_rb_node *node,
+ bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b),
+ struct btf_struct_meta *meta,
+ u64 off)
{
- struct btf_struct_meta *meta = meta__ign;
struct bpf_rb_node_kern *n = (void *)node;
return __bpf_rbtree_add(root, n, (void *)less, meta ? meta->record : NULL, off);
}
+__bpf_kfunc int bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node,
+ bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b),
+ void *meta__ign, u64 off)
+{
+ return bpf_rbtree_add(root, node, less, meta__ign, off);
+}
+
__bpf_kfunc struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root)
{
struct rb_root_cached *r = (struct rb_root_cached *)root;
@@ -4165,17 +4295,25 @@ static bool bpf_task_work_ctx_tryget(struct bpf_task_work_ctx *ctx)
return refcount_inc_not_zero(&ctx->refcnt);
}
+static void bpf_task_work_destroy(struct irq_work *irq_work)
+{
+ struct bpf_task_work_ctx *ctx = container_of(irq_work, struct bpf_task_work_ctx, irq_work);
+
+ bpf_task_work_ctx_reset(ctx);
+ kfree_rcu(ctx, rcu);
+}
+
static void bpf_task_work_ctx_put(struct bpf_task_work_ctx *ctx)
{
if (!refcount_dec_and_test(&ctx->refcnt))
return;
- bpf_task_work_ctx_reset(ctx);
-
- /* bpf_mem_free expects migration to be disabled */
- migrate_disable();
- bpf_mem_free(&bpf_global_ma, ctx);
- migrate_enable();
+ if (irqs_disabled()) {
+ ctx->irq_work = IRQ_WORK_INIT(bpf_task_work_destroy);
+ irq_work_queue(&ctx->irq_work);
+ } else {
+ bpf_task_work_destroy(&ctx->irq_work);
+ }
}
static void bpf_task_work_cancel(struct bpf_task_work_ctx *ctx)
@@ -4229,7 +4367,7 @@ static void bpf_task_work_irq(struct irq_work *irq_work)
enum bpf_task_work_state state;
int err;
- guard(rcu_tasks_trace)();
+ guard(rcu)();
if (cmpxchg(&ctx->state, BPF_TW_PENDING, BPF_TW_SCHEDULING) != BPF_TW_PENDING) {
bpf_task_work_ctx_put(ctx);
@@ -4251,9 +4389,9 @@ static void bpf_task_work_irq(struct irq_work *irq_work)
/*
* It's technically possible for just scheduled task_work callback to
* complete running by now, going SCHEDULING -> RUNNING and then
- * dropping its ctx refcount. Instead of capturing extra ref just to
- * protected below ctx->state access, we rely on RCU protection to
- * perform below SCHEDULING -> SCHEDULED attempt.
+ * dropping its ctx refcount. Instead of capturing an extra ref just
+ * to protect below ctx->state access, we rely on rcu_read_lock
+ * above to prevent kfree_rcu from freeing ctx before we return.
*/
state = cmpxchg(&ctx->state, BPF_TW_SCHEDULING, BPF_TW_SCHEDULED);
if (state == BPF_TW_FREED)
@@ -4270,7 +4408,7 @@ static struct bpf_task_work_ctx *bpf_task_work_fetch_ctx(struct bpf_task_work *t
if (ctx)
return ctx;
- ctx = bpf_mem_alloc(&bpf_global_ma, sizeof(struct bpf_task_work_ctx));
+ ctx = bpf_map_kmalloc_nolock(map, sizeof(*ctx), 0, NUMA_NO_NODE);
if (!ctx)
return ERR_PTR(-ENOMEM);
@@ -4284,7 +4422,7 @@ static struct bpf_task_work_ctx *bpf_task_work_fetch_ctx(struct bpf_task_work *t
* tw->ctx is set by concurrent BPF program, release allocated
* memory and try to reuse already set context.
*/
- bpf_mem_free(&bpf_global_ma, ctx);
+ kfree_nolock(ctx);
return old_ctx;
}
@@ -4296,13 +4434,23 @@ static struct bpf_task_work_ctx *bpf_task_work_acquire_ctx(struct bpf_task_work
{
struct bpf_task_work_ctx *ctx;
- ctx = bpf_task_work_fetch_ctx(tw, map);
- if (IS_ERR(ctx))
- return ctx;
-
- /* try to get ref for task_work callback to hold */
- if (!bpf_task_work_ctx_tryget(ctx))
- return ERR_PTR(-EBUSY);
+ /*
+ * Sleepable BPF programs hold rcu_read_lock_trace but not
+ * regular rcu_read_lock. Since kfree_rcu waits for regular
+ * RCU GP, the ctx can be freed while we're between reading
+ * the pointer and incrementing the refcount. Take regular
+ * rcu_read_lock to prevent kfree_rcu from freeing the ctx
+ * before we can tryget it.
+ */
+ scoped_guard(rcu) {
+ ctx = bpf_task_work_fetch_ctx(tw, map);
+ if (IS_ERR(ctx))
+ return ctx;
+
+ /* try to get ref for task_work callback to hold */
+ if (!bpf_task_work_ctx_tryget(ctx))
+ return ERR_PTR(-EBUSY);
+ }
if (cmpxchg(&ctx->state, BPF_TW_STANDBY, BPF_TW_PENDING) != BPF_TW_STANDBY) {
/* lost acquiring race or map_release_uref() stole it from us, put ref and bail */
@@ -4417,7 +4565,7 @@ static int make_file_dynptr(struct file *file, u32 flags, bool may_sleep,
return -EINVAL;
}
- state = bpf_mem_alloc(&bpf_global_ma, sizeof(struct bpf_dynptr_file_impl));
+ state = kmalloc_nolock(sizeof(*state), 0, NUMA_NO_NODE);
if (!state) {
bpf_dynptr_set_null(ptr);
return -ENOMEM;
@@ -4449,7 +4597,7 @@ __bpf_kfunc int bpf_dynptr_file_discard(struct bpf_dynptr *dynptr)
return 0;
freader_cleanup(&df->freader);
- bpf_mem_free(&bpf_global_ma, df);
+ kfree_nolock(df);
bpf_dynptr_set_null(ptr);
return 0;
}
@@ -4536,12 +4684,19 @@ BTF_KFUNCS_START(generic_btf_ids)
#ifdef CONFIG_CRASH_DUMP
BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)
#endif
+BTF_ID_FLAGS(func, bpf_obj_new, KF_ACQUIRE | KF_RET_NULL | KF_IMPLICIT_ARGS)
BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_percpu_obj_new, KF_ACQUIRE | KF_RET_NULL | KF_IMPLICIT_ARGS)
BTF_ID_FLAGS(func, bpf_percpu_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_obj_drop, KF_RELEASE | KF_IMPLICIT_ARGS)
BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_percpu_obj_drop, KF_RELEASE | KF_IMPLICIT_ARGS)
BTF_ID_FLAGS(func, bpf_percpu_obj_drop_impl, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_refcount_acquire, KF_ACQUIRE | KF_RET_NULL | KF_RCU | KF_IMPLICIT_ARGS)
BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE | KF_RET_NULL | KF_RCU)
+BTF_ID_FLAGS(func, bpf_list_push_front, KF_IMPLICIT_ARGS)
BTF_ID_FLAGS(func, bpf_list_push_front_impl)
+BTF_ID_FLAGS(func, bpf_list_push_back, KF_IMPLICIT_ARGS)
BTF_ID_FLAGS(func, bpf_list_push_back_impl)
BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL)
@@ -4550,6 +4705,7 @@ BTF_ID_FLAGS(func, bpf_list_back, KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_rbtree_add, KF_IMPLICIT_ARGS)
BTF_ID_FLAGS(func, bpf_rbtree_add_impl)
BTF_ID_FLAGS(func, bpf_rbtree_first, KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_rbtree_root, KF_RET_NULL)
@@ -4578,6 +4734,9 @@ BTF_ID_FLAGS(func, bpf_key_put, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_verify_pkcs7_signature, KF_SLEEPABLE)
#endif
#endif
+#ifdef CONFIG_S390
+BTF_ID_FLAGS(func, bpf_get_lowcore)
+#endif
BTF_KFUNCS_END(generic_btf_ids)
static const struct btf_kfunc_id_set generic_kfunc_set = {
diff --git a/kernel/bpf/liveness.c b/kernel/bpf/liveness.c
index 998986853c61..1fb4c511db5a 100644
--- a/kernel/bpf/liveness.c
+++ b/kernel/bpf/liveness.c
@@ -2,217 +2,119 @@
/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
#include <linux/bpf_verifier.h>
+#include <linux/btf.h>
#include <linux/hashtable.h>
#include <linux/jhash.h>
#include <linux/slab.h>
+#include <linux/sort.h>
-/*
- * This file implements live stack slots analysis. After accumulating
- * stack usage data, the analysis answers queries about whether a
- * particular stack slot may be read by an instruction or any of it's
- * successors. This data is consumed by the verifier states caching
- * mechanism to decide which stack slots are important when looking for a
- * visited state corresponding to the current state.
- *
- * The analysis is call chain sensitive, meaning that data is collected
- * and queried for tuples (call chain, subprogram instruction index).
- * Such sensitivity allows identifying if some subprogram call always
- * leads to writes in the caller's stack.
- *
- * The basic idea is as follows:
- * - As the verifier accumulates a set of visited states, the analysis instance
- * accumulates a conservative estimate of stack slots that can be read
- * or must be written for each visited tuple (call chain, instruction index).
- * - If several states happen to visit the same instruction with the same
- * call chain, stack usage information for the corresponding tuple is joined:
- * - "may_read" set represents a union of all possibly read slots
- * (any slot in "may_read" set might be read at or after the instruction);
- * - "must_write" set represents an intersection of all possibly written slots
- * (any slot in "must_write" set is guaranteed to be written by the instruction).
- * - The analysis is split into two phases:
- * - read and write marks accumulation;
- * - read and write marks propagation.
- * - The propagation phase is a textbook live variable data flow analysis:
- *
- * state[cc, i].live_after = U [state[cc, s].live_before for s in bpf_insn_successors(i)]
- * state[cc, i].live_before =
- * (state[cc, i].live_after / state[cc, i].must_write) U state[i].may_read
- *
- * Where:
- * - `U` stands for set union
- * - `/` stands for set difference;
- * - `cc` stands for a call chain;
- * - `i` and `s` are instruction indexes;
- *
- * The above equations are computed for each call chain and instruction
- * index until state stops changing.
- * - Additionally, in order to transfer "must_write" information from a
- * subprogram to call instructions invoking this subprogram,
- * the "must_write_acc" set is tracked for each (cc, i) tuple.
- * A set of stack slots that are guaranteed to be written by this
- * instruction or any of its successors (within the subprogram).
- * The equation for "must_write_acc" propagation looks as follows:
- *
- * state[cc, i].must_write_acc =
- * ∩ [state[cc, s].must_write_acc for s in bpf_insn_successors(i)]
- * U state[cc, i].must_write
- *
- * (An intersection of all "must_write_acc" for instruction successors
- * plus all "must_write" slots for the instruction itself).
- * - After the propagation phase completes for a subprogram, information from
- * (cc, 0) tuple (subprogram entry) is transferred to the caller's call chain:
- * - "must_write_acc" set is intersected with the call site's "must_write" set;
- * - "may_read" set is added to the call site's "may_read" set.
- * - Any live stack queries must be taken after the propagation phase.
- * - Accumulation and propagation phases can be entered multiple times,
- * at any point in time:
- * - "may_read" set only grows;
- * - "must_write" set only shrinks;
- * - for each visited verifier state with zero branches, all relevant
- * read and write marks are already recorded by the analysis instance.
- *
- * Technically, the analysis is facilitated by the following data structures:
- * - Call chain: for given verifier state, the call chain is a tuple of call
- * instruction indexes leading to the current subprogram plus the subprogram
- * entry point index.
- * - Function instance: for a given call chain, for each instruction in
- * the current subprogram, a mapping between instruction index and a
- * set of "may_read", "must_write" and other marks accumulated for this
- * instruction.
- * - A hash table mapping call chains to function instances.
- */
-
-struct callchain {
- u32 callsites[MAX_CALL_FRAMES]; /* instruction pointer for each frame */
- /* cached subprog_info[*].start for functions owning the frames:
- * - sp_starts[curframe] used to get insn relative index within current function;
- * - sp_starts[0..current-1] used for fast callchain_frame_up().
- */
- u32 sp_starts[MAX_CALL_FRAMES];
- u32 curframe; /* depth of callsites and sp_starts arrays */
-};
+#define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args)
struct per_frame_masks {
- u64 may_read; /* stack slots that may be read by this instruction */
- u64 must_write; /* stack slots written by this instruction */
- u64 must_write_acc; /* stack slots written by this instruction and its successors */
- u64 live_before; /* stack slots that may be read by this insn and its successors */
+ spis_t may_read; /* stack slots that may be read by this instruction */
+ spis_t must_write; /* stack slots written by this instruction */
+ spis_t live_before; /* stack slots that may be read by this insn and its successors */
};
/*
- * A function instance created for a specific callchain.
+ * A function instance keyed by (callsite, depth).
* Encapsulates read and write marks for each instruction in the function.
- * Marks are tracked for each frame in the callchain.
+ * Marks are tracked for each frame up to @depth.
*/
struct func_instance {
struct hlist_node hl_node;
- struct callchain callchain;
+ u32 callsite; /* call insn that invoked this subprog (subprog_start for depth 0) */
+ u32 depth; /* call depth (0 = entry subprog) */
+ u32 subprog; /* subprog index */
+ u32 subprog_start; /* cached env->subprog_info[subprog].start */
u32 insn_cnt; /* cached number of insns in the function */
- bool updated;
- bool must_write_dropped;
/* Per frame, per instruction masks, frames allocated lazily. */
struct per_frame_masks *frames[MAX_CALL_FRAMES];
- /* For each instruction a flag telling if "must_write" had been initialized for it. */
- bool *must_write_set;
+ bool must_write_initialized;
};
struct live_stack_query {
struct func_instance *instances[MAX_CALL_FRAMES]; /* valid in range [0..curframe] */
+ u32 callsites[MAX_CALL_FRAMES]; /* callsite[i] = insn calling frame i+1 */
u32 curframe;
u32 insn_idx;
};
struct bpf_liveness {
- DECLARE_HASHTABLE(func_instances, 8); /* maps callchain to func_instance */
+ DECLARE_HASHTABLE(func_instances, 8); /* maps (depth, callsite) to func_instance */
struct live_stack_query live_stack_query; /* cache to avoid repetitive ht lookups */
- /* Cached instance corresponding to env->cur_state, avoids per-instruction ht lookup */
- struct func_instance *cur_instance;
- /*
- * Below fields are used to accumulate stack write marks for instruction at
- * @write_insn_idx before submitting the marks to @cur_instance.
- */
- u64 write_masks_acc[MAX_CALL_FRAMES];
- u32 write_insn_idx;
+ u32 subprog_calls; /* analyze_subprog() invocations */
};
-/* Compute callchain corresponding to state @st at depth @frameno */
-static void compute_callchain(struct bpf_verifier_env *env, struct bpf_verifier_state *st,
- struct callchain *callchain, u32 frameno)
+/*
+ * Hash/compare key for func_instance: (depth, callsite).
+ * For depth == 0 (entry subprog), @callsite is the subprog start insn.
+ * For depth > 0, @callsite is the call instruction index that invoked the subprog.
+ */
+static u32 instance_hash(u32 callsite, u32 depth)
{
- struct bpf_subprog_info *subprog_info = env->subprog_info;
- u32 i;
+ u32 key[2] = { depth, callsite };
- memset(callchain, 0, sizeof(*callchain));
- for (i = 0; i <= frameno; i++) {
- callchain->sp_starts[i] = subprog_info[st->frame[i]->subprogno].start;
- if (i < st->curframe)
- callchain->callsites[i] = st->frame[i + 1]->callsite;
- }
- callchain->curframe = frameno;
- callchain->callsites[callchain->curframe] = callchain->sp_starts[callchain->curframe];
-}
-
-static u32 hash_callchain(struct callchain *callchain)
-{
- return jhash2(callchain->callsites, callchain->curframe, 0);
+ return jhash2(key, 2, 0);
}
-static bool same_callsites(struct callchain *a, struct callchain *b)
+static struct func_instance *find_instance(struct bpf_verifier_env *env,
+ u32 callsite, u32 depth)
{
- int i;
+ struct bpf_liveness *liveness = env->liveness;
+ struct func_instance *f;
+ u32 key = instance_hash(callsite, depth);
- if (a->curframe != b->curframe)
- return false;
- for (i = a->curframe; i >= 0; i--)
- if (a->callsites[i] != b->callsites[i])
- return false;
- return true;
+ hash_for_each_possible(liveness->func_instances, f, hl_node, key)
+ if (f->depth == depth && f->callsite == callsite)
+ return f;
+ return NULL;
}
-/*
- * Find existing or allocate new function instance corresponding to @callchain.
- * Instances are accumulated in env->liveness->func_instances and persist
- * until the end of the verification process.
- */
-static struct func_instance *__lookup_instance(struct bpf_verifier_env *env,
- struct callchain *callchain)
+static struct func_instance *call_instance(struct bpf_verifier_env *env,
+ struct func_instance *caller,
+ u32 callsite, int subprog)
{
- struct bpf_liveness *liveness = env->liveness;
- struct bpf_subprog_info *subprog;
- struct func_instance *result;
- u32 subprog_sz, size, key;
-
- key = hash_callchain(callchain);
- hash_for_each_possible(liveness->func_instances, result, hl_node, key)
- if (same_callsites(&result->callchain, callchain))
- return result;
-
- subprog = bpf_find_containing_subprog(env, callchain->sp_starts[callchain->curframe]);
- subprog_sz = (subprog + 1)->start - subprog->start;
- size = sizeof(struct func_instance);
- result = kvzalloc(size, GFP_KERNEL_ACCOUNT);
- if (!result)
+ u32 depth = caller ? caller->depth + 1 : 0;
+ u32 subprog_start = env->subprog_info[subprog].start;
+ u32 lookup_key = depth > 0 ? callsite : subprog_start;
+ struct func_instance *f;
+ u32 hash;
+
+ f = find_instance(env, lookup_key, depth);
+ if (f)
+ return f;
+
+ f = kvzalloc(sizeof(*f), GFP_KERNEL_ACCOUNT);
+ if (!f)
return ERR_PTR(-ENOMEM);
- result->must_write_set = kvzalloc_objs(*result->must_write_set,
- subprog_sz, GFP_KERNEL_ACCOUNT);
- if (!result->must_write_set) {
- kvfree(result);
- return ERR_PTR(-ENOMEM);
- }
- memcpy(&result->callchain, callchain, sizeof(*callchain));
- result->insn_cnt = subprog_sz;
- hash_add(liveness->func_instances, &result->hl_node, key);
- return result;
+ f->callsite = lookup_key;
+ f->depth = depth;
+ f->subprog = subprog;
+ f->subprog_start = subprog_start;
+ f->insn_cnt = (env->subprog_info + subprog + 1)->start - subprog_start;
+ hash = instance_hash(lookup_key, depth);
+ hash_add(env->liveness->func_instances, &f->hl_node, hash);
+ return f;
}
static struct func_instance *lookup_instance(struct bpf_verifier_env *env,
struct bpf_verifier_state *st,
u32 frameno)
{
- struct callchain callchain;
-
- compute_callchain(env, st, &callchain, frameno);
- return __lookup_instance(env, &callchain);
+ u32 callsite, subprog_start;
+ struct func_instance *f;
+ u32 key, depth;
+
+ subprog_start = env->subprog_info[st->frame[frameno]->subprogno].start;
+ callsite = frameno > 0 ? st->frame[frameno]->callsite : subprog_start;
+
+ for (depth = frameno; ; depth--) {
+ key = depth > 0 ? callsite : subprog_start;
+ f = find_instance(env, key, depth);
+ if (f || depth == 0)
+ return f;
+ }
}
int bpf_stack_liveness_init(struct bpf_verifier_env *env)
@@ -233,9 +135,8 @@ void bpf_stack_liveness_free(struct bpf_verifier_env *env)
if (!env->liveness)
return;
hash_for_each_safe(env->liveness->func_instances, bkt, tmp, instance, hl_node) {
- for (i = 0; i <= instance->callchain.curframe; i++)
+ for (i = 0; i <= instance->depth; i++)
kvfree(instance->frames[i]);
- kvfree(instance->must_write_set);
kvfree(instance);
}
kvfree(env->liveness);
@@ -247,7 +148,7 @@ void bpf_stack_liveness_free(struct bpf_verifier_env *env)
*/
static int relative_idx(struct func_instance *instance, u32 insn_idx)
{
- return insn_idx - instance->callchain.sp_starts[instance->callchain.curframe];
+ return insn_idx - instance->subprog_start;
}
static struct per_frame_masks *get_frame_masks(struct func_instance *instance,
@@ -259,8 +160,7 @@ static struct per_frame_masks *get_frame_masks(struct func_instance *instance,
return &instance->frames[frame][relative_idx(instance, insn_idx)];
}
-static struct per_frame_masks *alloc_frame_masks(struct bpf_verifier_env *env,
- struct func_instance *instance,
+static struct per_frame_masks *alloc_frame_masks(struct func_instance *instance,
u32 frame, u32 insn_idx)
{
struct per_frame_masks *arr;
@@ -275,167 +175,29 @@ static struct per_frame_masks *alloc_frame_masks(struct bpf_verifier_env *env,
return get_frame_masks(instance, frame, insn_idx);
}
-void bpf_reset_live_stack_callchain(struct bpf_verifier_env *env)
-{
- env->liveness->cur_instance = NULL;
-}
-
-/* If @env->liveness->cur_instance is null, set it to instance corresponding to @env->cur_state. */
-static int ensure_cur_instance(struct bpf_verifier_env *env)
-{
- struct bpf_liveness *liveness = env->liveness;
- struct func_instance *instance;
-
- if (liveness->cur_instance)
- return 0;
-
- instance = lookup_instance(env, env->cur_state, env->cur_state->curframe);
- if (IS_ERR(instance))
- return PTR_ERR(instance);
-
- liveness->cur_instance = instance;
- return 0;
-}
-
/* Accumulate may_read masks for @frame at @insn_idx */
-static int mark_stack_read(struct bpf_verifier_env *env,
- struct func_instance *instance, u32 frame, u32 insn_idx, u64 mask)
+static int mark_stack_read(struct func_instance *instance, u32 frame, u32 insn_idx, spis_t mask)
{
struct per_frame_masks *masks;
- u64 new_may_read;
- masks = alloc_frame_masks(env, instance, frame, insn_idx);
+ masks = alloc_frame_masks(instance, frame, insn_idx);
if (IS_ERR(masks))
return PTR_ERR(masks);
- new_may_read = masks->may_read | mask;
- if (new_may_read != masks->may_read &&
- ((new_may_read | masks->live_before) != masks->live_before))
- instance->updated = true;
- masks->may_read |= mask;
- return 0;
-}
-
-int bpf_mark_stack_read(struct bpf_verifier_env *env, u32 frame, u32 insn_idx, u64 mask)
-{
- int err;
-
- err = ensure_cur_instance(env);
- err = err ?: mark_stack_read(env, env->liveness->cur_instance, frame, insn_idx, mask);
- return err;
-}
-
-static void reset_stack_write_marks(struct bpf_verifier_env *env,
- struct func_instance *instance, u32 insn_idx)
-{
- struct bpf_liveness *liveness = env->liveness;
- int i;
-
- liveness->write_insn_idx = insn_idx;
- for (i = 0; i <= instance->callchain.curframe; i++)
- liveness->write_masks_acc[i] = 0;
-}
-
-int bpf_reset_stack_write_marks(struct bpf_verifier_env *env, u32 insn_idx)
-{
- struct bpf_liveness *liveness = env->liveness;
- int err;
-
- err = ensure_cur_instance(env);
- if (err)
- return err;
-
- reset_stack_write_marks(env, liveness->cur_instance, insn_idx);
+ masks->may_read = spis_or(masks->may_read, mask);
return 0;
}
-void bpf_mark_stack_write(struct bpf_verifier_env *env, u32 frame, u64 mask)
+static int mark_stack_write(struct func_instance *instance, u32 frame, u32 insn_idx, spis_t mask)
{
- env->liveness->write_masks_acc[frame] |= mask;
-}
-
-static int commit_stack_write_marks(struct bpf_verifier_env *env,
- struct func_instance *instance)
-{
- struct bpf_liveness *liveness = env->liveness;
- u32 idx, frame, curframe, old_must_write;
struct per_frame_masks *masks;
- u64 mask;
- if (!instance)
- return 0;
-
- curframe = instance->callchain.curframe;
- idx = relative_idx(instance, liveness->write_insn_idx);
- for (frame = 0; frame <= curframe; frame++) {
- mask = liveness->write_masks_acc[frame];
- /* avoid allocating frames for zero masks */
- if (mask == 0 && !instance->must_write_set[idx])
- continue;
- masks = alloc_frame_masks(env, instance, frame, liveness->write_insn_idx);
- if (IS_ERR(masks))
- return PTR_ERR(masks);
- old_must_write = masks->must_write;
- /*
- * If instruction at this callchain is seen for a first time, set must_write equal
- * to @mask. Otherwise take intersection with the previous value.
- */
- if (instance->must_write_set[idx])
- mask &= old_must_write;
- if (old_must_write != mask) {
- masks->must_write = mask;
- instance->updated = true;
- }
- if (old_must_write & ~mask)
- instance->must_write_dropped = true;
- }
- instance->must_write_set[idx] = true;
- liveness->write_insn_idx = 0;
+ masks = alloc_frame_masks(instance, frame, insn_idx);
+ if (IS_ERR(masks))
+ return PTR_ERR(masks);
+ masks->must_write = spis_or(masks->must_write, mask);
return 0;
}
-/*
- * Merge stack writes marks in @env->liveness->write_masks_acc
- * with information already in @env->liveness->cur_instance.
- */
-int bpf_commit_stack_write_marks(struct bpf_verifier_env *env)
-{
- return commit_stack_write_marks(env, env->liveness->cur_instance);
-}
-
-static char *fmt_callchain(struct bpf_verifier_env *env, struct callchain *callchain)
-{
- char *buf_end = env->tmp_str_buf + sizeof(env->tmp_str_buf);
- char *buf = env->tmp_str_buf;
- int i;
-
- buf += snprintf(buf, buf_end - buf, "(");
- for (i = 0; i <= callchain->curframe; i++)
- buf += snprintf(buf, buf_end - buf, "%s%d", i ? "," : "", callchain->callsites[i]);
- snprintf(buf, buf_end - buf, ")");
- return env->tmp_str_buf;
-}
-
-static void log_mask_change(struct bpf_verifier_env *env, struct callchain *callchain,
- char *pfx, u32 frame, u32 insn_idx, u64 old, u64 new)
-{
- u64 changed_bits = old ^ new;
- u64 new_ones = new & changed_bits;
- u64 new_zeros = ~new & changed_bits;
-
- if (!changed_bits)
- return;
- bpf_log(&env->log, "%s frame %d insn %d ", fmt_callchain(env, callchain), frame, insn_idx);
- if (new_ones) {
- bpf_fmt_stack_mask(env->tmp_str_buf, sizeof(env->tmp_str_buf), new_ones);
- bpf_log(&env->log, "+%s %s ", pfx, env->tmp_str_buf);
- }
- if (new_zeros) {
- bpf_fmt_stack_mask(env->tmp_str_buf, sizeof(env->tmp_str_buf), new_zeros);
- bpf_log(&env->log, "-%s %s", pfx, env->tmp_str_buf);
- }
- bpf_log(&env->log, "\n");
-}
-
int bpf_jmp_offset(struct bpf_insn *insn)
{
u8 code = insn->code;
@@ -507,62 +269,11 @@ bpf_insn_successors(struct bpf_verifier_env *env, u32 idx)
__diag_pop();
-static struct func_instance *get_outer_instance(struct bpf_verifier_env *env,
- struct func_instance *instance)
-{
- struct callchain callchain = instance->callchain;
-
- /* Adjust @callchain to represent callchain one frame up */
- callchain.callsites[callchain.curframe] = 0;
- callchain.sp_starts[callchain.curframe] = 0;
- callchain.curframe--;
- callchain.callsites[callchain.curframe] = callchain.sp_starts[callchain.curframe];
- return __lookup_instance(env, &callchain);
-}
-
-static u32 callchain_subprog_start(struct callchain *callchain)
-{
- return callchain->sp_starts[callchain->curframe];
-}
-
-/*
- * Transfer @may_read and @must_write_acc marks from the first instruction of @instance,
- * to the call instruction in function instance calling @instance.
- */
-static int propagate_to_outer_instance(struct bpf_verifier_env *env,
- struct func_instance *instance)
-{
- struct callchain *callchain = &instance->callchain;
- u32 this_subprog_start, callsite, frame;
- struct func_instance *outer_instance;
- struct per_frame_masks *insn;
- int err;
-
- this_subprog_start = callchain_subprog_start(callchain);
- outer_instance = get_outer_instance(env, instance);
- if (IS_ERR(outer_instance))
- return PTR_ERR(outer_instance);
- callsite = callchain->callsites[callchain->curframe - 1];
-
- reset_stack_write_marks(env, outer_instance, callsite);
- for (frame = 0; frame < callchain->curframe; frame++) {
- insn = get_frame_masks(instance, frame, this_subprog_start);
- if (!insn)
- continue;
- bpf_mark_stack_write(env, frame, insn->must_write_acc);
- err = mark_stack_read(env, outer_instance, frame, callsite, insn->live_before);
- if (err)
- return err;
- }
- commit_stack_write_marks(env, outer_instance);
- return 0;
-}
static inline bool update_insn(struct bpf_verifier_env *env,
struct func_instance *instance, u32 frame, u32 insn_idx)
{
- struct bpf_insn_aux_data *aux = env->insn_aux_data;
- u64 new_before, new_after, must_write_acc;
+ spis_t new_before, new_after;
struct per_frame_masks *insn, *succ_insn;
struct bpf_iarray *succ;
u32 s;
@@ -574,77 +285,40 @@ static inline bool update_insn(struct bpf_verifier_env *env,
changed = false;
insn = get_frame_masks(instance, frame, insn_idx);
- new_before = 0;
- new_after = 0;
- /*
- * New "must_write_acc" is an intersection of all "must_write_acc"
- * of successors plus all "must_write" slots of instruction itself.
- */
- must_write_acc = U64_MAX;
+ new_before = SPIS_ZERO;
+ new_after = SPIS_ZERO;
for (s = 0; s < succ->cnt; ++s) {
succ_insn = get_frame_masks(instance, frame, succ->items[s]);
- new_after |= succ_insn->live_before;
- must_write_acc &= succ_insn->must_write_acc;
+ new_after = spis_or(new_after, succ_insn->live_before);
}
- must_write_acc |= insn->must_write;
/*
* New "live_before" is a union of all "live_before" of successors
* minus slots written by instruction plus slots read by instruction.
+ * new_before = (new_after & ~insn->must_write) | insn->may_read
*/
- new_before = (new_after & ~insn->must_write) | insn->may_read;
- changed |= new_before != insn->live_before;
- changed |= must_write_acc != insn->must_write_acc;
- if (unlikely(env->log.level & BPF_LOG_LEVEL2) &&
- (insn->may_read || insn->must_write ||
- insn_idx == callchain_subprog_start(&instance->callchain) ||
- aux[insn_idx].prune_point)) {
- log_mask_change(env, &instance->callchain, "live",
- frame, insn_idx, insn->live_before, new_before);
- log_mask_change(env, &instance->callchain, "written",
- frame, insn_idx, insn->must_write_acc, must_write_acc);
- }
+ new_before = spis_or(spis_and(new_after, spis_not(insn->must_write)),
+ insn->may_read);
+ changed |= !spis_equal(new_before, insn->live_before);
insn->live_before = new_before;
- insn->must_write_acc = must_write_acc;
return changed;
}
-/* Fixed-point computation of @live_before and @must_write_acc marks */
-static int update_instance(struct bpf_verifier_env *env, struct func_instance *instance)
+/* Fixed-point computation of @live_before marks */
+static void update_instance(struct bpf_verifier_env *env, struct func_instance *instance)
{
- u32 i, frame, po_start, po_end, cnt, this_subprog_start;
- struct callchain *callchain = &instance->callchain;
+ u32 i, frame, po_start, po_end;
int *insn_postorder = env->cfg.insn_postorder;
struct bpf_subprog_info *subprog;
- struct per_frame_masks *insn;
bool changed;
- int err;
-
- this_subprog_start = callchain_subprog_start(callchain);
- /*
- * If must_write marks were updated must_write_acc needs to be reset
- * (to account for the case when new must_write sets became smaller).
- */
- if (instance->must_write_dropped) {
- for (frame = 0; frame <= callchain->curframe; frame++) {
- if (!instance->frames[frame])
- continue;
-
- for (i = 0; i < instance->insn_cnt; i++) {
- insn = get_frame_masks(instance, frame, this_subprog_start + i);
- insn->must_write_acc = 0;
- }
- }
- }
- subprog = bpf_find_containing_subprog(env, this_subprog_start);
+ instance->must_write_initialized = true;
+ subprog = &env->subprog_info[instance->subprog];
po_start = subprog->postorder_start;
po_end = (subprog + 1)->postorder_start;
- cnt = 0;
/* repeat until fixed point is reached */
do {
- cnt++;
changed = false;
- for (frame = 0; frame <= instance->callchain.curframe; frame++) {
+ for (frame = 0; frame <= instance->depth; frame++) {
if (!instance->frames[frame])
continue;
@@ -652,57 +326,14 @@ static int update_instance(struct bpf_verifier_env *env, struct func_instance *i
changed |= update_insn(env, instance, frame, insn_postorder[i]);
}
} while (changed);
-
- if (env->log.level & BPF_LOG_LEVEL2)
- bpf_log(&env->log, "%s live stack update done in %d iterations\n",
- fmt_callchain(env, callchain), cnt);
-
- /* transfer marks accumulated for outer frames to outer func instance (caller) */
- if (callchain->curframe > 0) {
- err = propagate_to_outer_instance(env, instance);
- if (err)
- return err;
- }
-
- return 0;
-}
-
-/*
- * Prepare all callchains within @env->cur_state for querying.
- * This function should be called after each verifier.c:pop_stack()
- * and whenever verifier.c:do_check_insn() processes subprogram exit.
- * This would guarantee that visited verifier states with zero branches
- * have their bpf_mark_stack_{read,write}() effects propagated in
- * @env->liveness.
- */
-int bpf_update_live_stack(struct bpf_verifier_env *env)
-{
- struct func_instance *instance;
- int err, frame;
-
- bpf_reset_live_stack_callchain(env);
- for (frame = env->cur_state->curframe; frame >= 0; --frame) {
- instance = lookup_instance(env, env->cur_state, frame);
- if (IS_ERR(instance))
- return PTR_ERR(instance);
-
- if (instance->updated) {
- err = update_instance(env, instance);
- if (err)
- return err;
- instance->updated = false;
- instance->must_write_dropped = false;
- }
- }
- return 0;
}
-static bool is_live_before(struct func_instance *instance, u32 insn_idx, u32 frameno, u32 spi)
+static bool is_live_before(struct func_instance *instance, u32 insn_idx, u32 frameno, u32 half_spi)
{
struct per_frame_masks *masks;
masks = get_frame_masks(instance, frameno, insn_idx);
- return masks && (masks->live_before & BIT(spi));
+ return masks && spis_test_bit(masks->live_before, half_spi);
}
int bpf_live_stack_query_init(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
@@ -714,41 +345,1858 @@ int bpf_live_stack_query_init(struct bpf_verifier_env *env, struct bpf_verifier_
memset(q, 0, sizeof(*q));
for (frame = 0; frame <= st->curframe; frame++) {
instance = lookup_instance(env, st, frame);
- if (IS_ERR(instance))
- return PTR_ERR(instance);
- q->instances[frame] = instance;
+ if (IS_ERR_OR_NULL(instance))
+ q->instances[frame] = NULL;
+ else
+ q->instances[frame] = instance;
+ if (frame < st->curframe)
+ q->callsites[frame] = st->frame[frame + 1]->callsite;
}
q->curframe = st->curframe;
q->insn_idx = st->insn_idx;
return 0;
}
-bool bpf_stack_slot_alive(struct bpf_verifier_env *env, u32 frameno, u32 spi)
+bool bpf_stack_slot_alive(struct bpf_verifier_env *env, u32 frameno, u32 half_spi)
{
/*
- * Slot is alive if it is read before q->st->insn_idx in current func instance,
+ * Slot is alive if it is read before q->insn_idx in current func instance,
* or if for some outer func instance:
* - alive before callsite if callsite calls callback, otherwise
* - alive after callsite
*/
struct live_stack_query *q = &env->liveness->live_stack_query;
struct func_instance *instance, *curframe_instance;
- u32 i, callsite;
- bool alive;
+ u32 i, callsite, rel;
+ int cur_delta, delta;
+ bool alive = false;
curframe_instance = q->instances[q->curframe];
- if (is_live_before(curframe_instance, q->insn_idx, frameno, spi))
+ if (!curframe_instance)
+ return true;
+ cur_delta = (int)curframe_instance->depth - (int)q->curframe;
+ rel = frameno + cur_delta;
+ if (rel <= curframe_instance->depth)
+ alive = is_live_before(curframe_instance, q->insn_idx, rel, half_spi);
+
+ if (alive)
return true;
for (i = frameno; i < q->curframe; i++) {
- callsite = curframe_instance->callchain.callsites[i];
instance = q->instances[i];
+ if (!instance)
+ return true;
+ /* Map actual frameno to frame index within this instance */
+ delta = (int)instance->depth - (int)i;
+ rel = frameno + delta;
+ if (rel > instance->depth)
+ return true;
+
+ /* Get callsite from verifier state, not from instance callchain */
+ callsite = q->callsites[i];
+
alive = bpf_calls_callback(env, callsite)
- ? is_live_before(instance, callsite, frameno, spi)
- : is_live_before(instance, callsite + 1, frameno, spi);
+ ? is_live_before(instance, callsite, rel, half_spi)
+ : is_live_before(instance, callsite + 1, rel, half_spi);
if (alive)
return true;
}
return false;
}
+
+static char *fmt_subprog(struct bpf_verifier_env *env, int subprog)
+{
+ const char *name = env->subprog_info[subprog].name;
+
+ snprintf(env->tmp_str_buf, sizeof(env->tmp_str_buf),
+ "subprog#%d%s%s", subprog, name ? " " : "", name ? name : "");
+ return env->tmp_str_buf;
+}
+
+static char *fmt_instance(struct bpf_verifier_env *env, struct func_instance *instance)
+{
+ snprintf(env->tmp_str_buf, sizeof(env->tmp_str_buf),
+ "(d%d,cs%d)", instance->depth, instance->callsite);
+ return env->tmp_str_buf;
+}
+
+static int spi_off(int spi)
+{
+ return -(spi + 1) * BPF_REG_SIZE;
+}
+
+/*
+ * When both halves of an 8-byte SPI are set, print as "-8","-16",...
+ * When only one half is set, print as "-4h","-8h",...
+ * Runs of 3+ consecutive fully-set SPIs are collapsed: "fp0-8..-24"
+ */
+static char *fmt_spis_mask(struct bpf_verifier_env *env, int frame, bool first, spis_t spis)
+{
+ int buf_sz = sizeof(env->tmp_str_buf);
+ char *buf = env->tmp_str_buf;
+ int spi, n, run_start;
+
+ buf[0] = '\0';
+
+ for (spi = 0; spi < STACK_SLOTS / 2 && buf_sz > 0; spi++) {
+ bool lo = spis_test_bit(spis, spi * 2);
+ bool hi = spis_test_bit(spis, spi * 2 + 1);
+ const char *space = first ? "" : " ";
+
+ if (!lo && !hi)
+ continue;
+
+ if (!lo || !hi) {
+ /* half-spi */
+ n = scnprintf(buf, buf_sz, "%sfp%d%d%s",
+ space, frame, spi_off(spi) + (lo ? STACK_SLOT_SZ : 0), "h");
+ } else if (spi + 2 < STACK_SLOTS / 2 &&
+ spis_test_bit(spis, spi * 2 + 2) &&
+ spis_test_bit(spis, spi * 2 + 3) &&
+ spis_test_bit(spis, spi * 2 + 4) &&
+ spis_test_bit(spis, spi * 2 + 5)) {
+ /* 3+ consecutive full spis */
+ run_start = spi;
+ while (spi + 1 < STACK_SLOTS / 2 &&
+ spis_test_bit(spis, (spi + 1) * 2) &&
+ spis_test_bit(spis, (spi + 1) * 2 + 1))
+ spi++;
+ n = scnprintf(buf, buf_sz, "%sfp%d%d..%d",
+ space, frame, spi_off(run_start), spi_off(spi));
+ } else {
+ /* just a full spi */
+ n = scnprintf(buf, buf_sz, "%sfp%d%d", space, frame, spi_off(spi));
+ }
+ first = false;
+ buf += n;
+ buf_sz -= n;
+ }
+ return env->tmp_str_buf;
+}
+
+static void print_instance(struct bpf_verifier_env *env, struct func_instance *instance)
+{
+ int start = env->subprog_info[instance->subprog].start;
+ struct bpf_insn *insns = env->prog->insnsi;
+ struct per_frame_masks *masks;
+ int len = instance->insn_cnt;
+ int insn_idx, frame, i;
+ bool has_use, has_def;
+ u64 pos, insn_pos;
+
+ if (!(env->log.level & BPF_LOG_LEVEL2))
+ return;
+
+ verbose(env, "stack use/def %s ", fmt_subprog(env, instance->subprog));
+ verbose(env, "%s:\n", fmt_instance(env, instance));
+ for (i = 0; i < len; i++) {
+ insn_idx = start + i;
+ has_use = false;
+ has_def = false;
+ pos = env->log.end_pos;
+ verbose(env, "%3d: ", insn_idx);
+ bpf_verbose_insn(env, &insns[insn_idx]);
+ bpf_vlog_reset(&env->log, env->log.end_pos - 1); /* remove \n */
+ insn_pos = env->log.end_pos;
+ verbose(env, "%*c;", bpf_vlog_alignment(insn_pos - pos), ' ');
+ pos = env->log.end_pos;
+ verbose(env, " use: ");
+ for (frame = instance->depth; frame >= 0; --frame) {
+ masks = get_frame_masks(instance, frame, insn_idx);
+ if (!masks || spis_is_zero(masks->may_read))
+ continue;
+ verbose(env, "%s", fmt_spis_mask(env, frame, !has_use, masks->may_read));
+ has_use = true;
+ }
+ if (!has_use)
+ bpf_vlog_reset(&env->log, pos);
+ pos = env->log.end_pos;
+ verbose(env, " def: ");
+ for (frame = instance->depth; frame >= 0; --frame) {
+ masks = get_frame_masks(instance, frame, insn_idx);
+ if (!masks || spis_is_zero(masks->must_write))
+ continue;
+ verbose(env, "%s", fmt_spis_mask(env, frame, !has_def, masks->must_write));
+ has_def = true;
+ }
+ if (!has_def)
+ bpf_vlog_reset(&env->log, has_use ? pos : insn_pos);
+ verbose(env, "\n");
+ if (bpf_is_ldimm64(&insns[insn_idx]))
+ i++;
+ }
+}
+
+static int cmp_instances(const void *pa, const void *pb)
+{
+ struct func_instance *a = *(struct func_instance **)pa;
+ struct func_instance *b = *(struct func_instance **)pb;
+ int dcallsite = (int)a->callsite - b->callsite;
+ int ddepth = (int)a->depth - b->depth;
+
+ if (dcallsite)
+ return dcallsite;
+ if (ddepth)
+ return ddepth;
+ return 0;
+}
+
+/* print use/def slots for all instances ordered by callsite first, then by depth */
+static int print_instances(struct bpf_verifier_env *env)
+{
+ struct func_instance *instance, **sorted_instances;
+ struct bpf_liveness *liveness = env->liveness;
+ int i, bkt, cnt;
+
+ cnt = 0;
+ hash_for_each(liveness->func_instances, bkt, instance, hl_node)
+ cnt++;
+ sorted_instances = kvmalloc_objs(*sorted_instances, cnt, GFP_KERNEL_ACCOUNT);
+ if (!sorted_instances)
+ return -ENOMEM;
+ cnt = 0;
+ hash_for_each(liveness->func_instances, bkt, instance, hl_node)
+ sorted_instances[cnt++] = instance;
+ sort(sorted_instances, cnt, sizeof(*sorted_instances), cmp_instances, NULL);
+ for (i = 0; i < cnt; i++)
+ print_instance(env, sorted_instances[i]);
+ kvfree(sorted_instances);
+ return 0;
+}
+
+/*
+ * Per-register tracking state for compute_subprog_args().
+ * Tracks which frame's FP a value is derived from
+ * and the byte offset from that frame's FP.
+ *
+ * The .frame field forms a lattice with three levels of precision:
+ *
+ * precise {frame=N, off=V} -- known absolute frame index and byte offset
+ * |
+ * offset-imprecise {frame=N, off=OFF_IMPRECISE}
+ * | -- known frame identity, unknown offset
+ * fully-imprecise {frame=ARG_IMPRECISE, mask=bitmask}
+ * -- unknown frame identity; .mask is a
+ * bitmask of which frame indices might be
+ * involved
+ *
+ * At CFG merge points, arg_track_join() moves down the lattice:
+ * - same frame + same offset -> precise
+ * - same frame + different offset -> offset-imprecise
+ * - different frames -> fully-imprecise (bitmask OR)
+ *
+ * At memory access sites (LDX/STX/ST), offset-imprecise marks only
+ * the known frame's access mask as SPIS_ALL, while fully-imprecise
+ * iterates bits in the bitmask and routes each frame to its target.
+ */
+#define MAX_ARG_OFFSETS 4
+
+struct arg_track {
+ union {
+ s16 off[MAX_ARG_OFFSETS]; /* byte offsets; off_cnt says how many */
+ u16 mask; /* arg bitmask when arg == ARG_IMPRECISE */
+ };
+ s8 frame; /* absolute frame index, or enum arg_track_state */
+ s8 off_cnt; /* 0 = offset-imprecise, 1-4 = # of precise offsets */
+};
+
+enum arg_track_state {
+ ARG_NONE = -1, /* not derived from any argument */
+ ARG_UNVISITED = -2, /* not yet reached by dataflow */
+ ARG_IMPRECISE = -3, /* lost identity; .mask is arg bitmask */
+};
+
+#define OFF_IMPRECISE S16_MIN /* arg identity known but offset unknown */
+
+/* Track callee stack slots fp-8 through fp-512 (64 slots of 8 bytes each) */
+#define MAX_ARG_SPILL_SLOTS 64
+
+static bool arg_is_visited(const struct arg_track *at)
+{
+ return at->frame != ARG_UNVISITED;
+}
+
+static bool arg_is_fp(const struct arg_track *at)
+{
+ return at->frame >= 0 || at->frame == ARG_IMPRECISE;
+}
+
+/*
+ * Clear all tracked callee stack slots overlapping the byte range
+ * [off, off+sz-1] where off is a negative FP-relative offset.
+ */
+static void clear_overlapping_stack_slots(struct arg_track *at_stack, s16 off, u32 sz)
+{
+ struct arg_track none = { .frame = ARG_NONE };
+
+ if (off == OFF_IMPRECISE) {
+ for (int i = 0; i < MAX_ARG_SPILL_SLOTS; i++)
+ at_stack[i] = none;
+ return;
+ }
+ for (int i = 0; i < MAX_ARG_SPILL_SLOTS; i++) {
+ int slot_start = -((i + 1) * 8);
+ int slot_end = slot_start + 8;
+
+ if (slot_start < off + (int)sz && slot_end > off)
+ at_stack[i] = none;
+ }
+}
+
+static void verbose_arg_track(struct bpf_verifier_env *env, struct arg_track *at)
+{
+ int i;
+
+ switch (at->frame) {
+ case ARG_NONE: verbose(env, "_"); break;
+ case ARG_UNVISITED: verbose(env, "?"); break;
+ case ARG_IMPRECISE: verbose(env, "IMP%x", at->mask); break;
+ default:
+ /* frame >= 0: absolute frame index */
+ if (at->off_cnt == 0) {
+ verbose(env, "fp%d ?", at->frame);
+ } else {
+ for (i = 0; i < at->off_cnt; i++) {
+ if (i)
+ verbose(env, "|");
+ verbose(env, "fp%d%+d", at->frame, at->off[i]);
+ }
+ }
+ break;
+ }
+}
+
+static bool arg_track_eq(const struct arg_track *a, const struct arg_track *b)
+{
+ int i;
+
+ if (a->frame != b->frame)
+ return false;
+ if (a->frame == ARG_IMPRECISE)
+ return a->mask == b->mask;
+ if (a->frame < 0)
+ return true;
+ if (a->off_cnt != b->off_cnt)
+ return false;
+ for (i = 0; i < a->off_cnt; i++)
+ if (a->off[i] != b->off[i])
+ return false;
+ return true;
+}
+
+static struct arg_track arg_single(s8 arg, s16 off)
+{
+ struct arg_track at = {};
+
+ at.frame = arg;
+ at.off[0] = off;
+ at.off_cnt = 1;
+ return at;
+}
+
+/*
+ * Merge two sorted offset arrays, deduplicate.
+ * Returns off_cnt=0 if the result exceeds MAX_ARG_OFFSETS.
+ * Both args must have the same frame and off_cnt > 0.
+ */
+static struct arg_track arg_merge_offsets(struct arg_track a, struct arg_track b)
+{
+ struct arg_track result = { .frame = a.frame };
+ struct arg_track imp = { .frame = a.frame };
+ int i = 0, j = 0, k = 0;
+
+ while (i < a.off_cnt && j < b.off_cnt) {
+ s16 v;
+
+ if (a.off[i] <= b.off[j]) {
+ v = a.off[i++];
+ if (v == b.off[j])
+ j++;
+ } else {
+ v = b.off[j++];
+ }
+ if (k > 0 && result.off[k - 1] == v)
+ continue;
+ if (k >= MAX_ARG_OFFSETS)
+ return imp;
+ result.off[k++] = v;
+ }
+ while (i < a.off_cnt) {
+ if (k >= MAX_ARG_OFFSETS)
+ return imp;
+ result.off[k++] = a.off[i++];
+ }
+ while (j < b.off_cnt) {
+ if (k >= MAX_ARG_OFFSETS)
+ return imp;
+ result.off[k++] = b.off[j++];
+ }
+ result.off_cnt = k;
+ return result;
+}
+
+/*
+ * Merge two arg_tracks into ARG_IMPRECISE, collecting the frame
+ * bits from both operands. Precise frame indices (frame >= 0)
+ * contribute a single bit; existing ARG_IMPRECISE values
+ * contribute their full bitmask.
+ */
+static struct arg_track arg_join_imprecise(struct arg_track a, struct arg_track b)
+{
+ u32 m = 0;
+
+ if (a.frame >= 0)
+ m |= BIT(a.frame);
+ else if (a.frame == ARG_IMPRECISE)
+ m |= a.mask;
+
+ if (b.frame >= 0)
+ m |= BIT(b.frame);
+ else if (b.frame == ARG_IMPRECISE)
+ m |= b.mask;
+
+ return (struct arg_track){ .mask = m, .frame = ARG_IMPRECISE };
+}
+
+/* Join two arg_track values at merge points */
+static struct arg_track __arg_track_join(struct arg_track a, struct arg_track b)
+{
+ if (!arg_is_visited(&b))
+ return a;
+ if (!arg_is_visited(&a))
+ return b;
+ if (a.frame == b.frame && a.frame >= 0) {
+ /* Both offset-imprecise: stay imprecise */
+ if (a.off_cnt == 0 || b.off_cnt == 0)
+ return (struct arg_track){ .frame = a.frame };
+ /* Merge offset sets; falls back to off_cnt=0 if >4 */
+ return arg_merge_offsets(a, b);
+ }
+
+ /*
+ * args are different, but one of them is known
+ * arg + none -> arg
+ * none + arg -> arg
+ *
+ * none + none -> none
+ */
+ if (a.frame == ARG_NONE && b.frame == ARG_NONE)
+ return a;
+ if (a.frame >= 0 && b.frame == ARG_NONE) {
+ /*
+ * When joining single fp-N add fake fp+0 to
+ * keep stack_use and prevent stack_def
+ */
+ if (a.off_cnt == 1)
+ return arg_merge_offsets(a, arg_single(a.frame, 0));
+ return a;
+ }
+ if (b.frame >= 0 && a.frame == ARG_NONE) {
+ if (b.off_cnt == 1)
+ return arg_merge_offsets(b, arg_single(b.frame, 0));
+ return b;
+ }
+
+ return arg_join_imprecise(a, b);
+}
+
+static bool arg_track_join(struct bpf_verifier_env *env, int idx, int target, int r,
+ struct arg_track *in, struct arg_track out)
+{
+ struct arg_track old = *in;
+ struct arg_track new_val = __arg_track_join(old, out);
+
+ if (arg_track_eq(&new_val, &old))
+ return false;
+
+ *in = new_val;
+ if (!(env->log.level & BPF_LOG_LEVEL2) || !arg_is_visited(&old))
+ return true;
+
+ verbose(env, "arg JOIN insn %d -> %d ", idx, target);
+ if (r >= 0)
+ verbose(env, "r%d: ", r);
+ else
+ verbose(env, "fp%+d: ", r * 8);
+ verbose_arg_track(env, &old);
+ verbose(env, " + ");
+ verbose_arg_track(env, &out);
+ verbose(env, " => ");
+ verbose_arg_track(env, &new_val);
+ verbose(env, "\n");
+ return true;
+}
+
+/*
+ * Compute the result when an ALU op destroys offset precision.
+ * If a single arg is identifiable, preserve it with OFF_IMPRECISE.
+ * If two different args are involved or one is already ARG_IMPRECISE,
+ * the result is fully ARG_IMPRECISE.
+ */
+static void arg_track_alu64(struct arg_track *dst, const struct arg_track *src)
+{
+ WARN_ON_ONCE(!arg_is_visited(dst));
+ WARN_ON_ONCE(!arg_is_visited(src));
+
+ if (dst->frame >= 0 && (src->frame == ARG_NONE || src->frame == dst->frame)) {
+ /*
+ * rX += rY where rY is not arg derived
+ * rX += rX
+ */
+ dst->off_cnt = 0;
+ return;
+ }
+ if (src->frame >= 0 && dst->frame == ARG_NONE) {
+ /*
+ * rX += rY where rX is not arg derived
+ * rY identity leaks into rX
+ */
+ dst->off_cnt = 0;
+ dst->frame = src->frame;
+ return;
+ }
+
+ if (dst->frame == ARG_NONE && src->frame == ARG_NONE)
+ return;
+
+ *dst = arg_join_imprecise(*dst, *src);
+}
+
+static s16 arg_add(s16 off, s64 delta)
+{
+ s64 res;
+
+ if (off == OFF_IMPRECISE)
+ return OFF_IMPRECISE;
+ res = (s64)off + delta;
+ if (res < S16_MIN + 1 || res > S16_MAX)
+ return OFF_IMPRECISE;
+ return res;
+}
+
+static void arg_padd(struct arg_track *at, s64 delta)
+{
+ int i;
+
+ if (at->off_cnt == 0)
+ return;
+ for (i = 0; i < at->off_cnt; i++) {
+ s16 new_off = arg_add(at->off[i], delta);
+
+ if (new_off == OFF_IMPRECISE) {
+ at->off_cnt = 0;
+ return;
+ }
+ at->off[i] = new_off;
+ }
+}
+
+/*
+ * Convert a byte offset from FP to a callee stack slot index.
+ * Returns -1 if out of range or not 8-byte aligned.
+ * Slot 0 = fp-8, slot 1 = fp-16, ..., slot 7 = fp-64, ....
+ */
+static int fp_off_to_slot(s16 off)
+{
+ if (off == OFF_IMPRECISE)
+ return -1;
+ if (off >= 0 || off < -(int)(MAX_ARG_SPILL_SLOTS * 8))
+ return -1;
+ if (off % 8)
+ return -1;
+ return (-off) / 8 - 1;
+}
+
+static struct arg_track fill_from_stack(struct bpf_insn *insn,
+ struct arg_track *at_out, int reg,
+ struct arg_track *at_stack_out,
+ int depth)
+{
+ struct arg_track imp = {
+ .mask = (1u << (depth + 1)) - 1,
+ .frame = ARG_IMPRECISE
+ };
+ struct arg_track result = { .frame = ARG_NONE };
+ int cnt, i;
+
+ if (reg == BPF_REG_FP) {
+ int slot = fp_off_to_slot(insn->off);
+
+ return slot >= 0 ? at_stack_out[slot] : imp;
+ }
+ cnt = at_out[reg].off_cnt;
+ if (cnt == 0)
+ return imp;
+
+ for (i = 0; i < cnt; i++) {
+ s16 fp_off = arg_add(at_out[reg].off[i], insn->off);
+ int slot = fp_off_to_slot(fp_off);
+
+ if (slot < 0)
+ return imp;
+ result = __arg_track_join(result, at_stack_out[slot]);
+ }
+ return result;
+}
+
+/*
+ * Spill @val to all possible stack slots indicated by the FP offsets in @reg.
+ * For an 8-byte store, single candidate slot gets @val. multi-slots are joined.
+ * sub-8-byte store joins with ARG_NONE.
+ * When exact offset is unknown conservatively add reg values to all slots in at_stack_out.
+ */
+static void spill_to_stack(struct bpf_insn *insn, struct arg_track *at_out,
+ int reg, struct arg_track *at_stack_out,
+ struct arg_track *val, u32 sz)
+{
+ struct arg_track none = { .frame = ARG_NONE };
+ struct arg_track new_val = sz == 8 ? *val : none;
+ int cnt, i;
+
+ if (reg == BPF_REG_FP) {
+ int slot = fp_off_to_slot(insn->off);
+
+ if (slot >= 0)
+ at_stack_out[slot] = new_val;
+ return;
+ }
+ cnt = at_out[reg].off_cnt;
+ if (cnt == 0) {
+ for (int slot = 0; slot < MAX_ARG_SPILL_SLOTS; slot++)
+ at_stack_out[slot] = __arg_track_join(at_stack_out[slot], new_val);
+ return;
+ }
+ for (i = 0; i < cnt; i++) {
+ s16 fp_off = arg_add(at_out[reg].off[i], insn->off);
+ int slot = fp_off_to_slot(fp_off);
+
+ if (slot < 0)
+ continue;
+ if (cnt == 1)
+ at_stack_out[slot] = new_val;
+ else
+ at_stack_out[slot] = __arg_track_join(at_stack_out[slot], new_val);
+ }
+}
+
+/*
+ * Clear stack slots overlapping all possible FP offsets in @reg.
+ */
+static void clear_stack_for_all_offs(struct bpf_insn *insn,
+ struct arg_track *at_out, int reg,
+ struct arg_track *at_stack_out, u32 sz)
+{
+ int cnt, i;
+
+ if (reg == BPF_REG_FP) {
+ clear_overlapping_stack_slots(at_stack_out, insn->off, sz);
+ return;
+ }
+ cnt = at_out[reg].off_cnt;
+ if (cnt == 0) {
+ clear_overlapping_stack_slots(at_stack_out, OFF_IMPRECISE, sz);
+ return;
+ }
+ for (i = 0; i < cnt; i++) {
+ s16 fp_off = arg_add(at_out[reg].off[i], insn->off);
+
+ clear_overlapping_stack_slots(at_stack_out, fp_off, sz);
+ }
+}
+
+static void arg_track_log(struct bpf_verifier_env *env, struct bpf_insn *insn, int idx,
+ struct arg_track *at_in, struct arg_track *at_stack_in,
+ struct arg_track *at_out, struct arg_track *at_stack_out)
+{
+ bool printed = false;
+ int i;
+
+ if (!(env->log.level & BPF_LOG_LEVEL2))
+ return;
+ for (i = 0; i < MAX_BPF_REG; i++) {
+ if (arg_track_eq(&at_out[i], &at_in[i]))
+ continue;
+ if (!printed) {
+ verbose(env, "%3d: ", idx);
+ bpf_verbose_insn(env, insn);
+ bpf_vlog_reset(&env->log, env->log.end_pos - 1);
+ printed = true;
+ }
+ verbose(env, "\tr%d: ", i); verbose_arg_track(env, &at_in[i]);
+ verbose(env, " -> "); verbose_arg_track(env, &at_out[i]);
+ }
+ for (i = 0; i < MAX_ARG_SPILL_SLOTS; i++) {
+ if (arg_track_eq(&at_stack_out[i], &at_stack_in[i]))
+ continue;
+ if (!printed) {
+ verbose(env, "%3d: ", idx);
+ bpf_verbose_insn(env, insn);
+ bpf_vlog_reset(&env->log, env->log.end_pos - 1);
+ printed = true;
+ }
+ verbose(env, "\tfp%+d: ", -(i + 1) * 8); verbose_arg_track(env, &at_stack_in[i]);
+ verbose(env, " -> "); verbose_arg_track(env, &at_stack_out[i]);
+ }
+ if (printed)
+ verbose(env, "\n");
+}
+
+/*
+ * Pure dataflow transfer function for arg_track state.
+ * Updates at_out[] based on how the instruction modifies registers.
+ * Tracks spill/fill, but not other memory accesses.
+ */
+static void arg_track_xfer(struct bpf_verifier_env *env, struct bpf_insn *insn,
+ int insn_idx,
+ struct arg_track *at_out, struct arg_track *at_stack_out,
+ struct func_instance *instance,
+ u32 *callsites)
+{
+ int depth = instance->depth;
+ u8 class = BPF_CLASS(insn->code);
+ u8 code = BPF_OP(insn->code);
+ struct arg_track *dst = &at_out[insn->dst_reg];
+ struct arg_track *src = &at_out[insn->src_reg];
+ struct arg_track none = { .frame = ARG_NONE };
+ int r;
+
+ if (class == BPF_ALU64 && BPF_SRC(insn->code) == BPF_K) {
+ if (code == BPF_MOV) {
+ *dst = none;
+ } else if (dst->frame >= 0) {
+ if (code == BPF_ADD)
+ arg_padd(dst, insn->imm);
+ else if (code == BPF_SUB)
+ arg_padd(dst, -(s64)insn->imm);
+ else
+ /* Any other 64-bit alu on the pointer makes it imprecise */
+ dst->off_cnt = 0;
+ } /* else if dst->frame is imprecise it stays so */
+ } else if (class == BPF_ALU64 && BPF_SRC(insn->code) == BPF_X) {
+ if (code == BPF_MOV) {
+ if (insn->off == 0) {
+ *dst = *src;
+ } else {
+ /* addr_space_cast destroys a pointer */
+ *dst = none;
+ }
+ } else {
+ arg_track_alu64(dst, src);
+ }
+ } else if (class == BPF_ALU) {
+ /*
+ * 32-bit alu destroys the pointer.
+ * If src was a pointer it cannot leak into dst
+ */
+ *dst = none;
+ } else if (class == BPF_JMP && code == BPF_CALL) {
+ /*
+ * at_stack_out[slot] is not cleared by the helper and subprog calls.
+ * The fill_from_stack() may return the stale spill — which is an FP-derived arg_track
+ * (the value that was originally spilled there). The loaded register then carries
+ * a phantom FP-derived identity that doesn't correspond to what's actually in the slot.
+ * This phantom FP pointer propagates forward, and wherever it's subsequently used
+ * (as a helper argument, another store, etc.), it sets stack liveness bits.
+ * Those bits correspond to stack accesses that don't actually happen.
+ * So the effect is over-reporting stack liveness — marking slots as live that aren't
+ * actually accessed. The verifier preserves more state than necessary across calls,
+ * which is conservative.
+ *
+ * helpers can scratch stack slots, but they won't make a valid pointer out of it.
+ * subprogs are allowed to write into parent slots, but they cannot write
+ * _any_ FP-derived pointer into it (either their own or parent's FP).
+ */
+ for (r = BPF_REG_0; r <= BPF_REG_5; r++)
+ at_out[r] = none;
+ } else if (class == BPF_LDX) {
+ u32 sz = bpf_size_to_bytes(BPF_SIZE(insn->code));
+ bool src_is_local_fp = insn->src_reg == BPF_REG_FP || src->frame == depth ||
+ (src->frame == ARG_IMPRECISE && (src->mask & BIT(depth)));
+
+ /*
+ * Reload from callee stack: if src is current-frame FP-derived
+ * and the load is an 8-byte BPF_MEM, try to restore the spill
+ * identity. For imprecise sources fill_from_stack() returns
+ * ARG_IMPRECISE (off_cnt == 0).
+ */
+ if (src_is_local_fp && BPF_MODE(insn->code) == BPF_MEM && sz == 8) {
+ *dst = fill_from_stack(insn, at_out, insn->src_reg, at_stack_out, depth);
+ } else if (src->frame >= 0 && src->frame < depth &&
+ BPF_MODE(insn->code) == BPF_MEM && sz == 8) {
+ struct arg_track *parent_stack =
+ env->callsite_at_stack[callsites[src->frame]];
+
+ *dst = fill_from_stack(insn, at_out, insn->src_reg,
+ parent_stack, src->frame);
+ } else if (src->frame == ARG_IMPRECISE &&
+ !(src->mask & BIT(depth)) && src->mask &&
+ BPF_MODE(insn->code) == BPF_MEM && sz == 8) {
+ /*
+ * Imprecise src with only parent-frame bits:
+ * conservative fallback.
+ */
+ *dst = *src;
+ } else {
+ *dst = none;
+ }
+ } else if (class == BPF_LD && BPF_MODE(insn->code) == BPF_IMM) {
+ *dst = none;
+ } else if (class == BPF_STX) {
+ u32 sz = bpf_size_to_bytes(BPF_SIZE(insn->code));
+ bool dst_is_local_fp;
+
+ /* Track spills to current-frame FP-derived callee stack */
+ dst_is_local_fp = insn->dst_reg == BPF_REG_FP || dst->frame == depth;
+ if (dst_is_local_fp && BPF_MODE(insn->code) == BPF_MEM)
+ spill_to_stack(insn, at_out, insn->dst_reg,
+ at_stack_out, src, sz);
+
+ if (BPF_MODE(insn->code) == BPF_ATOMIC) {
+ if (dst_is_local_fp && insn->imm != BPF_LOAD_ACQ)
+ clear_stack_for_all_offs(insn, at_out, insn->dst_reg,
+ at_stack_out, sz);
+
+ if (insn->imm == BPF_CMPXCHG)
+ at_out[BPF_REG_0] = none;
+ else if (insn->imm == BPF_LOAD_ACQ)
+ *dst = none;
+ else if (insn->imm & BPF_FETCH)
+ *src = none;
+ }
+ } else if (class == BPF_ST && BPF_MODE(insn->code) == BPF_MEM) {
+ u32 sz = bpf_size_to_bytes(BPF_SIZE(insn->code));
+ bool dst_is_local_fp = insn->dst_reg == BPF_REG_FP || dst->frame == depth;
+
+ /* BPF_ST to FP-derived dst: clear overlapping stack slots */
+ if (dst_is_local_fp)
+ clear_stack_for_all_offs(insn, at_out, insn->dst_reg,
+ at_stack_out, sz);
+ }
+}
+
+/*
+ * Record access_bytes from helper/kfunc or load/store insn.
+ * access_bytes > 0: stack read
+ * access_bytes < 0: stack write
+ * access_bytes == S64_MIN: unknown — conservative, mark [0..slot] as read
+ * access_bytes == 0: no access
+ *
+ */
+static int record_stack_access_off(struct func_instance *instance, s64 fp_off,
+ s64 access_bytes, u32 frame, u32 insn_idx)
+{
+ s32 slot_hi, slot_lo;
+ spis_t mask;
+
+ if (fp_off >= 0)
+ /*
+ * out of bounds stack access doesn't contribute
+ * into actual stack liveness. It will be rejected
+ * by the main verifier pass later.
+ */
+ return 0;
+ if (access_bytes == S64_MIN) {
+ /* helper/kfunc read unknown amount of bytes from fp_off until fp+0 */
+ slot_hi = (-fp_off - 1) / STACK_SLOT_SZ;
+ mask = SPIS_ZERO;
+ spis_or_range(&mask, 0, slot_hi);
+ return mark_stack_read(instance, frame, insn_idx, mask);
+ }
+ if (access_bytes > 0) {
+ /* Mark any touched slot as use */
+ slot_hi = (-fp_off - 1) / STACK_SLOT_SZ;
+ slot_lo = max_t(s32, (-fp_off - access_bytes) / STACK_SLOT_SZ, 0);
+ mask = SPIS_ZERO;
+ spis_or_range(&mask, slot_lo, slot_hi);
+ return mark_stack_read(instance, frame, insn_idx, mask);
+ } else if (access_bytes < 0) {
+ /* Mark only fully covered slots as def */
+ access_bytes = -access_bytes;
+ slot_hi = (-fp_off) / STACK_SLOT_SZ - 1;
+ slot_lo = max_t(s32, (-fp_off - access_bytes + STACK_SLOT_SZ - 1) / STACK_SLOT_SZ, 0);
+ if (slot_lo <= slot_hi) {
+ mask = SPIS_ZERO;
+ spis_or_range(&mask, slot_lo, slot_hi);
+ return mark_stack_write(instance, frame, insn_idx, mask);
+ }
+ }
+ return 0;
+}
+
+/*
+ * 'arg' is FP-derived argument to helper/kfunc or load/store that
+ * reads (positive) or writes (negative) 'access_bytes' into 'use' or 'def'.
+ */
+static int record_stack_access(struct func_instance *instance,
+ const struct arg_track *arg,
+ s64 access_bytes, u32 frame, u32 insn_idx)
+{
+ int i, err;
+
+ if (access_bytes == 0)
+ return 0;
+ if (arg->off_cnt == 0) {
+ if (access_bytes > 0 || access_bytes == S64_MIN)
+ return mark_stack_read(instance, frame, insn_idx, SPIS_ALL);
+ return 0;
+ }
+ if (access_bytes != S64_MIN && access_bytes < 0 && arg->off_cnt != 1)
+ /* multi-offset write cannot set stack_def */
+ return 0;
+
+ for (i = 0; i < arg->off_cnt; i++) {
+ err = record_stack_access_off(instance, arg->off[i], access_bytes, frame, insn_idx);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+/*
+ * When a pointer is ARG_IMPRECISE, conservatively mark every frame in
+ * the bitmask as fully used.
+ */
+static int record_imprecise(struct func_instance *instance, u32 mask, u32 insn_idx)
+{
+ int depth = instance->depth;
+ int f, err;
+
+ for (f = 0; mask; f++, mask >>= 1) {
+ if (!(mask & 1))
+ continue;
+ if (f <= depth) {
+ err = mark_stack_read(instance, f, insn_idx, SPIS_ALL);
+ if (err)
+ return err;
+ }
+ }
+ return 0;
+}
+
+/* Record load/store access for a given 'at' state of 'insn'. */
+static int record_load_store_access(struct bpf_verifier_env *env,
+ struct func_instance *instance,
+ struct arg_track *at, int insn_idx)
+{
+ struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
+ int depth = instance->depth;
+ s32 sz = bpf_size_to_bytes(BPF_SIZE(insn->code));
+ u8 class = BPF_CLASS(insn->code);
+ struct arg_track resolved, *ptr;
+ int oi;
+
+ switch (class) {
+ case BPF_LDX:
+ ptr = &at[insn->src_reg];
+ break;
+ case BPF_STX:
+ if (BPF_MODE(insn->code) == BPF_ATOMIC) {
+ if (insn->imm == BPF_STORE_REL)
+ sz = -sz;
+ if (insn->imm == BPF_LOAD_ACQ)
+ ptr = &at[insn->src_reg];
+ else
+ ptr = &at[insn->dst_reg];
+ } else {
+ ptr = &at[insn->dst_reg];
+ sz = -sz;
+ }
+ break;
+ case BPF_ST:
+ ptr = &at[insn->dst_reg];
+ sz = -sz;
+ break;
+ default:
+ return 0;
+ }
+
+ /* Resolve offsets: fold insn->off into arg_track */
+ if (ptr->off_cnt > 0) {
+ resolved.off_cnt = ptr->off_cnt;
+ resolved.frame = ptr->frame;
+ for (oi = 0; oi < ptr->off_cnt; oi++) {
+ resolved.off[oi] = arg_add(ptr->off[oi], insn->off);
+ if (resolved.off[oi] == OFF_IMPRECISE) {
+ resolved.off_cnt = 0;
+ break;
+ }
+ }
+ ptr = &resolved;
+ }
+
+ if (ptr->frame >= 0 && ptr->frame <= depth)
+ return record_stack_access(instance, ptr, sz, ptr->frame, insn_idx);
+ if (ptr->frame == ARG_IMPRECISE)
+ return record_imprecise(instance, ptr->mask, insn_idx);
+ /* ARG_NONE: not derived from any frame pointer, skip */
+ return 0;
+}
+
+/* Record stack access for a given 'at' state of helper/kfunc 'insn' */
+static int record_call_access(struct bpf_verifier_env *env,
+ struct func_instance *instance,
+ struct arg_track *at,
+ int insn_idx)
+{
+ struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
+ int depth = instance->depth;
+ struct bpf_call_summary cs;
+ int r, err = 0, num_params = 5;
+
+ if (bpf_pseudo_call(insn))
+ return 0;
+
+ if (bpf_get_call_summary(env, insn, &cs))
+ num_params = cs.num_params;
+
+ for (r = BPF_REG_1; r < BPF_REG_1 + num_params; r++) {
+ int frame = at[r].frame;
+ s64 bytes;
+
+ if (!arg_is_fp(&at[r]))
+ continue;
+
+ if (bpf_helper_call(insn)) {
+ bytes = bpf_helper_stack_access_bytes(env, insn, r - 1, insn_idx);
+ } else if (bpf_pseudo_kfunc_call(insn)) {
+ bytes = bpf_kfunc_stack_access_bytes(env, insn, r - 1, insn_idx);
+ } else {
+ for (int f = 0; f <= depth; f++) {
+ err = mark_stack_read(instance, f, insn_idx, SPIS_ALL);
+ if (err)
+ return err;
+ }
+ return 0;
+ }
+ if (bytes == 0)
+ continue;
+
+ if (frame >= 0 && frame <= depth)
+ err = record_stack_access(instance, &at[r], bytes, frame, insn_idx);
+ else if (frame == ARG_IMPRECISE)
+ err = record_imprecise(instance, at[r].mask, insn_idx);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+/*
+ * For a calls_callback helper, find the callback subprog and determine
+ * which caller register maps to which callback register for FP passthrough.
+ */
+static int find_callback_subprog(struct bpf_verifier_env *env,
+ struct bpf_insn *insn, int insn_idx,
+ int *caller_reg, int *callee_reg)
+{
+ struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
+ int cb_reg = -1;
+
+ *caller_reg = -1;
+ *callee_reg = -1;
+
+ if (!bpf_helper_call(insn))
+ return -1;
+ switch (insn->imm) {
+ case BPF_FUNC_loop:
+ /* bpf_loop(nr, cb, ctx, flags): cb=R2, R3->cb R2 */
+ cb_reg = BPF_REG_2;
+ *caller_reg = BPF_REG_3;
+ *callee_reg = BPF_REG_2;
+ break;
+ case BPF_FUNC_for_each_map_elem:
+ /* for_each_map_elem(map, cb, ctx, flags): cb=R2, R3->cb R4 */
+ cb_reg = BPF_REG_2;
+ *caller_reg = BPF_REG_3;
+ *callee_reg = BPF_REG_4;
+ break;
+ case BPF_FUNC_find_vma:
+ /* find_vma(task, addr, cb, ctx, flags): cb=R3, R4->cb R3 */
+ cb_reg = BPF_REG_3;
+ *caller_reg = BPF_REG_4;
+ *callee_reg = BPF_REG_3;
+ break;
+ case BPF_FUNC_user_ringbuf_drain:
+ /* user_ringbuf_drain(map, cb, ctx, flags): cb=R2, R3->cb R2 */
+ cb_reg = BPF_REG_2;
+ *caller_reg = BPF_REG_3;
+ *callee_reg = BPF_REG_2;
+ break;
+ default:
+ return -1;
+ }
+
+ if (!(aux->const_reg_subprog_mask & BIT(cb_reg)))
+ return -2;
+
+ return aux->const_reg_vals[cb_reg];
+}
+
+/* Per-subprog intermediate state kept alive across analysis phases */
+struct subprog_at_info {
+ struct arg_track (*at_in)[MAX_BPF_REG];
+ int len;
+};
+
+static void print_subprog_arg_access(struct bpf_verifier_env *env,
+ int subprog,
+ struct subprog_at_info *info,
+ struct arg_track (*at_stack_in)[MAX_ARG_SPILL_SLOTS])
+{
+ struct bpf_insn *insns = env->prog->insnsi;
+ int start = env->subprog_info[subprog].start;
+ int len = info->len;
+ int i, r;
+
+ if (!(env->log.level & BPF_LOG_LEVEL2))
+ return;
+
+ verbose(env, "%s:\n", fmt_subprog(env, subprog));
+ for (i = 0; i < len; i++) {
+ int idx = start + i;
+ bool has_extra = false;
+ u8 cls = BPF_CLASS(insns[idx].code);
+ bool is_ldx_stx_call = cls == BPF_LDX || cls == BPF_STX ||
+ insns[idx].code == (BPF_JMP | BPF_CALL);
+
+ verbose(env, "%3d: ", idx);
+ bpf_verbose_insn(env, &insns[idx]);
+
+ /* Collect what needs printing */
+ if (is_ldx_stx_call &&
+ arg_is_visited(&info->at_in[i][0])) {
+ for (r = 0; r < MAX_BPF_REG - 1; r++)
+ if (arg_is_fp(&info->at_in[i][r]))
+ has_extra = true;
+ }
+ if (is_ldx_stx_call) {
+ for (r = 0; r < MAX_ARG_SPILL_SLOTS; r++)
+ if (arg_is_fp(&at_stack_in[i][r]))
+ has_extra = true;
+ }
+
+ if (!has_extra) {
+ if (bpf_is_ldimm64(&insns[idx]))
+ i++;
+ continue;
+ }
+
+ bpf_vlog_reset(&env->log, env->log.end_pos - 1);
+ verbose(env, " //");
+
+ if (is_ldx_stx_call && info->at_in &&
+ arg_is_visited(&info->at_in[i][0])) {
+ for (r = 0; r < MAX_BPF_REG - 1; r++) {
+ if (!arg_is_fp(&info->at_in[i][r]))
+ continue;
+ verbose(env, " r%d=", r);
+ verbose_arg_track(env, &info->at_in[i][r]);
+ }
+ }
+
+ if (is_ldx_stx_call) {
+ for (r = 0; r < MAX_ARG_SPILL_SLOTS; r++) {
+ if (!arg_is_fp(&at_stack_in[i][r]))
+ continue;
+ verbose(env, " fp%+d=", -(r + 1) * 8);
+ verbose_arg_track(env, &at_stack_in[i][r]);
+ }
+ }
+
+ verbose(env, "\n");
+ if (bpf_is_ldimm64(&insns[idx]))
+ i++;
+ }
+}
+
+/*
+ * Compute arg tracking dataflow for a single subprog.
+ * Runs forward fixed-point with arg_track_xfer(), then records
+ * memory accesses in a single linear pass over converged state.
+ *
+ * @callee_entry: pre-populated entry state for R1-R5
+ * NULL for main (subprog 0).
+ * @info: stores at_in, len for debug printing.
+ */
+static int compute_subprog_args(struct bpf_verifier_env *env,
+ struct subprog_at_info *info,
+ struct arg_track *callee_entry,
+ struct func_instance *instance,
+ u32 *callsites)
+{
+ int subprog = instance->subprog;
+ struct bpf_insn *insns = env->prog->insnsi;
+ int depth = instance->depth;
+ int start = env->subprog_info[subprog].start;
+ int po_start = env->subprog_info[subprog].postorder_start;
+ int end = env->subprog_info[subprog + 1].start;
+ int po_end = env->subprog_info[subprog + 1].postorder_start;
+ int len = end - start;
+ struct arg_track (*at_in)[MAX_BPF_REG] = NULL;
+ struct arg_track at_out[MAX_BPF_REG];
+ struct arg_track (*at_stack_in)[MAX_ARG_SPILL_SLOTS] = NULL;
+ struct arg_track *at_stack_out = NULL;
+ struct arg_track unvisited = { .frame = ARG_UNVISITED };
+ struct arg_track none = { .frame = ARG_NONE };
+ bool changed;
+ int i, p, r, err = -ENOMEM;
+
+ at_in = kvmalloc_objs(*at_in, len, GFP_KERNEL_ACCOUNT);
+ if (!at_in)
+ goto err_free;
+
+ at_stack_in = kvmalloc_objs(*at_stack_in, len, GFP_KERNEL_ACCOUNT);
+ if (!at_stack_in)
+ goto err_free;
+
+ at_stack_out = kvmalloc_objs(*at_stack_out, MAX_ARG_SPILL_SLOTS, GFP_KERNEL_ACCOUNT);
+ if (!at_stack_out)
+ goto err_free;
+
+ for (i = 0; i < len; i++) {
+ for (r = 0; r < MAX_BPF_REG; r++)
+ at_in[i][r] = unvisited;
+ for (r = 0; r < MAX_ARG_SPILL_SLOTS; r++)
+ at_stack_in[i][r] = unvisited;
+ }
+
+ for (r = 0; r < MAX_BPF_REG; r++)
+ at_in[0][r] = none;
+
+ /* Entry: R10 is always precisely the current frame's FP */
+ at_in[0][BPF_REG_FP] = arg_single(depth, 0);
+
+ /* R1-R5: from caller or ARG_NONE for main */
+ if (callee_entry) {
+ for (r = BPF_REG_1; r <= BPF_REG_5; r++)
+ at_in[0][r] = callee_entry[r];
+ }
+
+ /* Entry: all stack slots are ARG_NONE */
+ for (r = 0; r < MAX_ARG_SPILL_SLOTS; r++)
+ at_stack_in[0][r] = none;
+
+ if (env->log.level & BPF_LOG_LEVEL2)
+ verbose(env, "subprog#%d: analyzing (depth %d)...\n", subprog, depth);
+
+ /* Forward fixed-point iteration in reverse post order */
+redo:
+ changed = false;
+ for (p = po_end - 1; p >= po_start; p--) {
+ int idx = env->cfg.insn_postorder[p];
+ int i = idx - start;
+ struct bpf_insn *insn = &insns[idx];
+ struct bpf_iarray *succ;
+
+ if (!arg_is_visited(&at_in[i][0]) && !arg_is_visited(&at_in[i][1]))
+ continue;
+
+ memcpy(at_out, at_in[i], sizeof(at_out));
+ memcpy(at_stack_out, at_stack_in[i], MAX_ARG_SPILL_SLOTS * sizeof(*at_stack_out));
+
+ arg_track_xfer(env, insn, idx, at_out, at_stack_out, instance, callsites);
+ arg_track_log(env, insn, idx, at_in[i], at_stack_in[i], at_out, at_stack_out);
+
+ /* Propagate to successors within this subprogram */
+ succ = bpf_insn_successors(env, idx);
+ for (int s = 0; s < succ->cnt; s++) {
+ int target = succ->items[s];
+ int ti;
+
+ /* Filter: stay within the subprogram's range */
+ if (target < start || target >= end)
+ continue;
+ ti = target - start;
+
+ for (r = 0; r < MAX_BPF_REG; r++)
+ changed |= arg_track_join(env, idx, target, r,
+ &at_in[ti][r], at_out[r]);
+
+ for (r = 0; r < MAX_ARG_SPILL_SLOTS; r++)
+ changed |= arg_track_join(env, idx, target, -r - 1,
+ &at_stack_in[ti][r], at_stack_out[r]);
+ }
+ }
+ if (changed)
+ goto redo;
+
+ /* Record memory accesses using converged at_in (RPO skips dead code) */
+ for (p = po_end - 1; p >= po_start; p--) {
+ int idx = env->cfg.insn_postorder[p];
+ int i = idx - start;
+ struct bpf_insn *insn = &insns[idx];
+
+ err = record_load_store_access(env, instance, at_in[i], idx);
+ if (err)
+ goto err_free;
+
+ if (insn->code == (BPF_JMP | BPF_CALL)) {
+ err = record_call_access(env, instance, at_in[i], idx);
+ if (err)
+ goto err_free;
+ }
+
+ if (bpf_pseudo_call(insn) || bpf_calls_callback(env, idx)) {
+ kvfree(env->callsite_at_stack[idx]);
+ env->callsite_at_stack[idx] =
+ kvmalloc_objs(*env->callsite_at_stack[idx],
+ MAX_ARG_SPILL_SLOTS, GFP_KERNEL_ACCOUNT);
+ if (!env->callsite_at_stack[idx]) {
+ err = -ENOMEM;
+ goto err_free;
+ }
+ memcpy(env->callsite_at_stack[idx],
+ at_stack_in[i], sizeof(struct arg_track) * MAX_ARG_SPILL_SLOTS);
+ }
+ }
+
+ info->at_in = at_in;
+ at_in = NULL;
+ info->len = len;
+ print_subprog_arg_access(env, subprog, info, at_stack_in);
+ err = 0;
+
+err_free:
+ kvfree(at_stack_out);
+ kvfree(at_stack_in);
+ kvfree(at_in);
+ return err;
+}
+
+/* Return true if any of R1-R5 is derived from a frame pointer. */
+static bool has_fp_args(struct arg_track *args)
+{
+ for (int r = BPF_REG_1; r <= BPF_REG_5; r++)
+ if (args[r].frame != ARG_NONE)
+ return true;
+ return false;
+}
+
+/*
+ * Merge a freshly analyzed instance into the original.
+ * may_read: union (any pass might read the slot).
+ * must_write: intersection (only slots written on ALL passes are guaranteed).
+ * live_before is recomputed by a subsequent update_instance() on @dst.
+ */
+static void merge_instances(struct func_instance *dst, struct func_instance *src)
+{
+ int f, i;
+
+ for (f = 0; f <= dst->depth; f++) {
+ if (!src->frames[f]) {
+ /* This pass didn't touch frame f — must_write intersects with empty. */
+ if (dst->frames[f])
+ for (i = 0; i < dst->insn_cnt; i++)
+ dst->frames[f][i].must_write = SPIS_ZERO;
+ continue;
+ }
+ if (!dst->frames[f]) {
+ /* Previous pass didn't touch frame f — take src, zero must_write. */
+ dst->frames[f] = src->frames[f];
+ src->frames[f] = NULL;
+ for (i = 0; i < dst->insn_cnt; i++)
+ dst->frames[f][i].must_write = SPIS_ZERO;
+ continue;
+ }
+ for (i = 0; i < dst->insn_cnt; i++) {
+ dst->frames[f][i].may_read =
+ spis_or(dst->frames[f][i].may_read,
+ src->frames[f][i].may_read);
+ dst->frames[f][i].must_write =
+ spis_and(dst->frames[f][i].must_write,
+ src->frames[f][i].must_write);
+ }
+ }
+}
+
+static struct func_instance *fresh_instance(struct func_instance *src)
+{
+ struct func_instance *f;
+
+ f = kvzalloc_obj(*f, GFP_KERNEL_ACCOUNT);
+ if (!f)
+ return ERR_PTR(-ENOMEM);
+ f->callsite = src->callsite;
+ f->depth = src->depth;
+ f->subprog = src->subprog;
+ f->subprog_start = src->subprog_start;
+ f->insn_cnt = src->insn_cnt;
+ return f;
+}
+
+static void free_instance(struct func_instance *instance)
+{
+ int i;
+
+ for (i = 0; i <= instance->depth; i++)
+ kvfree(instance->frames[i]);
+ kvfree(instance);
+}
+
+/*
+ * Recursively analyze a subprog with specific 'entry_args'.
+ * Each callee is analyzed with the exact args from its call site.
+ *
+ * Args are recomputed for each call because the dataflow result at_in[]
+ * depends on the entry args and frame depth. Consider: A->C->D and B->C->D
+ * Callsites in A and B pass different args into C, so C is recomputed.
+ * Then within C the same callsite passes different args into D.
+ */
+static int analyze_subprog(struct bpf_verifier_env *env,
+ struct arg_track *entry_args,
+ struct subprog_at_info *info,
+ struct func_instance *instance,
+ u32 *callsites)
+{
+ int subprog = instance->subprog;
+ int depth = instance->depth;
+ struct bpf_insn *insns = env->prog->insnsi;
+ int start = env->subprog_info[subprog].start;
+ int po_start = env->subprog_info[subprog].postorder_start;
+ int po_end = env->subprog_info[subprog + 1].postorder_start;
+ struct func_instance *prev_instance = NULL;
+ int j, err;
+
+ if (++env->liveness->subprog_calls > 10000) {
+ verbose(env, "liveness analysis exceeded complexity limit (%d calls)\n",
+ env->liveness->subprog_calls);
+ return -E2BIG;
+ }
+
+ if (need_resched())
+ cond_resched();
+
+
+ /*
+ * When an instance is reused (must_write_initialized == true),
+ * record into a fresh instance and merge afterward. This avoids
+ * stale must_write marks for instructions not reached in this pass.
+ */
+ if (instance->must_write_initialized) {
+ struct func_instance *fresh = fresh_instance(instance);
+
+ if (IS_ERR(fresh))
+ return PTR_ERR(fresh);
+ prev_instance = instance;
+ instance = fresh;
+ }
+
+ /* Free prior analysis if this subprog was already visited */
+ kvfree(info[subprog].at_in);
+ info[subprog].at_in = NULL;
+
+ err = compute_subprog_args(env, &info[subprog], entry_args, instance, callsites);
+ if (err)
+ goto out_free;
+
+ /* For each reachable call site in the subprog, recurse into callees */
+ for (int p = po_start; p < po_end; p++) {
+ int idx = env->cfg.insn_postorder[p];
+ struct arg_track callee_args[BPF_REG_5 + 1];
+ struct arg_track none = { .frame = ARG_NONE };
+ struct bpf_insn *insn = &insns[idx];
+ struct func_instance *callee_instance;
+ int callee, target;
+ int caller_reg, cb_callee_reg;
+
+ j = idx - start; /* relative index within this subprog */
+
+ if (bpf_pseudo_call(insn)) {
+ target = idx + insn->imm + 1;
+ callee = bpf_find_subprog(env, target);
+ if (callee < 0)
+ continue;
+
+ /* Build entry args: R1-R5 from at_in at call site */
+ for (int r = BPF_REG_1; r <= BPF_REG_5; r++)
+ callee_args[r] = info[subprog].at_in[j][r];
+ } else if (bpf_calls_callback(env, idx)) {
+ callee = find_callback_subprog(env, insn, idx, &caller_reg, &cb_callee_reg);
+ if (callee == -2) {
+ /*
+ * same bpf_loop() calls two different callbacks and passes
+ * stack pointer to them
+ */
+ if (info[subprog].at_in[j][caller_reg].frame == ARG_NONE)
+ continue;
+ for (int f = 0; f <= depth; f++) {
+ err = mark_stack_read(instance, f, idx, SPIS_ALL);
+ if (err)
+ goto out_free;
+ }
+ continue;
+ }
+ if (callee < 0)
+ continue;
+
+ for (int r = BPF_REG_1; r <= BPF_REG_5; r++)
+ callee_args[r] = none;
+ callee_args[cb_callee_reg] = info[subprog].at_in[j][caller_reg];
+ } else {
+ continue;
+ }
+
+ if (!has_fp_args(callee_args))
+ continue;
+
+ if (depth == MAX_CALL_FRAMES - 1) {
+ err = -EINVAL;
+ goto out_free;
+ }
+
+ callee_instance = call_instance(env, instance, idx, callee);
+ if (IS_ERR(callee_instance)) {
+ err = PTR_ERR(callee_instance);
+ goto out_free;
+ }
+ callsites[depth] = idx;
+ err = analyze_subprog(env, callee_args, info, callee_instance, callsites);
+ if (err)
+ goto out_free;
+
+ /* Pull callee's entry liveness back to caller's callsite */
+ {
+ u32 callee_start = callee_instance->subprog_start;
+ struct per_frame_masks *entry;
+
+ for (int f = 0; f < callee_instance->depth; f++) {
+ entry = get_frame_masks(callee_instance, f, callee_start);
+ if (!entry)
+ continue;
+ err = mark_stack_read(instance, f, idx, entry->live_before);
+ if (err)
+ goto out_free;
+ }
+ }
+ }
+
+ if (prev_instance) {
+ merge_instances(prev_instance, instance);
+ free_instance(instance);
+ instance = prev_instance;
+ }
+ update_instance(env, instance);
+ return 0;
+
+out_free:
+ if (prev_instance)
+ free_instance(instance);
+ return err;
+}
+
+int bpf_compute_subprog_arg_access(struct bpf_verifier_env *env)
+{
+ u32 callsites[MAX_CALL_FRAMES] = {};
+ int insn_cnt = env->prog->len;
+ struct func_instance *instance;
+ struct subprog_at_info *info;
+ int k, err = 0;
+
+ info = kvzalloc_objs(*info, env->subprog_cnt, GFP_KERNEL_ACCOUNT);
+ if (!info)
+ return -ENOMEM;
+
+ env->callsite_at_stack = kvzalloc_objs(*env->callsite_at_stack, insn_cnt,
+ GFP_KERNEL_ACCOUNT);
+ if (!env->callsite_at_stack) {
+ kvfree(info);
+ return -ENOMEM;
+ }
+
+ instance = call_instance(env, NULL, 0, 0);
+ if (IS_ERR(instance)) {
+ err = PTR_ERR(instance);
+ goto out;
+ }
+ err = analyze_subprog(env, NULL, info, instance, callsites);
+ if (err)
+ goto out;
+
+ /*
+ * Subprogs and callbacks that don't receive FP-derived arguments
+ * cannot access ancestor stack frames, so they were skipped during
+ * the recursive walk above. Async callbacks (timer, workqueue) are
+ * also not reachable from the main program's call graph. Analyze
+ * all unvisited subprogs as independent roots at depth 0.
+ *
+ * Use reverse topological order (callers before callees) so that
+ * each subprog is analyzed before its callees, allowing the
+ * recursive walk inside analyze_subprog() to naturally
+ * reach nested callees that also lack FP-derived args.
+ */
+ for (k = env->subprog_cnt - 1; k >= 0; k--) {
+ int sub = env->subprog_topo_order[k];
+
+ if (info[sub].at_in && !bpf_subprog_is_global(env, sub))
+ continue;
+ instance = call_instance(env, NULL, 0, sub);
+ if (IS_ERR(instance)) {
+ err = PTR_ERR(instance);
+ goto out;
+ }
+ err = analyze_subprog(env, NULL, info, instance, callsites);
+ if (err)
+ goto out;
+ }
+
+ if (env->log.level & BPF_LOG_LEVEL2)
+ err = print_instances(env);
+
+out:
+ for (k = 0; k < insn_cnt; k++)
+ kvfree(env->callsite_at_stack[k]);
+ kvfree(env->callsite_at_stack);
+ env->callsite_at_stack = NULL;
+ for (k = 0; k < env->subprog_cnt; k++)
+ kvfree(info[k].at_in);
+ kvfree(info);
+ return err;
+}
+
+/* Each field is a register bitmask */
+struct insn_live_regs {
+ u16 use; /* registers read by instruction */
+ u16 def; /* registers written by instruction */
+ u16 in; /* registers that may be alive before instruction */
+ u16 out; /* registers that may be alive after instruction */
+};
+
+/* Bitmask with 1s for all caller saved registers */
+#define ALL_CALLER_SAVED_REGS ((1u << CALLER_SAVED_REGS) - 1)
+
+/* Compute info->{use,def} fields for the instruction */
+static void compute_insn_live_regs(struct bpf_verifier_env *env,
+ struct bpf_insn *insn,
+ struct insn_live_regs *info)
+{
+ struct bpf_call_summary cs;
+ u8 class = BPF_CLASS(insn->code);
+ u8 code = BPF_OP(insn->code);
+ u8 mode = BPF_MODE(insn->code);
+ u16 src = BIT(insn->src_reg);
+ u16 dst = BIT(insn->dst_reg);
+ u16 r0 = BIT(0);
+ u16 def = 0;
+ u16 use = 0xffff;
+
+ switch (class) {
+ case BPF_LD:
+ switch (mode) {
+ case BPF_IMM:
+ if (BPF_SIZE(insn->code) == BPF_DW) {
+ def = dst;
+ use = 0;
+ }
+ break;
+ case BPF_LD | BPF_ABS:
+ case BPF_LD | BPF_IND:
+ /* stick with defaults */
+ break;
+ }
+ break;
+ case BPF_LDX:
+ switch (mode) {
+ case BPF_MEM:
+ case BPF_MEMSX:
+ def = dst;
+ use = src;
+ break;
+ }
+ break;
+ case BPF_ST:
+ switch (mode) {
+ case BPF_MEM:
+ def = 0;
+ use = dst;
+ break;
+ }
+ break;
+ case BPF_STX:
+ switch (mode) {
+ case BPF_MEM:
+ def = 0;
+ use = dst | src;
+ break;
+ case BPF_ATOMIC:
+ switch (insn->imm) {
+ case BPF_CMPXCHG:
+ use = r0 | dst | src;
+ def = r0;
+ break;
+ case BPF_LOAD_ACQ:
+ def = dst;
+ use = src;
+ break;
+ case BPF_STORE_REL:
+ def = 0;
+ use = dst | src;
+ break;
+ default:
+ use = dst | src;
+ if (insn->imm & BPF_FETCH)
+ def = src;
+ else
+ def = 0;
+ }
+ break;
+ }
+ break;
+ case BPF_ALU:
+ case BPF_ALU64:
+ switch (code) {
+ case BPF_END:
+ use = dst;
+ def = dst;
+ break;
+ case BPF_MOV:
+ def = dst;
+ if (BPF_SRC(insn->code) == BPF_K)
+ use = 0;
+ else
+ use = src;
+ break;
+ default:
+ def = dst;
+ if (BPF_SRC(insn->code) == BPF_K)
+ use = dst;
+ else
+ use = dst | src;
+ }
+ break;
+ case BPF_JMP:
+ case BPF_JMP32:
+ switch (code) {
+ case BPF_JA:
+ def = 0;
+ if (BPF_SRC(insn->code) == BPF_X)
+ use = dst;
+ else
+ use = 0;
+ break;
+ case BPF_JCOND:
+ def = 0;
+ use = 0;
+ break;
+ case BPF_EXIT:
+ def = 0;
+ use = r0;
+ break;
+ case BPF_CALL:
+ def = ALL_CALLER_SAVED_REGS;
+ use = def & ~BIT(BPF_REG_0);
+ if (bpf_get_call_summary(env, insn, &cs))
+ use = GENMASK(cs.num_params, 1);
+ break;
+ default:
+ def = 0;
+ if (BPF_SRC(insn->code) == BPF_K)
+ use = dst;
+ else
+ use = dst | src;
+ }
+ break;
+ }
+
+ info->def = def;
+ info->use = use;
+}
+
+/* Compute may-live registers after each instruction in the program.
+ * The register is live after the instruction I if it is read by some
+ * instruction S following I during program execution and is not
+ * overwritten between I and S.
+ *
+ * Store result in env->insn_aux_data[i].live_regs.
+ */
+int bpf_compute_live_registers(struct bpf_verifier_env *env)
+{
+ struct bpf_insn_aux_data *insn_aux = env->insn_aux_data;
+ struct bpf_insn *insns = env->prog->insnsi;
+ struct insn_live_regs *state;
+ int insn_cnt = env->prog->len;
+ int err = 0, i, j;
+ bool changed;
+
+ /* Use the following algorithm:
+ * - define the following:
+ * - I.use : a set of all registers read by instruction I;
+ * - I.def : a set of all registers written by instruction I;
+ * - I.in : a set of all registers that may be alive before I execution;
+ * - I.out : a set of all registers that may be alive after I execution;
+ * - insn_successors(I): a set of instructions S that might immediately
+ * follow I for some program execution;
+ * - associate separate empty sets 'I.in' and 'I.out' with each instruction;
+ * - visit each instruction in a postorder and update
+ * state[i].in, state[i].out as follows:
+ *
+ * state[i].out = U [state[s].in for S in insn_successors(i)]
+ * state[i].in = (state[i].out / state[i].def) U state[i].use
+ *
+ * (where U stands for set union, / stands for set difference)
+ * - repeat the computation while {in,out} fields changes for
+ * any instruction.
+ */
+ state = kvzalloc_objs(*state, insn_cnt, GFP_KERNEL_ACCOUNT);
+ if (!state) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ for (i = 0; i < insn_cnt; ++i)
+ compute_insn_live_regs(env, &insns[i], &state[i]);
+
+ /* Forward pass: resolve stack access through FP-derived pointers */
+ err = bpf_compute_subprog_arg_access(env);
+ if (err)
+ goto out;
+
+ changed = true;
+ while (changed) {
+ changed = false;
+ for (i = 0; i < env->cfg.cur_postorder; ++i) {
+ int insn_idx = env->cfg.insn_postorder[i];
+ struct insn_live_regs *live = &state[insn_idx];
+ struct bpf_iarray *succ;
+ u16 new_out = 0;
+ u16 new_in = 0;
+
+ succ = bpf_insn_successors(env, insn_idx);
+ for (int s = 0; s < succ->cnt; ++s)
+ new_out |= state[succ->items[s]].in;
+ new_in = (new_out & ~live->def) | live->use;
+ if (new_out != live->out || new_in != live->in) {
+ live->in = new_in;
+ live->out = new_out;
+ changed = true;
+ }
+ }
+ }
+
+ for (i = 0; i < insn_cnt; ++i)
+ insn_aux[i].live_regs_before = state[i].in;
+
+ if (env->log.level & BPF_LOG_LEVEL2) {
+ verbose(env, "Live regs before insn:\n");
+ for (i = 0; i < insn_cnt; ++i) {
+ if (env->insn_aux_data[i].scc)
+ verbose(env, "%3d ", env->insn_aux_data[i].scc);
+ else
+ verbose(env, " ");
+ verbose(env, "%3d: ", i);
+ for (j = BPF_REG_0; j < BPF_REG_10; ++j)
+ if (insn_aux[i].live_regs_before & BIT(j))
+ verbose(env, "%d", j);
+ else
+ verbose(env, ".");
+ verbose(env, " ");
+ bpf_verbose_insn(env, &insns[i]);
+ if (bpf_is_ldimm64(&insns[i]))
+ i++;
+ }
+ }
+
+out:
+ kvfree(state);
+ return err;
+}
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 8fca0c64f7b1..23267213a17f 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -270,7 +270,7 @@ static int cgroup_storage_get_next_key(struct bpf_map *_map, void *key,
goto enoent;
storage = list_next_entry(storage, list_map);
- if (!storage)
+ if (list_entry_is_head(storage, &map->list, list_map))
goto enoent;
} else {
storage = list_first_entry(&map->list,
diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c
index a0c3b35de2ce..011e4ec25acd 100644
--- a/kernel/bpf/log.c
+++ b/kernel/bpf/log.c
@@ -329,47 +329,6 @@ __printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
}
EXPORT_SYMBOL_GPL(bpf_log);
-static const struct bpf_line_info *
-find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
-{
- const struct bpf_line_info *linfo;
- const struct bpf_prog *prog;
- u32 nr_linfo;
- int l, r, m;
-
- prog = env->prog;
- nr_linfo = prog->aux->nr_linfo;
-
- if (!nr_linfo || insn_off >= prog->len)
- return NULL;
-
- linfo = prog->aux->linfo;
- /* Loop invariant: linfo[l].insn_off <= insns_off.
- * linfo[0].insn_off == 0 which always satisfies above condition.
- * Binary search is searching for rightmost linfo entry that satisfies
- * the above invariant, giving us the desired record that covers given
- * instruction offset.
- */
- l = 0;
- r = nr_linfo - 1;
- while (l < r) {
- /* (r - l + 1) / 2 means we break a tie to the right, so if:
- * l=1, r=2, linfo[l].insn_off <= insn_off, linfo[r].insn_off > insn_off,
- * then m=2, we see that linfo[m].insn_off > insn_off, and so
- * r becomes 1 and we exit the loop with correct l==1.
- * If the tie was broken to the left, m=1 would end us up in
- * an endless loop where l and m stay at 1 and r stays at 2.
- */
- m = l + (r - l + 1) / 2;
- if (linfo[m].insn_off <= insn_off)
- l = m;
- else
- r = m - 1;
- }
-
- return &linfo[l];
-}
-
static const char *ltrim(const char *s)
{
while (isspace(*s))
@@ -390,7 +349,7 @@ __printf(3, 4) void verbose_linfo(struct bpf_verifier_env *env,
return;
prev_linfo = env->prev_linfo;
- linfo = find_linfo(env, insn_off);
+ linfo = bpf_find_linfo(env->prog, insn_off);
if (!linfo || linfo == prev_linfo)
return;
@@ -542,7 +501,8 @@ static char slot_type_char[] = {
[STACK_ZERO] = '0',
[STACK_DYNPTR] = 'd',
[STACK_ITER] = 'i',
- [STACK_IRQ_FLAG] = 'f'
+ [STACK_IRQ_FLAG] = 'f',
+ [STACK_POISON] = 'p',
};
#define UNUM_MAX_DECIMAL U16_MAX
@@ -581,6 +541,8 @@ int tnum_strn(char *str, size_t size, struct tnum a)
if (a.mask == 0) {
if (is_unum_decimal(a.value))
return snprintf(str, size, "%llu", a.value);
+ if (is_snum_decimal(a.value))
+ return snprintf(str, size, "%lld", a.value);
else
return snprintf(str, size, "%#llx", a.value);
}
@@ -692,7 +654,7 @@ static void print_reg_state(struct bpf_verifier_env *env,
if (state->frameno != reg->frameno)
verbose(env, "[%d]", reg->frameno);
if (tnum_is_const(reg->var_off)) {
- verbose_snum(env, reg->var_off.value + reg->off);
+ verbose_snum(env, reg->var_off.value + reg->delta);
return;
}
}
@@ -702,7 +664,7 @@ static void print_reg_state(struct bpf_verifier_env *env,
if (reg->id)
verbose_a("id=%d", reg->id & ~BPF_ADD_CONST);
if (reg->id & BPF_ADD_CONST)
- verbose(env, "%+d", reg->off);
+ verbose(env, "%+d", reg->delta);
if (reg->ref_obj_id)
verbose_a("ref_obj_id=%d", reg->ref_obj_id);
if (type_is_non_owning_ref(reg->type))
@@ -714,9 +676,9 @@ static void print_reg_state(struct bpf_verifier_env *env,
reg->map_ptr->key_size,
reg->map_ptr->value_size);
}
- if (t != SCALAR_VALUE && reg->off) {
+ if (t != SCALAR_VALUE && reg->delta) {
verbose_a("off=");
- verbose_snum(env, reg->off);
+ verbose_snum(env, reg->delta);
}
if (type_is_pkt_pointer(t)) {
verbose_a("r=");
@@ -777,7 +739,7 @@ void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_verifie
for (j = 0; j < BPF_REG_SIZE; j++) {
slot_type = state->stack[i].slot_type[j];
- if (slot_type != STACK_INVALID)
+ if (slot_type != STACK_INVALID && slot_type != STACK_POISON)
valid = true;
types_buf[j] = slot_type_char[slot_type];
}
@@ -845,7 +807,7 @@ void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_verifie
mark_verifier_state_clean(env);
}
-static inline u32 vlog_alignment(u32 pos)
+u32 bpf_vlog_alignment(u32 pos)
{
return round_up(max(pos + BPF_LOG_MIN_ALIGNMENT / 2, BPF_LOG_ALIGNMENT),
BPF_LOG_MIN_ALIGNMENT) - pos - 1;
@@ -857,7 +819,7 @@ void print_insn_state(struct bpf_verifier_env *env, const struct bpf_verifier_st
if (env->prev_log_pos && env->prev_log_pos == env->log.end_pos) {
/* remove new line character */
bpf_vlog_reset(&env->log, env->prev_log_pos - 1);
- verbose(env, "%*c;", vlog_alignment(env->prev_insn_print_pos), ' ');
+ verbose(env, "%*c;", bpf_vlog_alignment(env->prev_insn_print_pos), ' ');
} else {
verbose(env, "%d:", env->insn_idx);
}
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index 682a9f34214b..e9662db7198f 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -284,17 +284,6 @@ static void __free_rcu(struct rcu_head *head)
atomic_set(&c->call_rcu_ttrace_in_progress, 0);
}
-static void __free_rcu_tasks_trace(struct rcu_head *head)
-{
- /* If RCU Tasks Trace grace period implies RCU grace period,
- * there is no need to invoke call_rcu().
- */
- if (rcu_trace_implies_rcu_gp())
- __free_rcu(head);
- else
- call_rcu(head, __free_rcu);
-}
-
static void enque_to_free(struct bpf_mem_cache *c, void *obj)
{
struct llist_node *llnode = obj;
@@ -326,12 +315,12 @@ static void do_call_rcu_ttrace(struct bpf_mem_cache *c)
return;
}
- /* Use call_rcu_tasks_trace() to wait for sleepable progs to finish.
- * If RCU Tasks Trace grace period implies RCU grace period, free
- * these elements directly, else use call_rcu() to wait for normal
- * progs to finish and finally do free_one() on each element.
+ /*
+ * Use call_rcu_tasks_trace() to wait for sleepable progs to finish.
+ * RCU Tasks Trace grace period implies RCU grace period, so pass
+ * __free_rcu directly as the callback.
*/
- call_rcu_tasks_trace(&c->rcu_ttrace, __free_rcu_tasks_trace);
+ call_rcu_tasks_trace(&c->rcu_ttrace, __free_rcu);
}
static void free_bulk(struct bpf_mem_cache *c)
@@ -696,20 +685,18 @@ static void free_mem_alloc_no_barrier(struct bpf_mem_alloc *ma)
static void free_mem_alloc(struct bpf_mem_alloc *ma)
{
- /* waiting_for_gp[_ttrace] lists were drained, but RCU callbacks
+ /*
+ * waiting_for_gp[_ttrace] lists were drained, but RCU callbacks
* might still execute. Wait for them.
*
* rcu_barrier_tasks_trace() doesn't imply synchronize_rcu_tasks_trace(),
* but rcu_barrier_tasks_trace() and rcu_barrier() below are only used
- * to wait for the pending __free_rcu_tasks_trace() and __free_rcu(),
- * so if call_rcu(head, __free_rcu) is skipped due to
- * rcu_trace_implies_rcu_gp(), it will be OK to skip rcu_barrier() by
- * using rcu_trace_implies_rcu_gp() as well.
+ * to wait for the pending __free_by_rcu(), and __free_rcu(). RCU Tasks
+ * Trace grace period implies RCU grace period, so all __free_rcu don't
+ * need extra call_rcu() (and thus extra rcu_barrier() here).
*/
rcu_barrier(); /* wait for __free_by_rcu */
rcu_barrier_tasks_trace(); /* wait for __free_rcu */
- if (!rcu_trace_implies_rcu_gp())
- rcu_barrier();
free_mem_alloc_no_barrier(ma);
}
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 0ad97d643bf4..0d6f5569588c 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -435,9 +435,8 @@ static struct ns_common *bpf_prog_offload_info_fill_ns(void *private_data)
if (aux->offload) {
args->info->ifindex = aux->offload->netdev->ifindex;
- net = dev_net(aux->offload->netdev);
- get_net(net);
- ns = &net->ns;
+ net = maybe_get_net(dev_net(aux->offload->netdev));
+ ns = net ? &net->ns : NULL;
} else {
args->info->ifindex = 0;
ns = NULL;
@@ -647,9 +646,8 @@ static struct ns_common *bpf_map_offload_info_fill_ns(void *private_data)
if (args->offmap->netdev) {
args->info->ifindex = args->offmap->netdev->ifindex;
- net = dev_net(args->offmap->netdev);
- get_net(net);
- ns = &net->ns;
+ net = maybe_get_net(dev_net(args->offmap->netdev));
+ ns = net ? &net->ns : NULL;
} else {
args->info->ifindex = 0;
ns = NULL;
diff --git a/kernel/bpf/states.c b/kernel/bpf/states.c
new file mode 100644
index 000000000000..8478d2c6ed5b
--- /dev/null
+++ b/kernel/bpf/states.c
@@ -0,0 +1,1563 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
+#include <linux/bpf.h>
+#include <linux/bpf_verifier.h>
+#include <linux/filter.h>
+
+#define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args)
+
+#define BPF_COMPLEXITY_LIMIT_STATES 64
+
+static bool is_may_goto_insn_at(struct bpf_verifier_env *env, int insn_idx)
+{
+ return bpf_is_may_goto_insn(&env->prog->insnsi[insn_idx]);
+}
+
+static bool is_iter_next_insn(struct bpf_verifier_env *env, int insn_idx)
+{
+ return env->insn_aux_data[insn_idx].is_iter_next;
+}
+
+static void update_peak_states(struct bpf_verifier_env *env)
+{
+ u32 cur_states;
+
+ cur_states = env->explored_states_size + env->free_list_size + env->num_backedges;
+ env->peak_states = max(env->peak_states, cur_states);
+}
+
+/* struct bpf_verifier_state->parent refers to states
+ * that are in either of env->{expored_states,free_list}.
+ * In both cases the state is contained in struct bpf_verifier_state_list.
+ */
+static struct bpf_verifier_state_list *state_parent_as_list(struct bpf_verifier_state *st)
+{
+ if (st->parent)
+ return container_of(st->parent, struct bpf_verifier_state_list, state);
+ return NULL;
+}
+
+static bool incomplete_read_marks(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *st);
+
+/* A state can be freed if it is no longer referenced:
+ * - is in the env->free_list;
+ * - has no children states;
+ */
+static void maybe_free_verifier_state(struct bpf_verifier_env *env,
+ struct bpf_verifier_state_list *sl)
+{
+ if (!sl->in_free_list
+ || sl->state.branches != 0
+ || incomplete_read_marks(env, &sl->state))
+ return;
+ list_del(&sl->node);
+ bpf_free_verifier_state(&sl->state, false);
+ kfree(sl);
+ env->free_list_size--;
+}
+
+/* For state @st look for a topmost frame with frame_insn_idx() in some SCC,
+ * if such frame exists form a corresponding @callchain as an array of
+ * call sites leading to this frame and SCC id.
+ * E.g.:
+ *
+ * void foo() { A: loop {... SCC#1 ...}; }
+ * void bar() { B: loop { C: foo(); ... SCC#2 ... }
+ * D: loop { E: foo(); ... SCC#3 ... } }
+ * void main() { F: bar(); }
+ *
+ * @callchain at (A) would be either (F,SCC#2) or (F,SCC#3) depending
+ * on @st frame call sites being (F,C,A) or (F,E,A).
+ */
+static bool compute_scc_callchain(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *st,
+ struct bpf_scc_callchain *callchain)
+{
+ u32 i, scc, insn_idx;
+
+ memset(callchain, 0, sizeof(*callchain));
+ for (i = 0; i <= st->curframe; i++) {
+ insn_idx = bpf_frame_insn_idx(st, i);
+ scc = env->insn_aux_data[insn_idx].scc;
+ if (scc) {
+ callchain->scc = scc;
+ break;
+ } else if (i < st->curframe) {
+ callchain->callsites[i] = insn_idx;
+ } else {
+ return false;
+ }
+ }
+ return true;
+}
+
+/* Check if bpf_scc_visit instance for @callchain exists. */
+static struct bpf_scc_visit *scc_visit_lookup(struct bpf_verifier_env *env,
+ struct bpf_scc_callchain *callchain)
+{
+ struct bpf_scc_info *info = env->scc_info[callchain->scc];
+ struct bpf_scc_visit *visits = info->visits;
+ u32 i;
+
+ if (!info)
+ return NULL;
+ for (i = 0; i < info->num_visits; i++)
+ if (memcmp(callchain, &visits[i].callchain, sizeof(*callchain)) == 0)
+ return &visits[i];
+ return NULL;
+}
+
+/* Allocate a new bpf_scc_visit instance corresponding to @callchain.
+ * Allocated instances are alive for a duration of the do_check_common()
+ * call and are freed by free_states().
+ */
+static struct bpf_scc_visit *scc_visit_alloc(struct bpf_verifier_env *env,
+ struct bpf_scc_callchain *callchain)
+{
+ struct bpf_scc_visit *visit;
+ struct bpf_scc_info *info;
+ u32 scc, num_visits;
+ u64 new_sz;
+
+ scc = callchain->scc;
+ info = env->scc_info[scc];
+ num_visits = info ? info->num_visits : 0;
+ new_sz = sizeof(*info) + sizeof(struct bpf_scc_visit) * (num_visits + 1);
+ info = kvrealloc(env->scc_info[scc], new_sz, GFP_KERNEL_ACCOUNT);
+ if (!info)
+ return NULL;
+ env->scc_info[scc] = info;
+ info->num_visits = num_visits + 1;
+ visit = &info->visits[num_visits];
+ memset(visit, 0, sizeof(*visit));
+ memcpy(&visit->callchain, callchain, sizeof(*callchain));
+ return visit;
+}
+
+/* Form a string '(callsite#1,callsite#2,...,scc)' in env->tmp_str_buf */
+static char *format_callchain(struct bpf_verifier_env *env, struct bpf_scc_callchain *callchain)
+{
+ char *buf = env->tmp_str_buf;
+ int i, delta = 0;
+
+ delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "(");
+ for (i = 0; i < ARRAY_SIZE(callchain->callsites); i++) {
+ if (!callchain->callsites[i])
+ break;
+ delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "%u,",
+ callchain->callsites[i]);
+ }
+ delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "%u)", callchain->scc);
+ return env->tmp_str_buf;
+}
+
+/* If callchain for @st exists (@st is in some SCC), ensure that
+ * bpf_scc_visit instance for this callchain exists.
+ * If instance does not exist or is empty, assign visit->entry_state to @st.
+ */
+static int maybe_enter_scc(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
+{
+ struct bpf_scc_callchain *callchain = &env->callchain_buf;
+ struct bpf_scc_visit *visit;
+
+ if (!compute_scc_callchain(env, st, callchain))
+ return 0;
+ visit = scc_visit_lookup(env, callchain);
+ visit = visit ?: scc_visit_alloc(env, callchain);
+ if (!visit)
+ return -ENOMEM;
+ if (!visit->entry_state) {
+ visit->entry_state = st;
+ if (env->log.level & BPF_LOG_LEVEL2)
+ verbose(env, "SCC enter %s\n", format_callchain(env, callchain));
+ }
+ return 0;
+}
+
+static int propagate_backedges(struct bpf_verifier_env *env, struct bpf_scc_visit *visit);
+
+/* If callchain for @st exists (@st is in some SCC), make it empty:
+ * - set visit->entry_state to NULL;
+ * - flush accumulated backedges.
+ */
+static int maybe_exit_scc(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
+{
+ struct bpf_scc_callchain *callchain = &env->callchain_buf;
+ struct bpf_scc_visit *visit;
+
+ if (!compute_scc_callchain(env, st, callchain))
+ return 0;
+ visit = scc_visit_lookup(env, callchain);
+ if (!visit) {
+ /*
+ * If path traversal stops inside an SCC, corresponding bpf_scc_visit
+ * must exist for non-speculative paths. For non-speculative paths
+ * traversal stops when:
+ * a. Verification error is found, maybe_exit_scc() is not called.
+ * b. Top level BPF_EXIT is reached. Top level BPF_EXIT is not a member
+ * of any SCC.
+ * c. A checkpoint is reached and matched. Checkpoints are created by
+ * is_state_visited(), which calls maybe_enter_scc(), which allocates
+ * bpf_scc_visit instances for checkpoints within SCCs.
+ * (c) is the only case that can reach this point.
+ */
+ if (!st->speculative) {
+ verifier_bug(env, "scc exit: no visit info for call chain %s",
+ format_callchain(env, callchain));
+ return -EFAULT;
+ }
+ return 0;
+ }
+ if (visit->entry_state != st)
+ return 0;
+ if (env->log.level & BPF_LOG_LEVEL2)
+ verbose(env, "SCC exit %s\n", format_callchain(env, callchain));
+ visit->entry_state = NULL;
+ env->num_backedges -= visit->num_backedges;
+ visit->num_backedges = 0;
+ update_peak_states(env);
+ return propagate_backedges(env, visit);
+}
+
+/* Lookup an bpf_scc_visit instance corresponding to @st callchain
+ * and add @backedge to visit->backedges. @st callchain must exist.
+ */
+static int add_scc_backedge(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *st,
+ struct bpf_scc_backedge *backedge)
+{
+ struct bpf_scc_callchain *callchain = &env->callchain_buf;
+ struct bpf_scc_visit *visit;
+
+ if (!compute_scc_callchain(env, st, callchain)) {
+ verifier_bug(env, "add backedge: no SCC in verification path, insn_idx %d",
+ st->insn_idx);
+ return -EFAULT;
+ }
+ visit = scc_visit_lookup(env, callchain);
+ if (!visit) {
+ verifier_bug(env, "add backedge: no visit info for call chain %s",
+ format_callchain(env, callchain));
+ return -EFAULT;
+ }
+ if (env->log.level & BPF_LOG_LEVEL2)
+ verbose(env, "SCC backedge %s\n", format_callchain(env, callchain));
+ backedge->next = visit->backedges;
+ visit->backedges = backedge;
+ visit->num_backedges++;
+ env->num_backedges++;
+ update_peak_states(env);
+ return 0;
+}
+
+/* bpf_reg_state->live marks for registers in a state @st are incomplete,
+ * if state @st is in some SCC and not all execution paths starting at this
+ * SCC are fully explored.
+ */
+static bool incomplete_read_marks(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *st)
+{
+ struct bpf_scc_callchain *callchain = &env->callchain_buf;
+ struct bpf_scc_visit *visit;
+
+ if (!compute_scc_callchain(env, st, callchain))
+ return false;
+ visit = scc_visit_lookup(env, callchain);
+ if (!visit)
+ return false;
+ return !!visit->backedges;
+}
+
+int bpf_update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
+{
+ struct bpf_verifier_state_list *sl = NULL, *parent_sl;
+ struct bpf_verifier_state *parent;
+ int err;
+
+ while (st) {
+ u32 br = --st->branches;
+
+ /* verifier_bug_if(br > 1, ...) technically makes sense here,
+ * but see comment in push_stack(), hence:
+ */
+ verifier_bug_if((int)br < 0, env, "%s:branches_to_explore=%d", __func__, br);
+ if (br)
+ break;
+ err = maybe_exit_scc(env, st);
+ if (err)
+ return err;
+ parent = st->parent;
+ parent_sl = state_parent_as_list(st);
+ if (sl)
+ maybe_free_verifier_state(env, sl);
+ st = parent;
+ sl = parent_sl;
+ }
+ return 0;
+}
+
+/* check %cur's range satisfies %old's */
+static bool range_within(const struct bpf_reg_state *old,
+ const struct bpf_reg_state *cur)
+{
+ return old->umin_value <= cur->umin_value &&
+ old->umax_value >= cur->umax_value &&
+ old->smin_value <= cur->smin_value &&
+ old->smax_value >= cur->smax_value &&
+ old->u32_min_value <= cur->u32_min_value &&
+ old->u32_max_value >= cur->u32_max_value &&
+ old->s32_min_value <= cur->s32_min_value &&
+ old->s32_max_value >= cur->s32_max_value;
+}
+
+/* If in the old state two registers had the same id, then they need to have
+ * the same id in the new state as well. But that id could be different from
+ * the old state, so we need to track the mapping from old to new ids.
+ * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
+ * regs with old id 5 must also have new id 9 for the new state to be safe. But
+ * regs with a different old id could still have new id 9, we don't care about
+ * that.
+ * So we look through our idmap to see if this old id has been seen before. If
+ * so, we require the new id to match; otherwise, we add the id pair to the map.
+ */
+static bool check_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
+{
+ struct bpf_id_pair *map = idmap->map;
+ unsigned int i;
+
+ /* either both IDs should be set or both should be zero */
+ if (!!old_id != !!cur_id)
+ return false;
+
+ if (old_id == 0) /* cur_id == 0 as well */
+ return true;
+
+ for (i = 0; i < idmap->cnt; i++) {
+ if (map[i].old == old_id)
+ return map[i].cur == cur_id;
+ if (map[i].cur == cur_id)
+ return false;
+ }
+
+ /* Reached the end of known mappings; haven't seen this id before */
+ if (idmap->cnt < BPF_ID_MAP_SIZE) {
+ map[idmap->cnt].old = old_id;
+ map[idmap->cnt].cur = cur_id;
+ idmap->cnt++;
+ return true;
+ }
+
+ /* We ran out of idmap slots, which should be impossible */
+ WARN_ON_ONCE(1);
+ return false;
+}
+
+/*
+ * Compare scalar register IDs for state equivalence.
+ *
+ * When old_id == 0, the old register is independent - not linked to any
+ * other register. Any linking in the current state only adds constraints,
+ * making it more restrictive. Since the old state didn't rely on any ID
+ * relationships for this register, it's always safe to accept cur regardless
+ * of its ID. Hence, return true immediately.
+ *
+ * When old_id != 0 but cur_id == 0, we need to ensure that different
+ * independent registers in cur don't incorrectly satisfy the ID matching
+ * requirements of linked registers in old.
+ *
+ * Example: if old has r6.id=X and r7.id=X (linked), but cur has r6.id=0
+ * and r7.id=0 (both independent), without temp IDs both would map old_id=X
+ * to cur_id=0 and pass. With temp IDs: r6 maps X->temp1, r7 tries to map
+ * X->temp2, but X is already mapped to temp1, so the check fails correctly.
+ *
+ * When old_id has BPF_ADD_CONST set, the compound id (base | flag) and the
+ * base id (flag stripped) must both map consistently. Example: old has
+ * r2.id=A, r3.id=A|flag (r3 = r2 + delta), cur has r2.id=B, r3.id=C|flag
+ * (r3 derived from unrelated r4). Without the base check, idmap gets two
+ * independent entries A->B and A|flag->C|flag, missing that A->C conflicts
+ * with A->B. The base ID cross-check catches this.
+ */
+static bool check_scalar_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
+{
+ if (!old_id)
+ return true;
+
+ cur_id = cur_id ? cur_id : ++idmap->tmp_id_gen;
+
+ if (!check_ids(old_id, cur_id, idmap))
+ return false;
+ if (old_id & BPF_ADD_CONST) {
+ old_id &= ~BPF_ADD_CONST;
+ cur_id &= ~BPF_ADD_CONST;
+ if (!check_ids(old_id, cur_id, idmap))
+ return false;
+ }
+ return true;
+}
+
+static void __clean_func_state(struct bpf_verifier_env *env,
+ struct bpf_func_state *st,
+ u16 live_regs, int frame)
+{
+ int i, j;
+
+ for (i = 0; i < BPF_REG_FP; i++) {
+ /* liveness must not touch this register anymore */
+ if (!(live_regs & BIT(i)))
+ /* since the register is unused, clear its state
+ * to make further comparison simpler
+ */
+ bpf_mark_reg_not_init(env, &st->regs[i]);
+ }
+
+ /*
+ * Clean dead 4-byte halves within each SPI independently.
+ * half_spi 2*i → lower half: slot_type[0..3] (closer to FP)
+ * half_spi 2*i+1 → upper half: slot_type[4..7] (farther from FP)
+ */
+ for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
+ bool lo_live = bpf_stack_slot_alive(env, frame, i * 2);
+ bool hi_live = bpf_stack_slot_alive(env, frame, i * 2 + 1);
+
+ if (!hi_live || !lo_live) {
+ int start = !lo_live ? 0 : BPF_REG_SIZE / 2;
+ int end = !hi_live ? BPF_REG_SIZE : BPF_REG_SIZE / 2;
+ u8 stype = st->stack[i].slot_type[7];
+
+ /*
+ * Don't clear special slots.
+ * destroy_if_dynptr_stack_slot() needs STACK_DYNPTR to
+ * detect overwrites and invalidate associated data slices.
+ * is_iter_reg_valid_uninit() and is_irq_flag_reg_valid_uninit()
+ * check for their respective slot types to detect double-create.
+ */
+ if (stype == STACK_DYNPTR || stype == STACK_ITER ||
+ stype == STACK_IRQ_FLAG)
+ continue;
+
+ /*
+ * Only destroy spilled_ptr when hi half is dead.
+ * If hi half is still live with STACK_SPILL, the
+ * spilled_ptr metadata is needed for correct state
+ * comparison in stacksafe().
+ * is_spilled_reg() is using slot_type[7], but
+ * is_spilled_scalar_after() check either slot_type[0] or [4]
+ */
+ if (!hi_live) {
+ struct bpf_reg_state *spill = &st->stack[i].spilled_ptr;
+
+ if (lo_live && stype == STACK_SPILL) {
+ u8 val = STACK_MISC;
+
+ /*
+ * 8 byte spill of scalar 0 where half slot is dead
+ * should become STACK_ZERO in lo 4 bytes.
+ */
+ if (bpf_register_is_null(spill))
+ val = STACK_ZERO;
+ for (j = 0; j < 4; j++) {
+ u8 *t = &st->stack[i].slot_type[j];
+
+ if (*t == STACK_SPILL)
+ *t = val;
+ }
+ }
+ bpf_mark_reg_not_init(env, spill);
+ }
+ for (j = start; j < end; j++)
+ st->stack[i].slot_type[j] = STACK_POISON;
+ }
+ }
+}
+
+static int clean_verifier_state(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *st)
+{
+ int i, err;
+
+ err = bpf_live_stack_query_init(env, st);
+ if (err)
+ return err;
+ for (i = 0; i <= st->curframe; i++) {
+ u32 ip = bpf_frame_insn_idx(st, i);
+ u16 live_regs = env->insn_aux_data[ip].live_regs_before;
+
+ __clean_func_state(env, st->frame[i], live_regs, i);
+ }
+ return 0;
+}
+
+static bool regs_exact(const struct bpf_reg_state *rold,
+ const struct bpf_reg_state *rcur,
+ struct bpf_idmap *idmap)
+{
+ return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
+ check_ids(rold->id, rcur->id, idmap) &&
+ check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
+}
+
+enum exact_level {
+ NOT_EXACT,
+ EXACT,
+ RANGE_WITHIN
+};
+
+/* Returns true if (rold safe implies rcur safe) */
+static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
+ struct bpf_reg_state *rcur, struct bpf_idmap *idmap,
+ enum exact_level exact)
+{
+ if (exact == EXACT)
+ return regs_exact(rold, rcur, idmap);
+
+ if (rold->type == NOT_INIT)
+ /* explored state can't have used this */
+ return true;
+
+ /* Enforce that register types have to match exactly, including their
+ * modifiers (like PTR_MAYBE_NULL, MEM_RDONLY, etc), as a general
+ * rule.
+ *
+ * One can make a point that using a pointer register as unbounded
+ * SCALAR would be technically acceptable, but this could lead to
+ * pointer leaks because scalars are allowed to leak while pointers
+ * are not. We could make this safe in special cases if root is
+ * calling us, but it's probably not worth the hassle.
+ *
+ * Also, register types that are *not* MAYBE_NULL could technically be
+ * safe to use as their MAYBE_NULL variants (e.g., PTR_TO_MAP_VALUE
+ * is safe to be used as PTR_TO_MAP_VALUE_OR_NULL, provided both point
+ * to the same map).
+ * However, if the old MAYBE_NULL register then got NULL checked,
+ * doing so could have affected others with the same id, and we can't
+ * check for that because we lost the id when we converted to
+ * a non-MAYBE_NULL variant.
+ * So, as a general rule we don't allow mixing MAYBE_NULL and
+ * non-MAYBE_NULL registers as well.
+ */
+ if (rold->type != rcur->type)
+ return false;
+
+ switch (base_type(rold->type)) {
+ case SCALAR_VALUE:
+ if (env->explore_alu_limits) {
+ /* explore_alu_limits disables tnum_in() and range_within()
+ * logic and requires everything to be strict
+ */
+ return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
+ check_scalar_ids(rold->id, rcur->id, idmap);
+ }
+ if (!rold->precise && exact == NOT_EXACT)
+ return true;
+ /*
+ * Linked register tracking uses rold->id to detect relationships.
+ * When rold->id == 0, the register is independent and any linking
+ * in rcur only adds constraints. When rold->id != 0, we must verify
+ * id mapping and (for BPF_ADD_CONST) offset consistency.
+ *
+ * +------------------+-----------+------------------+---------------+
+ * | | rold->id | rold + ADD_CONST | rold->id == 0 |
+ * |------------------+-----------+------------------+---------------|
+ * | rcur->id | range,ids | false | range |
+ * | rcur + ADD_CONST | false | range,ids,off | range |
+ * | rcur->id == 0 | range,ids | false | range |
+ * +------------------+-----------+------------------+---------------+
+ *
+ * Why check_ids() for scalar registers?
+ *
+ * Consider the following BPF code:
+ * 1: r6 = ... unbound scalar, ID=a ...
+ * 2: r7 = ... unbound scalar, ID=b ...
+ * 3: if (r6 > r7) goto +1
+ * 4: r6 = r7
+ * 5: if (r6 > X) goto ...
+ * 6: ... memory operation using r7 ...
+ *
+ * First verification path is [1-6]:
+ * - at (4) same bpf_reg_state::id (b) would be assigned to r6 and r7;
+ * - at (5) r6 would be marked <= X, sync_linked_regs() would also mark
+ * r7 <= X, because r6 and r7 share same id.
+ * Next verification path is [1-4, 6].
+ *
+ * Instruction (6) would be reached in two states:
+ * I. r6{.id=b}, r7{.id=b} via path 1-6;
+ * II. r6{.id=a}, r7{.id=b} via path 1-4, 6.
+ *
+ * Use check_ids() to distinguish these states.
+ * ---
+ * Also verify that new value satisfies old value range knowledge.
+ */
+
+ /*
+ * ADD_CONST flags must match exactly: BPF_ADD_CONST32 and
+ * BPF_ADD_CONST64 have different linking semantics in
+ * sync_linked_regs() (alu32 zero-extends, alu64 does not),
+ * so pruning across different flag types is unsafe.
+ */
+ if (rold->id &&
+ (rold->id & BPF_ADD_CONST) != (rcur->id & BPF_ADD_CONST))
+ return false;
+
+ /* Both have offset linkage: offsets must match */
+ if ((rold->id & BPF_ADD_CONST) && rold->delta != rcur->delta)
+ return false;
+
+ if (!check_scalar_ids(rold->id, rcur->id, idmap))
+ return false;
+
+ return range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off);
+ case PTR_TO_MAP_KEY:
+ case PTR_TO_MAP_VALUE:
+ case PTR_TO_MEM:
+ case PTR_TO_BUF:
+ case PTR_TO_TP_BUFFER:
+ /* If the new min/max/var_off satisfy the old ones and
+ * everything else matches, we are OK.
+ */
+ return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 &&
+ range_within(rold, rcur) &&
+ tnum_in(rold->var_off, rcur->var_off) &&
+ check_ids(rold->id, rcur->id, idmap) &&
+ check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
+ case PTR_TO_PACKET_META:
+ case PTR_TO_PACKET:
+ /* We must have at least as much range as the old ptr
+ * did, so that any accesses which were safe before are
+ * still safe. This is true even if old range < old off,
+ * since someone could have accessed through (ptr - k), or
+ * even done ptr -= k in a register, to get a safe access.
+ */
+ if (rold->range < 0 || rcur->range < 0) {
+ /* special case for [BEYOND|AT]_PKT_END */
+ if (rold->range != rcur->range)
+ return false;
+ } else if (rold->range > rcur->range) {
+ return false;
+ }
+ /* id relations must be preserved */
+ if (!check_ids(rold->id, rcur->id, idmap))
+ return false;
+ /* new val must satisfy old val knowledge */
+ return range_within(rold, rcur) &&
+ tnum_in(rold->var_off, rcur->var_off);
+ case PTR_TO_STACK:
+ /* two stack pointers are equal only if they're pointing to
+ * the same stack frame, since fp-8 in foo != fp-8 in bar
+ */
+ return regs_exact(rold, rcur, idmap) && rold->frameno == rcur->frameno;
+ case PTR_TO_ARENA:
+ return true;
+ case PTR_TO_INSN:
+ return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 &&
+ range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off);
+ default:
+ return regs_exact(rold, rcur, idmap);
+ }
+}
+
+static struct bpf_reg_state unbound_reg;
+
+static __init int unbound_reg_init(void)
+{
+ bpf_mark_reg_unknown_imprecise(&unbound_reg);
+ return 0;
+}
+late_initcall(unbound_reg_init);
+
+static bool is_spilled_scalar_after(const struct bpf_stack_state *stack, int im)
+{
+ return stack->slot_type[im] == STACK_SPILL &&
+ stack->spilled_ptr.type == SCALAR_VALUE;
+}
+
+static bool is_stack_misc_after(struct bpf_verifier_env *env,
+ struct bpf_stack_state *stack, int im)
+{
+ u32 i;
+
+ for (i = im; i < ARRAY_SIZE(stack->slot_type); ++i) {
+ if ((stack->slot_type[i] == STACK_MISC) ||
+ ((stack->slot_type[i] == STACK_INVALID || stack->slot_type[i] == STACK_POISON) &&
+ env->allow_uninit_stack))
+ continue;
+ return false;
+ }
+
+ return true;
+}
+
+static struct bpf_reg_state *scalar_reg_for_stack(struct bpf_verifier_env *env,
+ struct bpf_stack_state *stack, int im)
+{
+ if (is_spilled_scalar_after(stack, im))
+ return &stack->spilled_ptr;
+
+ if (is_stack_misc_after(env, stack, im))
+ return &unbound_reg;
+
+ return NULL;
+}
+
+static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
+ struct bpf_func_state *cur, struct bpf_idmap *idmap,
+ enum exact_level exact)
+{
+ int i, spi;
+
+ /* walk slots of the explored stack and ignore any additional
+ * slots in the current stack, since explored(safe) state
+ * didn't use them
+ */
+ for (i = 0; i < old->allocated_stack; i++) {
+ struct bpf_reg_state *old_reg, *cur_reg;
+ int im = i % BPF_REG_SIZE;
+
+ spi = i / BPF_REG_SIZE;
+
+ if (exact == EXACT) {
+ u8 old_type = old->stack[spi].slot_type[i % BPF_REG_SIZE];
+ u8 cur_type = i < cur->allocated_stack ?
+ cur->stack[spi].slot_type[i % BPF_REG_SIZE] : STACK_INVALID;
+
+ /* STACK_INVALID and STACK_POISON are equivalent for pruning */
+ if (old_type == STACK_POISON)
+ old_type = STACK_INVALID;
+ if (cur_type == STACK_POISON)
+ cur_type = STACK_INVALID;
+ if (i >= cur->allocated_stack || old_type != cur_type)
+ return false;
+ }
+
+ if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID ||
+ old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_POISON)
+ continue;
+
+ if (env->allow_uninit_stack &&
+ old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC)
+ continue;
+
+ /* explored stack has more populated slots than current stack
+ * and these slots were used
+ */
+ if (i >= cur->allocated_stack)
+ return false;
+
+ /*
+ * 64 and 32-bit scalar spills vs MISC/INVALID slots and vice versa.
+ * Load from MISC/INVALID slots produces unbound scalar.
+ * Construct a fake register for such stack and call
+ * regsafe() to ensure scalar ids are compared.
+ */
+ if (im == 0 || im == 4) {
+ old_reg = scalar_reg_for_stack(env, &old->stack[spi], im);
+ cur_reg = scalar_reg_for_stack(env, &cur->stack[spi], im);
+ if (old_reg && cur_reg) {
+ if (!regsafe(env, old_reg, cur_reg, idmap, exact))
+ return false;
+ i += (im == 0 ? BPF_REG_SIZE - 1 : 3);
+ continue;
+ }
+ }
+
+ /* if old state was safe with misc data in the stack
+ * it will be safe with zero-initialized stack.
+ * The opposite is not true
+ */
+ if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
+ cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
+ continue;
+ if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
+ cur->stack[spi].slot_type[i % BPF_REG_SIZE])
+ /* Ex: old explored (safe) state has STACK_SPILL in
+ * this stack slot, but current has STACK_MISC ->
+ * this verifier states are not equivalent,
+ * return false to continue verification of this path
+ */
+ return false;
+ if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1)
+ continue;
+ /* Both old and cur are having same slot_type */
+ switch (old->stack[spi].slot_type[BPF_REG_SIZE - 1]) {
+ case STACK_SPILL:
+ /* when explored and current stack slot are both storing
+ * spilled registers, check that stored pointers types
+ * are the same as well.
+ * Ex: explored safe path could have stored
+ * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
+ * but current path has stored:
+ * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
+ * such verifier states are not equivalent.
+ * return false to continue verification of this path
+ */
+ if (!regsafe(env, &old->stack[spi].spilled_ptr,
+ &cur->stack[spi].spilled_ptr, idmap, exact))
+ return false;
+ break;
+ case STACK_DYNPTR:
+ old_reg = &old->stack[spi].spilled_ptr;
+ cur_reg = &cur->stack[spi].spilled_ptr;
+ if (old_reg->dynptr.type != cur_reg->dynptr.type ||
+ old_reg->dynptr.first_slot != cur_reg->dynptr.first_slot ||
+ !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap))
+ return false;
+ break;
+ case STACK_ITER:
+ old_reg = &old->stack[spi].spilled_ptr;
+ cur_reg = &cur->stack[spi].spilled_ptr;
+ /* iter.depth is not compared between states as it
+ * doesn't matter for correctness and would otherwise
+ * prevent convergence; we maintain it only to prevent
+ * infinite loop check triggering, see
+ * iter_active_depths_differ()
+ */
+ if (old_reg->iter.btf != cur_reg->iter.btf ||
+ old_reg->iter.btf_id != cur_reg->iter.btf_id ||
+ old_reg->iter.state != cur_reg->iter.state ||
+ /* ignore {old_reg,cur_reg}->iter.depth, see above */
+ !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap))
+ return false;
+ break;
+ case STACK_IRQ_FLAG:
+ old_reg = &old->stack[spi].spilled_ptr;
+ cur_reg = &cur->stack[spi].spilled_ptr;
+ if (!check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap) ||
+ old_reg->irq.kfunc_class != cur_reg->irq.kfunc_class)
+ return false;
+ break;
+ case STACK_MISC:
+ case STACK_ZERO:
+ case STACK_INVALID:
+ case STACK_POISON:
+ continue;
+ /* Ensure that new unhandled slot types return false by default */
+ default:
+ return false;
+ }
+ }
+ return true;
+}
+
+static bool refsafe(struct bpf_verifier_state *old, struct bpf_verifier_state *cur,
+ struct bpf_idmap *idmap)
+{
+ int i;
+
+ if (old->acquired_refs != cur->acquired_refs)
+ return false;
+
+ if (old->active_locks != cur->active_locks)
+ return false;
+
+ if (old->active_preempt_locks != cur->active_preempt_locks)
+ return false;
+
+ if (old->active_rcu_locks != cur->active_rcu_locks)
+ return false;
+
+ if (!check_ids(old->active_irq_id, cur->active_irq_id, idmap))
+ return false;
+
+ if (!check_ids(old->active_lock_id, cur->active_lock_id, idmap) ||
+ old->active_lock_ptr != cur->active_lock_ptr)
+ return false;
+
+ for (i = 0; i < old->acquired_refs; i++) {
+ if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap) ||
+ old->refs[i].type != cur->refs[i].type)
+ return false;
+ switch (old->refs[i].type) {
+ case REF_TYPE_PTR:
+ case REF_TYPE_IRQ:
+ break;
+ case REF_TYPE_LOCK:
+ case REF_TYPE_RES_LOCK:
+ case REF_TYPE_RES_LOCK_IRQ:
+ if (old->refs[i].ptr != cur->refs[i].ptr)
+ return false;
+ break;
+ default:
+ WARN_ONCE(1, "Unhandled enum type for reference state: %d\n", old->refs[i].type);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/* compare two verifier states
+ *
+ * all states stored in state_list are known to be valid, since
+ * verifier reached 'bpf_exit' instruction through them
+ *
+ * this function is called when verifier exploring different branches of
+ * execution popped from the state stack. If it sees an old state that has
+ * more strict register state and more strict stack state then this execution
+ * branch doesn't need to be explored further, since verifier already
+ * concluded that more strict state leads to valid finish.
+ *
+ * Therefore two states are equivalent if register state is more conservative
+ * and explored stack state is more conservative than the current one.
+ * Example:
+ * explored current
+ * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
+ * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
+ *
+ * In other words if current stack state (one being explored) has more
+ * valid slots than old one that already passed validation, it means
+ * the verifier can stop exploring and conclude that current state is valid too
+ *
+ * Similarly with registers. If explored state has register type as invalid
+ * whereas register type in current state is meaningful, it means that
+ * the current state will reach 'bpf_exit' instruction safely
+ */
+static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
+ struct bpf_func_state *cur, u32 insn_idx, enum exact_level exact)
+{
+ u16 live_regs = env->insn_aux_data[insn_idx].live_regs_before;
+ u16 i;
+
+ if (old->callback_depth > cur->callback_depth)
+ return false;
+
+ for (i = 0; i < MAX_BPF_REG; i++)
+ if (((1 << i) & live_regs) &&
+ !regsafe(env, &old->regs[i], &cur->regs[i],
+ &env->idmap_scratch, exact))
+ return false;
+
+ if (!stacksafe(env, old, cur, &env->idmap_scratch, exact))
+ return false;
+
+ return true;
+}
+
+static void reset_idmap_scratch(struct bpf_verifier_env *env)
+{
+ struct bpf_idmap *idmap = &env->idmap_scratch;
+
+ idmap->tmp_id_gen = env->id_gen;
+ idmap->cnt = 0;
+}
+
+static bool states_equal(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *old,
+ struct bpf_verifier_state *cur,
+ enum exact_level exact)
+{
+ u32 insn_idx;
+ int i;
+
+ if (old->curframe != cur->curframe)
+ return false;
+
+ reset_idmap_scratch(env);
+
+ /* Verification state from speculative execution simulation
+ * must never prune a non-speculative execution one.
+ */
+ if (old->speculative && !cur->speculative)
+ return false;
+
+ if (old->in_sleepable != cur->in_sleepable)
+ return false;
+
+ if (!refsafe(old, cur, &env->idmap_scratch))
+ return false;
+
+ /* for states to be equal callsites have to be the same
+ * and all frame states need to be equivalent
+ */
+ for (i = 0; i <= old->curframe; i++) {
+ insn_idx = bpf_frame_insn_idx(old, i);
+ if (old->frame[i]->callsite != cur->frame[i]->callsite)
+ return false;
+ if (!func_states_equal(env, old->frame[i], cur->frame[i], insn_idx, exact))
+ return false;
+ }
+ return true;
+}
+
+/* find precise scalars in the previous equivalent state and
+ * propagate them into the current state
+ */
+static int propagate_precision(struct bpf_verifier_env *env,
+ const struct bpf_verifier_state *old,
+ struct bpf_verifier_state *cur,
+ bool *changed)
+{
+ struct bpf_reg_state *state_reg;
+ struct bpf_func_state *state;
+ int i, err = 0, fr;
+ bool first;
+
+ for (fr = old->curframe; fr >= 0; fr--) {
+ state = old->frame[fr];
+ state_reg = state->regs;
+ first = true;
+ for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
+ if (state_reg->type != SCALAR_VALUE ||
+ !state_reg->precise)
+ continue;
+ if (env->log.level & BPF_LOG_LEVEL2) {
+ if (first)
+ verbose(env, "frame %d: propagating r%d", fr, i);
+ else
+ verbose(env, ",r%d", i);
+ }
+ bpf_bt_set_frame_reg(&env->bt, fr, i);
+ first = false;
+ }
+
+ for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
+ if (!bpf_is_spilled_reg(&state->stack[i]))
+ continue;
+ state_reg = &state->stack[i].spilled_ptr;
+ if (state_reg->type != SCALAR_VALUE ||
+ !state_reg->precise)
+ continue;
+ if (env->log.level & BPF_LOG_LEVEL2) {
+ if (first)
+ verbose(env, "frame %d: propagating fp%d",
+ fr, (-i - 1) * BPF_REG_SIZE);
+ else
+ verbose(env, ",fp%d", (-i - 1) * BPF_REG_SIZE);
+ }
+ bpf_bt_set_frame_slot(&env->bt, fr, i);
+ first = false;
+ }
+ if (!first && (env->log.level & BPF_LOG_LEVEL2))
+ verbose(env, "\n");
+ }
+
+ err = bpf_mark_chain_precision(env, cur, -1, changed);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+#define MAX_BACKEDGE_ITERS 64
+
+/* Propagate read and precision marks from visit->backedges[*].state->equal_state
+ * to corresponding parent states of visit->backedges[*].state until fixed point is reached,
+ * then free visit->backedges.
+ * After execution of this function incomplete_read_marks() will return false
+ * for all states corresponding to @visit->callchain.
+ */
+static int propagate_backedges(struct bpf_verifier_env *env, struct bpf_scc_visit *visit)
+{
+ struct bpf_scc_backedge *backedge;
+ struct bpf_verifier_state *st;
+ bool changed;
+ int i, err;
+
+ i = 0;
+ do {
+ if (i++ > MAX_BACKEDGE_ITERS) {
+ if (env->log.level & BPF_LOG_LEVEL2)
+ verbose(env, "%s: too many iterations\n", __func__);
+ for (backedge = visit->backedges; backedge; backedge = backedge->next)
+ bpf_mark_all_scalars_precise(env, &backedge->state);
+ break;
+ }
+ changed = false;
+ for (backedge = visit->backedges; backedge; backedge = backedge->next) {
+ st = &backedge->state;
+ err = propagate_precision(env, st->equal_state, st, &changed);
+ if (err)
+ return err;
+ }
+ } while (changed);
+
+ bpf_free_backedges(visit);
+ return 0;
+}
+
+static bool states_maybe_looping(struct bpf_verifier_state *old,
+ struct bpf_verifier_state *cur)
+{
+ struct bpf_func_state *fold, *fcur;
+ int i, fr = cur->curframe;
+
+ if (old->curframe != fr)
+ return false;
+
+ fold = old->frame[fr];
+ fcur = cur->frame[fr];
+ for (i = 0; i < MAX_BPF_REG; i++)
+ if (memcmp(&fold->regs[i], &fcur->regs[i],
+ offsetof(struct bpf_reg_state, frameno)))
+ return false;
+ return true;
+}
+
+/* is_state_visited() handles iter_next() (see process_iter_next_call() for
+ * terminology) calls specially: as opposed to bounded BPF loops, it *expects*
+ * states to match, which otherwise would look like an infinite loop. So while
+ * iter_next() calls are taken care of, we still need to be careful and
+ * prevent erroneous and too eager declaration of "infinite loop", when
+ * iterators are involved.
+ *
+ * Here's a situation in pseudo-BPF assembly form:
+ *
+ * 0: again: ; set up iter_next() call args
+ * 1: r1 = &it ; <CHECKPOINT HERE>
+ * 2: call bpf_iter_num_next ; this is iter_next() call
+ * 3: if r0 == 0 goto done
+ * 4: ... something useful here ...
+ * 5: goto again ; another iteration
+ * 6: done:
+ * 7: r1 = &it
+ * 8: call bpf_iter_num_destroy ; clean up iter state
+ * 9: exit
+ *
+ * This is a typical loop. Let's assume that we have a prune point at 1:,
+ * before we get to `call bpf_iter_num_next` (e.g., because of that `goto
+ * again`, assuming other heuristics don't get in a way).
+ *
+ * When we first time come to 1:, let's say we have some state X. We proceed
+ * to 2:, fork states, enqueue ACTIVE, validate NULL case successfully, exit.
+ * Now we come back to validate that forked ACTIVE state. We proceed through
+ * 3-5, come to goto, jump to 1:. Let's assume our state didn't change, so we
+ * are converging. But the problem is that we don't know that yet, as this
+ * convergence has to happen at iter_next() call site only. So if nothing is
+ * done, at 1: verifier will use bounded loop logic and declare infinite
+ * looping (and would be *technically* correct, if not for iterator's
+ * "eventual sticky NULL" contract, see process_iter_next_call()). But we
+ * don't want that. So what we do in process_iter_next_call() when we go on
+ * another ACTIVE iteration, we bump slot->iter.depth, to mark that it's
+ * a different iteration. So when we suspect an infinite loop, we additionally
+ * check if any of the *ACTIVE* iterator states depths differ. If yes, we
+ * pretend we are not looping and wait for next iter_next() call.
+ *
+ * This only applies to ACTIVE state. In DRAINED state we don't expect to
+ * loop, because that would actually mean infinite loop, as DRAINED state is
+ * "sticky", and so we'll keep returning into the same instruction with the
+ * same state (at least in one of possible code paths).
+ *
+ * This approach allows to keep infinite loop heuristic even in the face of
+ * active iterator. E.g., C snippet below is and will be detected as
+ * infinitely looping:
+ *
+ * struct bpf_iter_num it;
+ * int *p, x;
+ *
+ * bpf_iter_num_new(&it, 0, 10);
+ * while ((p = bpf_iter_num_next(&t))) {
+ * x = p;
+ * while (x--) {} // <<-- infinite loop here
+ * }
+ *
+ */
+static bool iter_active_depths_differ(struct bpf_verifier_state *old, struct bpf_verifier_state *cur)
+{
+ struct bpf_reg_state *slot, *cur_slot;
+ struct bpf_func_state *state;
+ int i, fr;
+
+ for (fr = old->curframe; fr >= 0; fr--) {
+ state = old->frame[fr];
+ for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
+ if (state->stack[i].slot_type[0] != STACK_ITER)
+ continue;
+
+ slot = &state->stack[i].spilled_ptr;
+ if (slot->iter.state != BPF_ITER_STATE_ACTIVE)
+ continue;
+
+ cur_slot = &cur->frame[fr]->stack[i].spilled_ptr;
+ if (cur_slot->iter.depth != slot->iter.depth)
+ return true;
+ }
+ }
+ return false;
+}
+
+static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
+{
+ struct bpf_func_state *func;
+ struct bpf_reg_state *reg;
+ int i, j;
+
+ for (i = 0; i <= st->curframe; i++) {
+ func = st->frame[i];
+ for (j = 0; j < BPF_REG_FP; j++) {
+ reg = &func->regs[j];
+ if (reg->type != SCALAR_VALUE)
+ continue;
+ reg->precise = false;
+ }
+ for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
+ if (!bpf_is_spilled_reg(&func->stack[j]))
+ continue;
+ reg = &func->stack[j].spilled_ptr;
+ if (reg->type != SCALAR_VALUE)
+ continue;
+ reg->precise = false;
+ }
+ }
+}
+
+int bpf_is_state_visited(struct bpf_verifier_env *env, int insn_idx)
+{
+ struct bpf_verifier_state_list *new_sl;
+ struct bpf_verifier_state_list *sl;
+ struct bpf_verifier_state *cur = env->cur_state, *new;
+ bool force_new_state, add_new_state, loop;
+ int n, err, states_cnt = 0;
+ struct list_head *pos, *tmp, *head;
+
+ force_new_state = env->test_state_freq || bpf_is_force_checkpoint(env, insn_idx) ||
+ /* Avoid accumulating infinitely long jmp history */
+ cur->jmp_history_cnt > 40;
+
+ /* bpf progs typically have pruning point every 4 instructions
+ * http://vger.kernel.org/bpfconf2019.html#session-1
+ * Do not add new state for future pruning if the verifier hasn't seen
+ * at least 2 jumps and at least 8 instructions.
+ * This heuristics helps decrease 'total_states' and 'peak_states' metric.
+ * In tests that amounts to up to 50% reduction into total verifier
+ * memory consumption and 20% verifier time speedup.
+ */
+ add_new_state = force_new_state;
+ if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
+ env->insn_processed - env->prev_insn_processed >= 8)
+ add_new_state = true;
+
+ /* keep cleaning the current state as registers/stack become dead */
+ err = clean_verifier_state(env, cur);
+ if (err)
+ return err;
+
+ loop = false;
+ head = bpf_explored_state(env, insn_idx);
+ list_for_each_safe(pos, tmp, head) {
+ sl = container_of(pos, struct bpf_verifier_state_list, node);
+ states_cnt++;
+ if (sl->state.insn_idx != insn_idx)
+ continue;
+
+ if (sl->state.branches) {
+ struct bpf_func_state *frame = sl->state.frame[sl->state.curframe];
+
+ if (frame->in_async_callback_fn &&
+ frame->async_entry_cnt != cur->frame[cur->curframe]->async_entry_cnt) {
+ /* Different async_entry_cnt means that the verifier is
+ * processing another entry into async callback.
+ * Seeing the same state is not an indication of infinite
+ * loop or infinite recursion.
+ * But finding the same state doesn't mean that it's safe
+ * to stop processing the current state. The previous state
+ * hasn't yet reached bpf_exit, since state.branches > 0.
+ * Checking in_async_callback_fn alone is not enough either.
+ * Since the verifier still needs to catch infinite loops
+ * inside async callbacks.
+ */
+ goto skip_inf_loop_check;
+ }
+ /* BPF open-coded iterators loop detection is special.
+ * states_maybe_looping() logic is too simplistic in detecting
+ * states that *might* be equivalent, because it doesn't know
+ * about ID remapping, so don't even perform it.
+ * See process_iter_next_call() and iter_active_depths_differ()
+ * for overview of the logic. When current and one of parent
+ * states are detected as equivalent, it's a good thing: we prove
+ * convergence and can stop simulating further iterations.
+ * It's safe to assume that iterator loop will finish, taking into
+ * account iter_next() contract of eventually returning
+ * sticky NULL result.
+ *
+ * Note, that states have to be compared exactly in this case because
+ * read and precision marks might not be finalized inside the loop.
+ * E.g. as in the program below:
+ *
+ * 1. r7 = -16
+ * 2. r6 = bpf_get_prandom_u32()
+ * 3. while (bpf_iter_num_next(&fp[-8])) {
+ * 4. if (r6 != 42) {
+ * 5. r7 = -32
+ * 6. r6 = bpf_get_prandom_u32()
+ * 7. continue
+ * 8. }
+ * 9. r0 = r10
+ * 10. r0 += r7
+ * 11. r8 = *(u64 *)(r0 + 0)
+ * 12. r6 = bpf_get_prandom_u32()
+ * 13. }
+ *
+ * Here verifier would first visit path 1-3, create a checkpoint at 3
+ * with r7=-16, continue to 4-7,3. Existing checkpoint at 3 does
+ * not have read or precision mark for r7 yet, thus inexact states
+ * comparison would discard current state with r7=-32
+ * => unsafe memory access at 11 would not be caught.
+ */
+ if (is_iter_next_insn(env, insn_idx)) {
+ if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
+ struct bpf_func_state *cur_frame;
+ struct bpf_reg_state *iter_state, *iter_reg;
+ int spi;
+
+ cur_frame = cur->frame[cur->curframe];
+ /* btf_check_iter_kfuncs() enforces that
+ * iter state pointer is always the first arg
+ */
+ iter_reg = &cur_frame->regs[BPF_REG_1];
+ /* current state is valid due to states_equal(),
+ * so we can assume valid iter and reg state,
+ * no need for extra (re-)validations
+ */
+ spi = bpf_get_spi(iter_reg->var_off.value);
+ iter_state = &bpf_func(env, iter_reg)->stack[spi].spilled_ptr;
+ if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) {
+ loop = true;
+ goto hit;
+ }
+ }
+ goto skip_inf_loop_check;
+ }
+ if (is_may_goto_insn_at(env, insn_idx)) {
+ if (sl->state.may_goto_depth != cur->may_goto_depth &&
+ states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
+ loop = true;
+ goto hit;
+ }
+ }
+ if (bpf_calls_callback(env, insn_idx)) {
+ if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
+ loop = true;
+ goto hit;
+ }
+ goto skip_inf_loop_check;
+ }
+ /* attempt to detect infinite loop to avoid unnecessary doomed work */
+ if (states_maybe_looping(&sl->state, cur) &&
+ states_equal(env, &sl->state, cur, EXACT) &&
+ !iter_active_depths_differ(&sl->state, cur) &&
+ sl->state.may_goto_depth == cur->may_goto_depth &&
+ sl->state.callback_unroll_depth == cur->callback_unroll_depth) {
+ verbose_linfo(env, insn_idx, "; ");
+ verbose(env, "infinite loop detected at insn %d\n", insn_idx);
+ verbose(env, "cur state:");
+ print_verifier_state(env, cur, cur->curframe, true);
+ verbose(env, "old state:");
+ print_verifier_state(env, &sl->state, cur->curframe, true);
+ return -EINVAL;
+ }
+ /* if the verifier is processing a loop, avoid adding new state
+ * too often, since different loop iterations have distinct
+ * states and may not help future pruning.
+ * This threshold shouldn't be too low to make sure that
+ * a loop with large bound will be rejected quickly.
+ * The most abusive loop will be:
+ * r1 += 1
+ * if r1 < 1000000 goto pc-2
+ * 1M insn_procssed limit / 100 == 10k peak states.
+ * This threshold shouldn't be too high either, since states
+ * at the end of the loop are likely to be useful in pruning.
+ */
+skip_inf_loop_check:
+ if (!force_new_state &&
+ env->jmps_processed - env->prev_jmps_processed < 20 &&
+ env->insn_processed - env->prev_insn_processed < 100)
+ add_new_state = false;
+ goto miss;
+ }
+ /* See comments for mark_all_regs_read_and_precise() */
+ loop = incomplete_read_marks(env, &sl->state);
+ if (states_equal(env, &sl->state, cur, loop ? RANGE_WITHIN : NOT_EXACT)) {
+hit:
+ sl->hit_cnt++;
+
+ /* if previous state reached the exit with precision and
+ * current state is equivalent to it (except precision marks)
+ * the precision needs to be propagated back in
+ * the current state.
+ */
+ err = 0;
+ if (bpf_is_jmp_point(env, env->insn_idx))
+ err = bpf_push_jmp_history(env, cur, 0, 0);
+ err = err ? : propagate_precision(env, &sl->state, cur, NULL);
+ if (err)
+ return err;
+ /* When processing iterator based loops above propagate_liveness and
+ * propagate_precision calls are not sufficient to transfer all relevant
+ * read and precision marks. E.g. consider the following case:
+ *
+ * .-> A --. Assume the states are visited in the order A, B, C.
+ * | | | Assume that state B reaches a state equivalent to state A.
+ * | v v At this point, state C is not processed yet, so state A
+ * '-- B C has not received any read or precision marks from C.
+ * Thus, marks propagated from A to B are incomplete.
+ *
+ * The verifier mitigates this by performing the following steps:
+ *
+ * - Prior to the main verification pass, strongly connected components
+ * (SCCs) are computed over the program's control flow graph,
+ * intraprocedurally.
+ *
+ * - During the main verification pass, `maybe_enter_scc()` checks
+ * whether the current verifier state is entering an SCC. If so, an
+ * instance of a `bpf_scc_visit` object is created, and the state
+ * entering the SCC is recorded as the entry state.
+ *
+ * - This instance is associated not with the SCC itself, but with a
+ * `bpf_scc_callchain`: a tuple consisting of the call sites leading to
+ * the SCC and the SCC id. See `compute_scc_callchain()`.
+ *
+ * - When a verification path encounters a `states_equal(...,
+ * RANGE_WITHIN)` condition, there exists a call chain describing the
+ * current state and a corresponding `bpf_scc_visit` instance. A copy
+ * of the current state is created and added to
+ * `bpf_scc_visit->backedges`.
+ *
+ * - When a verification path terminates, `maybe_exit_scc()` is called
+ * from `bpf_update_branch_counts()`. For states with `branches == 0`, it
+ * checks whether the state is the entry state of any `bpf_scc_visit`
+ * instance. If it is, this indicates that all paths originating from
+ * this SCC visit have been explored. `propagate_backedges()` is then
+ * called, which propagates read and precision marks through the
+ * backedges until a fixed point is reached.
+ * (In the earlier example, this would propagate marks from A to B,
+ * from C to A, and then again from A to B.)
+ *
+ * A note on callchains
+ * --------------------
+ *
+ * Consider the following example:
+ *
+ * void foo() { loop { ... SCC#1 ... } }
+ * void main() {
+ * A: foo();
+ * B: ...
+ * C: foo();
+ * }
+ *
+ * Here, there are two distinct callchains leading to SCC#1:
+ * - (A, SCC#1)
+ * - (C, SCC#1)
+ *
+ * Each callchain identifies a separate `bpf_scc_visit` instance that
+ * accumulates backedge states. The `propagate_{liveness,precision}()`
+ * functions traverse the parent state of each backedge state, which
+ * means these parent states must remain valid (i.e., not freed) while
+ * the corresponding `bpf_scc_visit` instance exists.
+ *
+ * Associating `bpf_scc_visit` instances directly with SCCs instead of
+ * callchains would break this invariant:
+ * - States explored during `C: foo()` would contribute backedges to
+ * SCC#1, but SCC#1 would only be exited once the exploration of
+ * `A: foo()` completes.
+ * - By that time, the states explored between `A: foo()` and `C: foo()`
+ * (i.e., `B: ...`) may have already been freed, causing the parent
+ * links for states from `C: foo()` to become invalid.
+ */
+ if (loop) {
+ struct bpf_scc_backedge *backedge;
+
+ backedge = kzalloc_obj(*backedge,
+ GFP_KERNEL_ACCOUNT);
+ if (!backedge)
+ return -ENOMEM;
+ err = bpf_copy_verifier_state(&backedge->state, cur);
+ backedge->state.equal_state = &sl->state;
+ backedge->state.insn_idx = insn_idx;
+ err = err ?: add_scc_backedge(env, &sl->state, backedge);
+ if (err) {
+ bpf_free_verifier_state(&backedge->state, false);
+ kfree(backedge);
+ return err;
+ }
+ }
+ return 1;
+ }
+miss:
+ /* when new state is not going to be added do not increase miss count.
+ * Otherwise several loop iterations will remove the state
+ * recorded earlier. The goal of these heuristics is to have
+ * states from some iterations of the loop (some in the beginning
+ * and some at the end) to help pruning.
+ */
+ if (add_new_state)
+ sl->miss_cnt++;
+ /* heuristic to determine whether this state is beneficial
+ * to keep checking from state equivalence point of view.
+ * Higher numbers increase max_states_per_insn and verification time,
+ * but do not meaningfully decrease insn_processed.
+ * 'n' controls how many times state could miss before eviction.
+ * Use bigger 'n' for checkpoints because evicting checkpoint states
+ * too early would hinder iterator convergence.
+ */
+ n = bpf_is_force_checkpoint(env, insn_idx) && sl->state.branches > 0 ? 64 : 3;
+ if (sl->miss_cnt > sl->hit_cnt * n + n) {
+ /* the state is unlikely to be useful. Remove it to
+ * speed up verification
+ */
+ sl->in_free_list = true;
+ list_del(&sl->node);
+ list_add(&sl->node, &env->free_list);
+ env->free_list_size++;
+ env->explored_states_size--;
+ maybe_free_verifier_state(env, sl);
+ }
+ }
+
+ if (env->max_states_per_insn < states_cnt)
+ env->max_states_per_insn = states_cnt;
+
+ if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
+ return 0;
+
+ if (!add_new_state)
+ return 0;
+
+ /* There were no equivalent states, remember the current one.
+ * Technically the current state is not proven to be safe yet,
+ * but it will either reach outer most bpf_exit (which means it's safe)
+ * or it will be rejected. When there are no loops the verifier won't be
+ * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
+ * again on the way to bpf_exit.
+ * When looping the sl->state.branches will be > 0 and this state
+ * will not be considered for equivalence until branches == 0.
+ */
+ new_sl = kzalloc_obj(struct bpf_verifier_state_list, GFP_KERNEL_ACCOUNT);
+ if (!new_sl)
+ return -ENOMEM;
+ env->total_states++;
+ env->explored_states_size++;
+ update_peak_states(env);
+ env->prev_jmps_processed = env->jmps_processed;
+ env->prev_insn_processed = env->insn_processed;
+
+ /* forget precise markings we inherited, see __mark_chain_precision */
+ if (env->bpf_capable)
+ mark_all_scalars_imprecise(env, cur);
+
+ bpf_clear_singular_ids(env, cur);
+
+ /* add new state to the head of linked list */
+ new = &new_sl->state;
+ err = bpf_copy_verifier_state(new, cur);
+ if (err) {
+ bpf_free_verifier_state(new, false);
+ kfree(new_sl);
+ return err;
+ }
+ new->insn_idx = insn_idx;
+ verifier_bug_if(new->branches != 1, env,
+ "%s:branches_to_explore=%d insn %d",
+ __func__, new->branches, insn_idx);
+ err = maybe_enter_scc(env, new);
+ if (err) {
+ bpf_free_verifier_state(new, false);
+ kfree(new_sl);
+ return err;
+ }
+
+ cur->parent = new;
+ cur->first_insn_idx = insn_idx;
+ cur->dfs_depth = new->dfs_depth + 1;
+ bpf_clear_jmp_history(cur);
+ list_add(&new_sl->node, head);
+ return 0;
+}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 700938782bed..b73b25c63073 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -941,14 +941,6 @@ static void bpf_map_free_rcu_gp(struct rcu_head *rcu)
bpf_map_free_in_work(container_of(rcu, struct bpf_map, rcu));
}
-static void bpf_map_free_mult_rcu_gp(struct rcu_head *rcu)
-{
- if (rcu_trace_implies_rcu_gp())
- bpf_map_free_rcu_gp(rcu);
- else
- call_rcu(rcu, bpf_map_free_rcu_gp);
-}
-
/* decrement map refcnt and schedule it for freeing via workqueue
* (underlying map implementation ops->map_free() might sleep)
*/
@@ -959,8 +951,9 @@ void bpf_map_put(struct bpf_map *map)
bpf_map_free_id(map);
WARN_ON_ONCE(atomic64_read(&map->sleepable_refcnt));
+ /* RCU tasks trace grace period implies RCU grace period. */
if (READ_ONCE(map->free_after_mult_rcu_gp))
- call_rcu_tasks_trace(&map->rcu, bpf_map_free_mult_rcu_gp);
+ call_rcu_tasks_trace(&map->rcu, bpf_map_free_rcu_gp);
else if (READ_ONCE(map->free_after_rcu_gp))
call_rcu(&map->rcu, bpf_map_free_rcu_gp);
else
@@ -2832,7 +2825,7 @@ static int bpf_prog_verify_signature(struct bpf_prog *prog, union bpf_attr *attr
sig = kvmemdup_bpfptr(usig, attr->signature_size);
if (IS_ERR(sig)) {
bpf_key_put(key);
- return -ENOMEM;
+ return PTR_ERR(sig);
}
bpf_dynptr_init(&sig_ptr, sig, BPF_DYNPTR_TYPE_LOCAL, 0,
@@ -3273,14 +3266,6 @@ static bool bpf_link_is_tracepoint(struct bpf_link *link)
(link->type == BPF_LINK_TYPE_TRACING && link->attach_type == BPF_TRACE_RAW_TP);
}
-static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu)
-{
- if (rcu_trace_implies_rcu_gp())
- bpf_link_defer_dealloc_rcu_gp(rcu);
- else
- call_rcu(rcu, bpf_link_defer_dealloc_rcu_gp);
-}
-
/* bpf_link_free is guaranteed to be called from process context */
static void bpf_link_free(struct bpf_link *link)
{
@@ -3306,7 +3291,8 @@ static void bpf_link_free(struct bpf_link *link)
* faultable case, since it exclusively uses RCU Tasks Trace.
*/
if (link->sleepable || (link->prog && link->prog->sleepable))
- call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp);
+ /* RCU Tasks Trace grace period implies RCU grace period. */
+ call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_rcu_gp);
/* We need to do a SRCU grace period wait for non-faultable tracepoint BPF links. */
else if (bpf_link_is_tracepoint(link))
call_tracepoint_unregister_atomic(&link->rcu, bpf_link_defer_dealloc_rcu_gp);
@@ -3754,6 +3740,23 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
tr = prog->aux->dst_trampoline;
tgt_prog = prog->aux->dst_prog;
}
+ /*
+ * It is to prevent modifying struct pt_regs via kprobe_write_ctx=true
+ * freplace prog. Without this check, kprobe_write_ctx=true freplace
+ * prog is allowed to attach to kprobe_write_ctx=false kprobe prog, and
+ * then modify the registers of the kprobe prog's target kernel
+ * function.
+ *
+ * This also blocks the combination of uprobe+freplace, because it is
+ * unable to recognize the use of the tgt_prog as an uprobe or a kprobe
+ * by tgt_prog itself. At attach time, uprobe/kprobe is recognized by
+ * the target perf event flags in __perf_event_set_bpf_prog().
+ */
+ if (prog->type == BPF_PROG_TYPE_EXT &&
+ prog->aux->kprobe_write_ctx != tgt_prog->aux->kprobe_write_ctx) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
err = bpf_link_prime(&link->link.link, &link_primer);
if (err)
@@ -6369,8 +6372,7 @@ static bool syscall_prog_is_valid_access(int off, int size,
{
if (off < 0 || off >= U16_MAX)
return false;
- if (off % size != 0)
- return false;
+ /* No alignment requirements for syscall ctx accesses. */
return true;
}
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index 98d9b4c0daff..e791ae065c39 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -9,6 +9,8 @@
#include <linux/bpf_mem_alloc.h>
#include <linux/btf_ids.h>
#include <linux/mm_types.h>
+#include <linux/mmap_lock.h>
+#include <linux/sched/mm.h>
#include "mmap_unlock_work.h"
static const char * const iter_task_type_names[] = {
@@ -794,11 +796,20 @@ const struct bpf_func_proto bpf_find_vma_proto = {
.arg5_type = ARG_ANYTHING,
};
+static inline void bpf_iter_mmput_async(struct mm_struct *mm)
+{
+#ifdef CONFIG_MMU
+ mmput_async(mm);
+#else
+ mmput(mm);
+#endif
+}
+
struct bpf_iter_task_vma_kern_data {
struct task_struct *task;
struct mm_struct *mm;
- struct mmap_unlock_irq_work *work;
- struct vma_iterator vmi;
+ struct vm_area_struct snapshot;
+ u64 next_addr;
};
struct bpf_iter_task_vma {
@@ -819,12 +830,28 @@ __bpf_kfunc int bpf_iter_task_vma_new(struct bpf_iter_task_vma *it,
struct task_struct *task, u64 addr)
{
struct bpf_iter_task_vma_kern *kit = (void *)it;
- bool irq_work_busy = false;
int err;
BUILD_BUG_ON(sizeof(struct bpf_iter_task_vma_kern) != sizeof(struct bpf_iter_task_vma));
BUILD_BUG_ON(__alignof__(struct bpf_iter_task_vma_kern) != __alignof__(struct bpf_iter_task_vma));
+ if (!IS_ENABLED(CONFIG_PER_VMA_LOCK)) {
+ kit->data = NULL;
+ return -EOPNOTSUPP;
+ }
+
+ /*
+ * Reject irqs-disabled contexts including NMI. Operations used
+ * by _next() and _destroy() (vma_end_read, fput, bpf_iter_mmput_async)
+ * can take spinlocks with IRQs disabled (pi_lock, pool->lock).
+ * Running from NMI or from a tracepoint that fires with those
+ * locks held could deadlock.
+ */
+ if (irqs_disabled()) {
+ kit->data = NULL;
+ return -EBUSY;
+ }
+
/* is_iter_reg_valid_uninit guarantees that kit hasn't been initialized
* before, so non-NULL kit->data doesn't point to previously
* bpf_mem_alloc'd bpf_iter_task_vma_kern_data
@@ -834,38 +861,131 @@ __bpf_kfunc int bpf_iter_task_vma_new(struct bpf_iter_task_vma *it,
return -ENOMEM;
kit->data->task = get_task_struct(task);
+ /*
+ * Safely read task->mm and acquire an mm reference.
+ *
+ * Cannot use get_task_mm() because its task_lock() is a
+ * blocking spin_lock that would deadlock if the target task
+ * already holds alloc_lock on this CPU (e.g. a softirq BPF
+ * program iterating a task interrupted while holding its
+ * alloc_lock).
+ */
+ if (!spin_trylock(&task->alloc_lock)) {
+ err = -EBUSY;
+ goto err_cleanup_iter;
+ }
kit->data->mm = task->mm;
+ if (kit->data->mm && !(task->flags & PF_KTHREAD))
+ mmget(kit->data->mm);
+ else
+ kit->data->mm = NULL;
+ spin_unlock(&task->alloc_lock);
if (!kit->data->mm) {
err = -ENOENT;
goto err_cleanup_iter;
}
- /* kit->data->work == NULL is valid after bpf_mmap_unlock_get_irq_work */
- irq_work_busy = bpf_mmap_unlock_get_irq_work(&kit->data->work);
- if (irq_work_busy || !mmap_read_trylock(kit->data->mm)) {
- err = -EBUSY;
- goto err_cleanup_iter;
- }
-
- vma_iter_init(&kit->data->vmi, kit->data->mm, addr);
+ kit->data->snapshot.vm_file = NULL;
+ kit->data->next_addr = addr;
return 0;
err_cleanup_iter:
- if (kit->data->task)
- put_task_struct(kit->data->task);
+ put_task_struct(kit->data->task);
bpf_mem_free(&bpf_global_ma, kit->data);
/* NULL kit->data signals failed bpf_iter_task_vma initialization */
kit->data = NULL;
return err;
}
+/*
+ * Find and lock the next VMA at or after data->next_addr.
+ *
+ * lock_vma_under_rcu() is a point lookup (mas_walk): it finds the VMA
+ * containing a given address but cannot iterate. An RCU-protected
+ * maple tree walk with vma_next() (mas_find) is needed first to locate
+ * the next VMA's vm_start across any gap.
+ *
+ * Between the RCU walk and the lock, the VMA may be removed, shrunk,
+ * or write-locked. On failure, advance past it using vm_end from the
+ * RCU walk. SLAB_TYPESAFE_BY_RCU can make vm_end stale, so fall back
+ * to PAGE_SIZE advancement to guarantee forward progress.
+ */
+static struct vm_area_struct *
+bpf_iter_task_vma_find_next(struct bpf_iter_task_vma_kern_data *data)
+{
+ struct vm_area_struct *vma;
+ struct vma_iterator vmi;
+ unsigned long start, end;
+
+retry:
+ rcu_read_lock();
+ vma_iter_init(&vmi, data->mm, data->next_addr);
+ vma = vma_next(&vmi);
+ if (!vma) {
+ rcu_read_unlock();
+ return NULL;
+ }
+ start = vma->vm_start;
+ end = vma->vm_end;
+ rcu_read_unlock();
+
+ vma = lock_vma_under_rcu(data->mm, start);
+ if (!vma) {
+ if (end <= data->next_addr)
+ data->next_addr += PAGE_SIZE;
+ else
+ data->next_addr = end;
+ goto retry;
+ }
+
+ if (unlikely(vma->vm_end <= data->next_addr)) {
+ data->next_addr += PAGE_SIZE;
+ vma_end_read(vma);
+ goto retry;
+ }
+
+ return vma;
+}
+
+static void bpf_iter_task_vma_snapshot_reset(struct vm_area_struct *snap)
+{
+ if (snap->vm_file) {
+ fput(snap->vm_file);
+ snap->vm_file = NULL;
+ }
+}
+
__bpf_kfunc struct vm_area_struct *bpf_iter_task_vma_next(struct bpf_iter_task_vma *it)
{
struct bpf_iter_task_vma_kern *kit = (void *)it;
+ struct vm_area_struct *snap, *vma;
if (!kit->data) /* bpf_iter_task_vma_new failed */
return NULL;
- return vma_next(&kit->data->vmi);
+
+ snap = &kit->data->snapshot;
+
+ bpf_iter_task_vma_snapshot_reset(snap);
+
+ vma = bpf_iter_task_vma_find_next(kit->data);
+ if (!vma)
+ return NULL;
+
+ memcpy(snap, vma, sizeof(*snap));
+
+ /*
+ * The verifier only trusts vm_mm and vm_file (see
+ * BTF_TYPE_SAFE_TRUSTED_OR_NULL in verifier.c). Take a reference
+ * on vm_file; vm_mm is already correct because lock_vma_under_rcu()
+ * verifies vma->vm_mm == mm. All other pointers are untrusted by
+ * the verifier and left as-is.
+ */
+ if (snap->vm_file)
+ get_file(snap->vm_file);
+
+ kit->data->next_addr = vma->vm_end;
+ vma_end_read(vma);
+ return snap;
}
__bpf_kfunc void bpf_iter_task_vma_destroy(struct bpf_iter_task_vma *it)
@@ -873,8 +993,9 @@ __bpf_kfunc void bpf_iter_task_vma_destroy(struct bpf_iter_task_vma *it)
struct bpf_iter_task_vma_kern *kit = (void *)it;
if (kit->data) {
- bpf_mmap_unlock_mm(kit->data->work, kit->data->mm);
+ bpf_iter_task_vma_snapshot_reset(&kit->data->snapshot);
put_task_struct(kit->data->task);
+ bpf_iter_mmput_async(kit->data->mm);
bpf_mem_free(&bpf_global_ma, kit->data);
}
}
diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c
index 4abc359b3db0..ec9c310cf5d7 100644
--- a/kernel/bpf/tnum.c
+++ b/kernel/bpf/tnum.c
@@ -286,8 +286,7 @@ struct tnum tnum_bswap64(struct tnum a)
*/
u64 tnum_step(struct tnum t, u64 z)
{
- u64 tmax, j, p, q, r, s, v, u, w, res;
- u8 k;
+ u64 tmax, d, carry_mask, filled, inc;
tmax = t.value | t.mask;
@@ -299,29 +298,22 @@ u64 tnum_step(struct tnum t, u64 z)
if (z < t.value)
return t.value;
- /* keep t's known bits, and match all unknown bits to z */
- j = t.value | (z & t.mask);
-
- if (j > z) {
- p = ~z & t.value & ~t.mask;
- k = fls64(p); /* k is the most-significant 0-to-1 flip */
- q = U64_MAX << k;
- r = q & z; /* positions > k matched to z */
- s = ~q & t.value; /* positions <= k matched to t.value */
- v = r | s;
- res = v;
- } else {
- p = z & ~t.value & ~t.mask;
- k = fls64(p); /* k is the most-significant 1-to-0 flip */
- q = U64_MAX << k;
- r = q & t.mask & z; /* unknown positions > k, matched to z */
- s = q & ~t.mask; /* known positions > k, set to 1 */
- v = r | s;
- /* add 1 to unknown positions > k to make value greater than z */
- u = v + (1ULL << k);
- /* extract bits in unknown positions > k from u, rest from t.value */
- w = (u & t.mask) | t.value;
- res = w;
- }
- return res;
+ /*
+ * Let r be the result tnum member, z = t.value + d.
+ * Every tnum member is t.value | s for some submask s of t.mask,
+ * and since t.value & t.mask == 0, t.value | s == t.value + s.
+ * So r > z becomes s > d where d = z - t.value.
+ *
+ * Find the smallest submask s of t.mask greater than d by
+ * "incrementing d within the mask": fill every non-mask
+ * position with 1 (`filled`) so +1 ripples through the gaps,
+ * then keep only mask bits. `carry_mask` additionally fills
+ * positions below the highest non-mask 1 in d, preventing
+ * it from trapping the carry.
+ */
+ d = z - t.value;
+ carry_mask = (1ULL << fls64(d & ~t.mask)) - 1;
+ filled = d | carry_mask | ~t.mask;
+ inc = (filled + 1) & t.mask;
+ return t.value | inc;
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index e3814152b52f..9e4980128151 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -195,9 +195,6 @@ struct bpf_verifier_stack_elem {
#define BPF_COMPLEXITY_LIMIT_JMP_SEQ 8192
#define BPF_COMPLEXITY_LIMIT_STATES 64
-#define BPF_MAP_KEY_POISON (1ULL << 63)
-#define BPF_MAP_KEY_SEEN (1ULL << 62)
-
#define BPF_GLOBAL_PERCPU_MA_MAX_SIZE 512
#define BPF_PRIV_STACK_MIN_SIZE 64
@@ -210,16 +207,10 @@ static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env);
static int ref_set_non_owning(struct bpf_verifier_env *env,
struct bpf_reg_state *reg);
static bool is_trusted_reg(const struct bpf_reg_state *reg);
-
-static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
-{
- return aux->map_ptr_state.poison;
-}
-
-static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
-{
- return aux->map_ptr_state.unpriv;
-}
+static inline bool in_sleepable_context(struct bpf_verifier_env *env);
+static const char *non_sleepable_context_description(struct bpf_verifier_env *env);
+static void scalar32_min_max_add(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg);
+static void scalar_min_max_add(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg);
static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
struct bpf_map *map,
@@ -231,21 +222,6 @@ static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
aux->map_ptr_state.map_ptr = map;
}
-static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
-{
- return aux->map_key_state & BPF_MAP_KEY_POISON;
-}
-
-static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
-{
- return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
-}
-
-static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
-{
- return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
-}
-
static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
{
bool poisoned = bpf_map_key_poisoned(aux);
@@ -254,29 +230,6 @@ static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
(poisoned ? BPF_MAP_KEY_POISON : 0ULL);
}
-static bool bpf_helper_call(const struct bpf_insn *insn)
-{
- return insn->code == (BPF_JMP | BPF_CALL) &&
- insn->src_reg == 0;
-}
-
-static bool bpf_pseudo_call(const struct bpf_insn *insn)
-{
- return insn->code == (BPF_JMP | BPF_CALL) &&
- insn->src_reg == BPF_PSEUDO_CALL;
-}
-
-static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn)
-{
- return insn->code == (BPF_JMP | BPF_CALL) &&
- insn->src_reg == BPF_PSEUDO_KFUNC_CALL;
-}
-
-struct bpf_map_desc {
- struct bpf_map *ptr;
- int uid;
-};
-
struct bpf_call_arg_meta {
struct bpf_map_desc map;
bool raw_mode;
@@ -306,59 +259,6 @@ struct bpf_kfunc_meta {
s32 id;
};
-struct bpf_kfunc_call_arg_meta {
- /* In parameters */
- struct btf *btf;
- u32 func_id;
- u32 kfunc_flags;
- const struct btf_type *func_proto;
- const char *func_name;
- /* Out parameters */
- u32 ref_obj_id;
- u8 release_regno;
- bool r0_rdonly;
- u32 ret_btf_id;
- u64 r0_size;
- u32 subprogno;
- struct {
- u64 value;
- bool found;
- } arg_constant;
-
- /* arg_{btf,btf_id,owning_ref} are used by kfunc-specific handling,
- * generally to pass info about user-defined local kptr types to later
- * verification logic
- * bpf_obj_drop/bpf_percpu_obj_drop
- * Record the local kptr type to be drop'd
- * bpf_refcount_acquire (via KF_ARG_PTR_TO_REFCOUNTED_KPTR arg type)
- * Record the local kptr type to be refcount_incr'd and use
- * arg_owning_ref to determine whether refcount_acquire should be
- * fallible
- */
- struct btf *arg_btf;
- u32 arg_btf_id;
- bool arg_owning_ref;
- bool arg_prog;
-
- struct {
- struct btf_field *field;
- } arg_list_head;
- struct {
- struct btf_field *field;
- } arg_rbtree_root;
- struct {
- enum bpf_dynptr_type type;
- u32 id;
- u32 ref_obj_id;
- } initialized_dynptr;
- struct {
- u8 spi;
- u8 frameno;
- } iter;
- struct bpf_map_desc map;
- u64 mem_size;
-};
-
struct btf *btf_vmlinux;
static const char *btf_type_name(const struct btf *btf, u32 id)
@@ -437,13 +337,36 @@ static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg)
return rec;
}
-static bool subprog_is_global(const struct bpf_verifier_env *env, int subprog)
+bool bpf_subprog_is_global(const struct bpf_verifier_env *env, int subprog)
{
struct bpf_func_info_aux *aux = env->prog->aux->func_info_aux;
return aux && aux[subprog].linkage == BTF_FUNC_GLOBAL;
}
+static bool subprog_returns_void(struct bpf_verifier_env *env, int subprog)
+{
+ const struct btf_type *type, *func, *func_proto;
+ const struct btf *btf = env->prog->aux->btf;
+ u32 btf_id;
+
+ btf_id = env->prog->aux->func_info[subprog].type_id;
+
+ func = btf_type_by_id(btf, btf_id);
+ if (verifier_bug_if(!func, env, "btf_id %u not found", btf_id))
+ return false;
+
+ func_proto = btf_type_by_id(btf, func->type);
+ if (!func_proto)
+ return false;
+
+ type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
+ if (!type)
+ return false;
+
+ return btf_type_is_void(type);
+}
+
static const char *subprog_name(const struct bpf_verifier_env *env, int subprog)
{
struct bpf_func_info *info;
@@ -455,7 +378,7 @@ static const char *subprog_name(const struct bpf_verifier_env *env, int subprog)
return btf_type_name(env->prog->aux->btf, info->type_id);
}
-static void mark_subprog_exc_cb(struct bpf_verifier_env *env, int subprog)
+void bpf_mark_subprog_exc_cb(struct bpf_verifier_env *env, int subprog)
{
struct bpf_subprog_info *info = subprog_info(env, subprog);
@@ -543,13 +466,13 @@ static bool is_callback_calling_function(enum bpf_func_id func_id)
is_async_callback_calling_function(func_id);
}
-static bool is_sync_callback_calling_insn(struct bpf_insn *insn)
+bool bpf_is_sync_callback_calling_insn(struct bpf_insn *insn)
{
return (bpf_helper_call(insn) && is_sync_callback_calling_function(insn->imm)) ||
(bpf_pseudo_kfunc_call(insn) && is_sync_callback_calling_kfunc(insn->imm));
}
-static bool is_async_callback_calling_insn(struct bpf_insn *insn)
+bool bpf_is_async_callback_calling_insn(struct bpf_insn *insn)
{
return (bpf_helper_call(insn) && is_async_callback_calling_function(insn->imm)) ||
(bpf_pseudo_kfunc_call(insn) && is_async_callback_calling_kfunc(insn->imm));
@@ -570,24 +493,11 @@ static bool is_async_cb_sleepable(struct bpf_verifier_env *env, struct bpf_insn
return false;
}
-static bool is_may_goto_insn(struct bpf_insn *insn)
+bool bpf_is_may_goto_insn(struct bpf_insn *insn)
{
return insn->code == (BPF_JMP | BPF_JCOND) && insn->src_reg == BPF_MAY_GOTO;
}
-static bool is_may_goto_insn_at(struct bpf_verifier_env *env, int insn_idx)
-{
- return is_may_goto_insn(&env->prog->insnsi[insn_idx]);
-}
-
-static bool is_storage_get_function(enum bpf_func_id func_id)
-{
- return func_id == BPF_FUNC_sk_storage_get ||
- func_id == BPF_FUNC_inode_storage_get ||
- func_id == BPF_FUNC_task_storage_get ||
- func_id == BPF_FUNC_cgrp_storage_get;
-}
-
static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id,
const struct bpf_map *map)
{
@@ -603,39 +513,6 @@ static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id,
return ref_obj_uses > 1;
}
-static bool is_cmpxchg_insn(const struct bpf_insn *insn)
-{
- return BPF_CLASS(insn->code) == BPF_STX &&
- BPF_MODE(insn->code) == BPF_ATOMIC &&
- insn->imm == BPF_CMPXCHG;
-}
-
-static bool is_atomic_load_insn(const struct bpf_insn *insn)
-{
- return BPF_CLASS(insn->code) == BPF_STX &&
- BPF_MODE(insn->code) == BPF_ATOMIC &&
- insn->imm == BPF_LOAD_ACQ;
-}
-
-static bool is_atomic_fetch_insn(const struct bpf_insn *insn)
-{
- return BPF_CLASS(insn->code) == BPF_STX &&
- BPF_MODE(insn->code) == BPF_ATOMIC &&
- (insn->imm & BPF_FETCH);
-}
-
-static int __get_spi(s32 off)
-{
- return (-off - 1) / BPF_REG_SIZE;
-}
-
-static struct bpf_func_state *func(struct bpf_verifier_env *env,
- const struct bpf_reg_state *reg)
-{
- struct bpf_verifier_state *cur = env->cur_state;
-
- return cur->frame[reg->frameno];
-}
static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots)
{
@@ -661,19 +538,19 @@ static int stack_slot_obj_get_spi(struct bpf_verifier_env *env, struct bpf_reg_s
return -EINVAL;
}
- off = reg->off + reg->var_off.value;
+ off = reg->var_off.value;
if (off % BPF_REG_SIZE) {
verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off);
return -EINVAL;
}
- spi = __get_spi(off);
+ spi = bpf_get_spi(off);
if (spi + 1 < nr_slots) {
verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off);
return -EINVAL;
}
- if (!is_spi_bounds_valid(func(env, reg), spi, nr_slots))
+ if (!is_spi_bounds_valid(bpf_func(env, reg), spi, nr_slots))
return -ERANGE;
return spi;
}
@@ -742,8 +619,6 @@ static void __mark_dynptr_reg(struct bpf_reg_state *reg,
enum bpf_dynptr_type type,
bool first_slot, int dynptr_id);
-static void __mark_reg_not_init(const struct bpf_verifier_env *env,
- struct bpf_reg_state *reg);
static void mark_dynptr_stack_regs(struct bpf_verifier_env *env,
struct bpf_reg_state *sreg1,
@@ -769,7 +644,7 @@ static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
enum bpf_arg_type arg_type, int insn_idx, int clone_ref_obj_id)
{
- struct bpf_func_state *state = func(env, reg);
+ struct bpf_func_state *state = bpf_func(env, reg);
enum bpf_dynptr_type type;
int spi, i, err;
@@ -821,8 +696,6 @@ static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_
state->stack[spi - 1].spilled_ptr.ref_obj_id = id;
}
- bpf_mark_stack_write(env, state->frameno, BIT(spi - 1) | BIT(spi));
-
return 0;
}
@@ -835,15 +708,13 @@ static void invalidate_dynptr(struct bpf_verifier_env *env, struct bpf_func_stat
state->stack[spi - 1].slot_type[i] = STACK_INVALID;
}
- __mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
- __mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
-
- bpf_mark_stack_write(env, state->frameno, BIT(spi - 1) | BIT(spi));
+ bpf_mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
+ bpf_mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
}
static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{
- struct bpf_func_state *state = func(env, reg);
+ struct bpf_func_state *state = bpf_func(env, reg);
int spi, ref_obj_id, i;
/*
@@ -902,7 +773,7 @@ static void __mark_reg_unknown(const struct bpf_verifier_env *env,
static void mark_reg_invalid(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{
if (!env->allow_ptr_leaks)
- __mark_reg_not_init(env, reg);
+ bpf_mark_reg_not_init(env, reg);
else
__mark_reg_unknown(env, reg);
}
@@ -927,8 +798,27 @@ static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
spi = spi + 1;
if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) {
- verbose(env, "cannot overwrite referenced dynptr\n");
- return -EINVAL;
+ int ref_obj_id = state->stack[spi].spilled_ptr.ref_obj_id;
+ int ref_cnt = 0;
+
+ /*
+ * A referenced dynptr can be overwritten only if there is at
+ * least one other dynptr sharing the same ref_obj_id,
+ * ensuring the reference can still be properly released.
+ */
+ for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
+ if (state->stack[i].slot_type[0] != STACK_DYNPTR)
+ continue;
+ if (!state->stack[i].spilled_ptr.dynptr.first_slot)
+ continue;
+ if (state->stack[i].spilled_ptr.ref_obj_id == ref_obj_id)
+ ref_cnt++;
+ }
+
+ if (ref_cnt <= 1) {
+ verbose(env, "cannot overwrite referenced dynptr\n");
+ return -EINVAL;
+ }
}
mark_stack_slot_scratched(env, spi);
@@ -953,10 +843,8 @@ static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
/* Do not release reference state, we are destroying dynptr on stack,
* not using some helper to release it. Just reset register.
*/
- __mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
- __mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
-
- bpf_mark_stack_write(env, state->frameno, BIT(spi - 1) | BIT(spi));
+ bpf_mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
+ bpf_mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
return 0;
}
@@ -991,7 +879,7 @@ static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_
static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{
- struct bpf_func_state *state = func(env, reg);
+ struct bpf_func_state *state = bpf_func(env, reg);
int i, spi;
/* This already represents first slot of initialized bpf_dynptr.
@@ -1021,7 +909,7 @@ static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_re
static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
enum bpf_arg_type arg_type)
{
- struct bpf_func_state *state = func(env, reg);
+ struct bpf_func_state *state = bpf_func(env, reg);
enum bpf_dynptr_type dynptr_type;
int spi;
@@ -1051,7 +939,7 @@ static int mark_stack_slots_iter(struct bpf_verifier_env *env,
struct bpf_reg_state *reg, int insn_idx,
struct btf *btf, u32 btf_id, int nr_slots)
{
- struct bpf_func_state *state = func(env, reg);
+ struct bpf_func_state *state = bpf_func(env, reg);
int spi, i, j, id;
spi = iter_get_spi(env, reg, nr_slots);
@@ -1083,7 +971,6 @@ static int mark_stack_slots_iter(struct bpf_verifier_env *env,
for (j = 0; j < BPF_REG_SIZE; j++)
slot->slot_type[j] = STACK_ITER;
- bpf_mark_stack_write(env, state->frameno, BIT(spi - i));
mark_stack_slot_scratched(env, spi - i);
}
@@ -1093,7 +980,7 @@ static int mark_stack_slots_iter(struct bpf_verifier_env *env,
static int unmark_stack_slots_iter(struct bpf_verifier_env *env,
struct bpf_reg_state *reg, int nr_slots)
{
- struct bpf_func_state *state = func(env, reg);
+ struct bpf_func_state *state = bpf_func(env, reg);
int spi, i, j;
spi = iter_get_spi(env, reg, nr_slots);
@@ -1107,12 +994,11 @@ static int unmark_stack_slots_iter(struct bpf_verifier_env *env,
if (i == 0)
WARN_ON_ONCE(release_reference(env, st->ref_obj_id));
- __mark_reg_not_init(env, st);
+ bpf_mark_reg_not_init(env, st);
for (j = 0; j < BPF_REG_SIZE; j++)
slot->slot_type[j] = STACK_INVALID;
- bpf_mark_stack_write(env, state->frameno, BIT(spi - i));
mark_stack_slot_scratched(env, spi - i);
}
@@ -1122,7 +1008,7 @@ static int unmark_stack_slots_iter(struct bpf_verifier_env *env,
static bool is_iter_reg_valid_uninit(struct bpf_verifier_env *env,
struct bpf_reg_state *reg, int nr_slots)
{
- struct bpf_func_state *state = func(env, reg);
+ struct bpf_func_state *state = bpf_func(env, reg);
int spi, i, j;
/* For -ERANGE (i.e. spi not falling into allocated stack slots), we
@@ -1149,7 +1035,7 @@ static bool is_iter_reg_valid_uninit(struct bpf_verifier_env *env,
static int is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
struct btf *btf, u32 btf_id, int nr_slots)
{
- struct bpf_func_state *state = func(env, reg);
+ struct bpf_func_state *state = bpf_func(env, reg);
int spi, i, j;
spi = iter_get_spi(env, reg, nr_slots);
@@ -1186,7 +1072,7 @@ static int mark_stack_slot_irq_flag(struct bpf_verifier_env *env,
struct bpf_reg_state *reg, int insn_idx,
int kfunc_class)
{
- struct bpf_func_state *state = func(env, reg);
+ struct bpf_func_state *state = bpf_func(env, reg);
struct bpf_stack_state *slot;
struct bpf_reg_state *st;
int spi, i, id;
@@ -1202,7 +1088,6 @@ static int mark_stack_slot_irq_flag(struct bpf_verifier_env *env,
slot = &state->stack[spi];
st = &slot->spilled_ptr;
- bpf_mark_stack_write(env, reg->frameno, BIT(spi));
__mark_reg_known_zero(st);
st->type = PTR_TO_STACK; /* we don't have dedicated reg type */
st->ref_obj_id = id;
@@ -1218,7 +1103,7 @@ static int mark_stack_slot_irq_flag(struct bpf_verifier_env *env,
static int unmark_stack_slot_irq_flag(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
int kfunc_class)
{
- struct bpf_func_state *state = func(env, reg);
+ struct bpf_func_state *state = bpf_func(env, reg);
struct bpf_stack_state *slot;
struct bpf_reg_state *st;
int spi, i, err;
@@ -1256,9 +1141,7 @@ static int unmark_stack_slot_irq_flag(struct bpf_verifier_env *env, struct bpf_r
return err;
}
- __mark_reg_not_init(env, st);
-
- bpf_mark_stack_write(env, reg->frameno, BIT(spi));
+ bpf_mark_reg_not_init(env, st);
for (i = 0; i < BPF_REG_SIZE; i++)
slot->slot_type[i] = STACK_INVALID;
@@ -1269,7 +1152,7 @@ static int unmark_stack_slot_irq_flag(struct bpf_verifier_env *env, struct bpf_r
static bool is_irq_flag_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{
- struct bpf_func_state *state = func(env, reg);
+ struct bpf_func_state *state = bpf_func(env, reg);
struct bpf_stack_state *slot;
int spi, i;
@@ -1293,7 +1176,7 @@ static bool is_irq_flag_reg_valid_uninit(struct bpf_verifier_env *env, struct bp
static int is_irq_flag_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{
- struct bpf_func_state *state = func(env, reg);
+ struct bpf_func_state *state = bpf_func(env, reg);
struct bpf_stack_state *slot;
struct bpf_reg_state *st;
int spi, i;
@@ -1331,6 +1214,7 @@ static bool is_stack_slot_special(const struct bpf_stack_state *stack)
case STACK_IRQ_FLAG:
return true;
case STACK_INVALID:
+ case STACK_POISON:
case STACK_MISC:
case STACK_ZERO:
return false;
@@ -1343,26 +1227,12 @@ static bool is_stack_slot_special(const struct bpf_stack_state *stack)
/* The reg state of a pointer or a bounded scalar was saved when
* it was spilled to the stack.
*/
-static bool is_spilled_reg(const struct bpf_stack_state *stack)
-{
- return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL;
-}
-
-static bool is_spilled_scalar_reg(const struct bpf_stack_state *stack)
-{
- return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL &&
- stack->spilled_ptr.type == SCALAR_VALUE;
-}
-
-static bool is_spilled_scalar_reg64(const struct bpf_stack_state *stack)
-{
- return stack->slot_type[0] == STACK_SPILL &&
- stack->spilled_ptr.type == SCALAR_VALUE;
-}
-/* Mark stack slot as STACK_MISC, unless it is already STACK_INVALID, in which
- * case they are equivalent, or it's STACK_ZERO, in which case we preserve
- * more precise STACK_ZERO.
+/*
+ * Mark stack slot as STACK_MISC, unless it is already:
+ * - STACK_INVALID, in which case they are equivalent.
+ * - STACK_ZERO, in which case we preserve more precise STACK_ZERO.
+ * - STACK_POISON, which truly forbids access to the slot.
* Regardless of allow_ptr_leaks setting (i.e., privileged or unprivileged
* mode), we won't promote STACK_INVALID to STACK_MISC. In privileged case it is
* unnecessary as both are considered equivalent when loading data and pruning,
@@ -1373,14 +1243,14 @@ static void mark_stack_slot_misc(struct bpf_verifier_env *env, u8 *stype)
{
if (*stype == STACK_ZERO)
return;
- if (*stype == STACK_INVALID)
+ if (*stype == STACK_INVALID || *stype == STACK_POISON)
return;
*stype = STACK_MISC;
}
static void scrub_spilled_slot(u8 *stype)
{
- if (*stype != STACK_INVALID)
+ if (*stype != STACK_INVALID && *stype != STACK_POISON)
*stype = STACK_MISC;
}
@@ -1669,14 +1539,6 @@ static struct bpf_reference_state *find_lock_state(struct bpf_verifier_state *st
return NULL;
}
-static void update_peak_states(struct bpf_verifier_env *env)
-{
- u32 cur_states;
-
- cur_states = env->explored_states_size + env->free_list_size + env->num_backedges;
- env->peak_states = max(env->peak_states, cur_states);
-}
-
static void free_func_state(struct bpf_func_state *state)
{
if (!state)
@@ -1685,15 +1547,15 @@ static void free_func_state(struct bpf_func_state *state)
kfree(state);
}
-static void clear_jmp_history(struct bpf_verifier_state *state)
+void bpf_clear_jmp_history(struct bpf_verifier_state *state)
{
kfree(state->jmp_history);
state->jmp_history = NULL;
state->jmp_history_cnt = 0;
}
-static void free_verifier_state(struct bpf_verifier_state *state,
- bool free_self)
+void bpf_free_verifier_state(struct bpf_verifier_state *state,
+ bool free_self)
{
int i;
@@ -1702,42 +1564,11 @@ static void free_verifier_state(struct bpf_verifier_state *state,
state->frame[i] = NULL;
}
kfree(state->refs);
- clear_jmp_history(state);
+ bpf_clear_jmp_history(state);
if (free_self)
kfree(state);
}
-/* struct bpf_verifier_state->parent refers to states
- * that are in either of env->{expored_states,free_list}.
- * In both cases the state is contained in struct bpf_verifier_state_list.
- */
-static struct bpf_verifier_state_list *state_parent_as_list(struct bpf_verifier_state *st)
-{
- if (st->parent)
- return container_of(st->parent, struct bpf_verifier_state_list, state);
- return NULL;
-}
-
-static bool incomplete_read_marks(struct bpf_verifier_env *env,
- struct bpf_verifier_state *st);
-
-/* A state can be freed if it is no longer referenced:
- * - is in the env->free_list;
- * - has no children states;
- */
-static void maybe_free_verifier_state(struct bpf_verifier_env *env,
- struct bpf_verifier_state_list *sl)
-{
- if (!sl->in_free_list
- || sl->state.branches != 0
- || incomplete_read_marks(env, &sl->state))
- return;
- list_del(&sl->node);
- free_verifier_state(&sl->state, false);
- kfree(sl);
- env->free_list_size--;
-}
-
/* copy verifier state from src to dst growing dst stack space
* when necessary to accommodate larger src stack
*/
@@ -1748,8 +1579,8 @@ static int copy_func_state(struct bpf_func_state *dst,
return copy_stack_state(dst, src);
}
-static int copy_verifier_state(struct bpf_verifier_state *dst_state,
- const struct bpf_verifier_state *src)
+int bpf_copy_verifier_state(struct bpf_verifier_state *dst_state,
+ const struct bpf_verifier_state *src)
{
struct bpf_func_state *dst;
int i, err;
@@ -1773,7 +1604,6 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
return err;
dst_state->speculative = src->speculative;
dst_state->in_sleepable = src->in_sleepable;
- dst_state->cleaned = src->cleaned;
dst_state->curframe = src->curframe;
dst_state->branches = src->branches;
dst_state->parent = src->parent;
@@ -1803,7 +1633,7 @@ static u32 state_htab_size(struct bpf_verifier_env *env)
return env->prog->len;
}
-static struct list_head *explored_state(struct bpf_verifier_env *env, int idx)
+struct list_head *bpf_explored_state(struct bpf_verifier_env *env, int idx)
{
struct bpf_verifier_state *cur = env->cur_state;
struct bpf_func_state *state = cur->frame[cur->curframe];
@@ -1825,266 +1655,19 @@ static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_sta
return true;
}
-/* Return IP for a given frame in a call stack */
-static u32 frame_insn_idx(struct bpf_verifier_state *st, u32 frame)
-{
- return frame == st->curframe
- ? st->insn_idx
- : st->frame[frame + 1]->callsite;
-}
-
-/* For state @st look for a topmost frame with frame_insn_idx() in some SCC,
- * if such frame exists form a corresponding @callchain as an array of
- * call sites leading to this frame and SCC id.
- * E.g.:
- *
- * void foo() { A: loop {... SCC#1 ...}; }
- * void bar() { B: loop { C: foo(); ... SCC#2 ... }
- * D: loop { E: foo(); ... SCC#3 ... } }
- * void main() { F: bar(); }
- *
- * @callchain at (A) would be either (F,SCC#2) or (F,SCC#3) depending
- * on @st frame call sites being (F,C,A) or (F,E,A).
- */
-static bool compute_scc_callchain(struct bpf_verifier_env *env,
- struct bpf_verifier_state *st,
- struct bpf_scc_callchain *callchain)
-{
- u32 i, scc, insn_idx;
-
- memset(callchain, 0, sizeof(*callchain));
- for (i = 0; i <= st->curframe; i++) {
- insn_idx = frame_insn_idx(st, i);
- scc = env->insn_aux_data[insn_idx].scc;
- if (scc) {
- callchain->scc = scc;
- break;
- } else if (i < st->curframe) {
- callchain->callsites[i] = insn_idx;
- } else {
- return false;
- }
- }
- return true;
-}
-
-/* Check if bpf_scc_visit instance for @callchain exists. */
-static struct bpf_scc_visit *scc_visit_lookup(struct bpf_verifier_env *env,
- struct bpf_scc_callchain *callchain)
-{
- struct bpf_scc_info *info = env->scc_info[callchain->scc];
- struct bpf_scc_visit *visits = info->visits;
- u32 i;
- if (!info)
- return NULL;
- for (i = 0; i < info->num_visits; i++)
- if (memcmp(callchain, &visits[i].callchain, sizeof(*callchain)) == 0)
- return &visits[i];
- return NULL;
-}
-
-/* Allocate a new bpf_scc_visit instance corresponding to @callchain.
- * Allocated instances are alive for a duration of the do_check_common()
- * call and are freed by free_states().
- */
-static struct bpf_scc_visit *scc_visit_alloc(struct bpf_verifier_env *env,
- struct bpf_scc_callchain *callchain)
-{
- struct bpf_scc_visit *visit;
- struct bpf_scc_info *info;
- u32 scc, num_visits;
- u64 new_sz;
-
- scc = callchain->scc;
- info = env->scc_info[scc];
- num_visits = info ? info->num_visits : 0;
- new_sz = sizeof(*info) + sizeof(struct bpf_scc_visit) * (num_visits + 1);
- info = kvrealloc(env->scc_info[scc], new_sz, GFP_KERNEL_ACCOUNT);
- if (!info)
- return NULL;
- env->scc_info[scc] = info;
- info->num_visits = num_visits + 1;
- visit = &info->visits[num_visits];
- memset(visit, 0, sizeof(*visit));
- memcpy(&visit->callchain, callchain, sizeof(*callchain));
- return visit;
-}
-
-/* Form a string '(callsite#1,callsite#2,...,scc)' in env->tmp_str_buf */
-static char *format_callchain(struct bpf_verifier_env *env, struct bpf_scc_callchain *callchain)
-{
- char *buf = env->tmp_str_buf;
- int i, delta = 0;
-
- delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "(");
- for (i = 0; i < ARRAY_SIZE(callchain->callsites); i++) {
- if (!callchain->callsites[i])
- break;
- delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "%u,",
- callchain->callsites[i]);
- }
- delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "%u)", callchain->scc);
- return env->tmp_str_buf;
-}
-
-/* If callchain for @st exists (@st is in some SCC), ensure that
- * bpf_scc_visit instance for this callchain exists.
- * If instance does not exist or is empty, assign visit->entry_state to @st.
- */
-static int maybe_enter_scc(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
-{
- struct bpf_scc_callchain *callchain = &env->callchain_buf;
- struct bpf_scc_visit *visit;
-
- if (!compute_scc_callchain(env, st, callchain))
- return 0;
- visit = scc_visit_lookup(env, callchain);
- visit = visit ?: scc_visit_alloc(env, callchain);
- if (!visit)
- return -ENOMEM;
- if (!visit->entry_state) {
- visit->entry_state = st;
- if (env->log.level & BPF_LOG_LEVEL2)
- verbose(env, "SCC enter %s\n", format_callchain(env, callchain));
- }
- return 0;
-}
-
-static int propagate_backedges(struct bpf_verifier_env *env, struct bpf_scc_visit *visit);
-
-/* If callchain for @st exists (@st is in some SCC), make it empty:
- * - set visit->entry_state to NULL;
- * - flush accumulated backedges.
- */
-static int maybe_exit_scc(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
-{
- struct bpf_scc_callchain *callchain = &env->callchain_buf;
- struct bpf_scc_visit *visit;
-
- if (!compute_scc_callchain(env, st, callchain))
- return 0;
- visit = scc_visit_lookup(env, callchain);
- if (!visit) {
- /*
- * If path traversal stops inside an SCC, corresponding bpf_scc_visit
- * must exist for non-speculative paths. For non-speculative paths
- * traversal stops when:
- * a. Verification error is found, maybe_exit_scc() is not called.
- * b. Top level BPF_EXIT is reached. Top level BPF_EXIT is not a member
- * of any SCC.
- * c. A checkpoint is reached and matched. Checkpoints are created by
- * is_state_visited(), which calls maybe_enter_scc(), which allocates
- * bpf_scc_visit instances for checkpoints within SCCs.
- * (c) is the only case that can reach this point.
- */
- if (!st->speculative) {
- verifier_bug(env, "scc exit: no visit info for call chain %s",
- format_callchain(env, callchain));
- return -EFAULT;
- }
- return 0;
- }
- if (visit->entry_state != st)
- return 0;
- if (env->log.level & BPF_LOG_LEVEL2)
- verbose(env, "SCC exit %s\n", format_callchain(env, callchain));
- visit->entry_state = NULL;
- env->num_backedges -= visit->num_backedges;
- visit->num_backedges = 0;
- update_peak_states(env);
- return propagate_backedges(env, visit);
-}
-
-/* Lookup an bpf_scc_visit instance corresponding to @st callchain
- * and add @backedge to visit->backedges. @st callchain must exist.
- */
-static int add_scc_backedge(struct bpf_verifier_env *env,
- struct bpf_verifier_state *st,
- struct bpf_scc_backedge *backedge)
-{
- struct bpf_scc_callchain *callchain = &env->callchain_buf;
- struct bpf_scc_visit *visit;
-
- if (!compute_scc_callchain(env, st, callchain)) {
- verifier_bug(env, "add backedge: no SCC in verification path, insn_idx %d",
- st->insn_idx);
- return -EFAULT;
- }
- visit = scc_visit_lookup(env, callchain);
- if (!visit) {
- verifier_bug(env, "add backedge: no visit info for call chain %s",
- format_callchain(env, callchain));
- return -EFAULT;
- }
- if (env->log.level & BPF_LOG_LEVEL2)
- verbose(env, "SCC backedge %s\n", format_callchain(env, callchain));
- backedge->next = visit->backedges;
- visit->backedges = backedge;
- visit->num_backedges++;
- env->num_backedges++;
- update_peak_states(env);
- return 0;
-}
-
-/* bpf_reg_state->live marks for registers in a state @st are incomplete,
- * if state @st is in some SCC and not all execution paths starting at this
- * SCC are fully explored.
- */
-static bool incomplete_read_marks(struct bpf_verifier_env *env,
- struct bpf_verifier_state *st)
-{
- struct bpf_scc_callchain *callchain = &env->callchain_buf;
- struct bpf_scc_visit *visit;
-
- if (!compute_scc_callchain(env, st, callchain))
- return false;
- visit = scc_visit_lookup(env, callchain);
- if (!visit)
- return false;
- return !!visit->backedges;
-}
-
-static void free_backedges(struct bpf_scc_visit *visit)
+void bpf_free_backedges(struct bpf_scc_visit *visit)
{
struct bpf_scc_backedge *backedge, *next;
for (backedge = visit->backedges; backedge; backedge = next) {
- free_verifier_state(&backedge->state, false);
+ bpf_free_verifier_state(&backedge->state, false);
next = backedge->next;
kfree(backedge);
}
visit->backedges = NULL;
}
-static int update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
-{
- struct bpf_verifier_state_list *sl = NULL, *parent_sl;
- struct bpf_verifier_state *parent;
- int err;
-
- while (st) {
- u32 br = --st->branches;
-
- /* verifier_bug_if(br > 1, ...) technically makes sense here,
- * but see comment in push_stack(), hence:
- */
- verifier_bug_if((int)br < 0, env, "%s:branches_to_explore=%d", __func__, br);
- if (br)
- break;
- err = maybe_exit_scc(env, st);
- if (err)
- return err;
- parent = st->parent;
- parent_sl = state_parent_as_list(st);
- if (sl)
- maybe_free_verifier_state(env, sl);
- st = parent;
- sl = parent_sl;
- }
- return 0;
-}
-
static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
int *insn_idx, bool pop_log)
{
@@ -2096,7 +1679,7 @@ static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
return -ENOENT;
if (cur) {
- err = copy_verifier_state(cur, &head->st);
+ err = bpf_copy_verifier_state(cur, &head->st);
if (err)
return err;
}
@@ -2107,7 +1690,7 @@ static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
if (prev_insn_idx)
*prev_insn_idx = head->prev_insn_idx;
elem = head->next;
- free_verifier_state(&head->st, false);
+ bpf_free_verifier_state(&head->st, false);
kfree(head);
env->head = elem;
env->stack_size--;
@@ -2144,7 +1727,7 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
elem->log_pos = env->log.end_pos;
env->head = elem;
env->stack_size++;
- err = copy_verifier_state(&elem->st, cur);
+ err = bpf_copy_verifier_state(&elem->st, cur);
if (err)
return ERR_PTR(-ENOMEM);
elem->st.speculative |= speculative;
@@ -2168,7 +1751,6 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
return &elem->st;
}
-#define CALLER_SAVED_REGS 6
static const int caller_saved[CALLER_SAVED_REGS] = {
BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
};
@@ -2231,13 +1813,6 @@ static void __mark_reg_const_zero(const struct bpf_verifier_env *env, struct bpf
static void mark_reg_known_zero(struct bpf_verifier_env *env,
struct bpf_reg_state *regs, u32 regno)
{
- if (WARN_ON(regno >= MAX_BPF_REG)) {
- verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
- /* Something bad happened, let's kill all regs */
- for (regno = 0; regno < MAX_BPF_REG; regno++)
- __mark_reg_not_init(env, regs + regno);
- return;
- }
__mark_reg_known_zero(regs + regno);
}
@@ -2288,11 +1863,10 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
static void mark_reg_graph_node(struct bpf_reg_state *regs, u32 regno,
struct btf_field_graph_root *ds_head)
{
- __mark_reg_known_zero(&regs[regno]);
+ __mark_reg_known(&regs[regno], ds_head->node_offset);
regs[regno].type = PTR_TO_BTF_ID | MEM_ALLOC;
regs[regno].btf = ds_head->btf;
regs[regno].btf_id = ds_head->value_btf_id;
- regs[regno].off = ds_head->node_offset;
}
static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
@@ -2323,7 +1897,6 @@ static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
*/
return reg->type == which &&
reg->id == 0 &&
- reg->off == 0 &&
tnum_equals_const(reg->var_off, 0);
}
@@ -2434,7 +2007,7 @@ static void __update_reg_bounds(struct bpf_reg_state *reg)
}
/* Uses signed min/max values to inform unsigned, and vice-versa */
-static void __reg32_deduce_bounds(struct bpf_reg_state *reg)
+static void deduce_bounds_32_from_64(struct bpf_reg_state *reg)
{
/* If upper 32 bits of u64/s64 range don't change, we can use lower 32
* bits to improve our u32/s32 boundaries.
@@ -2504,6 +2077,10 @@ static void __reg32_deduce_bounds(struct bpf_reg_state *reg)
reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->smin_value);
reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->smax_value);
}
+}
+
+static void deduce_bounds_32_from_32(struct bpf_reg_state *reg)
+{
/* if u32 range forms a valid s32 range (due to matching sign bit),
* try to learn from that
*/
@@ -2545,7 +2122,7 @@ static void __reg32_deduce_bounds(struct bpf_reg_state *reg)
}
}
-static void __reg64_deduce_bounds(struct bpf_reg_state *reg)
+static void deduce_bounds_64_from_64(struct bpf_reg_state *reg)
{
/* If u64 range forms a valid s64 range (due to matching sign bit),
* try to learn from that. Let's do a bit of ASCII art to see when
@@ -2680,7 +2257,7 @@ static void __reg64_deduce_bounds(struct bpf_reg_state *reg)
}
}
-static void __reg_deduce_mixed_bounds(struct bpf_reg_state *reg)
+static void deduce_bounds_64_from_32(struct bpf_reg_state *reg)
{
/* Try to tighten 64-bit bounds from 32-bit knowledge, using 32-bit
* values on both sides of 64-bit range in hope to have tighter range.
@@ -2749,9 +2326,10 @@ static void __reg_deduce_mixed_bounds(struct bpf_reg_state *reg)
static void __reg_deduce_bounds(struct bpf_reg_state *reg)
{
- __reg32_deduce_bounds(reg);
- __reg64_deduce_bounds(reg);
- __reg_deduce_mixed_bounds(reg);
+ deduce_bounds_64_from_64(reg);
+ deduce_bounds_32_from_64(reg);
+ deduce_bounds_32_from_32(reg);
+ deduce_bounds_64_from_32(reg);
}
/* Attempts to improve var_off based on unsigned min/max information */
@@ -2767,14 +2345,18 @@ static void __reg_bound_offset(struct bpf_reg_state *reg)
reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
}
+static bool range_bounds_violation(struct bpf_reg_state *reg);
+
static void reg_bounds_sync(struct bpf_reg_state *reg)
{
+ /* If the input reg_state is invalid, we can exit early */
+ if (range_bounds_violation(reg))
+ return;
/* We might have learned new bounds from the var_off. */
__update_reg_bounds(reg);
/* We might have learned something about the sign bit. */
__reg_deduce_bounds(reg);
__reg_deduce_bounds(reg);
- __reg_deduce_bounds(reg);
/* We might have learned some bits from the bounds. */
__reg_bound_offset(reg);
/* Intersecting with the old var_off might have improved our bounds
@@ -2784,39 +2366,55 @@ static void reg_bounds_sync(struct bpf_reg_state *reg)
__update_reg_bounds(reg);
}
+static bool range_bounds_violation(struct bpf_reg_state *reg)
+{
+ return (reg->umin_value > reg->umax_value || reg->smin_value > reg->smax_value ||
+ reg->u32_min_value > reg->u32_max_value ||
+ reg->s32_min_value > reg->s32_max_value);
+}
+
+static bool const_tnum_range_mismatch(struct bpf_reg_state *reg)
+{
+ u64 uval = reg->var_off.value;
+ s64 sval = (s64)uval;
+
+ if (!tnum_is_const(reg->var_off))
+ return false;
+
+ return reg->umin_value != uval || reg->umax_value != uval ||
+ reg->smin_value != sval || reg->smax_value != sval;
+}
+
+static bool const_tnum_range_mismatch_32(struct bpf_reg_state *reg)
+{
+ u32 uval32 = tnum_subreg(reg->var_off).value;
+ s32 sval32 = (s32)uval32;
+
+ if (!tnum_subreg_is_const(reg->var_off))
+ return false;
+
+ return reg->u32_min_value != uval32 || reg->u32_max_value != uval32 ||
+ reg->s32_min_value != sval32 || reg->s32_max_value != sval32;
+}
+
static int reg_bounds_sanity_check(struct bpf_verifier_env *env,
struct bpf_reg_state *reg, const char *ctx)
{
const char *msg;
- if (reg->umin_value > reg->umax_value ||
- reg->smin_value > reg->smax_value ||
- reg->u32_min_value > reg->u32_max_value ||
- reg->s32_min_value > reg->s32_max_value) {
- msg = "range bounds violation";
- goto out;
+ if (range_bounds_violation(reg)) {
+ msg = "range bounds violation";
+ goto out;
}
- if (tnum_is_const(reg->var_off)) {
- u64 uval = reg->var_off.value;
- s64 sval = (s64)uval;
-
- if (reg->umin_value != uval || reg->umax_value != uval ||
- reg->smin_value != sval || reg->smax_value != sval) {
- msg = "const tnum out of sync with range bounds";
- goto out;
- }
+ if (const_tnum_range_mismatch(reg)) {
+ msg = "const tnum out of sync with range bounds";
+ goto out;
}
- if (tnum_subreg_is_const(reg->var_off)) {
- u32 uval32 = tnum_subreg(reg->var_off).value;
- s32 sval32 = (s32)uval32;
-
- if (reg->u32_min_value != uval32 || reg->u32_max_value != uval32 ||
- reg->s32_min_value != sval32 || reg->s32_max_value != sval32) {
- msg = "const subreg tnum out of sync with range bounds";
- goto out;
- }
+ if (const_tnum_range_mismatch_32(reg)) {
+ msg = "const subreg tnum out of sync with range bounds";
+ goto out;
}
return 0;
@@ -2859,7 +2457,7 @@ static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
}
/* Mark a register as having a completely unknown (scalar) value. */
-static void __mark_reg_unknown_imprecise(struct bpf_reg_state *reg)
+void bpf_mark_reg_unknown_imprecise(struct bpf_reg_state *reg)
{
/*
* Clear type, off, and union(map_ptr, range) and
@@ -2881,20 +2479,13 @@ static void __mark_reg_unknown_imprecise(struct bpf_reg_state *reg)
static void __mark_reg_unknown(const struct bpf_verifier_env *env,
struct bpf_reg_state *reg)
{
- __mark_reg_unknown_imprecise(reg);
+ bpf_mark_reg_unknown_imprecise(reg);
reg->precise = !env->bpf_capable;
}
static void mark_reg_unknown(struct bpf_verifier_env *env,
struct bpf_reg_state *regs, u32 regno)
{
- if (WARN_ON(regno >= MAX_BPF_REG)) {
- verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
- /* Something bad happened, let's kill all regs except FP */
- for (regno = 0; regno < BPF_REG_FP; regno++)
- __mark_reg_not_init(env, regs + regno);
- return;
- }
__mark_reg_unknown(env, regs + regno);
}
@@ -2917,26 +2508,13 @@ static int __mark_reg_s32_range(struct bpf_verifier_env *env,
return reg_bounds_sanity_check(env, reg, "s32_range");
}
-static void __mark_reg_not_init(const struct bpf_verifier_env *env,
- struct bpf_reg_state *reg)
+void bpf_mark_reg_not_init(const struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg)
{
__mark_reg_unknown(env, reg);
reg->type = NOT_INIT;
}
-static void mark_reg_not_init(struct bpf_verifier_env *env,
- struct bpf_reg_state *regs, u32 regno)
-{
- if (WARN_ON(regno >= MAX_BPF_REG)) {
- verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
- /* Something bad happened, let's kill all regs except FP */
- for (regno = 0; regno < BPF_REG_FP; regno++)
- __mark_reg_not_init(env, regs + regno);
- return;
- }
- __mark_reg_not_init(env, regs + regno);
-}
-
static int mark_btf_ld_reg(struct bpf_verifier_env *env,
struct bpf_reg_state *regs, u32 regno,
enum bpf_reg_type reg_type,
@@ -2974,7 +2552,7 @@ static void init_reg_state(struct bpf_verifier_env *env,
int i;
for (i = 0; i < MAX_BPF_REG; i++) {
- mark_reg_not_init(env, regs, i);
+ bpf_mark_reg_not_init(env, &regs[i]);
regs[i].subreg_def = DEF_NOT_SUBREG;
}
@@ -2986,10 +2564,13 @@ static void init_reg_state(struct bpf_verifier_env *env,
static struct bpf_retval_range retval_range(s32 minval, s32 maxval)
{
- return (struct bpf_retval_range){ minval, maxval };
+ /*
+ * return_32bit is set to false by default and set explicitly
+ * by the caller when necessary.
+ */
+ return (struct bpf_retval_range){ minval, maxval, false };
}
-#define BPF_MAIN_FUNC (-1)
static void init_func_state(struct bpf_verifier_env *env,
struct bpf_func_state *state,
int callsite, int frameno, int subprogno)
@@ -3026,7 +2607,7 @@ static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
env->stack_size);
return ERR_PTR(-E2BIG);
}
- /* Unlike push_stack() do not copy_verifier_state().
+ /* Unlike push_stack() do not bpf_copy_verifier_state().
* The caller state doesn't matter.
* This is async callback. It starts in a fresh stack.
* Initialize it similar to do_check_common().
@@ -3045,12 +2626,6 @@ static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
}
-enum reg_arg_type {
- SRC_OP, /* register is used as source operand */
- DST_OP, /* register is used as destination operand */
- DST_OP_NO_MARK /* same as above, check only, don't mark */
-};
-
static int cmp_subprogs(const void *a, const void *b)
{
return ((struct bpf_subprog_info *)a)->start -
@@ -3079,7 +2654,7 @@ struct bpf_subprog_info *bpf_find_containing_subprog(struct bpf_verifier_env *en
}
/* Find subprogram that starts exactly at 'off' */
-static int find_subprog(struct bpf_verifier_env *env, int off)
+int bpf_find_subprog(struct bpf_verifier_env *env, int off)
{
struct bpf_subprog_info *p;
@@ -3098,7 +2673,7 @@ static int add_subprog(struct bpf_verifier_env *env, int off)
verbose(env, "call to invalid destination\n");
return -EINVAL;
}
- ret = find_subprog(env, off);
+ ret = bpf_find_subprog(env, off);
if (ret >= 0)
return ret;
if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
@@ -3174,41 +2749,19 @@ static int bpf_find_exception_callback_insn_off(struct bpf_verifier_env *env)
return ret;
}
-#define MAX_KFUNC_DESCS 256
#define MAX_KFUNC_BTFS 256
-struct bpf_kfunc_desc {
- struct btf_func_model func_model;
- u32 func_id;
- s32 imm;
- u16 offset;
- unsigned long addr;
-};
-
struct bpf_kfunc_btf {
struct btf *btf;
struct module *module;
u16 offset;
};
-struct bpf_kfunc_desc_tab {
- /* Sorted by func_id (BTF ID) and offset (fd_array offset) during
- * verification. JITs do lookups by bpf_insn, where func_id may not be
- * available, therefore at the end of verification do_misc_fixups()
- * sorts this by imm and offset.
- */
- struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS];
- u32 nr_descs;
-};
-
struct bpf_kfunc_btf_tab {
struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS];
u32 nr_descs;
};
-static int specialize_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc,
- int insn_idx);
-
static int kfunc_desc_cmp_by_id_off(const void *a, const void *b)
{
const struct bpf_kfunc_desc *d0 = a;
@@ -3436,7 +2989,7 @@ static int fetch_kfunc_meta(struct bpf_verifier_env *env,
return 0;
}
-static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
+int bpf_add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, u16 offset)
{
struct bpf_kfunc_btf_tab *btf_tab;
struct btf_func_model func_model;
@@ -3531,95 +3084,11 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
return 0;
}
-static int kfunc_desc_cmp_by_imm_off(const void *a, const void *b)
-{
- const struct bpf_kfunc_desc *d0 = a;
- const struct bpf_kfunc_desc *d1 = b;
-
- if (d0->imm != d1->imm)
- return d0->imm < d1->imm ? -1 : 1;
- if (d0->offset != d1->offset)
- return d0->offset < d1->offset ? -1 : 1;
- return 0;
-}
-
-static int set_kfunc_desc_imm(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc)
-{
- unsigned long call_imm;
-
- if (bpf_jit_supports_far_kfunc_call()) {
- call_imm = desc->func_id;
- } else {
- call_imm = BPF_CALL_IMM(desc->addr);
- /* Check whether the relative offset overflows desc->imm */
- if ((unsigned long)(s32)call_imm != call_imm) {
- verbose(env, "address of kernel func_id %u is out of range\n",
- desc->func_id);
- return -EINVAL;
- }
- }
- desc->imm = call_imm;
- return 0;
-}
-
-static int sort_kfunc_descs_by_imm_off(struct bpf_verifier_env *env)
-{
- struct bpf_kfunc_desc_tab *tab;
- int i, err;
-
- tab = env->prog->aux->kfunc_tab;
- if (!tab)
- return 0;
-
- for (i = 0; i < tab->nr_descs; i++) {
- err = set_kfunc_desc_imm(env, &tab->descs[i]);
- if (err)
- return err;
- }
-
- sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
- kfunc_desc_cmp_by_imm_off, NULL);
- return 0;
-}
-
bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
{
return !!prog->aux->kfunc_tab;
}
-const struct btf_func_model *
-bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
- const struct bpf_insn *insn)
-{
- const struct bpf_kfunc_desc desc = {
- .imm = insn->imm,
- .offset = insn->off,
- };
- const struct bpf_kfunc_desc *res;
- struct bpf_kfunc_desc_tab *tab;
-
- tab = prog->aux->kfunc_tab;
- res = bsearch(&desc, tab->descs, tab->nr_descs,
- sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm_off);
-
- return res ? &res->func_model : NULL;
-}
-
-static int add_kfunc_in_insns(struct bpf_verifier_env *env,
- struct bpf_insn *insn, int cnt)
-{
- int i, ret;
-
- for (i = 0; i < cnt; i++, insn++) {
- if (bpf_pseudo_kfunc_call(insn)) {
- ret = add_kfunc_call(env, insn->imm, insn->off);
- if (ret < 0)
- return ret;
- }
- }
- return 0;
-}
-
static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
{
struct bpf_subprog_info *subprog = env->subprog_info;
@@ -3644,7 +3113,7 @@ static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn))
ret = add_subprog(env, i + insn->imm + 1);
else
- ret = add_kfunc_call(env, insn->imm, insn->off);
+ ret = bpf_add_kfunc_call(env, insn->imm, insn->off);
if (ret < 0)
return ret;
@@ -3666,7 +3135,7 @@ static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
if (env->subprog_info[i].start != ex_cb_insn)
continue;
env->exception_callback_subprog = i;
- mark_subprog_exc_cb(env, i);
+ bpf_mark_subprog_exc_cb(env, i);
break;
}
}
@@ -3739,17 +3208,101 @@ next:
return 0;
}
+/*
+ * Sort subprogs in topological order so that leaf subprogs come first and
+ * their callers come later. This is a DFS post-order traversal of the call
+ * graph. Scan only reachable instructions (those in the computed postorder) of
+ * the current subprog to discover callees (direct subprogs and sync
+ * callbacks).
+ */
+static int sort_subprogs_topo(struct bpf_verifier_env *env)
+{
+ struct bpf_subprog_info *si = env->subprog_info;
+ int *insn_postorder = env->cfg.insn_postorder;
+ struct bpf_insn *insn = env->prog->insnsi;
+ int cnt = env->subprog_cnt;
+ int *dfs_stack = NULL;
+ int top = 0, order = 0;
+ int i, ret = 0;
+ u8 *color = NULL;
+
+ color = kvzalloc_objs(*color, cnt, GFP_KERNEL_ACCOUNT);
+ dfs_stack = kvmalloc_objs(*dfs_stack, cnt, GFP_KERNEL_ACCOUNT);
+ if (!color || !dfs_stack) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /*
+ * DFS post-order traversal.
+ * Color values: 0 = unvisited, 1 = on stack, 2 = done.
+ */
+ for (i = 0; i < cnt; i++) {
+ if (color[i])
+ continue;
+ color[i] = 1;
+ dfs_stack[top++] = i;
+
+ while (top > 0) {
+ int cur = dfs_stack[top - 1];
+ int po_start = si[cur].postorder_start;
+ int po_end = si[cur + 1].postorder_start;
+ bool pushed = false;
+ int j;
+
+ for (j = po_start; j < po_end; j++) {
+ int idx = insn_postorder[j];
+ int callee;
+
+ if (!bpf_pseudo_call(&insn[idx]) && !bpf_pseudo_func(&insn[idx]))
+ continue;
+ callee = bpf_find_subprog(env, idx + insn[idx].imm + 1);
+ if (callee < 0) {
+ ret = -EFAULT;
+ goto out;
+ }
+ if (color[callee] == 2)
+ continue;
+ if (color[callee] == 1) {
+ if (bpf_pseudo_func(&insn[idx]))
+ continue;
+ verbose(env, "recursive call from %s() to %s()\n",
+ subprog_name(env, cur),
+ subprog_name(env, callee));
+ ret = -EINVAL;
+ goto out;
+ }
+ color[callee] = 1;
+ dfs_stack[top++] = callee;
+ pushed = true;
+ break;
+ }
+
+ if (!pushed) {
+ color[cur] = 2;
+ env->subprog_topo_order[order++] = cur;
+ top--;
+ }
+ }
+ }
+
+ if (env->log.level & BPF_LOG_LEVEL2)
+ for (i = 0; i < cnt; i++)
+ verbose(env, "topo_order[%d] = %s\n",
+ i, subprog_name(env, env->subprog_topo_order[i]));
+out:
+ kvfree(dfs_stack);
+ kvfree(color);
+ return ret;
+}
+
static int mark_stack_slot_obj_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
int spi, int nr_slots)
{
- int err, i;
+ int i;
- for (i = 0; i < nr_slots; i++) {
- err = bpf_mark_stack_read(env, reg->frameno, env->insn_idx, BIT(spi - i));
- if (err)
- return err;
+ for (i = 0; i < nr_slots; i++)
mark_stack_slot_scratched(env, spi - i);
- }
return 0;
}
@@ -3793,8 +3346,8 @@ static int mark_irq_flag_read(struct bpf_verifier_env *env, struct bpf_reg_state
* code only. It returns TRUE if the source or destination register operates
* on 64-bit, otherwise return FALSE.
*/
-static bool is_reg64(struct bpf_insn *insn,
- u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
+bool bpf_is_reg64(struct bpf_insn *insn,
+ u32 regno, struct bpf_reg_state *reg, enum bpf_reg_arg_type t)
{
u8 code, class, op;
@@ -3879,41 +3432,6 @@ static bool is_reg64(struct bpf_insn *insn,
return true;
}
-/* Return the regno defined by the insn, or -1. */
-static int insn_def_regno(const struct bpf_insn *insn)
-{
- switch (BPF_CLASS(insn->code)) {
- case BPF_JMP:
- case BPF_JMP32:
- case BPF_ST:
- return -1;
- case BPF_STX:
- if (BPF_MODE(insn->code) == BPF_ATOMIC ||
- BPF_MODE(insn->code) == BPF_PROBE_ATOMIC) {
- if (insn->imm == BPF_CMPXCHG)
- return BPF_REG_0;
- else if (insn->imm == BPF_LOAD_ACQ)
- return insn->dst_reg;
- else if (insn->imm & BPF_FETCH)
- return insn->src_reg;
- }
- return -1;
- default:
- return insn->dst_reg;
- }
-}
-
-/* Return TRUE if INSN has defined any 32-bit value explicitly. */
-static bool insn_has_def32(struct bpf_insn *insn)
-{
- int dst_reg = insn_def_regno(insn);
-
- if (dst_reg == -1)
- return false;
-
- return !is_reg64(insn, dst_reg, NULL, DST_OP);
-}
-
static void mark_insn_zext(struct bpf_verifier_env *env,
struct bpf_reg_state *reg)
{
@@ -3928,21 +3446,16 @@ static void mark_insn_zext(struct bpf_verifier_env *env,
}
static int __check_reg_arg(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno,
- enum reg_arg_type t)
+ enum bpf_reg_arg_type t)
{
struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
struct bpf_reg_state *reg;
bool rw64;
- if (regno >= MAX_BPF_REG) {
- verbose(env, "R%d is invalid\n", regno);
- return -EINVAL;
- }
-
mark_reg_scratched(env, regno);
reg = &regs[regno];
- rw64 = is_reg64(insn, regno, reg, t);
+ rw64 = bpf_is_reg64(insn, regno, reg, t);
if (t == SRC_OP) {
/* check whether register used as source operand can be read */
if (reg->type == NOT_INIT) {
@@ -3971,7 +3484,7 @@ static int __check_reg_arg(struct bpf_verifier_env *env, struct bpf_reg_state *r
}
static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
- enum reg_arg_type t)
+ enum bpf_reg_arg_type t)
{
struct bpf_verifier_state *vstate = env->cur_state;
struct bpf_func_state *state = vstate->frame[vstate->curframe];
@@ -3984,26 +3497,6 @@ static int insn_stack_access_flags(int frameno, int spi)
return INSN_F_STACK_ACCESS | (spi << INSN_F_SPI_SHIFT) | frameno;
}
-static int insn_stack_access_spi(int insn_flags)
-{
- return (insn_flags >> INSN_F_SPI_SHIFT) & INSN_F_SPI_MASK;
-}
-
-static int insn_stack_access_frameno(int insn_flags)
-{
- return insn_flags & INSN_F_FRAMENO_MASK;
-}
-
-static void mark_jmp_point(struct bpf_verifier_env *env, int idx)
-{
- env->insn_aux_data[idx].jmp_point = true;
-}
-
-static bool is_jmp_point(struct bpf_verifier_env *env, int insn_idx)
-{
- return env->insn_aux_data[insn_idx].jmp_point;
-}
-
#define LR_FRAMENO_BITS 3
#define LR_SPI_BITS 6
#define LR_ENTRY_BITS (LR_SPI_BITS + LR_FRAMENO_BITS + 1)
@@ -4082,91 +3575,6 @@ static void linked_regs_unpack(u64 val, struct linked_regs *s)
}
}
-/* for any branch, call, exit record the history of jmps in the given state */
-static int push_jmp_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur,
- int insn_flags, u64 linked_regs)
-{
- u32 cnt = cur->jmp_history_cnt;
- struct bpf_jmp_history_entry *p;
- size_t alloc_size;
-
- /* combine instruction flags if we already recorded this instruction */
- if (env->cur_hist_ent) {
- /* atomic instructions push insn_flags twice, for READ and
- * WRITE sides, but they should agree on stack slot
- */
- verifier_bug_if((env->cur_hist_ent->flags & insn_flags) &&
- (env->cur_hist_ent->flags & insn_flags) != insn_flags,
- env, "insn history: insn_idx %d cur flags %x new flags %x",
- env->insn_idx, env->cur_hist_ent->flags, insn_flags);
- env->cur_hist_ent->flags |= insn_flags;
- verifier_bug_if(env->cur_hist_ent->linked_regs != 0, env,
- "insn history: insn_idx %d linked_regs: %#llx",
- env->insn_idx, env->cur_hist_ent->linked_regs);
- env->cur_hist_ent->linked_regs = linked_regs;
- return 0;
- }
-
- cnt++;
- alloc_size = kmalloc_size_roundup(size_mul(cnt, sizeof(*p)));
- p = krealloc(cur->jmp_history, alloc_size, GFP_KERNEL_ACCOUNT);
- if (!p)
- return -ENOMEM;
- cur->jmp_history = p;
-
- p = &cur->jmp_history[cnt - 1];
- p->idx = env->insn_idx;
- p->prev_idx = env->prev_insn_idx;
- p->flags = insn_flags;
- p->linked_regs = linked_regs;
- cur->jmp_history_cnt = cnt;
- env->cur_hist_ent = p;
-
- return 0;
-}
-
-static struct bpf_jmp_history_entry *get_jmp_hist_entry(struct bpf_verifier_state *st,
- u32 hist_end, int insn_idx)
-{
- if (hist_end > 0 && st->jmp_history[hist_end - 1].idx == insn_idx)
- return &st->jmp_history[hist_end - 1];
- return NULL;
-}
-
-/* Backtrack one insn at a time. If idx is not at the top of recorded
- * history then previous instruction came from straight line execution.
- * Return -ENOENT if we exhausted all instructions within given state.
- *
- * It's legal to have a bit of a looping with the same starting and ending
- * insn index within the same state, e.g.: 3->4->5->3, so just because current
- * instruction index is the same as state's first_idx doesn't mean we are
- * done. If there is still some jump history left, we should keep going. We
- * need to take into account that we might have a jump history between given
- * state's parent and itself, due to checkpointing. In this case, we'll have
- * history entry recording a jump from last instruction of parent state and
- * first instruction of given state.
- */
-static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
- u32 *history)
-{
- u32 cnt = *history;
-
- if (i == st->first_insn_idx) {
- if (cnt == 0)
- return -ENOENT;
- if (cnt == 1 && st->jmp_history[0].idx == i)
- return -ENOENT;
- }
-
- if (cnt && st->jmp_history[cnt - 1].idx == i) {
- i = st->jmp_history[cnt - 1].prev_idx;
- (*history)--;
- } else {
- i--;
- }
- return i;
-}
-
static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
{
const struct btf_type *func;
@@ -4183,7 +3591,7 @@ static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
return btf_name_by_offset(desc_btf, func->name_off);
}
-static void verbose_insn(struct bpf_verifier_env *env, struct bpf_insn *insn)
+void bpf_verbose_insn(struct bpf_verifier_env *env, struct bpf_insn *insn)
{
const struct bpf_insn_cbs cbs = {
.cb_call = disasm_kfunc_name,
@@ -4194,158 +3602,10 @@ static void verbose_insn(struct bpf_verifier_env *env, struct bpf_insn *insn)
print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
}
-static inline void bt_init(struct backtrack_state *bt, u32 frame)
-{
- bt->frame = frame;
-}
-
-static inline void bt_reset(struct backtrack_state *bt)
-{
- struct bpf_verifier_env *env = bt->env;
-
- memset(bt, 0, sizeof(*bt));
- bt->env = env;
-}
-
-static inline u32 bt_empty(struct backtrack_state *bt)
-{
- u64 mask = 0;
- int i;
-
- for (i = 0; i <= bt->frame; i++)
- mask |= bt->reg_masks[i] | bt->stack_masks[i];
-
- return mask == 0;
-}
-
-static inline int bt_subprog_enter(struct backtrack_state *bt)
-{
- if (bt->frame == MAX_CALL_FRAMES - 1) {
- verifier_bug(bt->env, "subprog enter from frame %d", bt->frame);
- return -EFAULT;
- }
- bt->frame++;
- return 0;
-}
-
-static inline int bt_subprog_exit(struct backtrack_state *bt)
-{
- if (bt->frame == 0) {
- verifier_bug(bt->env, "subprog exit from frame 0");
- return -EFAULT;
- }
- bt->frame--;
- return 0;
-}
-
-static inline void bt_set_frame_reg(struct backtrack_state *bt, u32 frame, u32 reg)
-{
- bt->reg_masks[frame] |= 1 << reg;
-}
-
-static inline void bt_clear_frame_reg(struct backtrack_state *bt, u32 frame, u32 reg)
-{
- bt->reg_masks[frame] &= ~(1 << reg);
-}
-
-static inline void bt_set_reg(struct backtrack_state *bt, u32 reg)
-{
- bt_set_frame_reg(bt, bt->frame, reg);
-}
-
-static inline void bt_clear_reg(struct backtrack_state *bt, u32 reg)
-{
- bt_clear_frame_reg(bt, bt->frame, reg);
-}
-
-static inline void bt_set_frame_slot(struct backtrack_state *bt, u32 frame, u32 slot)
-{
- bt->stack_masks[frame] |= 1ull << slot;
-}
-
-static inline void bt_clear_frame_slot(struct backtrack_state *bt, u32 frame, u32 slot)
-{
- bt->stack_masks[frame] &= ~(1ull << slot);
-}
-
-static inline u32 bt_frame_reg_mask(struct backtrack_state *bt, u32 frame)
-{
- return bt->reg_masks[frame];
-}
-
-static inline u32 bt_reg_mask(struct backtrack_state *bt)
-{
- return bt->reg_masks[bt->frame];
-}
-
-static inline u64 bt_frame_stack_mask(struct backtrack_state *bt, u32 frame)
-{
- return bt->stack_masks[frame];
-}
-
-static inline u64 bt_stack_mask(struct backtrack_state *bt)
-{
- return bt->stack_masks[bt->frame];
-}
-
-static inline bool bt_is_reg_set(struct backtrack_state *bt, u32 reg)
-{
- return bt->reg_masks[bt->frame] & (1 << reg);
-}
-
-static inline bool bt_is_frame_reg_set(struct backtrack_state *bt, u32 frame, u32 reg)
-{
- return bt->reg_masks[frame] & (1 << reg);
-}
-
-static inline bool bt_is_frame_slot_set(struct backtrack_state *bt, u32 frame, u32 slot)
-{
- return bt->stack_masks[frame] & (1ull << slot);
-}
-
-/* format registers bitmask, e.g., "r0,r2,r4" for 0x15 mask */
-static void fmt_reg_mask(char *buf, ssize_t buf_sz, u32 reg_mask)
-{
- DECLARE_BITMAP(mask, 64);
- bool first = true;
- int i, n;
-
- buf[0] = '\0';
-
- bitmap_from_u64(mask, reg_mask);
- for_each_set_bit(i, mask, 32) {
- n = snprintf(buf, buf_sz, "%sr%d", first ? "" : ",", i);
- first = false;
- buf += n;
- buf_sz -= n;
- if (buf_sz < 0)
- break;
- }
-}
-/* format stack slots bitmask, e.g., "-8,-24,-40" for 0x15 mask */
-void bpf_fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask)
-{
- DECLARE_BITMAP(mask, 64);
- bool first = true;
- int i, n;
-
- buf[0] = '\0';
-
- bitmap_from_u64(mask, stack_mask);
- for_each_set_bit(i, mask, 64) {
- n = snprintf(buf, buf_sz, "%s%d", first ? "" : ",", -(i + 1) * 8);
- first = false;
- buf += n;
- buf_sz -= n;
- if (buf_sz < 0)
- break;
- }
-}
-
/* If any register R in hist->linked_regs is marked as precise in bt,
* do bt_set_frame_{reg,slot}(bt, R) for all registers in hist->linked_regs.
*/
-static void bt_sync_linked_regs(struct backtrack_state *bt, struct bpf_jmp_history_entry *hist)
+void bpf_bt_sync_linked_regs(struct backtrack_state *bt, struct bpf_jmp_history_entry *hist)
{
struct linked_regs linked_regs;
bool some_precise = false;
@@ -4372,727 +3632,15 @@ static void bt_sync_linked_regs(struct backtrack_state *bt, struct bpf_jmp_histo
struct linked_reg *e = &linked_regs.entries[i];
if (e->is_reg)
- bt_set_frame_reg(bt, e->frameno, e->regno);
+ bpf_bt_set_frame_reg(bt, e->frameno, e->regno);
else
- bt_set_frame_slot(bt, e->frameno, e->spi);
- }
-}
-
-/* For given verifier state backtrack_insn() is called from the last insn to
- * the first insn. Its purpose is to compute a bitmask of registers and
- * stack slots that needs precision in the parent verifier state.
- *
- * @idx is an index of the instruction we are currently processing;
- * @subseq_idx is an index of the subsequent instruction that:
- * - *would be* executed next, if jump history is viewed in forward order;
- * - *was* processed previously during backtracking.
- */
-static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
- struct bpf_jmp_history_entry *hist, struct backtrack_state *bt)
-{
- struct bpf_insn *insn = env->prog->insnsi + idx;
- u8 class = BPF_CLASS(insn->code);
- u8 opcode = BPF_OP(insn->code);
- u8 mode = BPF_MODE(insn->code);
- u32 dreg = insn->dst_reg;
- u32 sreg = insn->src_reg;
- u32 spi, i, fr;
-
- if (insn->code == 0)
- return 0;
- if (env->log.level & BPF_LOG_LEVEL2) {
- fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_reg_mask(bt));
- verbose(env, "mark_precise: frame%d: regs=%s ",
- bt->frame, env->tmp_str_buf);
- bpf_fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_stack_mask(bt));
- verbose(env, "stack=%s before ", env->tmp_str_buf);
- verbose(env, "%d: ", idx);
- verbose_insn(env, insn);
- }
-
- /* If there is a history record that some registers gained range at this insn,
- * propagate precision marks to those registers, so that bt_is_reg_set()
- * accounts for these registers.
- */
- bt_sync_linked_regs(bt, hist);
-
- if (class == BPF_ALU || class == BPF_ALU64) {
- if (!bt_is_reg_set(bt, dreg))
- return 0;
- if (opcode == BPF_END || opcode == BPF_NEG) {
- /* sreg is reserved and unused
- * dreg still need precision before this insn
- */
- return 0;
- } else if (opcode == BPF_MOV) {
- if (BPF_SRC(insn->code) == BPF_X) {
- /* dreg = sreg or dreg = (s8, s16, s32)sreg
- * dreg needs precision after this insn
- * sreg needs precision before this insn
- */
- bt_clear_reg(bt, dreg);
- if (sreg != BPF_REG_FP)
- bt_set_reg(bt, sreg);
- } else {
- /* dreg = K
- * dreg needs precision after this insn.
- * Corresponding register is already marked
- * as precise=true in this verifier state.
- * No further markings in parent are necessary
- */
- bt_clear_reg(bt, dreg);
- }
- } else {
- if (BPF_SRC(insn->code) == BPF_X) {
- /* dreg += sreg
- * both dreg and sreg need precision
- * before this insn
- */
- if (sreg != BPF_REG_FP)
- bt_set_reg(bt, sreg);
- } /* else dreg += K
- * dreg still needs precision before this insn
- */
- }
- } else if (class == BPF_LDX ||
- is_atomic_load_insn(insn) ||
- is_atomic_fetch_insn(insn)) {
- u32 load_reg = dreg;
-
- /*
- * Atomic fetch operation writes the old value into
- * a register (sreg or r0) and if it was tracked for
- * precision, propagate to the stack slot like we do
- * in regular ldx.
- */
- if (is_atomic_fetch_insn(insn))
- load_reg = insn->imm == BPF_CMPXCHG ?
- BPF_REG_0 : sreg;
-
- if (!bt_is_reg_set(bt, load_reg))
- return 0;
- bt_clear_reg(bt, load_reg);
-
- /* scalars can only be spilled into stack w/o losing precision.
- * Load from any other memory can be zero extended.
- * The desire to keep that precision is already indicated
- * by 'precise' mark in corresponding register of this state.
- * No further tracking necessary.
- */
- if (!hist || !(hist->flags & INSN_F_STACK_ACCESS))
- return 0;
- /* dreg = *(u64 *)[fp - off] was a fill from the stack.
- * that [fp - off] slot contains scalar that needs to be
- * tracked with precision
- */
- spi = insn_stack_access_spi(hist->flags);
- fr = insn_stack_access_frameno(hist->flags);
- bt_set_frame_slot(bt, fr, spi);
- } else if (class == BPF_STX || class == BPF_ST) {
- if (bt_is_reg_set(bt, dreg))
- /* stx & st shouldn't be using _scalar_ dst_reg
- * to access memory. It means backtracking
- * encountered a case of pointer subtraction.
- */
- return -ENOTSUPP;
- /* scalars can only be spilled into stack */
- if (!hist || !(hist->flags & INSN_F_STACK_ACCESS))
- return 0;
- spi = insn_stack_access_spi(hist->flags);
- fr = insn_stack_access_frameno(hist->flags);
- if (!bt_is_frame_slot_set(bt, fr, spi))
- return 0;
- bt_clear_frame_slot(bt, fr, spi);
- if (class == BPF_STX)
- bt_set_reg(bt, sreg);
- } else if (class == BPF_JMP || class == BPF_JMP32) {
- if (bpf_pseudo_call(insn)) {
- int subprog_insn_idx, subprog;
-
- subprog_insn_idx = idx + insn->imm + 1;
- subprog = find_subprog(env, subprog_insn_idx);
- if (subprog < 0)
- return -EFAULT;
-
- if (subprog_is_global(env, subprog)) {
- /* check that jump history doesn't have any
- * extra instructions from subprog; the next
- * instruction after call to global subprog
- * should be literally next instruction in
- * caller program
- */
- verifier_bug_if(idx + 1 != subseq_idx, env,
- "extra insn from subprog");
- /* r1-r5 are invalidated after subprog call,
- * so for global func call it shouldn't be set
- * anymore
- */
- if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
- verifier_bug(env, "global subprog unexpected regs %x",
- bt_reg_mask(bt));
- return -EFAULT;
- }
- /* global subprog always sets R0 */
- bt_clear_reg(bt, BPF_REG_0);
- return 0;
- } else {
- /* static subprog call instruction, which
- * means that we are exiting current subprog,
- * so only r1-r5 could be still requested as
- * precise, r0 and r6-r10 or any stack slot in
- * the current frame should be zero by now
- */
- if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
- verifier_bug(env, "static subprog unexpected regs %x",
- bt_reg_mask(bt));
- return -EFAULT;
- }
- /* we are now tracking register spills correctly,
- * so any instance of leftover slots is a bug
- */
- if (bt_stack_mask(bt) != 0) {
- verifier_bug(env,
- "static subprog leftover stack slots %llx",
- bt_stack_mask(bt));
- return -EFAULT;
- }
- /* propagate r1-r5 to the caller */
- for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
- if (bt_is_reg_set(bt, i)) {
- bt_clear_reg(bt, i);
- bt_set_frame_reg(bt, bt->frame - 1, i);
- }
- }
- if (bt_subprog_exit(bt))
- return -EFAULT;
- return 0;
- }
- } else if (is_sync_callback_calling_insn(insn) && idx != subseq_idx - 1) {
- /* exit from callback subprog to callback-calling helper or
- * kfunc call. Use idx/subseq_idx check to discern it from
- * straight line code backtracking.
- * Unlike the subprog call handling above, we shouldn't
- * propagate precision of r1-r5 (if any requested), as they are
- * not actually arguments passed directly to callback subprogs
- */
- if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
- verifier_bug(env, "callback unexpected regs %x",
- bt_reg_mask(bt));
- return -EFAULT;
- }
- if (bt_stack_mask(bt) != 0) {
- verifier_bug(env, "callback leftover stack slots %llx",
- bt_stack_mask(bt));
- return -EFAULT;
- }
- /* clear r1-r5 in callback subprog's mask */
- for (i = BPF_REG_1; i <= BPF_REG_5; i++)
- bt_clear_reg(bt, i);
- if (bt_subprog_exit(bt))
- return -EFAULT;
- return 0;
- } else if (opcode == BPF_CALL) {
- /* kfunc with imm==0 is invalid and fixup_kfunc_call will
- * catch this error later. Make backtracking conservative
- * with ENOTSUPP.
- */
- if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && insn->imm == 0)
- return -ENOTSUPP;
- /* regular helper call sets R0 */
- bt_clear_reg(bt, BPF_REG_0);
- if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
- /* if backtracking was looking for registers R1-R5
- * they should have been found already.
- */
- verifier_bug(env, "backtracking call unexpected regs %x",
- bt_reg_mask(bt));
- return -EFAULT;
- }
- if (insn->src_reg == BPF_REG_0 && insn->imm == BPF_FUNC_tail_call
- && subseq_idx - idx != 1) {
- if (bt_subprog_enter(bt))
- return -EFAULT;
- }
- } else if (opcode == BPF_EXIT) {
- bool r0_precise;
-
- /* Backtracking to a nested function call, 'idx' is a part of
- * the inner frame 'subseq_idx' is a part of the outer frame.
- * In case of a regular function call, instructions giving
- * precision to registers R1-R5 should have been found already.
- * In case of a callback, it is ok to have R1-R5 marked for
- * backtracking, as these registers are set by the function
- * invoking callback.
- */
- if (subseq_idx >= 0 && bpf_calls_callback(env, subseq_idx))
- for (i = BPF_REG_1; i <= BPF_REG_5; i++)
- bt_clear_reg(bt, i);
- if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
- verifier_bug(env, "backtracking exit unexpected regs %x",
- bt_reg_mask(bt));
- return -EFAULT;
- }
-
- /* BPF_EXIT in subprog or callback always returns
- * right after the call instruction, so by checking
- * whether the instruction at subseq_idx-1 is subprog
- * call or not we can distinguish actual exit from
- * *subprog* from exit from *callback*. In the former
- * case, we need to propagate r0 precision, if
- * necessary. In the former we never do that.
- */
- r0_precise = subseq_idx - 1 >= 0 &&
- bpf_pseudo_call(&env->prog->insnsi[subseq_idx - 1]) &&
- bt_is_reg_set(bt, BPF_REG_0);
-
- bt_clear_reg(bt, BPF_REG_0);
- if (bt_subprog_enter(bt))
- return -EFAULT;
-
- if (r0_precise)
- bt_set_reg(bt, BPF_REG_0);
- /* r6-r9 and stack slots will stay set in caller frame
- * bitmasks until we return back from callee(s)
- */
- return 0;
- } else if (BPF_SRC(insn->code) == BPF_X) {
- if (!bt_is_reg_set(bt, dreg) && !bt_is_reg_set(bt, sreg))
- return 0;
- /* dreg <cond> sreg
- * Both dreg and sreg need precision before
- * this insn. If only sreg was marked precise
- * before it would be equally necessary to
- * propagate it to dreg.
- */
- if (!hist || !(hist->flags & INSN_F_SRC_REG_STACK))
- bt_set_reg(bt, sreg);
- if (!hist || !(hist->flags & INSN_F_DST_REG_STACK))
- bt_set_reg(bt, dreg);
- } else if (BPF_SRC(insn->code) == BPF_K) {
- /* dreg <cond> K
- * Only dreg still needs precision before
- * this insn, so for the K-based conditional
- * there is nothing new to be marked.
- */
- }
- } else if (class == BPF_LD) {
- if (!bt_is_reg_set(bt, dreg))
- return 0;
- bt_clear_reg(bt, dreg);
- /* It's ld_imm64 or ld_abs or ld_ind.
- * For ld_imm64 no further tracking of precision
- * into parent is necessary
- */
- if (mode == BPF_IND || mode == BPF_ABS)
- /* to be analyzed */
- return -ENOTSUPP;
- }
- /* Propagate precision marks to linked registers, to account for
- * registers marked as precise in this function.
- */
- bt_sync_linked_regs(bt, hist);
- return 0;
-}
-
-/* the scalar precision tracking algorithm:
- * . at the start all registers have precise=false.
- * . scalar ranges are tracked as normal through alu and jmp insns.
- * . once precise value of the scalar register is used in:
- * . ptr + scalar alu
- * . if (scalar cond K|scalar)
- * . helper_call(.., scalar, ...) where ARG_CONST is expected
- * backtrack through the verifier states and mark all registers and
- * stack slots with spilled constants that these scalar registers
- * should be precise.
- * . during state pruning two registers (or spilled stack slots)
- * are equivalent if both are not precise.
- *
- * Note the verifier cannot simply walk register parentage chain,
- * since many different registers and stack slots could have been
- * used to compute single precise scalar.
- *
- * The approach of starting with precise=true for all registers and then
- * backtrack to mark a register as not precise when the verifier detects
- * that program doesn't care about specific value (e.g., when helper
- * takes register as ARG_ANYTHING parameter) is not safe.
- *
- * It's ok to walk single parentage chain of the verifier states.
- * It's possible that this backtracking will go all the way till 1st insn.
- * All other branches will be explored for needing precision later.
- *
- * The backtracking needs to deal with cases like:
- * R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
- * r9 -= r8
- * r5 = r9
- * if r5 > 0x79f goto pc+7
- * R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
- * r5 += 1
- * ...
- * call bpf_perf_event_output#25
- * where .arg5_type = ARG_CONST_SIZE_OR_ZERO
- *
- * and this case:
- * r6 = 1
- * call foo // uses callee's r6 inside to compute r0
- * r0 += r6
- * if r0 == 0 goto
- *
- * to track above reg_mask/stack_mask needs to be independent for each frame.
- *
- * Also if parent's curframe > frame where backtracking started,
- * the verifier need to mark registers in both frames, otherwise callees
- * may incorrectly prune callers. This is similar to
- * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
- *
- * For now backtracking falls back into conservative marking.
- */
-static void mark_all_scalars_precise(struct bpf_verifier_env *env,
- struct bpf_verifier_state *st)
-{
- struct bpf_func_state *func;
- struct bpf_reg_state *reg;
- int i, j;
-
- if (env->log.level & BPF_LOG_LEVEL2) {
- verbose(env, "mark_precise: frame%d: falling back to forcing all scalars precise\n",
- st->curframe);
- }
-
- /* big hammer: mark all scalars precise in this path.
- * pop_stack may still get !precise scalars.
- * We also skip current state and go straight to first parent state,
- * because precision markings in current non-checkpointed state are
- * not needed. See why in the comment in __mark_chain_precision below.
- */
- for (st = st->parent; st; st = st->parent) {
- for (i = 0; i <= st->curframe; i++) {
- func = st->frame[i];
- for (j = 0; j < BPF_REG_FP; j++) {
- reg = &func->regs[j];
- if (reg->type != SCALAR_VALUE || reg->precise)
- continue;
- reg->precise = true;
- if (env->log.level & BPF_LOG_LEVEL2) {
- verbose(env, "force_precise: frame%d: forcing r%d to be precise\n",
- i, j);
- }
- }
- for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
- if (!is_spilled_reg(&func->stack[j]))
- continue;
- reg = &func->stack[j].spilled_ptr;
- if (reg->type != SCALAR_VALUE || reg->precise)
- continue;
- reg->precise = true;
- if (env->log.level & BPF_LOG_LEVEL2) {
- verbose(env, "force_precise: frame%d: forcing fp%d to be precise\n",
- i, -(j + 1) * 8);
- }
- }
- }
- }
-}
-
-static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
-{
- struct bpf_func_state *func;
- struct bpf_reg_state *reg;
- int i, j;
-
- for (i = 0; i <= st->curframe; i++) {
- func = st->frame[i];
- for (j = 0; j < BPF_REG_FP; j++) {
- reg = &func->regs[j];
- if (reg->type != SCALAR_VALUE)
- continue;
- reg->precise = false;
- }
- for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
- if (!is_spilled_reg(&func->stack[j]))
- continue;
- reg = &func->stack[j].spilled_ptr;
- if (reg->type != SCALAR_VALUE)
- continue;
- reg->precise = false;
- }
- }
-}
-
-/*
- * __mark_chain_precision() backtracks BPF program instruction sequence and
- * chain of verifier states making sure that register *regno* (if regno >= 0)
- * and/or stack slot *spi* (if spi >= 0) are marked as precisely tracked
- * SCALARS, as well as any other registers and slots that contribute to
- * a tracked state of given registers/stack slots, depending on specific BPF
- * assembly instructions (see backtrack_insns() for exact instruction handling
- * logic). This backtracking relies on recorded jmp_history and is able to
- * traverse entire chain of parent states. This process ends only when all the
- * necessary registers/slots and their transitive dependencies are marked as
- * precise.
- *
- * One important and subtle aspect is that precise marks *do not matter* in
- * the currently verified state (current state). It is important to understand
- * why this is the case.
- *
- * First, note that current state is the state that is not yet "checkpointed",
- * i.e., it is not yet put into env->explored_states, and it has no children
- * states as well. It's ephemeral, and can end up either a) being discarded if
- * compatible explored state is found at some point or BPF_EXIT instruction is
- * reached or b) checkpointed and put into env->explored_states, branching out
- * into one or more children states.
- *
- * In the former case, precise markings in current state are completely
- * ignored by state comparison code (see regsafe() for details). Only
- * checkpointed ("old") state precise markings are important, and if old
- * state's register/slot is precise, regsafe() assumes current state's
- * register/slot as precise and checks value ranges exactly and precisely. If
- * states turn out to be compatible, current state's necessary precise
- * markings and any required parent states' precise markings are enforced
- * after the fact with propagate_precision() logic, after the fact. But it's
- * important to realize that in this case, even after marking current state
- * registers/slots as precise, we immediately discard current state. So what
- * actually matters is any of the precise markings propagated into current
- * state's parent states, which are always checkpointed (due to b) case above).
- * As such, for scenario a) it doesn't matter if current state has precise
- * markings set or not.
- *
- * Now, for the scenario b), checkpointing and forking into child(ren)
- * state(s). Note that before current state gets to checkpointing step, any
- * processed instruction always assumes precise SCALAR register/slot
- * knowledge: if precise value or range is useful to prune jump branch, BPF
- * verifier takes this opportunity enthusiastically. Similarly, when
- * register's value is used to calculate offset or memory address, exact
- * knowledge of SCALAR range is assumed, checked, and enforced. So, similar to
- * what we mentioned above about state comparison ignoring precise markings
- * during state comparison, BPF verifier ignores and also assumes precise
- * markings *at will* during instruction verification process. But as verifier
- * assumes precision, it also propagates any precision dependencies across
- * parent states, which are not yet finalized, so can be further restricted
- * based on new knowledge gained from restrictions enforced by their children
- * states. This is so that once those parent states are finalized, i.e., when
- * they have no more active children state, state comparison logic in
- * is_state_visited() would enforce strict and precise SCALAR ranges, if
- * required for correctness.
- *
- * To build a bit more intuition, note also that once a state is checkpointed,
- * the path we took to get to that state is not important. This is crucial
- * property for state pruning. When state is checkpointed and finalized at
- * some instruction index, it can be correctly and safely used to "short
- * circuit" any *compatible* state that reaches exactly the same instruction
- * index. I.e., if we jumped to that instruction from a completely different
- * code path than original finalized state was derived from, it doesn't
- * matter, current state can be discarded because from that instruction
- * forward having a compatible state will ensure we will safely reach the
- * exit. States describe preconditions for further exploration, but completely
- * forget the history of how we got here.
- *
- * This also means that even if we needed precise SCALAR range to get to
- * finalized state, but from that point forward *that same* SCALAR register is
- * never used in a precise context (i.e., it's precise value is not needed for
- * correctness), it's correct and safe to mark such register as "imprecise"
- * (i.e., precise marking set to false). This is what we rely on when we do
- * not set precise marking in current state. If no child state requires
- * precision for any given SCALAR register, it's safe to dictate that it can
- * be imprecise. If any child state does require this register to be precise,
- * we'll mark it precise later retroactively during precise markings
- * propagation from child state to parent states.
- *
- * Skipping precise marking setting in current state is a mild version of
- * relying on the above observation. But we can utilize this property even
- * more aggressively by proactively forgetting any precise marking in the
- * current state (which we inherited from the parent state), right before we
- * checkpoint it and branch off into new child state. This is done by
- * mark_all_scalars_imprecise() to hopefully get more permissive and generic
- * finalized states which help in short circuiting more future states.
- */
-static int __mark_chain_precision(struct bpf_verifier_env *env,
- struct bpf_verifier_state *starting_state,
- int regno,
- bool *changed)
-{
- struct bpf_verifier_state *st = starting_state;
- struct backtrack_state *bt = &env->bt;
- int first_idx = st->first_insn_idx;
- int last_idx = starting_state->insn_idx;
- int subseq_idx = -1;
- struct bpf_func_state *func;
- bool tmp, skip_first = true;
- struct bpf_reg_state *reg;
- int i, fr, err;
-
- if (!env->bpf_capable)
- return 0;
-
- changed = changed ?: &tmp;
- /* set frame number from which we are starting to backtrack */
- bt_init(bt, starting_state->curframe);
-
- /* Do sanity checks against current state of register and/or stack
- * slot, but don't set precise flag in current state, as precision
- * tracking in the current state is unnecessary.
- */
- func = st->frame[bt->frame];
- if (regno >= 0) {
- reg = &func->regs[regno];
- if (reg->type != SCALAR_VALUE) {
- verifier_bug(env, "backtracking misuse");
- return -EFAULT;
- }
- bt_set_reg(bt, regno);
- }
-
- if (bt_empty(bt))
- return 0;
-
- for (;;) {
- DECLARE_BITMAP(mask, 64);
- u32 history = st->jmp_history_cnt;
- struct bpf_jmp_history_entry *hist;
-
- if (env->log.level & BPF_LOG_LEVEL2) {
- verbose(env, "mark_precise: frame%d: last_idx %d first_idx %d subseq_idx %d \n",
- bt->frame, last_idx, first_idx, subseq_idx);
- }
-
- if (last_idx < 0) {
- /* we are at the entry into subprog, which
- * is expected for global funcs, but only if
- * requested precise registers are R1-R5
- * (which are global func's input arguments)
- */
- if (st->curframe == 0 &&
- st->frame[0]->subprogno > 0 &&
- st->frame[0]->callsite == BPF_MAIN_FUNC &&
- bt_stack_mask(bt) == 0 &&
- (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) == 0) {
- bitmap_from_u64(mask, bt_reg_mask(bt));
- for_each_set_bit(i, mask, 32) {
- reg = &st->frame[0]->regs[i];
- bt_clear_reg(bt, i);
- if (reg->type == SCALAR_VALUE) {
- reg->precise = true;
- *changed = true;
- }
- }
- return 0;
- }
-
- verifier_bug(env, "backtracking func entry subprog %d reg_mask %x stack_mask %llx",
- st->frame[0]->subprogno, bt_reg_mask(bt), bt_stack_mask(bt));
- return -EFAULT;
- }
-
- for (i = last_idx;;) {
- if (skip_first) {
- err = 0;
- skip_first = false;
- } else {
- hist = get_jmp_hist_entry(st, history, i);
- err = backtrack_insn(env, i, subseq_idx, hist, bt);
- }
- if (err == -ENOTSUPP) {
- mark_all_scalars_precise(env, starting_state);
- bt_reset(bt);
- return 0;
- } else if (err) {
- return err;
- }
- if (bt_empty(bt))
- /* Found assignment(s) into tracked register in this state.
- * Since this state is already marked, just return.
- * Nothing to be tracked further in the parent state.
- */
- return 0;
- subseq_idx = i;
- i = get_prev_insn_idx(st, i, &history);
- if (i == -ENOENT)
- break;
- if (i >= env->prog->len) {
- /* This can happen if backtracking reached insn 0
- * and there are still reg_mask or stack_mask
- * to backtrack.
- * It means the backtracking missed the spot where
- * particular register was initialized with a constant.
- */
- verifier_bug(env, "backtracking idx %d", i);
- return -EFAULT;
- }
- }
- st = st->parent;
- if (!st)
- break;
-
- for (fr = bt->frame; fr >= 0; fr--) {
- func = st->frame[fr];
- bitmap_from_u64(mask, bt_frame_reg_mask(bt, fr));
- for_each_set_bit(i, mask, 32) {
- reg = &func->regs[i];
- if (reg->type != SCALAR_VALUE) {
- bt_clear_frame_reg(bt, fr, i);
- continue;
- }
- if (reg->precise) {
- bt_clear_frame_reg(bt, fr, i);
- } else {
- reg->precise = true;
- *changed = true;
- }
- }
-
- bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr));
- for_each_set_bit(i, mask, 64) {
- if (verifier_bug_if(i >= func->allocated_stack / BPF_REG_SIZE,
- env, "stack slot %d, total slots %d",
- i, func->allocated_stack / BPF_REG_SIZE))
- return -EFAULT;
-
- if (!is_spilled_scalar_reg(&func->stack[i])) {
- bt_clear_frame_slot(bt, fr, i);
- continue;
- }
- reg = &func->stack[i].spilled_ptr;
- if (reg->precise) {
- bt_clear_frame_slot(bt, fr, i);
- } else {
- reg->precise = true;
- *changed = true;
- }
- }
- if (env->log.level & BPF_LOG_LEVEL2) {
- fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN,
- bt_frame_reg_mask(bt, fr));
- verbose(env, "mark_precise: frame%d: parent state regs=%s ",
- fr, env->tmp_str_buf);
- bpf_fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN,
- bt_frame_stack_mask(bt, fr));
- verbose(env, "stack=%s: ", env->tmp_str_buf);
- print_verifier_state(env, st, fr, true);
- }
- }
-
- if (bt_empty(bt))
- return 0;
-
- subseq_idx = first_idx;
- last_idx = st->last_insn_idx;
- first_idx = st->first_insn_idx;
+ bpf_bt_set_frame_slot(bt, e->frameno, e->spi);
}
-
- /* if we still have requested precise regs or slots, we missed
- * something (e.g., stack access through non-r10 register), so
- * fallback to marking all precise
- */
- if (!bt_empty(bt)) {
- mark_all_scalars_precise(env, starting_state);
- bt_reset(bt);
- }
-
- return 0;
}
int mark_chain_precision(struct bpf_verifier_env *env, int regno)
{
- return __mark_chain_precision(env, env->cur_state, regno, NULL);
+ return bpf_mark_chain_precision(env, env->cur_state, regno, NULL);
}
/* mark_chain_precision_batch() assumes that env->bt is set in the caller to
@@ -5101,7 +3649,7 @@ int mark_chain_precision(struct bpf_verifier_env *env, int regno)
static int mark_chain_precision_batch(struct bpf_verifier_env *env,
struct bpf_verifier_state *starting_state)
{
- return __mark_chain_precision(env, starting_state, -1, NULL);
+ return bpf_mark_chain_precision(env, starting_state, -1, NULL);
}
static bool is_spillable_regtype(enum bpf_reg_type type)
@@ -5131,11 +3679,6 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
}
}
-/* Does this register contain a constant zero? */
-static bool register_is_null(struct bpf_reg_state *reg)
-{
- return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
-}
/* check if register is a constant scalar value */
static bool is_reg_const(struct bpf_reg_state *reg, bool subreg32)
@@ -5159,27 +3702,30 @@ static bool __is_pointer_value(bool allow_ptr_leaks,
return reg->type != SCALAR_VALUE;
}
+static void clear_scalar_id(struct bpf_reg_state *reg)
+{
+ reg->id = 0;
+ reg->delta = 0;
+}
+
static void assign_scalar_id_before_mov(struct bpf_verifier_env *env,
struct bpf_reg_state *src_reg)
{
if (src_reg->type != SCALAR_VALUE)
return;
-
- if (src_reg->id & BPF_ADD_CONST) {
- /*
- * The verifier is processing rX = rY insn and
- * rY->id has special linked register already.
- * Cleared it, since multiple rX += const are not supported.
- */
- src_reg->id = 0;
- src_reg->off = 0;
- }
-
+ /*
+ * The verifier is processing rX = rY insn and
+ * rY->id has special linked register already.
+ * Cleared it, since multiple rX += const are not supported.
+ */
+ if (src_reg->id & BPF_ADD_CONST)
+ clear_scalar_id(src_reg);
+ /*
+ * Ensure that src_reg has a valid ID that will be copied to
+ * dst_reg and then will be used by sync_linked_regs() to
+ * propagate min/max range.
+ */
if (!src_reg->id && !tnum_is_const(src_reg->var_off))
- /* Ensure that src_reg has a valid ID that will be copied to
- * dst_reg and then will be used by sync_linked_regs() to
- * propagate min/max range.
- */
src_reg->id = ++env->id_gen;
}
@@ -5241,6 +3787,18 @@ static void check_fastcall_stack_contract(struct bpf_verifier_env *env,
}
}
+static void scrub_special_slot(struct bpf_func_state *state, int spi)
+{
+ int i;
+
+ /* regular write of data into stack destroys any spilled ptr */
+ state->stack[spi].spilled_ptr.type = NOT_INIT;
+ /* Mark slots as STACK_MISC if they belonged to spilled ptr/dynptr/iter. */
+ if (is_stack_slot_special(&state->stack[spi]))
+ for (i = 0; i < BPF_REG_SIZE; i++)
+ scrub_spilled_slot(&state->stack[spi].slot_type[i]);
+}
+
/* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
* stack boundary and alignment are checked in check_mem_access()
*/
@@ -5260,8 +3818,8 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
* so it's aligned access and [off, off + size) are within stack limits
*/
if (!env->allow_ptr_leaks &&
- is_spilled_reg(&state->stack[spi]) &&
- !is_spilled_scalar_reg(&state->stack[spi]) &&
+ bpf_is_spilled_reg(&state->stack[spi]) &&
+ !bpf_is_spilled_scalar_reg(&state->stack[spi]) &&
size != BPF_REG_SIZE) {
verbose(env, "attempt to corrupt spilled pointer on stack\n");
return -EACCES;
@@ -5290,18 +3848,6 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
if (err)
return err;
- if (!(off % BPF_REG_SIZE) && size == BPF_REG_SIZE) {
- /* only mark the slot as written if all 8 bytes were written
- * otherwise read propagation may incorrectly stop too soon
- * when stack slots are partially written.
- * This heuristic means that read propagation will be
- * conservative, since it will add reg_live_read marks
- * to stack slots all the way to first state when programs
- * writes+reads less than 8 bytes
- */
- bpf_mark_stack_write(env, state->frameno, BIT(spi));
- }
-
check_fastcall_stack_contract(env, state, insn_idx, off);
mark_stack_slot_scratched(env, spi);
if (reg && !(off % BPF_REG_SIZE) && reg->type == SCALAR_VALUE && env->bpf_capable) {
@@ -5338,15 +3884,10 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
} else {
u8 type = STACK_MISC;
- /* regular write of data into stack destroys any spilled ptr */
- state->stack[spi].spilled_ptr.type = NOT_INIT;
- /* Mark slots as STACK_MISC if they belonged to spilled ptr/dynptr/iter. */
- if (is_stack_slot_special(&state->stack[spi]))
- for (i = 0; i < BPF_REG_SIZE; i++)
- scrub_spilled_slot(&state->stack[spi].slot_type[i]);
+ scrub_special_slot(state, spi);
/* when we zero initialize stack slots mark them as such */
- if ((reg && register_is_null(reg)) ||
+ if ((reg && bpf_register_is_null(reg)) ||
(!reg && is_bpf_st_mem(insn) && insn->imm == 0)) {
/* STACK_ZERO case happened because register spill
* wasn't properly aligned at the stack slot boundary,
@@ -5367,7 +3908,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
}
if (insn_flags)
- return push_jmp_history(env, env->cur_state, insn_flags, 0);
+ return bpf_push_jmp_history(env, env->cur_state, insn_flags, 0);
return 0;
}
@@ -5377,7 +3918,6 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
* tracks the effects of the write, considering that each stack slot in the
* dynamic range is potentially written to.
*
- * 'off' includes 'regno->off'.
* 'value_regno' can be -1, meaning that an unknown value is being written to
* the stack.
*
@@ -5413,14 +3953,14 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env,
max_off = ptr_reg->smax_value + off + size;
if (value_regno >= 0)
value_reg = &cur->regs[value_regno];
- if ((value_reg && register_is_null(value_reg)) ||
+ if ((value_reg && bpf_register_is_null(value_reg)) ||
(!value_reg && is_bpf_st_mem(insn) && insn->imm == 0))
writing_zero = true;
for (i = min_off; i < max_off; i++) {
int spi;
- spi = __get_spi(i);
+ spi = bpf_get_spi(i);
err = destroy_if_dynptr_stack_slot(env, state, spi);
if (err)
return err;
@@ -5458,7 +3998,7 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env,
* maintain the spill type.
*/
if (writing_zero && *stype == STACK_SPILL &&
- is_spilled_scalar_reg(&state->stack[spi])) {
+ bpf_is_spilled_scalar_reg(&state->stack[spi])) {
struct bpf_reg_state *spill_reg = &state->stack[spi].spilled_ptr;
if (tnum_is_const(spill_reg->var_off) && spill_reg->var_off.value == 0) {
@@ -5467,8 +4007,13 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env,
}
}
- /* Erase all other spilled pointers. */
- state->stack[spi].spilled_ptr.type = NOT_INIT;
+ /*
+ * Scrub slots if variable-offset stack write goes over spilled pointers.
+ * Otherwise bpf_is_spilled_reg() may == true && spilled_ptr.type == NOT_INIT
+ * and valid program is rejected by check_stack_read_fixed_off()
+ * with obscure "invalid size of register fill" message.
+ */
+ scrub_special_slot(state, spi);
/* Update the slot type. */
new_type = STACK_MISC;
@@ -5483,8 +4028,10 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env,
* For privileged programs, we will accept such reads to slots
* that may or may not be written because, if we're reject
* them, the error would be too confusing.
+ * Conservatively, treat STACK_POISON in a similar way.
*/
- if (*stype == STACK_INVALID && !env->allow_uninit_stack) {
+ if ((*stype == STACK_INVALID || *stype == STACK_POISON) &&
+ !env->allow_uninit_stack) {
verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d",
insn_idx, i);
return -EINVAL;
@@ -5559,18 +4106,14 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
struct bpf_reg_state *reg;
u8 *stype, type;
int insn_flags = insn_stack_access_flags(reg_state->frameno, spi);
- int err;
stype = reg_state->stack[spi].slot_type;
reg = &reg_state->stack[spi].spilled_ptr;
mark_stack_slot_scratched(env, spi);
check_fastcall_stack_contract(env, state, env->insn_idx, off);
- err = bpf_mark_stack_read(env, reg_state->frameno, env->insn_idx, BIT(spi));
- if (err)
- return err;
- if (is_spilled_reg(&reg_state->stack[spi])) {
+ if (bpf_is_spilled_reg(&reg_state->stack[spi])) {
u8 spill_size = 1;
for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--)
@@ -5606,7 +4149,7 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
* coerce_reg_to_size will adjust the boundaries.
*/
if (get_reg_width(reg) > size * BITS_PER_BYTE)
- state->regs[dst_regno].id = 0;
+ clear_scalar_id(&state->regs[dst_regno]);
} else {
int spill_cnt = 0, zero_cnt = 0;
@@ -5624,8 +4167,13 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
}
if (type == STACK_INVALID && env->allow_uninit_stack)
continue;
- verbose(env, "invalid read from stack off %d+%d size %d\n",
- off, i, size);
+ if (type == STACK_POISON) {
+ verbose(env, "reading from stack off %d+%d size %d, slot poisoned by dead code elimination\n",
+ off, i, size);
+ } else {
+ verbose(env, "invalid read from stack off %d+%d size %d\n",
+ off, i, size);
+ }
return -EACCES;
}
@@ -5674,8 +4222,13 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
continue;
if (type == STACK_INVALID && env->allow_uninit_stack)
continue;
- verbose(env, "invalid read from stack off %d+%d size %d\n",
- off, i, size);
+ if (type == STACK_POISON) {
+ verbose(env, "reading from stack off %d+%d size %d, slot poisoned by dead code elimination\n",
+ off, i, size);
+ } else {
+ verbose(env, "invalid read from stack off %d+%d size %d\n",
+ off, i, size);
+ }
return -EACCES;
}
if (dst_regno >= 0)
@@ -5683,7 +4236,7 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
insn_flags = 0; /* we are not restoring spilled register */
}
if (insn_flags)
- return push_jmp_history(env, env->cur_state, insn_flags, 0);
+ return bpf_push_jmp_history(env, env->cur_state, insn_flags, 0);
return 0;
}
@@ -5721,7 +4274,7 @@ static int check_stack_read_var_off(struct bpf_verifier_env *env,
{
/* The state of the source register. */
struct bpf_reg_state *reg = reg_state(env, ptr_regno);
- struct bpf_func_state *ptr_state = func(env, reg);
+ struct bpf_func_state *ptr_state = bpf_func(env, reg);
int err;
int min_off, max_off;
@@ -5753,7 +4306,7 @@ static int check_stack_read(struct bpf_verifier_env *env,
int dst_regno)
{
struct bpf_reg_state *reg = reg_state(env, ptr_regno);
- struct bpf_func_state *state = func(env, reg);
+ struct bpf_func_state *state = bpf_func(env, reg);
int err;
/* Some accesses are only permitted with a static offset. */
bool var_off = !tnum_is_const(reg->var_off);
@@ -5799,7 +4352,6 @@ static int check_stack_read(struct bpf_verifier_env *env,
* check_stack_write_var_off.
*
* 'ptr_regno' is the register used as a pointer into the stack.
- * 'off' includes 'ptr_regno->off', but not its variable offset (if any).
* 'value_regno' is the register whose value we're writing to the stack. It can
* be -1, meaning that we're not writing from a register.
*
@@ -5810,7 +4362,7 @@ static int check_stack_write(struct bpf_verifier_env *env,
int value_regno, int insn_idx)
{
struct bpf_reg_state *reg = reg_state(env, ptr_regno);
- struct bpf_func_state *state = func(env, reg);
+ struct bpf_func_state *state = bpf_func(env, reg);
int err;
if (tnum_is_const(reg->var_off)) {
@@ -5836,14 +4388,14 @@ static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
u32 cap = bpf_map_flags_to_cap(map);
if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
- verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n",
- map->value_size, off, size);
+ verbose(env, "write into map forbidden, value_size=%d off=%lld size=%d\n",
+ map->value_size, reg->smin_value + off, size);
return -EACCES;
}
if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
- verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n",
- map->value_size, off, size);
+ verbose(env, "read from map forbidden, value_size=%d off=%lld size=%d\n",
+ map->value_size, reg->smin_value + off, size);
return -EACCES;
}
@@ -5877,6 +4429,10 @@ static int __check_mem_access(struct bpf_verifier_env *env, int regno,
verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
off, size, regno, reg->id, off, mem_size);
break;
+ case PTR_TO_CTX:
+ verbose(env, "invalid access to context, ctx_size=%d off=%d size=%d\n",
+ mem_size, off, size);
+ break;
case PTR_TO_MEM:
default:
verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n",
@@ -5950,24 +4506,24 @@ static int __check_ptr_off_reg(struct bpf_verifier_env *env,
* is only allowed in its original, unmodified form.
*/
- if (reg->off < 0) {
- verbose(env, "negative offset %s ptr R%d off=%d disallowed\n",
- reg_type_str(env, reg->type), regno, reg->off);
+ if (!tnum_is_const(reg->var_off)) {
+ char tn_buf[48];
+
+ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+ verbose(env, "variable %s access var_off=%s disallowed\n",
+ reg_type_str(env, reg->type), tn_buf);
return -EACCES;
}
- if (!fixed_off_ok && reg->off) {
- verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n",
- reg_type_str(env, reg->type), regno, reg->off);
+ if (reg->smin_value < 0) {
+ verbose(env, "negative offset %s ptr R%d off=%lld disallowed\n",
+ reg_type_str(env, reg->type), regno, reg->var_off.value);
return -EACCES;
}
- if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
- char tn_buf[48];
-
- tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
- verbose(env, "variable %s access var_off=%s disallowed\n",
- reg_type_str(env, reg->type), tn_buf);
+ if (!fixed_off_ok && reg->var_off.value != 0) {
+ verbose(env, "dereference of modified %s ptr R%d off=%lld disallowed\n",
+ reg_type_str(env, reg->type), regno, reg->var_off.value);
return -EACCES;
}
@@ -6009,14 +4565,14 @@ static int map_kptr_match_type(struct bpf_verifier_env *env,
/* For ref_ptr case, release function check should ensure we get one
* referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the
* normal store of unreferenced kptr, we must ensure var_off is zero.
- * Since ref_ptr cannot be accessed directly by BPF insns, checks for
- * reg->off and reg->ref_obj_id are not needed here.
+ * Since ref_ptr cannot be accessed directly by BPF insns, check for
+ * reg->ref_obj_id is not needed here.
*/
if (__check_ptr_off_reg(env, reg, regno, true))
return -EACCES;
/* A full type match is needed, as BTF can be vmlinux, module or prog BTF, and
- * we also need to take into account the reg->off.
+ * we also need to take into account the reg->var_off.
*
* We want to support cases like:
*
@@ -6027,19 +4583,19 @@ static int map_kptr_match_type(struct bpf_verifier_env *env,
*
* struct foo *v;
* v = func(); // PTR_TO_BTF_ID
- * val->foo = v; // reg->off is zero, btf and btf_id match type
- * val->bar = &v->br; // reg->off is still zero, but we need to retry with
+ * val->foo = v; // reg->var_off is zero, btf and btf_id match type
+ * val->bar = &v->br; // reg->var_off is still zero, but we need to retry with
* // first member type of struct after comparison fails
- * val->baz = &v->bz; // reg->off is non-zero, so struct needs to be walked
+ * val->baz = &v->bz; // reg->var_off is non-zero, so struct needs to be walked
* // to match type
*
- * In the kptr_ref case, check_func_arg_reg_off already ensures reg->off
+ * In the kptr_ref case, check_func_arg_reg_off already ensures reg->var_off
* is zero. We must also ensure that btf_struct_ids_match does not walk
* the struct to match type against first member of struct, i.e. reject
* second case from above. Hence, when type is BPF_KPTR_REF, we set
* strict mode to true for type match.
*/
- if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
+ if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->var_off.value,
kptr_field->kptr.btf, kptr_field->kptr.btf_id,
kptr_field->type != BPF_KPTR_UNREF))
goto bad_type;
@@ -6203,7 +4759,7 @@ static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
return ret;
} else if (class == BPF_STX) {
val_reg = reg_state(env, value_regno);
- if (!register_is_null(val_reg) &&
+ if (!bpf_register_is_null(val_reg) &&
map_kptr_match_type(env, kptr_field, val_reg, value_regno))
return -EACCES;
} else if (class == BPF_ST) {
@@ -6298,11 +4854,9 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
return 0;
}
-#define MAX_PACKET_OFF 0xffff
-
static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
- const struct bpf_call_arg_meta *meta,
- enum bpf_access_type t)
+ const struct bpf_call_arg_meta *meta,
+ enum bpf_access_type t)
{
enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
@@ -6348,27 +4902,14 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
struct bpf_reg_state *reg = reg_state(env, regno);
int err;
- /* We may have added a variable offset to the packet pointer; but any
- * reg->range we have comes after that. We are only checking the fixed
- * offset.
- */
-
- /* We don't allow negative numbers, because we aren't tracking enough
- * detail to prove they're safe.
- */
- if (reg->smin_value < 0) {
- verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
- regno);
- return -EACCES;
+ if (reg->range < 0) {
+ verbose(env, "R%d offset is outside of the packet\n", regno);
+ return -EINVAL;
}
- err = reg->range < 0 ? -EINVAL :
- __check_mem_access(env, regno, off, size, reg->range,
- zero_size_allowed);
- if (err) {
- verbose(env, "R%d offset is outside of the packet\n", regno);
+ err = check_mem_region_access(env, regno, off, size, reg->range, zero_size_allowed);
+ if (err)
return err;
- }
/* __check_mem_access has made sure "off + size - 1" is within u16.
* reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
@@ -6380,12 +4921,17 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
max_t(u32, env->prog->aux->max_pkt_offset,
off + reg->umax_value + size - 1);
- return err;
+ return 0;
+}
+
+static bool is_var_ctx_off_allowed(struct bpf_prog *prog)
+{
+ return resolve_prog_type(prog) == BPF_PROG_TYPE_SYSCALL;
}
/* check access to 'struct bpf_context' fields. Supports fixed offsets only */
-static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
- enum bpf_access_type t, struct bpf_insn_access_aux *info)
+static int __check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
+ enum bpf_access_type t, struct bpf_insn_access_aux *info)
{
if (env->ops->is_valid_access &&
env->ops->is_valid_access(off, size, t, env->prog, info)) {
@@ -6416,6 +4962,34 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
return -EACCES;
}
+static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
+ int off, int access_size, enum bpf_access_type t,
+ struct bpf_insn_access_aux *info)
+{
+ /*
+ * Program types that don't rewrite ctx accesses can safely
+ * dereference ctx pointers with fixed offsets.
+ */
+ bool var_off_ok = is_var_ctx_off_allowed(env->prog);
+ bool fixed_off_ok = !env->ops->convert_ctx_access;
+ struct bpf_reg_state *regs = cur_regs(env);
+ struct bpf_reg_state *reg = regs + regno;
+ int err;
+
+ if (var_off_ok)
+ err = check_mem_region_access(env, regno, off, access_size, U16_MAX, false);
+ else
+ err = __check_ptr_off_reg(env, reg, regno, fixed_off_ok);
+ if (err)
+ return err;
+ off += reg->umax_value;
+
+ err = __check_ctx_access(env, insn_idx, off, access_size, t, info);
+ if (err)
+ verbose_linfo(env, insn_idx, "; ");
+ return err;
+}
+
static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
int size)
{
@@ -6597,14 +5171,14 @@ static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
*/
ip_align = 2;
- reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
+ reg_off = tnum_add(reg->var_off, tnum_const(ip_align + off));
if (!tnum_is_aligned(reg_off, size)) {
char tn_buf[48];
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
verbose(env,
- "misaligned packet access off %d+%s+%d+%d size %d\n",
- ip_align, tn_buf, reg->off, off, size);
+ "misaligned packet access off %d+%s+%d size %d\n",
+ ip_align, tn_buf, off, size);
return -EACCES;
}
@@ -6622,13 +5196,13 @@ static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
if (!strict || size == 1)
return 0;
- reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
+ reg_off = tnum_add(reg->var_off, tnum_const(off));
if (!tnum_is_aligned(reg_off, size)) {
char tn_buf[48];
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
- verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
- pointer_desc, tn_buf, reg->off, off, size);
+ verbose(env, "misaligned %saccess off %s+%d size %d\n",
+ pointer_desc, tn_buf, off, size);
return -EACCES;
}
@@ -6731,22 +5305,30 @@ static int round_up_stack_depth(struct bpf_verifier_env *env, int stack_depth)
return round_up(max_t(u32, stack_depth, 1), 32);
}
+/* temporary state used for call frame depth calculation */
+struct bpf_subprog_call_depth_info {
+ int ret_insn; /* caller instruction where we return to. */
+ int caller; /* caller subprogram idx */
+ int frame; /* # of consecutive static call stack frames on top of stack */
+};
+
/* starting from main bpf function walk all instructions of the function
* and recursively walk all callees that given function can call.
* Ignore jump and exit insns.
- * Since recursion is prevented by check_cfg() this algorithm
- * only needs a local stack of MAX_CALL_FRAMES to remember callsites
*/
static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx,
+ struct bpf_subprog_call_depth_info *dinfo,
bool priv_stack_supported)
{
struct bpf_subprog_info *subprog = env->subprog_info;
struct bpf_insn *insn = env->prog->insnsi;
int depth = 0, frame = 0, i, subprog_end, subprog_depth;
bool tail_call_reachable = false;
- int ret_insn[MAX_CALL_FRAMES];
- int ret_prog[MAX_CALL_FRAMES];
- int j;
+ int total;
+ int tmp;
+
+ /* no caller idx */
+ dinfo[idx].caller = -1;
i = subprog[idx].start;
if (!priv_stack_supported)
@@ -6798,8 +5380,12 @@ process_func:
} else {
depth += subprog_depth;
if (depth > MAX_BPF_STACK) {
+ total = 0;
+ for (tmp = idx; tmp >= 0; tmp = dinfo[tmp].caller)
+ total++;
+
verbose(env, "combined stack size of %d calls is %d. Too large\n",
- frame + 1, depth);
+ total, depth);
return -EACCES;
}
}
@@ -6813,10 +5399,8 @@ continue_func:
if (!is_bpf_throw_kfunc(insn + i))
continue;
- if (subprog[idx].is_cb)
- err = true;
- for (int c = 0; c < frame && !err; c++) {
- if (subprog[ret_prog[c]].is_cb) {
+ for (tmp = idx; tmp >= 0 && !err; tmp = dinfo[tmp].caller) {
+ if (subprog[tmp].is_cb) {
err = true;
break;
}
@@ -6832,12 +5416,10 @@ continue_func:
if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
continue;
/* remember insn and function to return to */
- ret_insn[frame] = i + 1;
- ret_prog[frame] = idx;
/* find the callee */
next_insn = i + insn[i].imm + 1;
- sidx = find_subprog(env, next_insn);
+ sidx = bpf_find_subprog(env, next_insn);
if (verifier_bug_if(sidx < 0, env, "callee not found at insn %d", next_insn))
return -EFAULT;
if (subprog[sidx].is_async_cb) {
@@ -6853,7 +5435,16 @@ continue_func:
return -EINVAL;
}
}
+
+ /* store caller info for after we return from callee */
+ dinfo[idx].frame = frame;
+ dinfo[idx].ret_insn = i + 1;
+
+ /* push caller idx into callee's dinfo */
+ dinfo[sidx].caller = idx;
+
i = next_insn;
+
idx = sidx;
if (!priv_stack_supported)
subprog[idx].priv_stack_mode = NO_PRIV_STACK;
@@ -6861,7 +5452,7 @@ continue_func:
if (subprog[idx].has_tail_call)
tail_call_reachable = true;
- frame++;
+ frame = bpf_subprog_is_global(env, idx) ? 0 : frame + 1;
if (frame >= MAX_CALL_FRAMES) {
verbose(env, "the call stack of %d frames is too deep !\n",
frame);
@@ -6875,12 +5466,12 @@ continue_func:
* tail call counter throughout bpf2bpf calls combined with tailcalls
*/
if (tail_call_reachable)
- for (j = 0; j < frame; j++) {
- if (subprog[ret_prog[j]].is_exception_cb) {
+ for (tmp = idx; tmp >= 0; tmp = dinfo[tmp].caller) {
+ if (subprog[tmp].is_exception_cb) {
verbose(env, "cannot tail call within exception cb\n");
return -EINVAL;
}
- subprog[ret_prog[j]].tail_call_reachable = true;
+ subprog[tmp].tail_call_reachable = true;
}
if (subprog[0].tail_call_reachable)
env->prog->aux->tail_call_reachable = true;
@@ -6888,23 +5479,33 @@ continue_func:
/* end of for() loop means the last insn of the 'subprog'
* was reached. Doesn't matter whether it was JA or EXIT
*/
- if (frame == 0)
+ if (frame == 0 && dinfo[idx].caller < 0)
return 0;
if (subprog[idx].priv_stack_mode != PRIV_STACK_ADAPTIVE)
depth -= round_up_stack_depth(env, subprog[idx].stack_depth);
- frame--;
- i = ret_insn[frame];
- idx = ret_prog[frame];
+
+ /* pop caller idx from callee */
+ idx = dinfo[idx].caller;
+
+ /* retrieve caller state from its frame */
+ frame = dinfo[idx].frame;
+ i = dinfo[idx].ret_insn;
+
goto continue_func;
}
static int check_max_stack_depth(struct bpf_verifier_env *env)
{
enum priv_stack_mode priv_stack_mode = PRIV_STACK_UNKNOWN;
+ struct bpf_subprog_call_depth_info *dinfo;
struct bpf_subprog_info *si = env->subprog_info;
bool priv_stack_supported;
int ret;
+ dinfo = kvcalloc(env->subprog_cnt, sizeof(*dinfo), GFP_KERNEL_ACCOUNT);
+ if (!dinfo)
+ return -ENOMEM;
+
for (int i = 0; i < env->subprog_cnt; i++) {
if (si[i].has_tail_call) {
priv_stack_mode = NO_PRIV_STACK;
@@ -6926,9 +5527,12 @@ static int check_max_stack_depth(struct bpf_verifier_env *env)
for (int i = env->subprog_cnt - 1; i >= 0; i--) {
if (!i || si[i].is_async_cb) {
priv_stack_supported = !i && priv_stack_mode == PRIV_STACK_ADAPTIVE;
- ret = check_max_stack_depth_subprog(env, i, priv_stack_supported);
- if (ret < 0)
+ ret = check_max_stack_depth_subprog(env, i, dinfo,
+ priv_stack_supported);
+ if (ret < 0) {
+ kvfree(dinfo);
return ret;
+ }
}
}
@@ -6939,21 +5543,10 @@ static int check_max_stack_depth(struct bpf_verifier_env *env)
}
}
- return 0;
-}
+ kvfree(dinfo);
-#ifndef CONFIG_BPF_JIT_ALWAYS_ON
-static int get_callee_stack_depth(struct bpf_verifier_env *env,
- const struct bpf_insn *insn, int idx)
-{
- int start = idx + insn->imm + 1, subprog;
-
- subprog = find_subprog(env, start);
- if (verifier_bug_if(subprog < 0, env, "get stack depth: no program at insn %d", start))
- return -EFAULT;
- return env->subprog_info[subprog].stack_depth;
+ return 0;
}
-#endif
static int __check_buffer_access(struct bpf_verifier_env *env,
const char *buf_info,
@@ -6966,7 +5559,7 @@ static int __check_buffer_access(struct bpf_verifier_env *env,
regno, buf_info, off, size);
return -EACCES;
}
- if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
+ if (!tnum_is_const(reg->var_off)) {
char tn_buf[48];
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
@@ -6989,8 +5582,8 @@ static int check_tp_buffer_access(struct bpf_verifier_env *env,
if (err)
return err;
- if (off + size > env->prog->aux->max_tp_access)
- env->prog->aux->max_tp_access = off + size;
+ env->prog->aux->max_tp_access = max(reg->var_off.value + off + size,
+ env->prog->aux->max_tp_access);
return 0;
}
@@ -7008,8 +5601,7 @@ static int check_buffer_access(struct bpf_verifier_env *env,
if (err)
return err;
- if (off + size > *max_access)
- *max_access = off + size;
+ *max_access = max(reg->var_off.value + off + size, *max_access);
return 0;
}
@@ -7196,7 +5788,7 @@ out:
set_sext32_default_val(reg, size);
}
-static bool bpf_map_is_rdonly(const struct bpf_map *map)
+bool bpf_map_is_rdonly(const struct bpf_map *map)
{
/* A map is considered read-only if the following condition are true:
*
@@ -7216,8 +5808,8 @@ static bool bpf_map_is_rdonly(const struct bpf_map *map)
!bpf_map_write_active(map);
}
-static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val,
- bool is_ldsx)
+int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val,
+ bool is_ldsx)
{
void *ptr;
u64 addr;
@@ -7402,13 +5994,8 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
tname);
return -EINVAL;
}
- if (off < 0) {
- verbose(env,
- "R%d is ptr_%s invalid negative access: off=%d\n",
- regno, tname, off);
- return -EACCES;
- }
- if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
+
+ if (!tnum_is_const(reg->var_off)) {
char tn_buf[48];
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
@@ -7418,6 +6005,15 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
return -EACCES;
}
+ off += reg->var_off.value;
+
+ if (off < 0) {
+ verbose(env,
+ "R%d is ptr_%s invalid negative access: off=%d\n",
+ regno, tname, off);
+ return -EACCES;
+ }
+
if (reg->type & MEM_USER) {
verbose(env,
"R%d is ptr_%s access user memory: off=%d\n",
@@ -7629,7 +6225,7 @@ static int check_stack_access_within_bounds(
enum bpf_access_type type)
{
struct bpf_reg_state *reg = reg_state(env, regno);
- struct bpf_func_state *state = func(env, reg);
+ struct bpf_func_state *state = bpf_func(env, reg);
s64 min_off, max_off;
int err;
char *err_extra;
@@ -7664,8 +6260,8 @@ static int check_stack_access_within_bounds(
if (err) {
if (tnum_is_const(reg->var_off)) {
- verbose(env, "invalid%s stack R%d off=%d size=%d\n",
- err_extra, regno, off, access_size);
+ verbose(env, "invalid%s stack R%d off=%lld size=%d\n",
+ err_extra, regno, min_off, access_size);
} else {
char tn_buf[48];
@@ -7693,6 +6289,23 @@ static bool get_func_retval_range(struct bpf_prog *prog,
return false;
}
+static void add_scalar_to_reg(struct bpf_reg_state *dst_reg, s64 val)
+{
+ struct bpf_reg_state fake_reg;
+
+ if (!val)
+ return;
+
+ fake_reg.type = SCALAR_VALUE;
+ __mark_reg_known(&fake_reg, val);
+
+ scalar32_min_max_add(dst_reg, &fake_reg);
+ scalar_min_max_add(dst_reg, &fake_reg);
+ dst_reg->var_off = tnum_add(dst_reg->var_off, fake_reg.var_off);
+
+ reg_bounds_sync(dst_reg);
+}
+
/* check whether memory at (regno + off) is accessible for t = (read | write)
* if t==write, value_regno is a register which value is stored into memory
* if t==read, value_regno is a register which will receive the value from memory
@@ -7711,14 +6324,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
if (size < 0)
return size;
- /* alignment checks will add in reg->off themselves */
err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
if (err)
return err;
- /* for access checks, reg->off is just part of off */
- off += reg->off;
-
if (reg->type == PTR_TO_MAP_KEY) {
if (t == BPF_WRITE) {
verbose(env, "write to change key R%d not allowed\n", regno);
@@ -7778,6 +6387,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
return -EACCES;
}
copy_register_state(&regs[value_regno], reg);
+ add_scalar_to_reg(&regs[value_regno], off);
regs[value_regno].type = PTR_TO_INSN;
} else {
mark_reg_unknown(env, regs, value_regno);
@@ -7815,12 +6425,12 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem))
mark_reg_unknown(env, regs, value_regno);
} else if (reg->type == PTR_TO_CTX) {
- struct bpf_retval_range range;
struct bpf_insn_access_aux info = {
.reg_type = SCALAR_VALUE,
.is_ldsx = is_ldsx,
.log = &env->log,
};
+ struct bpf_retval_range range;
if (t == BPF_WRITE && value_regno >= 0 &&
is_pointer_value(env, value_regno)) {
@@ -7828,13 +6438,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
return -EACCES;
}
- err = check_ptr_off_reg(env, reg, regno);
- if (err < 0)
- return err;
-
- err = check_ctx_access(env, insn_idx, off, size, t, &info);
- if (err)
- verbose_linfo(env, insn_idx, "; ");
+ err = check_ctx_access(env, insn_idx, regno, off, size, t, &info);
if (!err && t == BPF_READ && value_regno >= 0) {
/* ctx access returns either a scalar, or a
* PTR_TO_PACKET[_META,_END]. In the latter
@@ -8198,8 +6802,6 @@ static int check_atomic(struct bpf_verifier_env *env, struct bpf_insn *insn)
* on the access type and privileges, that all elements of the stack are
* initialized.
*
- * 'off' includes 'regno->off', but not its dynamic part (if any).
- *
* All registers that have been spilled on the stack in the slots within the
* read offsets are marked as read.
*/
@@ -8209,21 +6811,27 @@ static int check_stack_range_initialized(
enum bpf_access_type type, struct bpf_call_arg_meta *meta)
{
struct bpf_reg_state *reg = reg_state(env, regno);
- struct bpf_func_state *state = func(env, reg);
+ struct bpf_func_state *state = bpf_func(env, reg);
int err, min_off, max_off, i, j, slot, spi;
/* Some accesses can write anything into the stack, others are
* read-only.
*/
- bool clobber = false;
+ bool clobber = type == BPF_WRITE;
+ /*
+ * Negative access_size signals global subprog/kfunc arg check where
+ * STACK_POISON slots are acceptable. static stack liveness
+ * might have determined that subprog doesn't read them,
+ * but BTF based global subprog validation isn't accurate enough.
+ */
+ bool allow_poison = access_size < 0 || clobber;
+
+ access_size = abs(access_size);
if (access_size == 0 && !zero_size_allowed) {
verbose(env, "invalid zero-sized read\n");
return -EACCES;
}
- if (type == BPF_WRITE)
- clobber = true;
-
err = check_stack_access_within_bounds(env, regno, off, access_size, type);
if (err)
return err;
@@ -8275,7 +6883,7 @@ static int check_stack_range_initialized(
for (i = min_off; i < max_off + access_size; i++) {
int stack_off = -i - 1;
- spi = __get_spi(i);
+ spi = bpf_get_spi(i);
/* raw_mode may write past allocated_stack */
if (state->allocated_stack <= stack_off)
continue;
@@ -8311,7 +6919,7 @@ static int check_stack_range_initialized(
goto mark;
}
- if (is_spilled_reg(&state->stack[spi]) &&
+ if (bpf_is_spilled_reg(&state->stack[spi]) &&
(state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
env->allow_ptr_leaks)) {
if (clobber) {
@@ -8322,7 +6930,12 @@ static int check_stack_range_initialized(
goto mark;
}
- if (tnum_is_const(reg->var_off)) {
+ if (*stype == STACK_POISON) {
+ if (allow_poison)
+ goto mark;
+ verbose(env, "reading from stack R%d off %d+%d size %d, slot poisoned by dead code elimination\n",
+ regno, min_off, i - min_off, access_size);
+ } else if (tnum_is_const(reg->var_off)) {
verbose(env, "invalid read from stack R%d off %d+%d size %d\n",
regno, min_off, i - min_off, access_size);
} else {
@@ -8334,17 +6947,7 @@ static int check_stack_range_initialized(
}
return -EACCES;
mark:
- /* reading any byte out of 8-byte 'spill_slot' will cause
- * the whole slot to be marked as 'read'
- */
- err = bpf_mark_stack_read(env, reg->frameno, env->insn_idx, BIT(spi));
- if (err)
- return err;
- /* We do not call bpf_mark_stack_write(), as we can not
- * be sure that whether stack slot is written to or not. Hence,
- * we must still conservatively propagate reads upwards even if
- * helper may write to the entire memory range.
- */
+ ;
}
return 0;
}
@@ -8360,7 +6963,7 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
switch (base_type(reg->type)) {
case PTR_TO_PACKET:
case PTR_TO_PACKET_META:
- return check_packet_access(env, regno, reg->off, access_size,
+ return check_packet_access(env, regno, 0, access_size,
zero_size_allowed);
case PTR_TO_MAP_KEY:
if (access_type == BPF_WRITE) {
@@ -8368,12 +6971,12 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
reg_type_str(env, reg->type));
return -EACCES;
}
- return check_mem_region_access(env, regno, reg->off, access_size,
+ return check_mem_region_access(env, regno, 0, access_size,
reg->map_ptr->key_size, false);
case PTR_TO_MAP_VALUE:
- if (check_map_access_type(env, regno, reg->off, access_size, access_type))
+ if (check_map_access_type(env, regno, 0, access_size, access_type))
return -EACCES;
- return check_map_access(env, regno, reg->off, access_size,
+ return check_map_access(env, regno, 0, access_size,
zero_size_allowed, ACCESS_HELPER);
case PTR_TO_MEM:
if (type_is_rdonly_mem(reg->type)) {
@@ -8383,7 +6986,7 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
return -EACCES;
}
}
- return check_mem_region_access(env, regno, reg->off,
+ return check_mem_region_access(env, regno, 0,
access_size, reg->mem_size,
zero_size_allowed);
case PTR_TO_BUF:
@@ -8398,39 +7001,33 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
} else {
max_access = &env->prog->aux->max_rdwr_access;
}
- return check_buffer_access(env, reg, regno, reg->off,
+ return check_buffer_access(env, reg, regno, 0,
access_size, zero_size_allowed,
max_access);
case PTR_TO_STACK:
return check_stack_range_initialized(
env,
- regno, reg->off, access_size,
+ regno, 0, access_size,
zero_size_allowed, access_type, meta);
case PTR_TO_BTF_ID:
- return check_ptr_to_btf_access(env, regs, regno, reg->off,
+ return check_ptr_to_btf_access(env, regs, regno, 0,
access_size, BPF_READ, -1);
case PTR_TO_CTX:
- /* in case the function doesn't know how to access the context,
- * (because we are in a program of type SYSCALL for example), we
- * can not statically check its size.
- * Dynamically check it now.
- */
- if (!env->ops->convert_ctx_access) {
- int offset = access_size - 1;
-
- /* Allow zero-byte read from PTR_TO_CTX */
- if (access_size == 0)
- return zero_size_allowed ? 0 : -EACCES;
-
- return check_mem_access(env, env->insn_idx, regno, offset, BPF_B,
- access_type, -1, false, false);
+ /* Only permit reading or writing syscall context using helper calls. */
+ if (is_var_ctx_off_allowed(env->prog)) {
+ int err = check_mem_region_access(env, regno, 0, access_size, U16_MAX,
+ zero_size_allowed);
+ if (err)
+ return err;
+ if (env->prog->aux->max_ctx_offset < reg->umax_value + access_size)
+ env->prog->aux->max_ctx_offset = reg->umax_value + access_size;
+ return 0;
}
-
fallthrough;
default: /* scalar_value or invalid ptr */
/* Allow zero-byte read from NULL, regardless of pointer type */
if (zero_size_allowed && access_size == 0 &&
- register_is_null(reg))
+ bpf_register_is_null(reg))
return 0;
verbose(env, "R%d type=%s ", regno,
@@ -8503,7 +7100,7 @@ static int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg
struct bpf_reg_state saved_reg;
int err;
- if (register_is_null(reg))
+ if (bpf_register_is_null(reg))
return 0;
/* Assuming that the register contains a value check if the memory
@@ -8515,8 +7112,10 @@ static int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg
mark_ptr_not_null_reg(reg);
}
- err = check_helper_mem_access(env, regno, mem_size, BPF_READ, true, NULL);
- err = err ?: check_helper_mem_access(env, regno, mem_size, BPF_WRITE, true, NULL);
+ int size = base_type(reg->type) == PTR_TO_STACK ? -(int)mem_size : mem_size;
+
+ err = check_helper_mem_access(env, regno, size, BPF_READ, true, NULL);
+ err = err ?: check_helper_mem_access(env, regno, size, BPF_WRITE, true, NULL);
if (may_be_null)
*reg = saved_reg;
@@ -8619,9 +7218,9 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno, int flags)
return -EINVAL;
}
spin_lock_off = is_res_lock ? rec->res_spin_lock_off : rec->spin_lock_off;
- if (spin_lock_off != val + reg->off) {
+ if (spin_lock_off != val) {
verbose(env, "off %lld doesn't point to 'struct %s_lock' that is at %d\n",
- val + reg->off, lock_str, spin_lock_off);
+ val, lock_str, spin_lock_off);
return -EINVAL;
}
if (is_lock) {
@@ -8736,9 +7335,9 @@ static int check_map_field_pointer(struct bpf_verifier_env *env, u32 regno,
verifier_bug(env, "unsupported BTF field type: %s\n", struct_name);
return -EINVAL;
}
- if (field_off != val + reg->off) {
+ if (field_off != val) {
verbose(env, "off %lld doesn't point to 'struct %s' that is at %d\n",
- val + reg->off, struct_name, field_off);
+ val, struct_name, field_off);
return -EINVAL;
}
if (map_desc->ptr) {
@@ -8806,7 +7405,7 @@ static int process_kptr_func(struct bpf_verifier_env *env, int regno,
return -EINVAL;
}
- kptr_off = reg->off + reg->var_off.value;
+ kptr_off = reg->var_off.value;
kptr_field = btf_record_find(rec, kptr_off, BPF_KPTR);
if (!kptr_field) {
verbose(env, "off=%d doesn't point to kptr\n", kptr_off);
@@ -8927,7 +7526,7 @@ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn
static u32 iter_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int spi)
{
- struct bpf_func_state *state = func(env, reg);
+ struct bpf_func_state *state = bpf_func(env, reg);
return state->stack[spi].spilled_ptr.ref_obj_id;
}
@@ -8942,10 +7541,6 @@ static bool is_iter_new_kfunc(struct bpf_kfunc_call_arg_meta *meta)
return meta->kfunc_flags & KF_ITER_NEW;
}
-static bool is_iter_next_kfunc(struct bpf_kfunc_call_arg_meta *meta)
-{
- return meta->kfunc_flags & KF_ITER_NEXT;
-}
static bool is_iter_destroy_kfunc(struct bpf_kfunc_call_arg_meta *meta)
{
@@ -9063,7 +7658,7 @@ static struct bpf_verifier_state *find_prev_entry(struct bpf_verifier_env *env,
struct list_head *pos, *head;
/* Explored states are pushed in stack order, most recent states come first */
- head = explored_state(env, insn_idx);
+ head = bpf_explored_state(env, insn_idx);
list_for_each(pos, head) {
sl = container_of(pos, struct bpf_verifier_state_list, node);
/* If st->branches != 0 state is a part of current DFS verification path,
@@ -9078,11 +7673,6 @@ static struct bpf_verifier_state *find_prev_entry(struct bpf_verifier_env *env,
return NULL;
}
-static void reset_idmap_scratch(struct bpf_verifier_env *env);
-static bool regs_exact(const struct bpf_reg_state *rold,
- const struct bpf_reg_state *rcur,
- struct bpf_idmap *idmap);
-
/*
* Check if scalar registers are exact for the purpose of not widening.
* More lenient than regs_exact()
@@ -9124,8 +7714,8 @@ static int widen_imprecise_scalars(struct bpf_verifier_env *env,
num_slots = min(fold->allocated_stack / BPF_REG_SIZE,
fcur->allocated_stack / BPF_REG_SIZE);
for (i = 0; i < num_slots; i++) {
- if (!is_spilled_reg(&fold->stack[i]) ||
- !is_spilled_reg(&fcur->stack[i]))
+ if (!bpf_is_spilled_reg(&fold->stack[i]) ||
+ !bpf_is_spilled_reg(&fcur->stack[i]))
continue;
maybe_widen_reg(env,
@@ -9369,6 +7959,7 @@ static const struct bpf_reg_types mem_types = {
PTR_TO_MEM | MEM_RINGBUF,
PTR_TO_BUF,
PTR_TO_BTF_ID | PTR_TRUSTED,
+ PTR_TO_CTX,
},
};
@@ -9405,7 +7996,9 @@ static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE }
static const struct bpf_reg_types kptr_xchg_dest_types = {
.types = {
PTR_TO_MAP_VALUE,
- PTR_TO_BTF_ID | MEM_ALLOC
+ PTR_TO_BTF_ID | MEM_ALLOC,
+ PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF,
+ PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU,
}
};
static const struct bpf_reg_types dynptr_types = {
@@ -9449,7 +8042,7 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
struct bpf_reg_state *reg = reg_state(env, regno);
enum bpf_reg_type expected, type = reg->type;
const struct bpf_reg_types *compatible;
- int i, j;
+ int i, j, err;
compatible = compatible_reg_types[base_type(arg_type)];
if (!compatible) {
@@ -9552,8 +8145,12 @@ found:
return -EACCES;
}
- if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
- btf_vmlinux, *arg_btf_id,
+ err = __check_ptr_off_reg(env, reg, regno, true);
+ if (err)
+ return err;
+
+ if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id,
+ reg->var_off.value, btf_vmlinux, *arg_btf_id,
strict_type_match)) {
verbose(env, "R%d is of type %s but %s is expected\n",
regno, btf_type_name(reg->btf, reg->btf_id),
@@ -9565,6 +8162,8 @@ found:
}
case PTR_TO_BTF_ID | MEM_ALLOC:
case PTR_TO_BTF_ID | MEM_PERCPU | MEM_ALLOC:
+ case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF:
+ case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU:
if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock &&
meta->func_id != BPF_FUNC_kptr_xchg) {
verifier_bug(env, "unimplemented handling of MEM_ALLOC");
@@ -9631,12 +8230,11 @@ static int check_func_arg_reg_off(struct bpf_verifier_env *env,
* because fixed_off_ok is false, but checking here allows us
* to give the user a better error message.
*/
- if (reg->off) {
+ if (!tnum_is_const(reg->var_off) || reg->var_off.value != 0) {
verbose(env, "R%d must have zero offset when passed to release func or trusted arg to kfunc\n",
regno);
return -EINVAL;
}
- return __check_ptr_off_reg(env, reg, regno, false);
}
switch (type) {
@@ -9671,6 +8269,16 @@ static int check_func_arg_reg_off(struct bpf_verifier_env *env,
* still need to do checks instead of returning.
*/
return __check_ptr_off_reg(env, reg, regno, true);
+ case PTR_TO_CTX:
+ /*
+ * Allow fixed and variable offsets for syscall context, but
+ * only when the argument is passed as memory, not ctx,
+ * otherwise we may get modified ctx in tail called programs and
+ * global subprogs (that may act as extension prog hooks).
+ */
+ if (arg_type != ARG_PTR_TO_CTX && is_var_ctx_off_allowed(env->prog))
+ return 0;
+ fallthrough;
default:
return __check_ptr_off_reg(env, reg, regno, false);
}
@@ -9700,7 +8308,7 @@ static struct bpf_reg_state *get_dynptr_arg_reg(struct bpf_verifier_env *env,
static int dynptr_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{
- struct bpf_func_state *state = func(env, reg);
+ struct bpf_func_state *state = bpf_func(env, reg);
int spi;
if (reg->type == CONST_PTR_TO_DYNPTR)
@@ -9713,7 +8321,7 @@ static int dynptr_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
static int dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{
- struct bpf_func_state *state = func(env, reg);
+ struct bpf_func_state *state = bpf_func(env, reg);
int spi;
if (reg->type == CONST_PTR_TO_DYNPTR)
@@ -9727,13 +8335,13 @@ static int dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state
static enum bpf_dynptr_type dynptr_get_type(struct bpf_verifier_env *env,
struct bpf_reg_state *reg)
{
- struct bpf_func_state *state = func(env, reg);
+ struct bpf_func_state *state = bpf_func(env, reg);
int spi;
if (reg->type == CONST_PTR_TO_DYNPTR)
return reg->dynptr.type;
- spi = __get_spi(reg->off);
+ spi = bpf_get_spi(reg->var_off.value);
if (spi < 0) {
verbose(env, "verifier internal error: invalid spi when querying dynptr type\n");
return BPF_DYNPTR_TYPE_INVALID;
@@ -9774,13 +8382,13 @@ static int check_reg_const_str(struct bpf_verifier_env *env,
return -EACCES;
}
- err = check_map_access(env, regno, reg->off,
- map->value_size - reg->off, false,
+ err = check_map_access(env, regno, 0,
+ map->value_size - reg->var_off.value, false,
ACCESS_HELPER);
if (err)
return err;
- map_off = reg->off + reg->var_off.value;
+ map_off = reg->var_off.value;
err = map->ops->map_direct_value_addr(map, &map_addr, map_off);
if (err) {
verbose(env, "direct value access on string failed\n");
@@ -9801,7 +8409,7 @@ static int get_constant_map_key(struct bpf_verifier_env *env,
u32 key_size,
s64 *value)
{
- struct bpf_func_state *state = func(env, key);
+ struct bpf_func_state *state = bpf_func(env, key);
struct bpf_reg_state *reg;
int slot, spi, off;
int spill_size = 0;
@@ -9817,7 +8425,7 @@ static int get_constant_map_key(struct bpf_verifier_env *env,
if (!tnum_is_const(key->var_off))
return -EOPNOTSUPP;
- stack_off = key->off + key->var_off.value;
+ stack_off = key->var_off.value;
slot = -stack_off - 1;
spi = slot / BPF_REG_SIZE;
off = slot % BPF_REG_SIZE;
@@ -9832,7 +8440,7 @@ static int get_constant_map_key(struct bpf_verifier_env *env,
}
/* Check that stack contains a scalar spill of expected size */
- if (!is_spilled_scalar_reg(&state->stack[spi]))
+ if (!bpf_is_spilled_scalar_reg(&state->stack[spi]))
return -EOPNOTSUPP;
for (i = off; i >= 0 && stype[i] == STACK_SPILL; i--)
spill_size++;
@@ -9847,7 +8455,7 @@ static int get_constant_map_key(struct bpf_verifier_env *env,
/* We are relying on a constant value. So mark as precise
* to prevent pruning on it.
*/
- bt_set_frame_slot(&env->bt, key->frameno, spi);
+ bpf_bt_set_frame_slot(&env->bt, key->frameno, spi);
err = mark_chain_precision_batch(env, env->cur_state);
if (err < 0)
return err;
@@ -9899,7 +8507,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
return err;
}
- if (register_is_null(reg) && type_may_be_null(arg_type))
+ if (bpf_register_is_null(reg) && type_may_be_null(arg_type))
/* A NULL register has a SCALAR_VALUE type, so skip
* type checking.
*/
@@ -9921,7 +8529,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
skip_type_check:
if (arg_type_is_release(arg_type)) {
if (arg_type_is_dynptr(arg_type)) {
- struct bpf_func_state *state = func(env, reg);
+ struct bpf_func_state *state = bpf_func(env, reg);
int spi;
/* Only dynptr created on stack can be released, thus
@@ -9939,7 +8547,7 @@ skip_type_check:
verbose(env, "cannot release unowned const bpf_dynptr\n");
return -EINVAL;
}
- } else if (!reg->ref_obj_id && !register_is_null(reg)) {
+ } else if (!reg->ref_obj_id && !bpf_register_is_null(reg)) {
verbose(env, "R%d must be referenced when passed to release function\n",
regno);
return -EINVAL;
@@ -10018,7 +8626,7 @@ skip_type_check:
}
break;
case ARG_PTR_TO_MAP_VALUE:
- if (type_may_be_null(arg_type) && register_is_null(reg))
+ if (type_may_be_null(arg_type) && bpf_register_is_null(reg))
return 0;
/* bpf_map_xxx(..., map_ptr, ..., value) call:
@@ -10166,7 +8774,7 @@ static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
return false;
}
-static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
+bool bpf_allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
{
return env->prog->jit_requested &&
bpf_jit_supports_subprog_tailcalls();
@@ -10311,7 +8919,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
case BPF_FUNC_tail_call:
if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
goto error;
- if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) {
+ if (env->subprog_cnt > 1 && !bpf_allow_tail_call_in_subprogs(env)) {
verbose(env, "mixing of tail_calls and bpf-to-bpf calls is not supported\n");
return -EINVAL;
}
@@ -10623,7 +9231,7 @@ static void clear_caller_saved_regs(struct bpf_verifier_env *env,
/* after the call registers r0 - r5 were scratched */
for (i = 0; i < CALLER_SAVED_REGS; i++) {
- mark_reg_not_init(env, regs, caller_saved[i]);
+ bpf_mark_reg_not_init(env, &regs[caller_saved[i]]);
__check_reg_arg(env, regs, caller_saved[i], DST_OP_NO_MARK);
}
}
@@ -10718,7 +9326,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
* invalid memory access.
*/
} else if (arg->arg_type == ARG_PTR_TO_CTX) {
- ret = check_func_arg_reg_off(env, reg, regno, ARG_DONTCARE);
+ ret = check_func_arg_reg_off(env, reg, regno, ARG_PTR_TO_CTX);
if (ret < 0)
return ret;
/* If function expects ctx type in BTF check that caller
@@ -10762,7 +9370,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
struct bpf_call_arg_meta meta;
int err;
- if (register_is_null(reg) && type_may_be_null(arg->arg_type))
+ if (bpf_register_is_null(reg) && type_may_be_null(arg->arg_type))
continue;
memset(&meta, 0, sizeof(meta)); /* leave func_id as zero */
@@ -10844,7 +9452,7 @@ static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *ins
return -EFAULT;
}
- if (is_async_callback_calling_insn(insn)) {
+ if (bpf_is_async_callback_calling_insn(insn)) {
struct bpf_verifier_state *async_cb;
/* there is no real recursion here. timer and workqueue callbacks are async */
@@ -10891,7 +9499,7 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int err, subprog, target_insn;
target_insn = *insn_idx + insn->imm + 1;
- subprog = find_subprog(env, target_insn);
+ subprog = bpf_find_subprog(env, target_insn);
if (verifier_bug_if(subprog < 0, env, "target of func call at insn %d is not a program",
target_insn))
return -EFAULT;
@@ -10900,7 +9508,7 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
err = btf_check_subprog_call(env, subprog, caller->regs);
if (err == -EFAULT)
return err;
- if (subprog_is_global(env, subprog)) {
+ if (bpf_subprog_is_global(env, subprog)) {
const char *sub_name = subprog_name(env, subprog);
if (env->cur_state->active_locks) {
@@ -10909,12 +9517,9 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
return -EINVAL;
}
- if (env->subprog_info[subprog].might_sleep &&
- (env->cur_state->active_rcu_locks || env->cur_state->active_preempt_locks ||
- env->cur_state->active_irq_id || !in_sleepable(env))) {
- verbose(env, "global functions that may sleep are not allowed in non-sleepable context,\n"
- "i.e., in a RCU/IRQ/preempt-disabled section, or in\n"
- "a non-sleepable BPF program context\n");
+ if (env->subprog_info[subprog].might_sleep && !in_sleepable_context(env)) {
+ verbose(env, "sleepable global function %s() called in %s\n",
+ sub_name, non_sleepable_context_description(env));
return -EINVAL;
}
@@ -10933,9 +9538,11 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
subprog_aux(env, subprog)->called = true;
clear_caller_saved_regs(env, caller->regs);
- /* All global functions return a 64-bit SCALAR_VALUE */
- mark_reg_unknown(env, caller->regs, BPF_REG_0);
- caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
+ /* All non-void global functions return a 64-bit SCALAR_VALUE. */
+ if (!subprog_returns_void(env, subprog)) {
+ mark_reg_unknown(env, caller->regs, BPF_REG_0);
+ caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
+ }
/* continue with next insn after call */
return 0;
@@ -10953,8 +9560,6 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
/* and go analyze first insn of the callee */
*insn_idx = env->subprog_info[subprog].start - 1;
- bpf_reset_live_stack_callchain(env);
-
if (env->log.level & BPF_LOG_LEVEL) {
verbose(env, "caller:\n");
print_verifier_state(env, state, caller->frameno, true);
@@ -10988,7 +9593,7 @@ int map_set_for_each_callback_args(struct bpf_verifier_env *env,
callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3];
/* unused */
- __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
+ bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
return 0;
}
@@ -11045,9 +9650,9 @@ static int set_loop_callback_state(struct bpf_verifier_env *env,
callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
/* unused */
- __mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
- __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
- __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
+ bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
+ bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
+ bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
callee->in_callback_fn = true;
callee->callback_ret_range = retval_range(0, 1);
@@ -11077,8 +9682,8 @@ static int set_timer_callback_state(struct bpf_verifier_env *env,
callee->regs[BPF_REG_3].map_ptr = map_ptr;
/* unused */
- __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
- __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
+ bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
+ bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
callee->in_async_callback_fn = true;
callee->callback_ret_range = retval_range(0, 0);
return 0;
@@ -11105,8 +9710,8 @@ static int set_find_vma_callback_state(struct bpf_verifier_env *env,
callee->regs[BPF_REG_3] = caller->regs[BPF_REG_4];
/* unused */
- __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
- __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
+ bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
+ bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
callee->in_callback_fn = true;
callee->callback_ret_range = retval_range(0, 1);
return 0;
@@ -11121,14 +9726,14 @@ static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
* callback_ctx, u64 flags);
* callback_fn(const struct bpf_dynptr_t* dynptr, void *callback_ctx);
*/
- __mark_reg_not_init(env, &callee->regs[BPF_REG_0]);
+ bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_0]);
mark_dynptr_cb_reg(env, &callee->regs[BPF_REG_1], BPF_DYNPTR_TYPE_LOCAL);
callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
/* unused */
- __mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
- __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
- __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
+ bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
+ bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
+ bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
callee->in_callback_fn = true;
callee->callback_ret_range = retval_range(0, 1);
@@ -11149,7 +9754,8 @@ static int set_rbtree_add_callback_state(struct bpf_verifier_env *env,
*/
struct btf_field *field;
- field = reg_find_field_offset(&caller->regs[BPF_REG_1], caller->regs[BPF_REG_1].off,
+ field = reg_find_field_offset(&caller->regs[BPF_REG_1],
+ caller->regs[BPF_REG_1].var_off.value,
BPF_RB_ROOT);
if (!field || !field->graph_root.value_btf_id)
return -EFAULT;
@@ -11159,9 +9765,9 @@ static int set_rbtree_add_callback_state(struct bpf_verifier_env *env,
mark_reg_graph_node(callee->regs, BPF_REG_2, &field->graph_root);
ref_set_non_owning(env, &callee->regs[BPF_REG_2]);
- __mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
- __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
- __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
+ bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
+ bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
+ bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
callee->in_callback_fn = true;
callee->callback_ret_range = retval_range(0, 1);
return 0;
@@ -11190,8 +9796,8 @@ static int set_task_work_schedule_callback_state(struct bpf_verifier_env *env,
callee->regs[BPF_REG_3].map_ptr = map_ptr;
/* unused */
- __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
- __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
+ bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
+ bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
callee->in_async_callback_fn = true;
callee->callback_ret_range = retval_range(S32_MIN, S32_MAX);
return 0;
@@ -11222,10 +9828,9 @@ static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env)
return is_rbtree_lock_required_kfunc(kfunc_btf_id);
}
-static bool retval_range_within(struct bpf_retval_range range, const struct bpf_reg_state *reg,
- bool return_32bit)
+static bool retval_range_within(struct bpf_retval_range range, const struct bpf_reg_state *reg)
{
- if (return_32bit)
+ if (range.return_32bit)
return range.minval <= reg->s32_min_value && reg->s32_max_value <= range.maxval;
else
return range.minval <= reg->smin_value && reg->smax_value <= range.maxval;
@@ -11239,10 +9844,6 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
bool in_callback_fn;
int err;
- err = bpf_update_live_stack(env);
- if (err)
- return err;
-
callee = state->frame[state->curframe];
r0 = &callee->regs[BPF_REG_0];
if (r0->type == PTR_TO_STACK) {
@@ -11269,7 +9870,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
return err;
/* enforce R0 return value range, and bpf_callback_t returns 64bit */
- if (!retval_range_within(callee->callback_ret_range, r0, false)) {
+ if (!retval_range_within(callee->callback_ret_range, r0)) {
verbose_invalid_scalar(env, r0, callee->callback_ret_range,
"At callback return", "R0");
return -EINVAL;
@@ -11525,7 +10126,7 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
/* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const
* and map_direct_value_addr is set.
*/
- fmt_map_off = fmt_reg->off + fmt_reg->var_off.value;
+ fmt_map_off = fmt_reg->var_off.value;
err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr,
fmt_map_off);
if (err) {
@@ -11551,7 +10152,7 @@ static int check_get_func_ip(struct bpf_verifier_env *env)
if (type == BPF_PROG_TYPE_TRACING) {
if (!bpf_prog_has_trampoline(env->prog)) {
- verbose(env, "func %s#%d supported only for fentry/fexit/fmod_ret programs\n",
+ verbose(env, "func %s#%d supported only for fentry/fexit/fsession/fmod_ret programs\n",
func_id_name(func_id), func_id);
return -ENOTSUPP;
}
@@ -11573,7 +10174,7 @@ static struct bpf_insn_aux_data *cur_aux(const struct bpf_verifier_env *env)
static bool loop_flag_is_zero(struct bpf_verifier_env *env)
{
struct bpf_reg_state *reg = reg_state(env, BPF_REG_4);
- bool reg_is_null = register_is_null(reg);
+ bool reg_is_null = bpf_register_is_null(reg);
if (reg_is_null)
mark_chain_precision(env, BPF_REG_4);
@@ -11614,8 +10215,8 @@ static bool can_elide_value_nullness(enum bpf_map_type type)
}
}
-static int get_helper_proto(struct bpf_verifier_env *env, int func_id,
- const struct bpf_func_proto **ptr)
+int bpf_get_helper_proto(struct bpf_verifier_env *env, int func_id,
+ const struct bpf_func_proto **ptr)
{
if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID)
return -ERANGE;
@@ -11637,6 +10238,19 @@ static inline bool in_sleepable_context(struct bpf_verifier_env *env)
in_sleepable(env);
}
+static const char *non_sleepable_context_description(struct bpf_verifier_env *env)
+{
+ if (env->cur_state->active_rcu_locks)
+ return "rcu_read_lock region";
+ if (env->cur_state->active_preempt_locks)
+ return "non-preemptible region";
+ if (env->cur_state->active_irq_id)
+ return "IRQ-disabled region";
+ if (env->cur_state->active_locks)
+ return "lock region";
+ return "non-sleepable prog";
+}
+
static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx_p)
{
@@ -11653,7 +10267,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
/* find function prototype */
func_id = insn->imm;
- err = get_helper_proto(env, insn->imm, &fn);
+ err = bpf_get_helper_proto(env, insn->imm, &fn);
if (err == -ERANGE) {
verbose(env, "invalid func %s#%d\n", func_id_name(func_id), func_id);
return -EINVAL;
@@ -11676,11 +10290,6 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
return -EINVAL;
}
- if (!in_sleepable(env) && fn->might_sleep) {
- verbose(env, "helper call might sleep in a non-sleepable prog\n");
- return -EINVAL;
- }
-
/* With LD_ABS/IND some JITs save/restore skb from r1. */
changes_data = bpf_helper_changes_pkt_data(func_id);
if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
@@ -11697,28 +10306,10 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
return err;
}
- if (env->cur_state->active_rcu_locks) {
- if (fn->might_sleep) {
- verbose(env, "sleepable helper %s#%d in rcu_read_lock region\n",
- func_id_name(func_id), func_id);
- return -EINVAL;
- }
- }
-
- if (env->cur_state->active_preempt_locks) {
- if (fn->might_sleep) {
- verbose(env, "sleepable helper %s#%d in non-preemptible region\n",
- func_id_name(func_id), func_id);
- return -EINVAL;
- }
- }
-
- if (env->cur_state->active_irq_id) {
- if (fn->might_sleep) {
- verbose(env, "sleepable helper %s#%d in IRQ-disabled region\n",
- func_id_name(func_id), func_id);
- return -EINVAL;
- }
+ if (fn->might_sleep && !in_sleepable_context(env)) {
+ verbose(env, "sleepable helper %s#%d in %s\n", func_id_name(func_id), func_id,
+ non_sleepable_context_description(env));
+ return -EINVAL;
}
/* Track non-sleepable context for helpers. */
@@ -11779,7 +10370,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
}
} else if (meta.ref_obj_id) {
err = release_reference(env, meta.ref_obj_id);
- } else if (register_is_null(&regs[meta.release_regno])) {
+ } else if (bpf_register_is_null(&regs[meta.release_regno])) {
/* meta.ref_obj_id can only be 0 if register that is meant to be
* released is NULL, which must be > R0.
*/
@@ -11802,7 +10393,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
/* check that flags argument in get_local_storage(map, flags) is 0,
* this is required because get_local_storage() can't return an error.
*/
- if (!register_is_null(&regs[BPF_REG_2])) {
+ if (!bpf_register_is_null(&regs[BPF_REG_2])) {
verbose(env, "get_local_storage() doesn't support non-zero flags\n");
return -EINVAL;
}
@@ -11945,7 +10536,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
/* reset caller saved regs */
for (i = 0; i < CALLER_SAVED_REGS; i++) {
- mark_reg_not_init(env, regs, caller_saved[i]);
+ bpf_mark_reg_not_init(env, &regs[caller_saved[i]]);
check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
}
@@ -12207,10 +10798,6 @@ static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta)
return meta->kfunc_flags & KF_RELEASE;
}
-static bool is_kfunc_sleepable(struct bpf_kfunc_call_arg_meta *meta)
-{
- return meta->kfunc_flags & KF_SLEEPABLE;
-}
static bool is_kfunc_destructive(struct bpf_kfunc_call_arg_meta *meta)
{
@@ -12431,6 +11018,28 @@ static bool is_kfunc_arg_prog_aux(const struct btf *btf, const struct btf_param
return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_PROG_AUX_ID);
}
+/*
+ * A kfunc with KF_IMPLICIT_ARGS has two prototypes in BTF:
+ * - the _impl prototype with full arg list (meta->func_proto)
+ * - the BPF API prototype w/o implicit args (func->type in BTF)
+ * To determine whether an argument is implicit, we compare its position
+ * against the number of arguments in the prototype w/o implicit args.
+ */
+static bool is_kfunc_arg_implicit(const struct bpf_kfunc_call_arg_meta *meta, u32 arg_idx)
+{
+ const struct btf_type *func, *func_proto;
+ u32 argn;
+
+ if (!(meta->kfunc_flags & KF_IMPLICIT_ARGS))
+ return false;
+
+ func = btf_type_by_id(meta->btf, meta->func_id);
+ func_proto = btf_type_by_id(meta->btf, func->type);
+ argn = btf_type_vlen(func_proto);
+
+ return argn <= arg_idx;
+}
+
/* Returns true if struct is composed of scalars, 4 levels of nesting allowed */
static bool __btf_type_is_scalar_struct(struct bpf_verifier_env *env,
const struct btf *btf,
@@ -12497,10 +11106,15 @@ enum kfunc_ptr_arg_type {
enum special_kfunc_type {
KF_bpf_obj_new_impl,
+ KF_bpf_obj_new,
KF_bpf_obj_drop_impl,
+ KF_bpf_obj_drop,
KF_bpf_refcount_acquire_impl,
+ KF_bpf_refcount_acquire,
KF_bpf_list_push_front_impl,
+ KF_bpf_list_push_front,
KF_bpf_list_push_back_impl,
+ KF_bpf_list_push_back,
KF_bpf_list_pop_front,
KF_bpf_list_pop_back,
KF_bpf_list_front,
@@ -12511,6 +11125,7 @@ enum special_kfunc_type {
KF_bpf_rcu_read_unlock,
KF_bpf_rbtree_remove,
KF_bpf_rbtree_add_impl,
+ KF_bpf_rbtree_add,
KF_bpf_rbtree_first,
KF_bpf_rbtree_root,
KF_bpf_rbtree_left,
@@ -12523,7 +11138,9 @@ enum special_kfunc_type {
KF_bpf_dynptr_slice_rdwr,
KF_bpf_dynptr_clone,
KF_bpf_percpu_obj_new_impl,
+ KF_bpf_percpu_obj_new,
KF_bpf_percpu_obj_drop_impl,
+ KF_bpf_percpu_obj_drop,
KF_bpf_throw,
KF_bpf_wq_set_callback,
KF_bpf_preempt_disable,
@@ -12557,10 +11174,15 @@ enum special_kfunc_type {
BTF_ID_LIST(special_kfunc_list)
BTF_ID(func, bpf_obj_new_impl)
+BTF_ID(func, bpf_obj_new)
BTF_ID(func, bpf_obj_drop_impl)
+BTF_ID(func, bpf_obj_drop)
BTF_ID(func, bpf_refcount_acquire_impl)
+BTF_ID(func, bpf_refcount_acquire)
BTF_ID(func, bpf_list_push_front_impl)
+BTF_ID(func, bpf_list_push_front)
BTF_ID(func, bpf_list_push_back_impl)
+BTF_ID(func, bpf_list_push_back)
BTF_ID(func, bpf_list_pop_front)
BTF_ID(func, bpf_list_pop_back)
BTF_ID(func, bpf_list_front)
@@ -12571,6 +11193,7 @@ BTF_ID(func, bpf_rcu_read_lock)
BTF_ID(func, bpf_rcu_read_unlock)
BTF_ID(func, bpf_rbtree_remove)
BTF_ID(func, bpf_rbtree_add_impl)
+BTF_ID(func, bpf_rbtree_add)
BTF_ID(func, bpf_rbtree_first)
BTF_ID(func, bpf_rbtree_root)
BTF_ID(func, bpf_rbtree_left)
@@ -12590,7 +11213,9 @@ BTF_ID(func, bpf_dynptr_slice)
BTF_ID(func, bpf_dynptr_slice_rdwr)
BTF_ID(func, bpf_dynptr_clone)
BTF_ID(func, bpf_percpu_obj_new_impl)
+BTF_ID(func, bpf_percpu_obj_new)
BTF_ID(func, bpf_percpu_obj_drop_impl)
+BTF_ID(func, bpf_percpu_obj_drop)
BTF_ID(func, bpf_throw)
BTF_ID(func, bpf_wq_set_callback)
BTF_ID(func, bpf_preempt_disable)
@@ -12634,6 +11259,50 @@ BTF_ID(func, bpf_session_is_return)
BTF_ID(func, bpf_stream_vprintk)
BTF_ID(func, bpf_stream_print_stack)
+static bool is_bpf_obj_new_kfunc(u32 func_id)
+{
+ return func_id == special_kfunc_list[KF_bpf_obj_new] ||
+ func_id == special_kfunc_list[KF_bpf_obj_new_impl];
+}
+
+static bool is_bpf_percpu_obj_new_kfunc(u32 func_id)
+{
+ return func_id == special_kfunc_list[KF_bpf_percpu_obj_new] ||
+ func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl];
+}
+
+static bool is_bpf_obj_drop_kfunc(u32 func_id)
+{
+ return func_id == special_kfunc_list[KF_bpf_obj_drop] ||
+ func_id == special_kfunc_list[KF_bpf_obj_drop_impl];
+}
+
+static bool is_bpf_percpu_obj_drop_kfunc(u32 func_id)
+{
+ return func_id == special_kfunc_list[KF_bpf_percpu_obj_drop] ||
+ func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl];
+}
+
+static bool is_bpf_refcount_acquire_kfunc(u32 func_id)
+{
+ return func_id == special_kfunc_list[KF_bpf_refcount_acquire] ||
+ func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl];
+}
+
+static bool is_bpf_list_push_kfunc(u32 func_id)
+{
+ return func_id == special_kfunc_list[KF_bpf_list_push_front] ||
+ func_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
+ func_id == special_kfunc_list[KF_bpf_list_push_back] ||
+ func_id == special_kfunc_list[KF_bpf_list_push_back_impl];
+}
+
+static bool is_bpf_rbtree_add_kfunc(u32 func_id)
+{
+ return func_id == special_kfunc_list[KF_bpf_rbtree_add] ||
+ func_id == special_kfunc_list[KF_bpf_rbtree_add_impl];
+}
+
static bool is_task_work_add_kfunc(u32 func_id)
{
return func_id == special_kfunc_list[KF_bpf_task_work_schedule_signal] ||
@@ -12642,10 +11311,8 @@ static bool is_task_work_add_kfunc(u32 func_id)
static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
{
- if (meta->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl] &&
- meta->arg_owning_ref) {
+ if (is_bpf_refcount_acquire_kfunc(meta->func_id) && meta->arg_owning_ref)
return false;
- }
return meta->kfunc_flags & KF_RET_NULL;
}
@@ -12670,7 +11337,7 @@ static bool is_kfunc_bpf_preempt_enable(struct bpf_kfunc_call_arg_meta *meta)
return meta->func_id == special_kfunc_list[KF_bpf_preempt_enable];
}
-static bool is_kfunc_pkt_changing(struct bpf_kfunc_call_arg_meta *meta)
+bool bpf_is_kfunc_pkt_changing(struct bpf_kfunc_call_arg_meta *meta)
{
return meta->func_id == special_kfunc_list[KF_bpf_xdp_pull_data];
}
@@ -12705,7 +11372,7 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
if (btf_is_prog_ctx_type(&env->log, meta->btf, t, resolve_prog_type(env->prog), argno))
return KF_ARG_PTR_TO_CTX;
- if (is_kfunc_arg_nullable(meta->btf, &args[argno]) && register_is_null(reg) &&
+ if (is_kfunc_arg_nullable(meta->btf, &args[argno]) && bpf_register_is_null(reg) &&
!arg_mem_size)
return KF_ARG_PTR_TO_NULL;
@@ -12831,13 +11498,12 @@ static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env,
btf_type_ids_nocast_alias(&env->log, reg_btf, reg_ref_id, meta->btf, ref_id))
strict_type_match = true;
- WARN_ON_ONCE(is_kfunc_release(meta) &&
- (reg->off || !tnum_is_const(reg->var_off) ||
- reg->var_off.value));
+ WARN_ON_ONCE(is_kfunc_release(meta) && !tnum_is_const(reg->var_off));
reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, &reg_ref_id);
reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off);
- struct_same = btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->off, meta->btf, ref_id, strict_type_match);
+ struct_same = btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->var_off.value,
+ meta->btf, ref_id, strict_type_match);
/* If kfunc is accepting a projection type (ie. __sk_buff), it cannot
* actually use it -- it must cast to the underlying type. So we allow
* caller to pass in the underlying type.
@@ -13034,8 +11700,7 @@ static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_
static bool is_bpf_list_api_kfunc(u32 btf_id)
{
- return btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
- btf_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
+ return is_bpf_list_push_kfunc(btf_id) ||
btf_id == special_kfunc_list[KF_bpf_list_pop_front] ||
btf_id == special_kfunc_list[KF_bpf_list_pop_back] ||
btf_id == special_kfunc_list[KF_bpf_list_front] ||
@@ -13044,7 +11709,7 @@ static bool is_bpf_list_api_kfunc(u32 btf_id)
static bool is_bpf_rbtree_api_kfunc(u32 btf_id)
{
- return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl] ||
+ return is_bpf_rbtree_add_kfunc(btf_id) ||
btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
btf_id == special_kfunc_list[KF_bpf_rbtree_first] ||
btf_id == special_kfunc_list[KF_bpf_rbtree_root] ||
@@ -13061,8 +11726,9 @@ static bool is_bpf_iter_num_api_kfunc(u32 btf_id)
static bool is_bpf_graph_api_kfunc(u32 btf_id)
{
- return is_bpf_list_api_kfunc(btf_id) || is_bpf_rbtree_api_kfunc(btf_id) ||
- btf_id == special_kfunc_list[KF_bpf_refcount_acquire_impl];
+ return is_bpf_list_api_kfunc(btf_id) ||
+ is_bpf_rbtree_api_kfunc(btf_id) ||
+ is_bpf_refcount_acquire_kfunc(btf_id);
}
static bool is_bpf_res_spin_lock_kfunc(u32 btf_id)
@@ -13095,7 +11761,7 @@ static bool kfunc_spin_allowed(u32 btf_id)
static bool is_sync_callback_calling_kfunc(u32 btf_id)
{
- return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl];
+ return is_bpf_rbtree_add_kfunc(btf_id);
}
static bool is_async_callback_calling_kfunc(u32 btf_id)
@@ -13159,12 +11825,11 @@ static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env,
switch (node_field_type) {
case BPF_LIST_NODE:
- ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
- kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_back_impl]);
+ ret = is_bpf_list_push_kfunc(kfunc_btf_id);
break;
case BPF_RB_NODE:
- ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
- kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl] ||
+ ret = (is_bpf_rbtree_add_kfunc(kfunc_btf_id) ||
+ kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_left] ||
kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_right]);
break;
@@ -13209,7 +11874,7 @@ __process_kf_arg_ptr_to_graph_root(struct bpf_verifier_env *env,
}
rec = reg_btf_record(reg);
- head_off = reg->off + reg->var_off.value;
+ head_off = reg->var_off.value;
field = btf_record_find(rec, head_off, head_field_type);
if (!field) {
verbose(env, "%s not found at offset=%u\n", head_type_name, head_off);
@@ -13276,7 +11941,7 @@ __process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env *env,
return -EINVAL;
}
- node_off = reg->off + reg->var_off.value;
+ node_off = reg->var_off.value;
field = reg_find_field_offset(reg, node_off, node_field_type);
if (!field) {
verbose(env, "%s not found at offset=%u\n", node_type_name, node_off);
@@ -13381,11 +12046,6 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
bool is_ret_buf_sz = false;
int kf_arg_type;
- t = btf_type_skip_modifiers(btf, args[i].type, NULL);
-
- if (is_kfunc_arg_ignore(btf, &args[i]))
- continue;
-
if (is_kfunc_arg_prog_aux(btf, &args[i])) {
/* Reject repeated use bpf_prog_aux */
if (meta->arg_prog) {
@@ -13397,6 +12057,11 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
continue;
}
+ if (is_kfunc_arg_ignore(btf, &args[i]) || is_kfunc_arg_implicit(meta, i))
+ continue;
+
+ t = btf_type_skip_modifiers(btf, args[i].type, NULL);
+
if (btf_type_is_scalar(t)) {
if (reg->type != SCALAR_VALUE) {
verbose(env, "R%d is not a scalar\n", regno);
@@ -13448,7 +12113,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
return -EINVAL;
}
- if ((register_is_null(reg) || type_may_be_null(reg->type)) &&
+ if ((bpf_register_is_null(reg) || type_may_be_null(reg->type)) &&
!is_kfunc_arg_nullable(meta->btf, &args[i])) {
verbose(env, "Possibly NULL pointer passed to trusted arg%d\n", i);
return -EACCES;
@@ -13525,7 +12190,6 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
}
}
fallthrough;
- case KF_ARG_PTR_TO_CTX:
case KF_ARG_PTR_TO_DYNPTR:
case KF_ARG_PTR_TO_ITER:
case KF_ARG_PTR_TO_LIST_HEAD:
@@ -13543,6 +12207,9 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
case KF_ARG_PTR_TO_IRQ_FLAG:
case KF_ARG_PTR_TO_RES_SPIN_LOCK:
break;
+ case KF_ARG_PTR_TO_CTX:
+ arg_type = ARG_PTR_TO_CTX;
+ break;
default:
verifier_bug(env, "unknown kfunc arg type %d", kf_arg_type);
return -EFAULT;
@@ -13571,13 +12238,13 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
break;
case KF_ARG_PTR_TO_ALLOC_BTF_ID:
if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC)) {
- if (meta->func_id != special_kfunc_list[KF_bpf_obj_drop_impl]) {
- verbose(env, "arg#%d expected for bpf_obj_drop_impl()\n", i);
+ if (!is_bpf_obj_drop_kfunc(meta->func_id)) {
+ verbose(env, "arg#%d expected for bpf_obj_drop()\n", i);
return -EINVAL;
}
} else if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC | MEM_PERCPU)) {
- if (meta->func_id != special_kfunc_list[KF_bpf_percpu_obj_drop_impl]) {
- verbose(env, "arg#%d expected for bpf_percpu_obj_drop_impl()\n", i);
+ if (!is_bpf_percpu_obj_drop_kfunc(meta->func_id)) {
+ verbose(env, "arg#%d expected for bpf_percpu_obj_drop()\n", i);
return -EINVAL;
}
} else {
@@ -13703,7 +12370,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
return ret;
break;
case KF_ARG_PTR_TO_RB_NODE:
- if (meta->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
+ if (is_bpf_rbtree_add_kfunc(meta->func_id)) {
if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
verbose(env, "arg#%d expected pointer to allocated object\n", i);
return -EINVAL;
@@ -13766,7 +12433,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
struct bpf_reg_state *size_reg = &regs[regno + 1];
const struct btf_param *size_arg = &args[i + 1];
- if (!register_is_null(buff_reg) || !is_kfunc_arg_nullable(meta->btf, buff_arg)) {
+ if (!bpf_register_is_null(buff_reg) || !is_kfunc_arg_nullable(meta->btf, buff_arg)) {
ret = check_kfunc_mem_size_reg(env, size_reg, regno + 1);
if (ret < 0) {
verbose(env, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", i, i + 1);
@@ -13899,10 +12566,10 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
return 0;
}
-static int fetch_kfunc_arg_meta(struct bpf_verifier_env *env,
- s32 func_id,
- s16 offset,
- struct bpf_kfunc_call_arg_meta *meta)
+int bpf_fetch_kfunc_arg_meta(struct bpf_verifier_env *env,
+ s32 func_id,
+ s16 offset,
+ struct bpf_kfunc_call_arg_meta *meta)
{
struct bpf_kfunc_meta kfunc;
int err;
@@ -13925,6 +12592,194 @@ static int fetch_kfunc_arg_meta(struct bpf_verifier_env *env,
return 0;
}
+/*
+ * Determine how many bytes a helper accesses through a stack pointer at
+ * argument position @arg (0-based, corresponding to R1-R5).
+ *
+ * Returns:
+ * > 0 known read access size in bytes
+ * 0 doesn't read anything directly
+ * S64_MIN unknown
+ * < 0 known write access of (-return) bytes
+ */
+s64 bpf_helper_stack_access_bytes(struct bpf_verifier_env *env, struct bpf_insn *insn,
+ int arg, int insn_idx)
+{
+ struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
+ const struct bpf_func_proto *fn;
+ enum bpf_arg_type at;
+ s64 size;
+
+ if (bpf_get_helper_proto(env, insn->imm, &fn) < 0)
+ return S64_MIN;
+
+ at = fn->arg_type[arg];
+
+ switch (base_type(at)) {
+ case ARG_PTR_TO_MAP_KEY:
+ case ARG_PTR_TO_MAP_VALUE: {
+ bool is_key = base_type(at) == ARG_PTR_TO_MAP_KEY;
+ u64 val;
+ int i, map_reg;
+
+ for (i = 0; i < arg; i++) {
+ if (base_type(fn->arg_type[i]) == ARG_CONST_MAP_PTR)
+ break;
+ }
+ if (i >= arg)
+ goto scan_all_maps;
+
+ map_reg = BPF_REG_1 + i;
+
+ if (!(aux->const_reg_map_mask & BIT(map_reg)))
+ goto scan_all_maps;
+
+ i = aux->const_reg_vals[map_reg];
+ if (i < env->used_map_cnt) {
+ size = is_key ? env->used_maps[i]->key_size
+ : env->used_maps[i]->value_size;
+ goto out;
+ }
+scan_all_maps:
+ /*
+ * Map pointer is not known at this call site (e.g. different
+ * maps on merged paths). Conservatively return the largest
+ * key_size or value_size across all maps used by the program.
+ */
+ val = 0;
+ for (i = 0; i < env->used_map_cnt; i++) {
+ struct bpf_map *map = env->used_maps[i];
+ u32 sz = is_key ? map->key_size : map->value_size;
+
+ if (sz > val)
+ val = sz;
+ if (map->inner_map_meta) {
+ sz = is_key ? map->inner_map_meta->key_size
+ : map->inner_map_meta->value_size;
+ if (sz > val)
+ val = sz;
+ }
+ }
+ if (!val)
+ return S64_MIN;
+ size = val;
+ goto out;
+ }
+ case ARG_PTR_TO_MEM:
+ if (at & MEM_FIXED_SIZE) {
+ size = fn->arg_size[arg];
+ goto out;
+ }
+ if (arg + 1 < ARRAY_SIZE(fn->arg_type) &&
+ arg_type_is_mem_size(fn->arg_type[arg + 1])) {
+ int size_reg = BPF_REG_1 + arg + 1;
+
+ if (aux->const_reg_mask & BIT(size_reg)) {
+ size = (s64)aux->const_reg_vals[size_reg];
+ goto out;
+ }
+ /*
+ * Size arg is const on each path but differs across merged
+ * paths. MAX_BPF_STACK is a safe upper bound for reads.
+ */
+ if (at & MEM_UNINIT)
+ return 0;
+ return MAX_BPF_STACK;
+ }
+ return S64_MIN;
+ case ARG_PTR_TO_DYNPTR:
+ size = BPF_DYNPTR_SIZE;
+ break;
+ case ARG_PTR_TO_STACK:
+ /*
+ * Only used by bpf_calls_callback() helpers. The helper itself
+ * doesn't access stack. The callback subprog does and it's
+ * analyzed separately.
+ */
+ return 0;
+ default:
+ return S64_MIN;
+ }
+out:
+ /*
+ * MEM_UNINIT args are write-only: the helper initializes the
+ * buffer without reading it.
+ */
+ if (at & MEM_UNINIT)
+ return -size;
+ return size;
+}
+
+/*
+ * Determine how many bytes a kfunc accesses through a stack pointer at
+ * argument position @arg (0-based, corresponding to R1-R5).
+ *
+ * Returns:
+ * > 0 known read access size in bytes
+ * 0 doesn't access memory through that argument (ex: not a pointer)
+ * S64_MIN unknown
+ * < 0 known write access of (-return) bytes
+ */
+s64 bpf_kfunc_stack_access_bytes(struct bpf_verifier_env *env, struct bpf_insn *insn,
+ int arg, int insn_idx)
+{
+ struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
+ struct bpf_kfunc_call_arg_meta meta;
+ const struct btf_param *args;
+ const struct btf_type *t, *ref_t;
+ const struct btf *btf;
+ u32 nargs, type_size;
+ s64 size;
+
+ if (bpf_fetch_kfunc_arg_meta(env, insn->imm, insn->off, &meta) < 0)
+ return S64_MIN;
+
+ btf = meta.btf;
+ args = btf_params(meta.func_proto);
+ nargs = btf_type_vlen(meta.func_proto);
+ if (arg >= nargs)
+ return 0;
+
+ t = btf_type_skip_modifiers(btf, args[arg].type, NULL);
+ if (!btf_type_is_ptr(t))
+ return 0;
+
+ /* dynptr: fixed 16-byte on-stack representation */
+ if (is_kfunc_arg_dynptr(btf, &args[arg])) {
+ size = BPF_DYNPTR_SIZE;
+ goto out;
+ }
+
+ /* ptr + __sz/__szk pair: size is in the next register */
+ if (arg + 1 < nargs &&
+ (btf_param_match_suffix(btf, &args[arg + 1], "__sz") ||
+ btf_param_match_suffix(btf, &args[arg + 1], "__szk"))) {
+ int size_reg = BPF_REG_1 + arg + 1;
+
+ if (aux->const_reg_mask & BIT(size_reg)) {
+ size = (s64)aux->const_reg_vals[size_reg];
+ goto out;
+ }
+ return MAX_BPF_STACK;
+ }
+
+ /* fixed-size pointed-to type: resolve via BTF */
+ ref_t = btf_type_skip_modifiers(btf, t->type, NULL);
+ if (!IS_ERR(btf_resolve_size(btf, ref_t, &type_size))) {
+ size = type_size;
+ goto out;
+ }
+
+ return S64_MIN;
+out:
+ /* KF_ITER_NEW kfuncs initialize the iterator state at arg 0 */
+ if (arg == 0 && meta.kfunc_flags & KF_ITER_NEW)
+ return -size;
+ if (is_kfunc_arg_uninit(btf, &args[arg]))
+ return -size;
+ return size;
+}
+
/* check special kfuncs and return:
* 1 - not fall-through to 'else' branch, continue verification
* 0 - fall-through to 'else' branch
@@ -13940,13 +12795,12 @@ static int check_special_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_ca
if (meta->btf != btf_vmlinux)
return 0;
- if (meta->func_id == special_kfunc_list[KF_bpf_obj_new_impl] ||
- meta->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
+ if (is_bpf_obj_new_kfunc(meta->func_id) || is_bpf_percpu_obj_new_kfunc(meta->func_id)) {
struct btf_struct_meta *struct_meta;
struct btf *ret_btf;
u32 ret_btf_id;
- if (meta->func_id == special_kfunc_list[KF_bpf_obj_new_impl] && !bpf_global_ma_set)
+ if (is_bpf_obj_new_kfunc(meta->func_id) && !bpf_global_ma_set)
return -ENOMEM;
if (((u64)(u32)meta->arg_constant.value) != meta->arg_constant.value) {
@@ -13969,7 +12823,7 @@ static int check_special_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_ca
return -EINVAL;
}
- if (meta->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
+ if (is_bpf_percpu_obj_new_kfunc(meta->func_id)) {
if (ret_t->size > BPF_GLOBAL_PERCPU_MA_MAX_SIZE) {
verbose(env, "bpf_percpu_obj_new type size (%d) is greater than %d\n",
ret_t->size, BPF_GLOBAL_PERCPU_MA_MAX_SIZE);
@@ -13999,7 +12853,7 @@ static int check_special_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_ca
}
struct_meta = btf_find_struct_meta(ret_btf, ret_btf_id);
- if (meta->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
+ if (is_bpf_percpu_obj_new_kfunc(meta->func_id)) {
if (!__btf_type_is_scalar_struct(env, ret_btf, ret_t, 0)) {
verbose(env, "bpf_percpu_obj_new type ID argument must be of a struct of scalars\n");
return -EINVAL;
@@ -14015,12 +12869,12 @@ static int check_special_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_ca
regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
regs[BPF_REG_0].btf = ret_btf;
regs[BPF_REG_0].btf_id = ret_btf_id;
- if (meta->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl])
+ if (is_bpf_percpu_obj_new_kfunc(meta->func_id))
regs[BPF_REG_0].type |= MEM_PERCPU;
insn_aux->obj_new_size = ret_t->size;
insn_aux->kptr_struct_meta = struct_meta;
- } else if (meta->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
+ } else if (is_bpf_refcount_acquire_kfunc(meta->func_id)) {
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
regs[BPF_REG_0].btf = meta->arg_btf;
@@ -14106,6 +12960,8 @@ static int check_special_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_ca
}
static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name);
+static int process_bpf_exit_full(struct bpf_verifier_env *env,
+ bool *do_print_state, bool exception_exit);
static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx_p)
@@ -14125,7 +12981,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
if (!insn->imm)
return 0;
- err = fetch_kfunc_arg_meta(env, insn->imm, insn->off, &meta);
+ err = bpf_fetch_kfunc_arg_meta(env, insn->imm, insn->off, &meta);
if (err == -EACCES && meta.func_name)
verbose(env, "calling kernel function %s is not allowed\n", meta.func_name);
if (err)
@@ -14134,7 +12990,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
func_name = meta.func_name;
insn_aux = &env->insn_aux_data[insn_idx];
- insn_aux->is_iter_next = is_iter_next_kfunc(&meta);
+ insn_aux->is_iter_next = bpf_is_iter_next_kfunc(&meta);
if (!insn->off &&
(insn->imm == special_kfunc_list[KF_bpf_res_spin_lock] ||
@@ -14152,7 +13008,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
/* Clear r0-r5 registers in forked state */
for (i = 0; i < CALLER_SAVED_REGS; i++)
- mark_reg_not_init(env, regs, caller_saved[i]);
+ bpf_mark_reg_not_init(env, &regs[caller_saved[i]]);
mark_reg_unknown(env, regs, BPF_REG_0);
err = __mark_reg_s32_range(env, regs, BPF_REG_0, -MAX_ERRNO, -1);
@@ -14171,7 +13027,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
return -EACCES;
}
- sleepable = is_kfunc_sleepable(&meta);
+ sleepable = bpf_is_kfunc_sleepable(&meta);
if (sleepable && !in_sleepable(env)) {
verbose(env, "program must be sleepable to call sleepable kfunc %s\n", func_name);
return -EACCES;
@@ -14186,7 +13042,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
if (err < 0)
return err;
- if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
+ if (is_bpf_rbtree_add_kfunc(meta.func_id)) {
err = push_callback_call(env, insn, insn_idx, meta.subprogno,
set_rbtree_add_callback_state);
if (err) {
@@ -14246,34 +13102,24 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
}
}));
}
- } else if (sleepable && env->cur_state->active_rcu_locks) {
- verbose(env, "kernel func %s is sleepable within rcu_read_lock region\n", func_name);
- return -EACCES;
- }
-
- if (in_rbtree_lock_required_cb(env) && (rcu_lock || rcu_unlock)) {
- verbose(env, "Calling bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n");
- return -EACCES;
- }
-
- if (env->cur_state->active_preempt_locks) {
- if (preempt_disable) {
- env->cur_state->active_preempt_locks++;
- } else if (preempt_enable) {
- env->cur_state->active_preempt_locks--;
- } else if (sleepable) {
- verbose(env, "kernel func %s is sleepable within non-preemptible region\n", func_name);
- return -EACCES;
- }
} else if (preempt_disable) {
env->cur_state->active_preempt_locks++;
} else if (preempt_enable) {
- verbose(env, "unmatched attempt to enable preemption (kernel function %s)\n", func_name);
- return -EINVAL;
+ if (env->cur_state->active_preempt_locks == 0) {
+ verbose(env, "unmatched attempt to enable preemption (kernel function %s)\n", func_name);
+ return -EINVAL;
+ }
+ env->cur_state->active_preempt_locks--;
}
- if (env->cur_state->active_irq_id && sleepable) {
- verbose(env, "kernel func %s is sleepable within IRQ-disabled region\n", func_name);
+ if (sleepable && !in_sleepable_context(env)) {
+ verbose(env, "kernel func %s is sleepable within %s\n",
+ func_name, non_sleepable_context_description(env));
+ return -EACCES;
+ }
+
+ if (in_rbtree_lock_required_cb(env) && (rcu_lock || rcu_unlock)) {
+ verbose(env, "Calling bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n");
return -EACCES;
}
@@ -14300,11 +13146,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
return err;
}
- if (meta.func_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
- meta.func_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
- meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
+ if (is_bpf_list_push_kfunc(meta.func_id) || is_bpf_rbtree_add_kfunc(meta.func_id)) {
release_ref_obj_id = regs[BPF_REG_2].ref_obj_id;
- insn_aux->insert_off = regs[BPF_REG_2].off;
+ insn_aux->insert_off = regs[BPF_REG_2].var_off.value;
insn_aux->kptr_struct_meta = btf_find_struct_meta(meta.arg_btf, meta.arg_btf_id);
err = ref_convert_owning_non_owning(env, release_ref_obj_id);
if (err) {
@@ -14342,7 +13186,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
for (i = 0; i < CALLER_SAVED_REGS; i++) {
u32 regno = caller_saved[i];
- mark_reg_not_init(env, regs, regno);
+ bpf_mark_reg_not_init(env, &regs[regno]);
regs[regno].subreg_def = DEF_NOT_SUBREG;
}
@@ -14350,11 +13194,10 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
t = btf_type_skip_modifiers(desc_btf, meta.func_proto->type, NULL);
if (is_kfunc_acquire(&meta) && !btf_type_is_struct_ptr(meta.btf, t)) {
- /* Only exception is bpf_obj_new_impl */
if (meta.btf != btf_vmlinux ||
- (meta.func_id != special_kfunc_list[KF_bpf_obj_new_impl] &&
- meta.func_id != special_kfunc_list[KF_bpf_percpu_obj_new_impl] &&
- meta.func_id != special_kfunc_list[KF_bpf_refcount_acquire_impl])) {
+ (!is_bpf_obj_new_kfunc(meta.func_id) &&
+ !is_bpf_percpu_obj_new_kfunc(meta.func_id) &&
+ !is_bpf_refcount_acquire_kfunc(meta.func_id))) {
verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
return -EINVAL;
}
@@ -14414,7 +13257,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
if (meta.func_id == special_kfunc_list[KF_bpf_get_kmem_cache])
type |= PTR_UNTRUSTED;
else if (is_kfunc_rcu_protected(&meta) ||
- (is_iter_next_kfunc(&meta) &&
+ (bpf_is_iter_next_kfunc(&meta) &&
(get_iter_from_state(env->cur_state, &meta)
->type & MEM_RCU))) {
/*
@@ -14465,8 +13308,8 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
regs[BPF_REG_0].id = ++env->id_gen;
} else if (btf_type_is_void(t)) {
if (meta.btf == btf_vmlinux) {
- if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl] ||
- meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl]) {
+ if (is_bpf_obj_drop_kfunc(meta.func_id) ||
+ is_bpf_percpu_obj_drop_kfunc(meta.func_id)) {
insn_aux->kptr_struct_meta =
btf_find_struct_meta(meta.arg_btf,
meta.arg_btf_id);
@@ -14474,7 +13317,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
}
}
- if (is_kfunc_pkt_changing(&meta))
+ if (bpf_is_kfunc_pkt_changing(&meta))
clear_all_pkt_pointers(env);
nargs = btf_type_vlen(meta.func_proto);
@@ -14486,11 +13329,11 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
if (btf_type_is_ptr(t))
mark_btf_func_reg_size(env, regno, sizeof(void *));
else
- /* scalar. ensured by btf_check_kfunc_arg_match() */
+ /* scalar. ensured by check_kfunc_args() */
mark_btf_func_reg_size(env, regno, t->size);
}
- if (is_iter_next_kfunc(&meta)) {
+ if (bpf_is_iter_next_kfunc(&meta)) {
err = process_iter_next_call(env, insn_idx, &meta);
if (err)
return err;
@@ -14499,12 +13342,15 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
if (meta.func_id == special_kfunc_list[KF_bpf_session_cookie])
env->prog->call_session_cookie = true;
+ if (is_bpf_throw_kfunc(insn))
+ return process_bpf_exit_full(env, NULL, true);
+
return 0;
}
-static bool check_reg_sane_offset(struct bpf_verifier_env *env,
- const struct bpf_reg_state *reg,
- enum bpf_reg_type type)
+static bool check_reg_sane_offset_scalar(struct bpf_verifier_env *env,
+ const struct bpf_reg_state *reg,
+ enum bpf_reg_type type)
{
bool known = tnum_is_const(reg->var_off);
s64 val = reg->var_off.value;
@@ -14516,12 +13362,6 @@ static bool check_reg_sane_offset(struct bpf_verifier_env *env,
return false;
}
- if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
- verbose(env, "%s pointer offset %d is not allowed\n",
- reg_type_str(env, type), reg->off);
- return false;
- }
-
if (smin == S64_MIN) {
verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
reg_type_str(env, type));
@@ -14537,6 +13377,29 @@ static bool check_reg_sane_offset(struct bpf_verifier_env *env,
return true;
}
+static bool check_reg_sane_offset_ptr(struct bpf_verifier_env *env,
+ const struct bpf_reg_state *reg,
+ enum bpf_reg_type type)
+{
+ bool known = tnum_is_const(reg->var_off);
+ s64 val = reg->var_off.value;
+ s64 smin = reg->smin_value;
+
+ if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
+ verbose(env, "%s pointer offset %lld is not allowed\n",
+ reg_type_str(env, type), val);
+ return false;
+ }
+
+ if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
+ verbose(env, "%s pointer offset %lld is not allowed\n",
+ reg_type_str(env, type), smin);
+ return false;
+ }
+
+ return true;
+}
+
enum {
REASON_BOUNDS = -1,
REASON_TYPE = -2,
@@ -14558,13 +13421,11 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
* currently prohibited for unprivileged.
*/
max = MAX_BPF_STACK + mask_to_left;
- ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off);
+ ptr_limit = -ptr_reg->var_off.value;
break;
case PTR_TO_MAP_VALUE:
max = ptr_reg->map_ptr->value_size;
- ptr_limit = (mask_to_left ?
- ptr_reg->smin_value :
- ptr_reg->umax_value) + ptr_reg->off;
+ ptr_limit = mask_to_left ? ptr_reg->smin_value : ptr_reg->umax_value;
break;
default:
return REASON_TYPE;
@@ -14795,9 +13656,6 @@ static int sanitize_err(struct bpf_verifier_env *env,
* Variable offset is prohibited for unprivileged mode for simplicity since it
* requires corresponding support in Spectre masking for stack ALU. See also
* retrieve_ptr_limit().
- *
- *
- * 'off' includes 'reg->off'.
*/
static int check_stack_access_for_ptr_arithmetic(
struct bpf_verifier_env *env,
@@ -14838,11 +13696,11 @@ static int sanitize_check_bounds(struct bpf_verifier_env *env,
switch (dst_reg->type) {
case PTR_TO_STACK:
if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg,
- dst_reg->off + dst_reg->var_off.value))
+ dst_reg->var_off.value))
return -EACCES;
break;
case PTR_TO_MAP_VALUE:
- if (check_map_access(env, dst, dst_reg->off, 1, false, ACCESS_HELPER)) {
+ if (check_map_access(env, dst, 0, 1, false, ACCESS_HELPER)) {
verbose(env, "R%d pointer arithmetic of map value goes out of range, "
"prohibited for !root\n", dst);
return -EACCES;
@@ -14950,8 +13808,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
dst_reg->type = ptr_reg->type;
dst_reg->id = ptr_reg->id;
- if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
- !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
+ if (!check_reg_sane_offset_scalar(env, off_reg, ptr_reg->type) ||
+ !check_reg_sane_offset_ptr(env, ptr_reg, ptr_reg->type))
return -EINVAL;
/* pointer types do not carry 32-bit bounds at the moment. */
@@ -14966,23 +13824,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
switch (opcode) {
case BPF_ADD:
- /* We can take a fixed offset as long as it doesn't overflow
- * the s32 'off' field
- */
- if (known && (ptr_reg->off + smin_val ==
- (s64)(s32)(ptr_reg->off + smin_val))) {
- /* pointer += K. Accumulate it into fixed offset */
- dst_reg->smin_value = smin_ptr;
- dst_reg->smax_value = smax_ptr;
- dst_reg->umin_value = umin_ptr;
- dst_reg->umax_value = umax_ptr;
- dst_reg->var_off = ptr_reg->var_off;
- dst_reg->off = ptr_reg->off + smin_val;
- dst_reg->raw = ptr_reg->raw;
- break;
- }
- /* A new variable offset is created. Note that off_reg->off
- * == 0, since it's a scalar.
+ /*
* dst_reg gets the pointer type and since some positive
* integer value was added to the pointer, give it a new 'id'
* if it's a PTR_TO_PACKET.
@@ -15001,12 +13843,18 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
dst_reg->umax_value = U64_MAX;
}
dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
- dst_reg->off = ptr_reg->off;
dst_reg->raw = ptr_reg->raw;
if (reg_is_pkt_pointer(ptr_reg)) {
- dst_reg->id = ++env->id_gen;
- /* something was added to pkt_ptr, set range to zero */
- memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
+ if (!known)
+ dst_reg->id = ++env->id_gen;
+ /*
+ * Clear range for unknown addends since we can't know
+ * where the pkt pointer ended up. Also clear AT_PKT_END /
+ * BEYOND_PKT_END from prior comparison as any pointer
+ * arithmetic invalidates them.
+ */
+ if (!known || dst_reg->range < 0)
+ memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
}
break;
case BPF_SUB:
@@ -15025,19 +13873,6 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
dst);
return -EACCES;
}
- if (known && (ptr_reg->off - smin_val ==
- (s64)(s32)(ptr_reg->off - smin_val))) {
- /* pointer -= K. Subtract it from fixed offset */
- dst_reg->smin_value = smin_ptr;
- dst_reg->smax_value = smax_ptr;
- dst_reg->umin_value = umin_ptr;
- dst_reg->umax_value = umax_ptr;
- dst_reg->var_off = ptr_reg->var_off;
- dst_reg->id = ptr_reg->id;
- dst_reg->off = ptr_reg->off - smin_val;
- dst_reg->raw = ptr_reg->raw;
- break;
- }
/* A new variable offset is created. If the subtrahend is known
* nonnegative, then any reg->range we had before is still good.
*/
@@ -15057,12 +13892,18 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
dst_reg->umax_value = umax_ptr - umin_val;
}
dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
- dst_reg->off = ptr_reg->off;
dst_reg->raw = ptr_reg->raw;
if (reg_is_pkt_pointer(ptr_reg)) {
- dst_reg->id = ++env->id_gen;
- /* something was added to pkt_ptr, set range to zero */
- if (smin_val < 0)
+ if (!known)
+ dst_reg->id = ++env->id_gen;
+ /*
+ * Clear range if the subtrahend may be negative since
+ * pkt pointer could move past its bounds. A positive
+ * subtrahend moves it backwards keeping positive range
+ * intact. Also clear AT_PKT_END / BEYOND_PKT_END from
+ * prior comparison as arithmetic invalidates them.
+ */
+ if ((!known && smin_val < 0) || dst_reg->range < 0)
memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
}
break;
@@ -15080,7 +13921,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
return -EACCES;
}
- if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
+ if (!check_reg_sane_offset_ptr(env, dst_reg, ptr_reg->type))
return -EINVAL;
reg_bounds_sync(dst_reg);
bounds_ret = sanitize_check_bounds(env, insn, dst_reg);
@@ -15937,7 +14778,7 @@ static void scalar_byte_swap(struct bpf_reg_state *dst_reg, struct bpf_insn *ins
* any existing ties and avoid incorrect bounds propagation.
*/
if (need_bswap || insn->imm == 16 || insn->imm == 32)
- dst_reg->id = 0;
+ clear_scalar_id(dst_reg);
if (need_bswap) {
if (insn->imm == 16)
@@ -16210,11 +15051,20 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
int err;
dst_reg = &regs[insn->dst_reg];
- src_reg = NULL;
+ if (BPF_SRC(insn->code) == BPF_X)
+ src_reg = &regs[insn->src_reg];
+ else
+ src_reg = NULL;
- if (dst_reg->type == PTR_TO_ARENA) {
+ /* Case where at least one operand is an arena. */
+ if (dst_reg->type == PTR_TO_ARENA || (src_reg && src_reg->type == PTR_TO_ARENA)) {
struct bpf_insn_aux_data *aux = cur_aux(env);
+ if (dst_reg->type != PTR_TO_ARENA)
+ *dst_reg = *src_reg;
+
+ dst_reg->subreg_def = env->insn_idx + 1;
+
if (BPF_CLASS(insn->code) == BPF_ALU64)
/*
* 32-bit operations zero upper bits automatically.
@@ -16230,7 +15080,6 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
ptr_reg = dst_reg;
if (BPF_SRC(insn->code) == BPF_X) {
- src_reg = &regs[insn->src_reg];
if (src_reg->type != SCALAR_VALUE) {
if (dst_reg->type != SCALAR_VALUE) {
/* Combining two pointers by any ALU op yields
@@ -16313,7 +15162,8 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
*/
if (env->bpf_capable &&
(BPF_OP(insn->code) == BPF_ADD || BPF_OP(insn->code) == BPF_SUB) &&
- dst_reg->id && is_reg_const(src_reg, alu32)) {
+ dst_reg->id && is_reg_const(src_reg, alu32) &&
+ !(BPF_SRC(insn->code) == BPF_X && insn->src_reg == insn->dst_reg)) {
u64 val = reg_const_value(src_reg, alu32);
s32 off;
@@ -16338,21 +15188,20 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
* we cannot accumulate another val into rx->off.
*/
clear_id:
- dst_reg->off = 0;
- dst_reg->id = 0;
+ clear_scalar_id(dst_reg);
} else {
if (alu32)
dst_reg->id |= BPF_ADD_CONST32;
else
dst_reg->id |= BPF_ADD_CONST64;
- dst_reg->off = off;
+ dst_reg->delta = off;
}
} else {
/*
* Make sure ID is cleared otherwise dst_reg min/max could be
* incorrectly propagated into other registers by sync_linked_regs()
*/
- dst_reg->id = 0;
+ clear_scalar_id(dst_reg);
}
return 0;
}
@@ -16365,23 +15214,6 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
int err;
if (opcode == BPF_END || opcode == BPF_NEG) {
- if (opcode == BPF_NEG) {
- if (BPF_SRC(insn->code) != BPF_K ||
- insn->src_reg != BPF_REG_0 ||
- insn->off != 0 || insn->imm != 0) {
- verbose(env, "BPF_NEG uses reserved fields\n");
- return -EINVAL;
- }
- } else {
- if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
- (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
- (BPF_CLASS(insn->code) == BPF_ALU64 &&
- BPF_SRC(insn->code) != BPF_TO_LE)) {
- verbose(env, "BPF_END uses reserved fields\n");
- return -EINVAL;
- }
- }
-
/* check src operand */
err = check_reg_arg(env, insn->dst_reg, SRC_OP);
if (err)
@@ -16394,8 +15226,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
}
/* check dest operand */
- if ((opcode == BPF_NEG || opcode == BPF_END) &&
- regs[insn->dst_reg].type == SCALAR_VALUE) {
+ if (regs[insn->dst_reg].type == SCALAR_VALUE) {
err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
err = err ?: adjust_scalar_min_max_vals(env, insn,
&regs[insn->dst_reg],
@@ -16409,38 +15240,17 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
} else if (opcode == BPF_MOV) {
if (BPF_SRC(insn->code) == BPF_X) {
- if (BPF_CLASS(insn->code) == BPF_ALU) {
- if ((insn->off != 0 && insn->off != 8 && insn->off != 16) ||
- insn->imm) {
- verbose(env, "BPF_MOV uses reserved fields\n");
- return -EINVAL;
- }
- } else if (insn->off == BPF_ADDR_SPACE_CAST) {
- if (insn->imm != 1 && insn->imm != 1u << 16) {
- verbose(env, "addr_space_cast insn can only convert between address space 1 and 0\n");
- return -EINVAL;
- }
+ if (insn->off == BPF_ADDR_SPACE_CAST) {
if (!env->prog->aux->arena) {
verbose(env, "addr_space_cast insn can only be used in a program that has an associated arena\n");
return -EINVAL;
}
- } else {
- if ((insn->off != 0 && insn->off != 8 && insn->off != 16 &&
- insn->off != 32) || insn->imm) {
- verbose(env, "BPF_MOV uses reserved fields\n");
- return -EINVAL;
- }
}
/* check src operand */
err = check_reg_arg(env, insn->src_reg, SRC_OP);
if (err)
return err;
- } else {
- if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
- verbose(env, "BPF_MOV uses reserved fields\n");
- return -EINVAL;
- }
}
/* check dest operand, mark as required later */
@@ -16483,7 +15293,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
assign_scalar_id_before_mov(env, src_reg);
copy_register_state(dst_reg, src_reg);
if (!no_sext)
- dst_reg->id = 0;
+ clear_scalar_id(dst_reg);
coerce_reg_to_size_sx(dst_reg, insn->off >> 3);
dst_reg->subreg_def = DEF_NOT_SUBREG;
} else {
@@ -16509,7 +15319,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
* propagated into src_reg by sync_linked_regs()
*/
if (!is_src_reg_u32)
- dst_reg->id = 0;
+ clear_scalar_id(dst_reg);
dst_reg->subreg_def = env->insn_idx + 1;
} else {
/* case: W1 = (s8, s16)W2 */
@@ -16519,7 +15329,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
assign_scalar_id_before_mov(env, src_reg);
copy_register_state(dst_reg, src_reg);
if (!no_sext)
- dst_reg->id = 0;
+ clear_scalar_id(dst_reg);
dst_reg->subreg_def = env->insn_idx + 1;
coerce_subreg_to_size_sx(dst_reg, insn->off >> 3);
}
@@ -16546,28 +15356,13 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
}
}
- } else if (opcode > BPF_END) {
- verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
- return -EINVAL;
-
} else { /* all other ALU ops: and, sub, xor, add, ... */
if (BPF_SRC(insn->code) == BPF_X) {
- if (insn->imm != 0 || (insn->off != 0 && insn->off != 1) ||
- (insn->off == 1 && opcode != BPF_MOD && opcode != BPF_DIV)) {
- verbose(env, "BPF_ALU uses reserved fields\n");
- return -EINVAL;
- }
/* check src1 operand */
err = check_reg_arg(env, insn->src_reg, SRC_OP);
if (err)
return err;
- } else {
- if (insn->src_reg != BPF_REG_0 || (insn->off != 0 && insn->off != 1) ||
- (insn->off == 1 && opcode != BPF_MOD && opcode != BPF_DIV)) {
- verbose(env, "BPF_ALU uses reserved fields\n");
- return -EINVAL;
- }
}
/* check src2 operand */
@@ -16610,19 +15405,17 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
struct bpf_reg_state *reg;
int new_range;
- if (dst_reg->off < 0 ||
- (dst_reg->off == 0 && range_right_open))
+ if (dst_reg->umax_value == 0 && range_right_open)
/* This doesn't give us any range */
return;
- if (dst_reg->umax_value > MAX_PACKET_OFF ||
- dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
+ if (dst_reg->umax_value > MAX_PACKET_OFF)
/* Risk of overflow. For instance, ptr + (1<<63) may be less
* than pkt_end, but that's because it's also less than pkt.
*/
return;
- new_range = dst_reg->off;
+ new_range = dst_reg->umax_value;
if (range_right_open)
new_range++;
@@ -16671,7 +15464,7 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
/* If our ids match, then we must have the same max_value. And we
* don't care about the other reg's fixed offset, since if it's too big
* the range won't allow anything.
- * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
+ * dst_reg->umax_value is known < MAX_PACKET_OFF, therefore it fits in a u16.
*/
bpf_for_each_reg_in_vstate(vstate, state, reg, ({
if (reg->type == type && reg->id == dst_reg->id)
@@ -16680,11 +15473,50 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
}));
}
+static void regs_refine_cond_op(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2,
+ u8 opcode, bool is_jmp32);
+static u8 rev_opcode(u8 opcode);
+
+/*
+ * Learn more information about live branches by simulating refinement on both branches.
+ * regs_refine_cond_op() is sound, so producing ill-formed register bounds for the branch means
+ * that branch is dead.
+ */
+static int simulate_both_branches_taken(struct bpf_verifier_env *env, u8 opcode, bool is_jmp32)
+{
+ /* Fallthrough (FALSE) branch */
+ regs_refine_cond_op(&env->false_reg1, &env->false_reg2, rev_opcode(opcode), is_jmp32);
+ reg_bounds_sync(&env->false_reg1);
+ reg_bounds_sync(&env->false_reg2);
+ /*
+ * If there is a range bounds violation in *any* of the abstract values in either
+ * reg_states in the FALSE branch (i.e. reg1, reg2), the FALSE branch must be dead. Only
+ * TRUE branch will be taken.
+ */
+ if (range_bounds_violation(&env->false_reg1) || range_bounds_violation(&env->false_reg2))
+ return 1;
+
+ /* Jump (TRUE) branch */
+ regs_refine_cond_op(&env->true_reg1, &env->true_reg2, opcode, is_jmp32);
+ reg_bounds_sync(&env->true_reg1);
+ reg_bounds_sync(&env->true_reg2);
+ /*
+ * If there is a range bounds violation in *any* of the abstract values in either
+ * reg_states in the TRUE branch (i.e. true_reg1, true_reg2), the TRUE branch must be dead.
+ * Only FALSE branch will be taken.
+ */
+ if (range_bounds_violation(&env->true_reg1) || range_bounds_violation(&env->true_reg2))
+ return 0;
+
+ /* Both branches are possible, we can't determine which one will be taken. */
+ return -1;
+}
+
/*
* <reg1> <op> <reg2>, currently assuming reg2 is a constant
*/
-static int is_scalar_branch_taken(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2,
- u8 opcode, bool is_jmp32)
+static int is_scalar_branch_taken(struct bpf_verifier_env *env, struct bpf_reg_state *reg1,
+ struct bpf_reg_state *reg2, u8 opcode, bool is_jmp32)
{
struct tnum t1 = is_jmp32 ? tnum_subreg(reg1->var_off) : reg1->var_off;
struct tnum t2 = is_jmp32 ? tnum_subreg(reg2->var_off) : reg2->var_off;
@@ -16836,7 +15668,7 @@ static int is_scalar_branch_taken(struct bpf_reg_state *reg1, struct bpf_reg_sta
break;
}
- return -1;
+ return simulate_both_branches_taken(env, opcode, is_jmp32);
}
static int flip_opcode(u32 opcode)
@@ -16907,8 +15739,8 @@ static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg,
* -1 - unknown. Example: "if (reg1 < 5)" is unknown when register value
* range [0,10]
*/
-static int is_branch_taken(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2,
- u8 opcode, bool is_jmp32)
+static int is_branch_taken(struct bpf_verifier_env *env, struct bpf_reg_state *reg1,
+ struct bpf_reg_state *reg2, u8 opcode, bool is_jmp32)
{
if (reg_is_pkt_pointer_any(reg1) && reg_is_pkt_pointer_any(reg2) && !is_jmp32)
return is_pkt_ptr_branch_taken(reg1, reg2, opcode);
@@ -16946,7 +15778,7 @@ static int is_branch_taken(struct bpf_reg_state *reg1, struct bpf_reg_state *reg
}
/* now deal with two scalars, but not necessarily constants */
- return is_scalar_branch_taken(reg1, reg2, opcode, is_jmp32);
+ return is_scalar_branch_taken(env, reg1, reg2, opcode, is_jmp32);
}
/* Opcode that corresponds to a *false* branch condition.
@@ -17037,8 +15869,8 @@ static void regs_refine_cond_op(struct bpf_reg_state *reg1, struct bpf_reg_state
/* u32_min_value is not equal to 0xffffffff at this point,
* because otherwise u32_max_value is 0xffffffff as well,
* in such a case both reg1 and reg2 would be constants,
- * jump would be predicted and reg_set_min_max() won't
- * be called.
+ * jump would be predicted and regs_refine_cond_op()
+ * wouldn't be called.
*
* Same reasoning works for all {u,s}{min,max}{32,64} cases
* below.
@@ -17145,49 +15977,15 @@ static void regs_refine_cond_op(struct bpf_reg_state *reg1, struct bpf_reg_state
}
}
-/* Adjusts the register min/max values in the case that the dst_reg and
- * src_reg are both SCALAR_VALUE registers (or we are simply doing a BPF_K
- * check, in which case we have a fake SCALAR_VALUE representing insn->imm).
- * Technically we can do similar adjustments for pointers to the same object,
- * but we don't support that right now.
- */
-static int reg_set_min_max(struct bpf_verifier_env *env,
- struct bpf_reg_state *true_reg1,
- struct bpf_reg_state *true_reg2,
- struct bpf_reg_state *false_reg1,
- struct bpf_reg_state *false_reg2,
- u8 opcode, bool is_jmp32)
+/* Check for invariant violations on the registers for both branches of a condition */
+static int regs_bounds_sanity_check_branches(struct bpf_verifier_env *env)
{
int err;
- /* If either register is a pointer, we can't learn anything about its
- * variable offset from the compare (unless they were a pointer into
- * the same object, but we don't bother with that).
- */
- if (false_reg1->type != SCALAR_VALUE || false_reg2->type != SCALAR_VALUE)
- return 0;
-
- /* We compute branch direction for same SCALAR_VALUE registers in
- * is_scalar_branch_taken(). For unknown branch directions (e.g., BPF_JSET)
- * on the same registers, we don't need to adjust the min/max values.
- */
- if (false_reg1 == false_reg2)
- return 0;
-
- /* fallthrough (FALSE) branch */
- regs_refine_cond_op(false_reg1, false_reg2, rev_opcode(opcode), is_jmp32);
- reg_bounds_sync(false_reg1);
- reg_bounds_sync(false_reg2);
-
- /* jump (TRUE) branch */
- regs_refine_cond_op(true_reg1, true_reg2, opcode, is_jmp32);
- reg_bounds_sync(true_reg1);
- reg_bounds_sync(true_reg2);
-
- err = reg_bounds_sanity_check(env, true_reg1, "true_reg1");
- err = err ?: reg_bounds_sanity_check(env, true_reg2, "true_reg2");
- err = err ?: reg_bounds_sanity_check(env, false_reg1, "false_reg1");
- err = err ?: reg_bounds_sanity_check(env, false_reg2, "false_reg2");
+ err = reg_bounds_sanity_check(env, &env->true_reg1, "true_reg1");
+ err = err ?: reg_bounds_sanity_check(env, &env->true_reg2, "true_reg2");
+ err = err ?: reg_bounds_sanity_check(env, &env->false_reg1, "false_reg1");
+ err = err ?: reg_bounds_sanity_check(env, &env->false_reg2, "false_reg2");
return err;
}
@@ -17197,29 +15995,24 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
{
if (type_may_be_null(reg->type) && reg->id == id &&
(is_rcu_reg(reg) || !WARN_ON_ONCE(!reg->id))) {
- /* Old offset (both fixed and variable parts) should have been
- * known-zero, because we don't allow pointer arithmetic on
- * pointers that might be NULL. If we see this happening, don't
- * convert the register.
+ /* Old offset should have been known-zero, because we don't
+ * allow pointer arithmetic on pointers that might be NULL.
+ * If we see this happening, don't convert the register.
*
* But in some cases, some helpers that return local kptrs
- * advance offset for the returned pointer. In those cases, it
- * is fine to expect to see reg->off.
+ * advance offset for the returned pointer. In those cases,
+ * it is fine to expect to see reg->var_off.
*/
- if (WARN_ON_ONCE(reg->smin_value || reg->smax_value || !tnum_equals_const(reg->var_off, 0)))
- return;
if (!(type_is_ptr_alloc_obj(reg->type) || type_is_non_owning_ref(reg->type)) &&
- WARN_ON_ONCE(reg->off))
+ WARN_ON_ONCE(!tnum_equals_const(reg->var_off, 0)))
return;
-
if (is_null) {
- reg->type = SCALAR_VALUE;
/* We don't need id and ref_obj_id from this point
* onwards anymore, thus we should better reset it,
* so that state pruning has chances to take effect.
*/
- reg->id = 0;
- reg->ref_obj_id = 0;
+ __mark_reg_known_zero(reg);
+ reg->type = SCALAR_VALUE;
return;
}
@@ -17380,7 +16173,7 @@ static void __collect_linked_regs(struct linked_regs *reg_set, struct bpf_reg_st
e->is_reg = is_reg;
e->regno = spi_or_reg;
} else {
- reg->id = 0;
+ clear_scalar_id(reg);
}
}
@@ -17401,7 +16194,7 @@ static void collect_linked_regs(struct bpf_verifier_env *env,
id = id & ~BPF_ADD_CONST;
for (i = vstate->curframe; i >= 0; i--) {
- live_regs = aux[frame_insn_idx(vstate, i)].live_regs_before;
+ live_regs = aux[bpf_frame_insn_idx(vstate, i)].live_regs_before;
func = vstate->frame[i];
for (j = 0; j < BPF_REG_FP; j++) {
if (!(live_regs & BIT(j)))
@@ -17410,7 +16203,7 @@ static void collect_linked_regs(struct bpf_verifier_env *env,
__collect_linked_regs(linked_regs, reg, id, i, j, true);
}
for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
- if (!is_spilled_reg(&func->stack[j]))
+ if (!bpf_is_spilled_reg(&func->stack[j]))
continue;
reg = &func->stack[j].spilled_ptr;
__collect_linked_regs(linked_regs, reg, id, i, j, false);
@@ -17444,18 +16237,18 @@ static void sync_linked_regs(struct bpf_verifier_env *env, struct bpf_verifier_s
if (((reg->id ^ known_reg->id) & BPF_ADD_CONST) == BPF_ADD_CONST)
continue;
if ((!(reg->id & BPF_ADD_CONST) && !(known_reg->id & BPF_ADD_CONST)) ||
- reg->off == known_reg->off) {
+ reg->delta == known_reg->delta) {
s32 saved_subreg_def = reg->subreg_def;
copy_register_state(reg, known_reg);
reg->subreg_def = saved_subreg_def;
} else {
s32 saved_subreg_def = reg->subreg_def;
- s32 saved_off = reg->off;
+ s32 saved_off = reg->delta;
u32 saved_id = reg->id;
fake_reg.type = SCALAR_VALUE;
- __mark_reg_known(&fake_reg, (s64)reg->off - (s64)known_reg->off);
+ __mark_reg_known(&fake_reg, (s64)reg->delta - (s64)known_reg->delta);
/* reg = known_reg; reg += delta */
copy_register_state(reg, known_reg);
@@ -17463,7 +16256,7 @@ static void sync_linked_regs(struct bpf_verifier_env *env, struct bpf_verifier_s
* Must preserve off, id and subreg_def flag,
* otherwise another sync_linked_regs() will be incorrect.
*/
- reg->off = saved_off;
+ reg->delta = saved_off;
reg->id = saved_id;
reg->subreg_def = saved_subreg_def;
@@ -17506,12 +16299,6 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st;
int idx = *insn_idx;
- if (insn->code != (BPF_JMP | BPF_JCOND) ||
- insn->src_reg != BPF_MAY_GOTO ||
- insn->dst_reg || insn->imm) {
- verbose(env, "invalid may_goto imm %d\n", insn->imm);
- return -EINVAL;
- }
prev_st = find_prev_entry(env, cur_st->parent, idx);
/* branch out 'fallthrough' insn as a new state to explore */
@@ -17533,11 +16320,6 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
dst_reg = &regs[insn->dst_reg];
if (BPF_SRC(insn->code) == BPF_X) {
- if (insn->imm != 0) {
- verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
- return -EINVAL;
- }
-
/* check src1 operand */
err = check_reg_arg(env, insn->src_reg, SRC_OP);
if (err)
@@ -17556,10 +16338,6 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
if (dst_reg->type == PTR_TO_STACK)
insn_flags |= INSN_F_DST_REG_STACK;
} else {
- if (insn->src_reg != BPF_REG_0) {
- verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
- return -EINVAL;
- }
src_reg = &env->fake_reg[0];
memset(src_reg, 0, sizeof(*src_reg));
src_reg->type = SCALAR_VALUE;
@@ -17570,13 +16348,17 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
}
if (insn_flags) {
- err = push_jmp_history(env, this_branch, insn_flags, 0);
+ err = bpf_push_jmp_history(env, this_branch, insn_flags, 0);
if (err)
return err;
}
is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
- pred = is_branch_taken(dst_reg, src_reg, opcode, is_jmp32);
+ copy_register_state(&env->false_reg1, dst_reg);
+ copy_register_state(&env->false_reg2, src_reg);
+ copy_register_state(&env->true_reg1, dst_reg);
+ copy_register_state(&env->true_reg2, src_reg);
+ pred = is_branch_taken(env, dst_reg, src_reg, opcode, is_jmp32);
if (pred >= 0) {
/* If we get here with a dst_reg pointer type it is because
* above is_branch_taken() special cased the 0 comparison.
@@ -17630,7 +16412,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
if (dst_reg->type == SCALAR_VALUE && dst_reg->id)
collect_linked_regs(env, this_branch, dst_reg->id, &linked_regs);
if (linked_regs.cnt > 1) {
- err = push_jmp_history(env, this_branch, 0, linked_regs_pack(&linked_regs));
+ err = bpf_push_jmp_history(env, this_branch, 0, linked_regs_pack(&linked_regs));
if (err)
return err;
}
@@ -17640,27 +16422,16 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
return PTR_ERR(other_branch);
other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
- if (BPF_SRC(insn->code) == BPF_X) {
- err = reg_set_min_max(env,
- &other_branch_regs[insn->dst_reg],
- &other_branch_regs[insn->src_reg],
- dst_reg, src_reg, opcode, is_jmp32);
- } else /* BPF_SRC(insn->code) == BPF_K */ {
- /* reg_set_min_max() can mangle the fake_reg. Make a copy
- * so that these are two different memory locations. The
- * src_reg is not used beyond here in context of K.
- */
- memcpy(&env->fake_reg[1], &env->fake_reg[0],
- sizeof(env->fake_reg[0]));
- err = reg_set_min_max(env,
- &other_branch_regs[insn->dst_reg],
- &env->fake_reg[0],
- dst_reg, &env->fake_reg[1],
- opcode, is_jmp32);
- }
+ err = regs_bounds_sanity_check_branches(env);
if (err)
return err;
+ copy_register_state(dst_reg, &env->false_reg1);
+ copy_register_state(src_reg, &env->false_reg2);
+ copy_register_state(&other_branch_regs[insn->dst_reg], &env->true_reg1);
+ if (BPF_SRC(insn->code) == BPF_X)
+ copy_register_state(&other_branch_regs[insn->src_reg], &env->true_reg2);
+
if (BPF_SRC(insn->code) == BPF_X &&
src_reg->type == SCALAR_VALUE && src_reg->id &&
!WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
@@ -17713,12 +16484,15 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
}
/* detect if R == 0 where R is returned from bpf_map_lookup_elem().
+ * Also does the same detection for a register whose the value is
+ * known to be 0.
* NOTE: these optimizations below are related with pointer comparison
* which will never be JMP32.
*/
- if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
- insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
- type_may_be_null(dst_reg->type)) {
+ if (!is_jmp32 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
+ type_may_be_null(dst_reg->type) &&
+ ((BPF_SRC(insn->code) == BPF_K && insn->imm == 0) ||
+ (BPF_SRC(insn->code) == BPF_X && bpf_register_is_null(src_reg)))) {
/* Mark all identical registers in each branch as either
* safe or unknown depending R == 0 or R != 0 conditional.
*/
@@ -17751,10 +16525,6 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
verbose(env, "invalid BPF_LD_IMM insn\n");
return -EINVAL;
}
- if (insn->off != 0) {
- verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
- return -EINVAL;
- }
err = check_reg_arg(env, insn->dst_reg, DST_OP);
if (err)
@@ -17794,8 +16564,8 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
if (insn->src_reg == BPF_PSEUDO_FUNC) {
struct bpf_prog_aux *aux = env->prog->aux;
- u32 subprogno = find_subprog(env,
- env->insn_idx + insn->imm + 1);
+ u32 subprogno = bpf_find_subprog(env,
+ env->insn_idx + insn->imm + 1);
if (!aux->func_info) {
verbose(env, "missing btf func_info\n");
@@ -17812,22 +16582,24 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
}
map = env->used_maps[aux->map_index];
- dst_reg->map_ptr = map;
if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
if (map->map_type == BPF_MAP_TYPE_ARENA) {
__mark_reg_unknown(env, dst_reg);
+ dst_reg->map_ptr = map;
return 0;
}
+ __mark_reg_known(dst_reg, aux->map_off);
dst_reg->type = PTR_TO_MAP_VALUE;
- dst_reg->off = aux->map_off;
+ dst_reg->map_ptr = map;
WARN_ON_ONCE(map->map_type != BPF_MAP_TYPE_INSN_ARRAY &&
map->max_entries != 1);
/* We want reg->id to be same (0) as map_value is not distinct */
} else if (insn->src_reg == BPF_PSEUDO_MAP_FD ||
insn->src_reg == BPF_PSEUDO_MAP_IDX) {
dst_reg->type = CONST_PTR_TO_MAP;
+ dst_reg->map_ptr = map;
} else {
verifier_bug(env, "unexpected src reg value for ldimm64");
return -EFAULT;
@@ -17880,13 +16652,6 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
return -EFAULT;
}
- if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
- BPF_SIZE(insn->code) == BPF_DW ||
- (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
- verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
- return -EINVAL;
- }
-
/* check whether implicit source operand (register R6) is readable */
err = check_reg_arg(env, ctx_reg, SRC_OP);
if (err)
@@ -17919,7 +16684,7 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
/* reset caller saved regs to unreadable */
for (i = 0; i < CALLER_SAVED_REGS; i++) {
- mark_reg_not_init(env, regs, caller_saved[i]);
+ bpf_mark_reg_not_init(env, &regs[caller_saved[i]]);
check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
}
@@ -17930,107 +16695,59 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
mark_reg_unknown(env, regs, BPF_REG_0);
/* ld_abs load up to 32-bit skb data. */
regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
+ /*
+ * See bpf_gen_ld_abs() which emits a hidden BPF_EXIT with r0=0
+ * which must be explored by the verifier when in a subprog.
+ */
+ if (env->cur_state->curframe) {
+ struct bpf_verifier_state *branch;
+
+ mark_reg_scratched(env, BPF_REG_0);
+ branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false);
+ if (IS_ERR(branch))
+ return PTR_ERR(branch);
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+ err = prepare_func_exit(env, &env->insn_idx);
+ if (err)
+ return err;
+ env->insn_idx--;
+ }
return 0;
}
-static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name)
+
+static bool return_retval_range(struct bpf_verifier_env *env, struct bpf_retval_range *range)
{
- const char *exit_ctx = "At program exit";
- struct tnum enforce_attach_type_range = tnum_unknown;
- const struct bpf_prog *prog = env->prog;
- struct bpf_reg_state *reg = reg_state(env, regno);
- struct bpf_retval_range range = retval_range(0, 1);
enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
- int err;
- struct bpf_func_state *frame = env->cur_state->frame[0];
- const bool is_subprog = frame->subprogno;
- bool return_32bit = false;
- const struct btf_type *reg_type, *ret_type = NULL;
- /* LSM and struct_ops func-ptr's return type could be "void" */
- if (!is_subprog || frame->in_exception_callback_fn) {
- switch (prog_type) {
- case BPF_PROG_TYPE_LSM:
- if (prog->expected_attach_type == BPF_LSM_CGROUP)
- /* See below, can be 0 or 0-1 depending on hook. */
- break;
- if (!prog->aux->attach_func_proto->type)
- return 0;
- break;
- case BPF_PROG_TYPE_STRUCT_OPS:
- if (!prog->aux->attach_func_proto->type)
- return 0;
-
- if (frame->in_exception_callback_fn)
- break;
+ /* Default return value range. */
+ *range = retval_range(0, 1);
- /* Allow a struct_ops program to return a referenced kptr if it
- * matches the operator's return type and is in its unmodified
- * form. A scalar zero (i.e., a null pointer) is also allowed.
- */
- reg_type = reg->btf ? btf_type_by_id(reg->btf, reg->btf_id) : NULL;
- ret_type = btf_type_resolve_ptr(prog->aux->attach_btf,
- prog->aux->attach_func_proto->type,
- NULL);
- if (ret_type && ret_type == reg_type && reg->ref_obj_id)
- return __check_ptr_off_reg(env, reg, regno, false);
+ switch (prog_type) {
+ case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
+ switch (env->prog->expected_attach_type) {
+ case BPF_CGROUP_UDP4_RECVMSG:
+ case BPF_CGROUP_UDP6_RECVMSG:
+ case BPF_CGROUP_UNIX_RECVMSG:
+ case BPF_CGROUP_INET4_GETPEERNAME:
+ case BPF_CGROUP_INET6_GETPEERNAME:
+ case BPF_CGROUP_UNIX_GETPEERNAME:
+ case BPF_CGROUP_INET4_GETSOCKNAME:
+ case BPF_CGROUP_INET6_GETSOCKNAME:
+ case BPF_CGROUP_UNIX_GETSOCKNAME:
+ *range = retval_range(1, 1);
+ break;
+ case BPF_CGROUP_INET4_BIND:
+ case BPF_CGROUP_INET6_BIND:
+ *range = retval_range(0, 3);
break;
default:
break;
}
- }
-
- /* eBPF calling convention is such that R0 is used
- * to return the value from eBPF program.
- * Make sure that it's readable at this time
- * of bpf_exit, which means that program wrote
- * something into it earlier
- */
- err = check_reg_arg(env, regno, SRC_OP);
- if (err)
- return err;
-
- if (is_pointer_value(env, regno)) {
- verbose(env, "R%d leaks addr as return value\n", regno);
- return -EACCES;
- }
-
- if (frame->in_async_callback_fn) {
- exit_ctx = "At async callback return";
- range = frame->callback_ret_range;
- goto enforce_retval;
- }
-
- if (is_subprog && !frame->in_exception_callback_fn) {
- if (reg->type != SCALAR_VALUE) {
- verbose(env, "At subprogram exit the register R%d is not a scalar value (%s)\n",
- regno, reg_type_str(env, reg->type));
- return -EINVAL;
- }
- return 0;
- }
-
- switch (prog_type) {
- case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
- if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
- env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
- env->prog->expected_attach_type == BPF_CGROUP_UNIX_RECVMSG ||
- env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME ||
- env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME ||
- env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETPEERNAME ||
- env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
- env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME ||
- env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETSOCKNAME)
- range = retval_range(1, 1);
- if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND ||
- env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND)
- range = retval_range(0, 3);
break;
case BPF_PROG_TYPE_CGROUP_SKB:
- if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
- range = retval_range(0, 3);
- enforce_attach_type_range = tnum_range(2, 3);
- }
+ if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS)
+ *range = retval_range(0, 3);
break;
case BPF_PROG_TYPE_CGROUP_SOCK:
case BPF_PROG_TYPE_SOCK_OPS:
@@ -18040,72 +16757,164 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
break;
case BPF_PROG_TYPE_RAW_TRACEPOINT:
if (!env->prog->aux->attach_btf_id)
- return 0;
- range = retval_range(0, 0);
+ return false;
+ *range = retval_range(0, 0);
break;
case BPF_PROG_TYPE_TRACING:
switch (env->prog->expected_attach_type) {
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
case BPF_TRACE_FSESSION:
- range = retval_range(0, 0);
+ *range = retval_range(0, 0);
break;
case BPF_TRACE_RAW_TP:
case BPF_MODIFY_RETURN:
- return 0;
+ return false;
case BPF_TRACE_ITER:
- break;
default:
- return -ENOTSUPP;
+ break;
}
break;
case BPF_PROG_TYPE_KPROBE:
switch (env->prog->expected_attach_type) {
case BPF_TRACE_KPROBE_SESSION:
case BPF_TRACE_UPROBE_SESSION:
- range = retval_range(0, 1);
break;
default:
- return 0;
+ return false;
}
break;
case BPF_PROG_TYPE_SK_LOOKUP:
- range = retval_range(SK_DROP, SK_PASS);
+ *range = retval_range(SK_DROP, SK_PASS);
break;
case BPF_PROG_TYPE_LSM:
if (env->prog->expected_attach_type != BPF_LSM_CGROUP) {
/* no range found, any return value is allowed */
- if (!get_func_retval_range(env->prog, &range))
- return 0;
+ if (!get_func_retval_range(env->prog, range))
+ return false;
/* no restricted range, any return value is allowed */
- if (range.minval == S32_MIN && range.maxval == S32_MAX)
- return 0;
- return_32bit = true;
+ if (range->minval == S32_MIN && range->maxval == S32_MAX)
+ return false;
+ range->return_32bit = true;
} else if (!env->prog->aux->attach_func_proto->type) {
/* Make sure programs that attach to void
* hooks don't try to modify return value.
*/
- range = retval_range(1, 1);
+ *range = retval_range(1, 1);
}
break;
case BPF_PROG_TYPE_NETFILTER:
- range = retval_range(NF_DROP, NF_ACCEPT);
+ *range = retval_range(NF_DROP, NF_ACCEPT);
break;
case BPF_PROG_TYPE_STRUCT_OPS:
- if (!ret_type)
- return 0;
- range = retval_range(0, 0);
+ *range = retval_range(0, 0);
break;
case BPF_PROG_TYPE_EXT:
/* freplace program can return anything as its return value
* depends on the to-be-replaced kernel func or bpf program.
*/
default:
+ return false;
+ }
+
+ /* Continue calculating. */
+
+ return true;
+}
+
+static bool program_returns_void(struct bpf_verifier_env *env)
+{
+ const struct bpf_prog *prog = env->prog;
+ enum bpf_prog_type prog_type = prog->type;
+
+ switch (prog_type) {
+ case BPF_PROG_TYPE_LSM:
+ /* See return_retval_range, for BPF_LSM_CGROUP can be 0 or 0-1 depending on hook. */
+ if (prog->expected_attach_type != BPF_LSM_CGROUP &&
+ !prog->aux->attach_func_proto->type)
+ return true;
+ break;
+ case BPF_PROG_TYPE_STRUCT_OPS:
+ if (!prog->aux->attach_func_proto->type)
+ return true;
+ break;
+ case BPF_PROG_TYPE_EXT:
+ /*
+ * If the actual program is an extension, let it
+ * return void - attaching will succeed only if the
+ * program being replaced also returns void, and since
+ * it has passed verification its actual type doesn't matter.
+ */
+ if (subprog_returns_void(env, 0))
+ return true;
+ break;
+ default:
+ break;
+ }
+ return false;
+}
+
+static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name)
+{
+ const char *exit_ctx = "At program exit";
+ struct tnum enforce_attach_type_range = tnum_unknown;
+ const struct bpf_prog *prog = env->prog;
+ struct bpf_reg_state *reg = reg_state(env, regno);
+ struct bpf_retval_range range = retval_range(0, 1);
+ enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
+ struct bpf_func_state *frame = env->cur_state->frame[0];
+ const struct btf_type *reg_type, *ret_type = NULL;
+ int err;
+
+ /* LSM and struct_ops func-ptr's return type could be "void" */
+ if (!frame->in_async_callback_fn && program_returns_void(env))
return 0;
+
+ if (prog_type == BPF_PROG_TYPE_STRUCT_OPS) {
+ /* Allow a struct_ops program to return a referenced kptr if it
+ * matches the operator's return type and is in its unmodified
+ * form. A scalar zero (i.e., a null pointer) is also allowed.
+ */
+ reg_type = reg->btf ? btf_type_by_id(reg->btf, reg->btf_id) : NULL;
+ ret_type = btf_type_resolve_ptr(prog->aux->attach_btf,
+ prog->aux->attach_func_proto->type,
+ NULL);
+ if (ret_type && ret_type == reg_type && reg->ref_obj_id)
+ return __check_ptr_off_reg(env, reg, regno, false);
+ }
+
+ /* eBPF calling convention is such that R0 is used
+ * to return the value from eBPF program.
+ * Make sure that it's readable at this time
+ * of bpf_exit, which means that program wrote
+ * something into it earlier
+ */
+ err = check_reg_arg(env, regno, SRC_OP);
+ if (err)
+ return err;
+
+ if (is_pointer_value(env, regno)) {
+ verbose(env, "R%d leaks addr as return value\n", regno);
+ return -EACCES;
}
+ if (frame->in_async_callback_fn) {
+ exit_ctx = "At async callback return";
+ range = frame->callback_ret_range;
+ goto enforce_retval;
+ }
+
+ if (prog_type == BPF_PROG_TYPE_STRUCT_OPS && !ret_type)
+ return 0;
+
+ if (prog_type == BPF_PROG_TYPE_CGROUP_SKB && (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS))
+ enforce_attach_type_range = tnum_range(2, 3);
+
+ if (!return_retval_range(env, &range))
+ return 0;
+
enforce_retval:
if (reg->type != SCALAR_VALUE) {
verbose(env, "%s the register R%d is not a known value (%s)\n",
@@ -18117,10 +16926,9 @@ enforce_retval:
if (err)
return err;
- if (!retval_range_within(range, reg, return_32bit)) {
+ if (!retval_range_within(range, reg)) {
verbose_invalid_scalar(env, reg, range, exit_ctx, reg_name);
- if (!is_subprog &&
- prog->expected_attach_type == BPF_LSM_CGROUP &&
+ if (prog->expected_attach_type == BPF_LSM_CGROUP &&
prog_type == BPF_PROG_TYPE_LSM &&
!prog->aux->attach_func_proto->type)
verbose(env, "Note, BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
@@ -18133,189 +16941,31 @@ enforce_retval:
return 0;
}
-static void mark_subprog_changes_pkt_data(struct bpf_verifier_env *env, int off)
-{
- struct bpf_subprog_info *subprog;
-
- subprog = bpf_find_containing_subprog(env, off);
- subprog->changes_pkt_data = true;
-}
-
-static void mark_subprog_might_sleep(struct bpf_verifier_env *env, int off)
-{
- struct bpf_subprog_info *subprog;
-
- subprog = bpf_find_containing_subprog(env, off);
- subprog->might_sleep = true;
-}
-
-/* 't' is an index of a call-site.
- * 'w' is a callee entry point.
- * Eventually this function would be called when env->cfg.insn_state[w] == EXPLORED.
- * Rely on DFS traversal order and absence of recursive calls to guarantee that
- * callee's change_pkt_data marks would be correct at that moment.
- */
-static void merge_callee_effects(struct bpf_verifier_env *env, int t, int w)
-{
- struct bpf_subprog_info *caller, *callee;
-
- caller = bpf_find_containing_subprog(env, t);
- callee = bpf_find_containing_subprog(env, w);
- caller->changes_pkt_data |= callee->changes_pkt_data;
- caller->might_sleep |= callee->might_sleep;
-}
-
-/* non-recursive DFS pseudo code
- * 1 procedure DFS-iterative(G,v):
- * 2 label v as discovered
- * 3 let S be a stack
- * 4 S.push(v)
- * 5 while S is not empty
- * 6 t <- S.peek()
- * 7 if t is what we're looking for:
- * 8 return t
- * 9 for all edges e in G.adjacentEdges(t) do
- * 10 if edge e is already labelled
- * 11 continue with the next edge
- * 12 w <- G.adjacentVertex(t,e)
- * 13 if vertex w is not discovered and not explored
- * 14 label e as tree-edge
- * 15 label w as discovered
- * 16 S.push(w)
- * 17 continue at 5
- * 18 else if vertex w is discovered
- * 19 label e as back-edge
- * 20 else
- * 21 // vertex w is explored
- * 22 label e as forward- or cross-edge
- * 23 label t as explored
- * 24 S.pop()
- *
- * convention:
- * 0x10 - discovered
- * 0x11 - discovered and fall-through edge labelled
- * 0x12 - discovered and fall-through and branch edges labelled
- * 0x20 - explored
- */
-
-enum {
- DISCOVERED = 0x10,
- EXPLORED = 0x20,
- FALLTHROUGH = 1,
- BRANCH = 2,
-};
-
-static void mark_prune_point(struct bpf_verifier_env *env, int idx)
-{
- env->insn_aux_data[idx].prune_point = true;
-}
-
-static bool is_prune_point(struct bpf_verifier_env *env, int insn_idx)
-{
- return env->insn_aux_data[insn_idx].prune_point;
-}
-
-static void mark_force_checkpoint(struct bpf_verifier_env *env, int idx)
-{
- env->insn_aux_data[idx].force_checkpoint = true;
-}
-
-static bool is_force_checkpoint(struct bpf_verifier_env *env, int insn_idx)
-{
- return env->insn_aux_data[insn_idx].force_checkpoint;
-}
-
-static void mark_calls_callback(struct bpf_verifier_env *env, int idx)
-{
- env->insn_aux_data[idx].calls_callback = true;
-}
-
-bool bpf_calls_callback(struct bpf_verifier_env *env, int insn_idx)
-{
- return env->insn_aux_data[insn_idx].calls_callback;
-}
-
-enum {
- DONE_EXPLORING = 0,
- KEEP_EXPLORING = 1,
-};
-
-/* t, w, e - match pseudo-code above:
- * t - index of current instruction
- * w - next instruction
- * e - edge
- */
-static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
+static int check_global_subprog_return_code(struct bpf_verifier_env *env)
{
- int *insn_stack = env->cfg.insn_stack;
- int *insn_state = env->cfg.insn_state;
-
- if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
- return DONE_EXPLORING;
+ struct bpf_reg_state *reg = reg_state(env, BPF_REG_0);
+ struct bpf_func_state *cur_frame = cur_func(env);
+ int err;
- if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
- return DONE_EXPLORING;
+ if (subprog_returns_void(env, cur_frame->subprogno))
+ return 0;
- if (w < 0 || w >= env->prog->len) {
- verbose_linfo(env, t, "%d: ", t);
- verbose(env, "jump out of range from insn %d to %d\n", t, w);
- return -EINVAL;
- }
+ err = check_reg_arg(env, BPF_REG_0, SRC_OP);
+ if (err)
+ return err;
- if (e == BRANCH) {
- /* mark branch target for state pruning */
- mark_prune_point(env, w);
- mark_jmp_point(env, w);
+ if (is_pointer_value(env, BPF_REG_0)) {
+ verbose(env, "R%d leaks addr as return value\n", BPF_REG_0);
+ return -EACCES;
}
- if (insn_state[w] == 0) {
- /* tree-edge */
- insn_state[t] = DISCOVERED | e;
- insn_state[w] = DISCOVERED;
- if (env->cfg.cur_stack >= env->prog->len)
- return -E2BIG;
- insn_stack[env->cfg.cur_stack++] = w;
- return KEEP_EXPLORING;
- } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
- if (env->bpf_capable)
- return DONE_EXPLORING;
- verbose_linfo(env, t, "%d: ", t);
- verbose_linfo(env, w, "%d: ", w);
- verbose(env, "back-edge from insn %d to %d\n", t, w);
+ if (reg->type != SCALAR_VALUE) {
+ verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
+ reg_type_str(env, reg->type));
return -EINVAL;
- } else if (insn_state[w] == EXPLORED) {
- /* forward- or cross-edge */
- insn_state[t] = DISCOVERED | e;
- } else {
- verifier_bug(env, "insn state internal bug");
- return -EFAULT;
}
- return DONE_EXPLORING;
-}
-static int visit_func_call_insn(int t, struct bpf_insn *insns,
- struct bpf_verifier_env *env,
- bool visit_callee)
-{
- int ret, insn_sz;
- int w;
-
- insn_sz = bpf_is_ldimm64(&insns[t]) ? 2 : 1;
- ret = push_insn(t, t + insn_sz, FALLTHROUGH, env);
- if (ret)
- return ret;
-
- mark_prune_point(env, t + insn_sz);
- /* when we exit from subprog, we need to record non-linear history */
- mark_jmp_point(env, t + insn_sz);
-
- if (visit_callee) {
- w = t + insns[t].imm + 1;
- mark_prune_point(env, t);
- merge_callee_effects(env, t, w);
- ret = push_insn(t, w, BRANCH, env);
- }
- return ret;
+ return 0;
}
/* Bitmask with 1s for all caller saved registers */
@@ -18325,7 +16975,7 @@ static int visit_func_call_insn(int t, struct bpf_insn *insns,
* replacement patch is presumed to follow bpf_fastcall contract
* (see mark_fastcall_pattern_for_call() below).
*/
-static bool verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm)
+bool bpf_verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm)
{
switch (imm) {
#ifdef CONFIG_X86_64
@@ -18341,17 +16991,11 @@ static bool verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm)
}
}
-struct call_summary {
- u8 num_params;
- bool is_void;
- bool fastcall;
-};
-
/* If @call is a kfunc or helper call, fills @cs and returns true,
* otherwise returns false.
*/
-static bool get_call_summary(struct bpf_verifier_env *env, struct bpf_insn *call,
- struct call_summary *cs)
+bool bpf_get_call_summary(struct bpf_verifier_env *env, struct bpf_insn *call,
+ struct bpf_call_summary *cs)
{
struct bpf_kfunc_call_arg_meta meta;
const struct bpf_func_proto *fn;
@@ -18359,11 +17003,11 @@ static bool get_call_summary(struct bpf_verifier_env *env, struct bpf_insn *call
if (bpf_helper_call(call)) {
- if (get_helper_proto(env, call->imm, &fn) < 0)
+ if (bpf_get_helper_proto(env, call->imm, &fn) < 0)
/* error would be reported later */
return false;
cs->fastcall = fn->allow_fastcall &&
- (verifier_inlines_helper_call(env, call->imm) ||
+ (bpf_verifier_inlines_helper_call(env, call->imm) ||
bpf_jit_inlines_helper_call(call->imm));
cs->is_void = fn->ret_type == RET_VOID;
cs->num_params = 0;
@@ -18378,7 +17022,7 @@ static bool get_call_summary(struct bpf_verifier_env *env, struct bpf_insn *call
if (bpf_pseudo_kfunc_call(call)) {
int err;
- err = fetch_kfunc_arg_meta(env, call->imm, call->off, &meta);
+ err = bpf_fetch_kfunc_arg_meta(env, call->imm, call->off, &meta);
if (err < 0)
/* error would be reported later */
return false;
@@ -18472,12 +17116,12 @@ static void mark_fastcall_pattern_for_call(struct bpf_verifier_env *env,
struct bpf_insn *insns = env->prog->insnsi, *stx, *ldx;
struct bpf_insn *call = &env->prog->insnsi[insn_idx];
u32 clobbered_regs_mask;
- struct call_summary cs;
+ struct bpf_call_summary cs;
u32 expected_regs_mask;
s16 off;
int i;
- if (!get_call_summary(env, call, &cs))
+ if (!bpf_get_call_summary(env, call, &cs))
return;
/* A bitmask specifying which caller saved registers are clobbered
@@ -18580,714 +17224,6 @@ static int mark_fastcall_patterns(struct bpf_verifier_env *env)
return 0;
}
-static struct bpf_iarray *iarray_realloc(struct bpf_iarray *old, size_t n_elem)
-{
- size_t new_size = sizeof(struct bpf_iarray) + n_elem * sizeof(old->items[0]);
- struct bpf_iarray *new;
-
- new = kvrealloc(old, new_size, GFP_KERNEL_ACCOUNT);
- if (!new) {
- /* this is what callers always want, so simplify the call site */
- kvfree(old);
- return NULL;
- }
-
- new->cnt = n_elem;
- return new;
-}
-
-static int copy_insn_array(struct bpf_map *map, u32 start, u32 end, u32 *items)
-{
- struct bpf_insn_array_value *value;
- u32 i;
-
- for (i = start; i <= end; i++) {
- value = map->ops->map_lookup_elem(map, &i);
- /*
- * map_lookup_elem of an array map will never return an error,
- * but not checking it makes some static analysers to worry
- */
- if (IS_ERR(value))
- return PTR_ERR(value);
- else if (!value)
- return -EINVAL;
- items[i - start] = value->xlated_off;
- }
- return 0;
-}
-
-static int cmp_ptr_to_u32(const void *a, const void *b)
-{
- return *(u32 *)a - *(u32 *)b;
-}
-
-static int sort_insn_array_uniq(u32 *items, int cnt)
-{
- int unique = 1;
- int i;
-
- sort(items, cnt, sizeof(items[0]), cmp_ptr_to_u32, NULL);
-
- for (i = 1; i < cnt; i++)
- if (items[i] != items[unique - 1])
- items[unique++] = items[i];
-
- return unique;
-}
-
-/*
- * sort_unique({map[start], ..., map[end]}) into off
- */
-static int copy_insn_array_uniq(struct bpf_map *map, u32 start, u32 end, u32 *off)
-{
- u32 n = end - start + 1;
- int err;
-
- err = copy_insn_array(map, start, end, off);
- if (err)
- return err;
-
- return sort_insn_array_uniq(off, n);
-}
-
-/*
- * Copy all unique offsets from the map
- */
-static struct bpf_iarray *jt_from_map(struct bpf_map *map)
-{
- struct bpf_iarray *jt;
- int err;
- int n;
-
- jt = iarray_realloc(NULL, map->max_entries);
- if (!jt)
- return ERR_PTR(-ENOMEM);
-
- n = copy_insn_array_uniq(map, 0, map->max_entries - 1, jt->items);
- if (n < 0) {
- err = n;
- goto err_free;
- }
- if (n == 0) {
- err = -EINVAL;
- goto err_free;
- }
- jt->cnt = n;
- return jt;
-
-err_free:
- kvfree(jt);
- return ERR_PTR(err);
-}
-
-/*
- * Find and collect all maps which fit in the subprog. Return the result as one
- * combined jump table in jt->items (allocated with kvcalloc)
- */
-static struct bpf_iarray *jt_from_subprog(struct bpf_verifier_env *env,
- int subprog_start, int subprog_end)
-{
- struct bpf_iarray *jt = NULL;
- struct bpf_map *map;
- struct bpf_iarray *jt_cur;
- int i;
-
- for (i = 0; i < env->insn_array_map_cnt; i++) {
- /*
- * TODO (when needed): collect only jump tables, not static keys
- * or maps for indirect calls
- */
- map = env->insn_array_maps[i];
-
- jt_cur = jt_from_map(map);
- if (IS_ERR(jt_cur)) {
- kvfree(jt);
- return jt_cur;
- }
-
- /*
- * This is enough to check one element. The full table is
- * checked to fit inside the subprog later in create_jt()
- */
- if (jt_cur->items[0] >= subprog_start && jt_cur->items[0] < subprog_end) {
- u32 old_cnt = jt ? jt->cnt : 0;
- jt = iarray_realloc(jt, old_cnt + jt_cur->cnt);
- if (!jt) {
- kvfree(jt_cur);
- return ERR_PTR(-ENOMEM);
- }
- memcpy(jt->items + old_cnt, jt_cur->items, jt_cur->cnt << 2);
- }
-
- kvfree(jt_cur);
- }
-
- if (!jt) {
- verbose(env, "no jump tables found for subprog starting at %u\n", subprog_start);
- return ERR_PTR(-EINVAL);
- }
-
- jt->cnt = sort_insn_array_uniq(jt->items, jt->cnt);
- return jt;
-}
-
-static struct bpf_iarray *
-create_jt(int t, struct bpf_verifier_env *env)
-{
- static struct bpf_subprog_info *subprog;
- int subprog_start, subprog_end;
- struct bpf_iarray *jt;
- int i;
-
- subprog = bpf_find_containing_subprog(env, t);
- subprog_start = subprog->start;
- subprog_end = (subprog + 1)->start;
- jt = jt_from_subprog(env, subprog_start, subprog_end);
- if (IS_ERR(jt))
- return jt;
-
- /* Check that the every element of the jump table fits within the given subprogram */
- for (i = 0; i < jt->cnt; i++) {
- if (jt->items[i] < subprog_start || jt->items[i] >= subprog_end) {
- verbose(env, "jump table for insn %d points outside of the subprog [%u,%u]\n",
- t, subprog_start, subprog_end);
- kvfree(jt);
- return ERR_PTR(-EINVAL);
- }
- }
-
- return jt;
-}
-
-/* "conditional jump with N edges" */
-static int visit_gotox_insn(int t, struct bpf_verifier_env *env)
-{
- int *insn_stack = env->cfg.insn_stack;
- int *insn_state = env->cfg.insn_state;
- bool keep_exploring = false;
- struct bpf_iarray *jt;
- int i, w;
-
- jt = env->insn_aux_data[t].jt;
- if (!jt) {
- jt = create_jt(t, env);
- if (IS_ERR(jt))
- return PTR_ERR(jt);
-
- env->insn_aux_data[t].jt = jt;
- }
-
- mark_prune_point(env, t);
- for (i = 0; i < jt->cnt; i++) {
- w = jt->items[i];
- if (w < 0 || w >= env->prog->len) {
- verbose(env, "indirect jump out of range from insn %d to %d\n", t, w);
- return -EINVAL;
- }
-
- mark_jmp_point(env, w);
-
- /* EXPLORED || DISCOVERED */
- if (insn_state[w])
- continue;
-
- if (env->cfg.cur_stack >= env->prog->len)
- return -E2BIG;
-
- insn_stack[env->cfg.cur_stack++] = w;
- insn_state[w] |= DISCOVERED;
- keep_exploring = true;
- }
-
- return keep_exploring ? KEEP_EXPLORING : DONE_EXPLORING;
-}
-
-static int visit_tailcall_insn(struct bpf_verifier_env *env, int t)
-{
- static struct bpf_subprog_info *subprog;
- struct bpf_iarray *jt;
-
- if (env->insn_aux_data[t].jt)
- return 0;
-
- jt = iarray_realloc(NULL, 2);
- if (!jt)
- return -ENOMEM;
-
- subprog = bpf_find_containing_subprog(env, t);
- jt->items[0] = t + 1;
- jt->items[1] = subprog->exit_idx;
- env->insn_aux_data[t].jt = jt;
- return 0;
-}
-
-/* Visits the instruction at index t and returns one of the following:
- * < 0 - an error occurred
- * DONE_EXPLORING - the instruction was fully explored
- * KEEP_EXPLORING - there is still work to be done before it is fully explored
- */
-static int visit_insn(int t, struct bpf_verifier_env *env)
-{
- struct bpf_insn *insns = env->prog->insnsi, *insn = &insns[t];
- int ret, off, insn_sz;
-
- if (bpf_pseudo_func(insn))
- return visit_func_call_insn(t, insns, env, true);
-
- /* All non-branch instructions have a single fall-through edge. */
- if (BPF_CLASS(insn->code) != BPF_JMP &&
- BPF_CLASS(insn->code) != BPF_JMP32) {
- insn_sz = bpf_is_ldimm64(insn) ? 2 : 1;
- return push_insn(t, t + insn_sz, FALLTHROUGH, env);
- }
-
- switch (BPF_OP(insn->code)) {
- case BPF_EXIT:
- return DONE_EXPLORING;
-
- case BPF_CALL:
- if (is_async_callback_calling_insn(insn))
- /* Mark this call insn as a prune point to trigger
- * is_state_visited() check before call itself is
- * processed by __check_func_call(). Otherwise new
- * async state will be pushed for further exploration.
- */
- mark_prune_point(env, t);
- /* For functions that invoke callbacks it is not known how many times
- * callback would be called. Verifier models callback calling functions
- * by repeatedly visiting callback bodies and returning to origin call
- * instruction.
- * In order to stop such iteration verifier needs to identify when a
- * state identical some state from a previous iteration is reached.
- * Check below forces creation of checkpoint before callback calling
- * instruction to allow search for such identical states.
- */
- if (is_sync_callback_calling_insn(insn)) {
- mark_calls_callback(env, t);
- mark_force_checkpoint(env, t);
- mark_prune_point(env, t);
- mark_jmp_point(env, t);
- }
- if (bpf_helper_call(insn)) {
- const struct bpf_func_proto *fp;
-
- ret = get_helper_proto(env, insn->imm, &fp);
- /* If called in a non-sleepable context program will be
- * rejected anyway, so we should end up with precise
- * sleepable marks on subprogs, except for dead code
- * elimination.
- */
- if (ret == 0 && fp->might_sleep)
- mark_subprog_might_sleep(env, t);
- if (bpf_helper_changes_pkt_data(insn->imm))
- mark_subprog_changes_pkt_data(env, t);
- if (insn->imm == BPF_FUNC_tail_call)
- visit_tailcall_insn(env, t);
- } else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
- struct bpf_kfunc_call_arg_meta meta;
-
- ret = fetch_kfunc_arg_meta(env, insn->imm, insn->off, &meta);
- if (ret == 0 && is_iter_next_kfunc(&meta)) {
- mark_prune_point(env, t);
- /* Checking and saving state checkpoints at iter_next() call
- * is crucial for fast convergence of open-coded iterator loop
- * logic, so we need to force it. If we don't do that,
- * is_state_visited() might skip saving a checkpoint, causing
- * unnecessarily long sequence of not checkpointed
- * instructions and jumps, leading to exhaustion of jump
- * history buffer, and potentially other undesired outcomes.
- * It is expected that with correct open-coded iterators
- * convergence will happen quickly, so we don't run a risk of
- * exhausting memory.
- */
- mark_force_checkpoint(env, t);
- }
- /* Same as helpers, if called in a non-sleepable context
- * program will be rejected anyway, so we should end up
- * with precise sleepable marks on subprogs, except for
- * dead code elimination.
- */
- if (ret == 0 && is_kfunc_sleepable(&meta))
- mark_subprog_might_sleep(env, t);
- if (ret == 0 && is_kfunc_pkt_changing(&meta))
- mark_subprog_changes_pkt_data(env, t);
- }
- return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL);
-
- case BPF_JA:
- if (BPF_SRC(insn->code) == BPF_X)
- return visit_gotox_insn(t, env);
-
- if (BPF_CLASS(insn->code) == BPF_JMP)
- off = insn->off;
- else
- off = insn->imm;
-
- /* unconditional jump with single edge */
- ret = push_insn(t, t + off + 1, FALLTHROUGH, env);
- if (ret)
- return ret;
-
- mark_prune_point(env, t + off + 1);
- mark_jmp_point(env, t + off + 1);
-
- return ret;
-
- default:
- /* conditional jump with two edges */
- mark_prune_point(env, t);
- if (is_may_goto_insn(insn))
- mark_force_checkpoint(env, t);
-
- ret = push_insn(t, t + 1, FALLTHROUGH, env);
- if (ret)
- return ret;
-
- return push_insn(t, t + insn->off + 1, BRANCH, env);
- }
-}
-
-/* non-recursive depth-first-search to detect loops in BPF program
- * loop == back-edge in directed graph
- */
-static int check_cfg(struct bpf_verifier_env *env)
-{
- int insn_cnt = env->prog->len;
- int *insn_stack, *insn_state;
- int ex_insn_beg, i, ret = 0;
-
- insn_state = env->cfg.insn_state = kvzalloc_objs(int, insn_cnt,
- GFP_KERNEL_ACCOUNT);
- if (!insn_state)
- return -ENOMEM;
-
- insn_stack = env->cfg.insn_stack = kvzalloc_objs(int, insn_cnt,
- GFP_KERNEL_ACCOUNT);
- if (!insn_stack) {
- kvfree(insn_state);
- return -ENOMEM;
- }
-
- ex_insn_beg = env->exception_callback_subprog
- ? env->subprog_info[env->exception_callback_subprog].start
- : 0;
-
- insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
- insn_stack[0] = 0; /* 0 is the first instruction */
- env->cfg.cur_stack = 1;
-
-walk_cfg:
- while (env->cfg.cur_stack > 0) {
- int t = insn_stack[env->cfg.cur_stack - 1];
-
- ret = visit_insn(t, env);
- switch (ret) {
- case DONE_EXPLORING:
- insn_state[t] = EXPLORED;
- env->cfg.cur_stack--;
- break;
- case KEEP_EXPLORING:
- break;
- default:
- if (ret > 0) {
- verifier_bug(env, "visit_insn internal bug");
- ret = -EFAULT;
- }
- goto err_free;
- }
- }
-
- if (env->cfg.cur_stack < 0) {
- verifier_bug(env, "pop stack internal bug");
- ret = -EFAULT;
- goto err_free;
- }
-
- if (ex_insn_beg && insn_state[ex_insn_beg] != EXPLORED) {
- insn_state[ex_insn_beg] = DISCOVERED;
- insn_stack[0] = ex_insn_beg;
- env->cfg.cur_stack = 1;
- goto walk_cfg;
- }
-
- for (i = 0; i < insn_cnt; i++) {
- struct bpf_insn *insn = &env->prog->insnsi[i];
-
- if (insn_state[i] != EXPLORED) {
- verbose(env, "unreachable insn %d\n", i);
- ret = -EINVAL;
- goto err_free;
- }
- if (bpf_is_ldimm64(insn)) {
- if (insn_state[i + 1] != 0) {
- verbose(env, "jump into the middle of ldimm64 insn %d\n", i);
- ret = -EINVAL;
- goto err_free;
- }
- i++; /* skip second half of ldimm64 */
- }
- }
- ret = 0; /* cfg looks good */
- env->prog->aux->changes_pkt_data = env->subprog_info[0].changes_pkt_data;
- env->prog->aux->might_sleep = env->subprog_info[0].might_sleep;
-
-err_free:
- kvfree(insn_state);
- kvfree(insn_stack);
- env->cfg.insn_state = env->cfg.insn_stack = NULL;
- return ret;
-}
-
-/*
- * For each subprogram 'i' fill array env->cfg.insn_subprogram sub-range
- * [env->subprog_info[i].postorder_start, env->subprog_info[i+1].postorder_start)
- * with indices of 'i' instructions in postorder.
- */
-static int compute_postorder(struct bpf_verifier_env *env)
-{
- u32 cur_postorder, i, top, stack_sz, s;
- int *stack = NULL, *postorder = NULL, *state = NULL;
- struct bpf_iarray *succ;
-
- postorder = kvzalloc_objs(int, env->prog->len, GFP_KERNEL_ACCOUNT);
- state = kvzalloc_objs(int, env->prog->len, GFP_KERNEL_ACCOUNT);
- stack = kvzalloc_objs(int, env->prog->len, GFP_KERNEL_ACCOUNT);
- if (!postorder || !state || !stack) {
- kvfree(postorder);
- kvfree(state);
- kvfree(stack);
- return -ENOMEM;
- }
- cur_postorder = 0;
- for (i = 0; i < env->subprog_cnt; i++) {
- env->subprog_info[i].postorder_start = cur_postorder;
- stack[0] = env->subprog_info[i].start;
- stack_sz = 1;
- do {
- top = stack[stack_sz - 1];
- state[top] |= DISCOVERED;
- if (state[top] & EXPLORED) {
- postorder[cur_postorder++] = top;
- stack_sz--;
- continue;
- }
- succ = bpf_insn_successors(env, top);
- for (s = 0; s < succ->cnt; ++s) {
- if (!state[succ->items[s]]) {
- stack[stack_sz++] = succ->items[s];
- state[succ->items[s]] |= DISCOVERED;
- }
- }
- state[top] |= EXPLORED;
- } while (stack_sz);
- }
- env->subprog_info[i].postorder_start = cur_postorder;
- env->cfg.insn_postorder = postorder;
- env->cfg.cur_postorder = cur_postorder;
- kvfree(stack);
- kvfree(state);
- return 0;
-}
-
-static int check_abnormal_return(struct bpf_verifier_env *env)
-{
- int i;
-
- for (i = 1; i < env->subprog_cnt; i++) {
- if (env->subprog_info[i].has_ld_abs) {
- verbose(env, "LD_ABS is not allowed in subprogs without BTF\n");
- return -EINVAL;
- }
- if (env->subprog_info[i].has_tail_call) {
- verbose(env, "tail_call is not allowed in subprogs without BTF\n");
- return -EINVAL;
- }
- }
- return 0;
-}
-
-/* The minimum supported BTF func info size */
-#define MIN_BPF_FUNCINFO_SIZE 8
-#define MAX_FUNCINFO_REC_SIZE 252
-
-static int check_btf_func_early(struct bpf_verifier_env *env,
- const union bpf_attr *attr,
- bpfptr_t uattr)
-{
- u32 krec_size = sizeof(struct bpf_func_info);
- const struct btf_type *type, *func_proto;
- u32 i, nfuncs, urec_size, min_size;
- struct bpf_func_info *krecord;
- struct bpf_prog *prog;
- const struct btf *btf;
- u32 prev_offset = 0;
- bpfptr_t urecord;
- int ret = -ENOMEM;
-
- nfuncs = attr->func_info_cnt;
- if (!nfuncs) {
- if (check_abnormal_return(env))
- return -EINVAL;
- return 0;
- }
-
- urec_size = attr->func_info_rec_size;
- if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
- urec_size > MAX_FUNCINFO_REC_SIZE ||
- urec_size % sizeof(u32)) {
- verbose(env, "invalid func info rec size %u\n", urec_size);
- return -EINVAL;
- }
-
- prog = env->prog;
- btf = prog->aux->btf;
-
- urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
- min_size = min_t(u32, krec_size, urec_size);
-
- krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
- if (!krecord)
- return -ENOMEM;
-
- for (i = 0; i < nfuncs; i++) {
- ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
- if (ret) {
- if (ret == -E2BIG) {
- verbose(env, "nonzero tailing record in func info");
- /* set the size kernel expects so loader can zero
- * out the rest of the record.
- */
- if (copy_to_bpfptr_offset(uattr,
- offsetof(union bpf_attr, func_info_rec_size),
- &min_size, sizeof(min_size)))
- ret = -EFAULT;
- }
- goto err_free;
- }
-
- if (copy_from_bpfptr(&krecord[i], urecord, min_size)) {
- ret = -EFAULT;
- goto err_free;
- }
-
- /* check insn_off */
- ret = -EINVAL;
- if (i == 0) {
- if (krecord[i].insn_off) {
- verbose(env,
- "nonzero insn_off %u for the first func info record",
- krecord[i].insn_off);
- goto err_free;
- }
- } else if (krecord[i].insn_off <= prev_offset) {
- verbose(env,
- "same or smaller insn offset (%u) than previous func info record (%u)",
- krecord[i].insn_off, prev_offset);
- goto err_free;
- }
-
- /* check type_id */
- type = btf_type_by_id(btf, krecord[i].type_id);
- if (!type || !btf_type_is_func(type)) {
- verbose(env, "invalid type id %d in func info",
- krecord[i].type_id);
- goto err_free;
- }
-
- func_proto = btf_type_by_id(btf, type->type);
- if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto)))
- /* btf_func_check() already verified it during BTF load */
- goto err_free;
-
- prev_offset = krecord[i].insn_off;
- bpfptr_add(&urecord, urec_size);
- }
-
- prog->aux->func_info = krecord;
- prog->aux->func_info_cnt = nfuncs;
- return 0;
-
-err_free:
- kvfree(krecord);
- return ret;
-}
-
-static int check_btf_func(struct bpf_verifier_env *env,
- const union bpf_attr *attr,
- bpfptr_t uattr)
-{
- const struct btf_type *type, *func_proto, *ret_type;
- u32 i, nfuncs, urec_size;
- struct bpf_func_info *krecord;
- struct bpf_func_info_aux *info_aux = NULL;
- struct bpf_prog *prog;
- const struct btf *btf;
- bpfptr_t urecord;
- bool scalar_return;
- int ret = -ENOMEM;
-
- nfuncs = attr->func_info_cnt;
- if (!nfuncs) {
- if (check_abnormal_return(env))
- return -EINVAL;
- return 0;
- }
- if (nfuncs != env->subprog_cnt) {
- verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
- return -EINVAL;
- }
-
- urec_size = attr->func_info_rec_size;
-
- prog = env->prog;
- btf = prog->aux->btf;
-
- urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
-
- krecord = prog->aux->func_info;
- info_aux = kzalloc_objs(*info_aux, nfuncs,
- GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
- if (!info_aux)
- return -ENOMEM;
-
- for (i = 0; i < nfuncs; i++) {
- /* check insn_off */
- ret = -EINVAL;
-
- if (env->subprog_info[i].start != krecord[i].insn_off) {
- verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
- goto err_free;
- }
-
- /* Already checked type_id */
- type = btf_type_by_id(btf, krecord[i].type_id);
- info_aux[i].linkage = BTF_INFO_VLEN(type->info);
- /* Already checked func_proto */
- func_proto = btf_type_by_id(btf, type->type);
-
- ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
- scalar_return =
- btf_type_is_small_int(ret_type) || btf_is_any_enum(ret_type);
- if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
- verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
- goto err_free;
- }
- if (i && !scalar_return && env->subprog_info[i].has_tail_call) {
- verbose(env, "tail_call is only allowed in functions that return 'int'.\n");
- goto err_free;
- }
-
- bpfptr_add(&urecord, urec_size);
- }
-
- prog->aux->func_info_aux = info_aux;
- return 0;
-
-err_free:
- kfree(info_aux);
- return ret;
-}
-
static void adjust_btf_func(struct bpf_verifier_env *env)
{
struct bpf_prog_aux *aux = env->prog->aux;
@@ -19301,414 +17237,6 @@ static void adjust_btf_func(struct bpf_verifier_env *env)
aux->func_info[i].insn_off = env->subprog_info[i].start;
}
-#define MIN_BPF_LINEINFO_SIZE offsetofend(struct bpf_line_info, line_col)
-#define MAX_LINEINFO_REC_SIZE MAX_FUNCINFO_REC_SIZE
-
-static int check_btf_line(struct bpf_verifier_env *env,
- const union bpf_attr *attr,
- bpfptr_t uattr)
-{
- u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
- struct bpf_subprog_info *sub;
- struct bpf_line_info *linfo;
- struct bpf_prog *prog;
- const struct btf *btf;
- bpfptr_t ulinfo;
- int err;
-
- nr_linfo = attr->line_info_cnt;
- if (!nr_linfo)
- return 0;
- if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info))
- return -EINVAL;
-
- rec_size = attr->line_info_rec_size;
- if (rec_size < MIN_BPF_LINEINFO_SIZE ||
- rec_size > MAX_LINEINFO_REC_SIZE ||
- rec_size & (sizeof(u32) - 1))
- return -EINVAL;
-
- /* Need to zero it in case the userspace may
- * pass in a smaller bpf_line_info object.
- */
- linfo = kvzalloc_objs(struct bpf_line_info, nr_linfo,
- GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
- if (!linfo)
- return -ENOMEM;
-
- prog = env->prog;
- btf = prog->aux->btf;
-
- s = 0;
- sub = env->subprog_info;
- ulinfo = make_bpfptr(attr->line_info, uattr.is_kernel);
- expected_size = sizeof(struct bpf_line_info);
- ncopy = min_t(u32, expected_size, rec_size);
- for (i = 0; i < nr_linfo; i++) {
- err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
- if (err) {
- if (err == -E2BIG) {
- verbose(env, "nonzero tailing record in line_info");
- if (copy_to_bpfptr_offset(uattr,
- offsetof(union bpf_attr, line_info_rec_size),
- &expected_size, sizeof(expected_size)))
- err = -EFAULT;
- }
- goto err_free;
- }
-
- if (copy_from_bpfptr(&linfo[i], ulinfo, ncopy)) {
- err = -EFAULT;
- goto err_free;
- }
-
- /*
- * Check insn_off to ensure
- * 1) strictly increasing AND
- * 2) bounded by prog->len
- *
- * The linfo[0].insn_off == 0 check logically falls into
- * the later "missing bpf_line_info for func..." case
- * because the first linfo[0].insn_off must be the
- * first sub also and the first sub must have
- * subprog_info[0].start == 0.
- */
- if ((i && linfo[i].insn_off <= prev_offset) ||
- linfo[i].insn_off >= prog->len) {
- verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
- i, linfo[i].insn_off, prev_offset,
- prog->len);
- err = -EINVAL;
- goto err_free;
- }
-
- if (!prog->insnsi[linfo[i].insn_off].code) {
- verbose(env,
- "Invalid insn code at line_info[%u].insn_off\n",
- i);
- err = -EINVAL;
- goto err_free;
- }
-
- if (!btf_name_by_offset(btf, linfo[i].line_off) ||
- !btf_name_by_offset(btf, linfo[i].file_name_off)) {
- verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
- err = -EINVAL;
- goto err_free;
- }
-
- if (s != env->subprog_cnt) {
- if (linfo[i].insn_off == sub[s].start) {
- sub[s].linfo_idx = i;
- s++;
- } else if (sub[s].start < linfo[i].insn_off) {
- verbose(env, "missing bpf_line_info for func#%u\n", s);
- err = -EINVAL;
- goto err_free;
- }
- }
-
- prev_offset = linfo[i].insn_off;
- bpfptr_add(&ulinfo, rec_size);
- }
-
- if (s != env->subprog_cnt) {
- verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
- env->subprog_cnt - s, s);
- err = -EINVAL;
- goto err_free;
- }
-
- prog->aux->linfo = linfo;
- prog->aux->nr_linfo = nr_linfo;
-
- return 0;
-
-err_free:
- kvfree(linfo);
- return err;
-}
-
-#define MIN_CORE_RELO_SIZE sizeof(struct bpf_core_relo)
-#define MAX_CORE_RELO_SIZE MAX_FUNCINFO_REC_SIZE
-
-static int check_core_relo(struct bpf_verifier_env *env,
- const union bpf_attr *attr,
- bpfptr_t uattr)
-{
- u32 i, nr_core_relo, ncopy, expected_size, rec_size;
- struct bpf_core_relo core_relo = {};
- struct bpf_prog *prog = env->prog;
- const struct btf *btf = prog->aux->btf;
- struct bpf_core_ctx ctx = {
- .log = &env->log,
- .btf = btf,
- };
- bpfptr_t u_core_relo;
- int err;
-
- nr_core_relo = attr->core_relo_cnt;
- if (!nr_core_relo)
- return 0;
- if (nr_core_relo > INT_MAX / sizeof(struct bpf_core_relo))
- return -EINVAL;
-
- rec_size = attr->core_relo_rec_size;
- if (rec_size < MIN_CORE_RELO_SIZE ||
- rec_size > MAX_CORE_RELO_SIZE ||
- rec_size % sizeof(u32))
- return -EINVAL;
-
- u_core_relo = make_bpfptr(attr->core_relos, uattr.is_kernel);
- expected_size = sizeof(struct bpf_core_relo);
- ncopy = min_t(u32, expected_size, rec_size);
-
- /* Unlike func_info and line_info, copy and apply each CO-RE
- * relocation record one at a time.
- */
- for (i = 0; i < nr_core_relo; i++) {
- /* future proofing when sizeof(bpf_core_relo) changes */
- err = bpf_check_uarg_tail_zero(u_core_relo, expected_size, rec_size);
- if (err) {
- if (err == -E2BIG) {
- verbose(env, "nonzero tailing record in core_relo");
- if (copy_to_bpfptr_offset(uattr,
- offsetof(union bpf_attr, core_relo_rec_size),
- &expected_size, sizeof(expected_size)))
- err = -EFAULT;
- }
- break;
- }
-
- if (copy_from_bpfptr(&core_relo, u_core_relo, ncopy)) {
- err = -EFAULT;
- break;
- }
-
- if (core_relo.insn_off % 8 || core_relo.insn_off / 8 >= prog->len) {
- verbose(env, "Invalid core_relo[%u].insn_off:%u prog->len:%u\n",
- i, core_relo.insn_off, prog->len);
- err = -EINVAL;
- break;
- }
-
- err = bpf_core_apply(&ctx, &core_relo, i,
- &prog->insnsi[core_relo.insn_off / 8]);
- if (err)
- break;
- bpfptr_add(&u_core_relo, rec_size);
- }
- return err;
-}
-
-static int check_btf_info_early(struct bpf_verifier_env *env,
- const union bpf_attr *attr,
- bpfptr_t uattr)
-{
- struct btf *btf;
- int err;
-
- if (!attr->func_info_cnt && !attr->line_info_cnt) {
- if (check_abnormal_return(env))
- return -EINVAL;
- return 0;
- }
-
- btf = btf_get_by_fd(attr->prog_btf_fd);
- if (IS_ERR(btf))
- return PTR_ERR(btf);
- if (btf_is_kernel(btf)) {
- btf_put(btf);
- return -EACCES;
- }
- env->prog->aux->btf = btf;
-
- err = check_btf_func_early(env, attr, uattr);
- if (err)
- return err;
- return 0;
-}
-
-static int check_btf_info(struct bpf_verifier_env *env,
- const union bpf_attr *attr,
- bpfptr_t uattr)
-{
- int err;
-
- if (!attr->func_info_cnt && !attr->line_info_cnt) {
- if (check_abnormal_return(env))
- return -EINVAL;
- return 0;
- }
-
- err = check_btf_func(env, attr, uattr);
- if (err)
- return err;
-
- err = check_btf_line(env, attr, uattr);
- if (err)
- return err;
-
- err = check_core_relo(env, attr, uattr);
- if (err)
- return err;
-
- return 0;
-}
-
-/* check %cur's range satisfies %old's */
-static bool range_within(const struct bpf_reg_state *old,
- const struct bpf_reg_state *cur)
-{
- return old->umin_value <= cur->umin_value &&
- old->umax_value >= cur->umax_value &&
- old->smin_value <= cur->smin_value &&
- old->smax_value >= cur->smax_value &&
- old->u32_min_value <= cur->u32_min_value &&
- old->u32_max_value >= cur->u32_max_value &&
- old->s32_min_value <= cur->s32_min_value &&
- old->s32_max_value >= cur->s32_max_value;
-}
-
-/* If in the old state two registers had the same id, then they need to have
- * the same id in the new state as well. But that id could be different from
- * the old state, so we need to track the mapping from old to new ids.
- * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
- * regs with old id 5 must also have new id 9 for the new state to be safe. But
- * regs with a different old id could still have new id 9, we don't care about
- * that.
- * So we look through our idmap to see if this old id has been seen before. If
- * so, we require the new id to match; otherwise, we add the id pair to the map.
- */
-static bool check_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
-{
- struct bpf_id_pair *map = idmap->map;
- unsigned int i;
-
- /* either both IDs should be set or both should be zero */
- if (!!old_id != !!cur_id)
- return false;
-
- if (old_id == 0) /* cur_id == 0 as well */
- return true;
-
- for (i = 0; i < idmap->cnt; i++) {
- if (map[i].old == old_id)
- return map[i].cur == cur_id;
- if (map[i].cur == cur_id)
- return false;
- }
-
- /* Reached the end of known mappings; haven't seen this id before */
- if (idmap->cnt < BPF_ID_MAP_SIZE) {
- map[idmap->cnt].old = old_id;
- map[idmap->cnt].cur = cur_id;
- idmap->cnt++;
- return true;
- }
-
- /* We ran out of idmap slots, which should be impossible */
- WARN_ON_ONCE(1);
- return false;
-}
-
-/*
- * Compare scalar register IDs for state equivalence.
- *
- * When old_id == 0, the old register is independent - not linked to any
- * other register. Any linking in the current state only adds constraints,
- * making it more restrictive. Since the old state didn't rely on any ID
- * relationships for this register, it's always safe to accept cur regardless
- * of its ID. Hence, return true immediately.
- *
- * When old_id != 0 but cur_id == 0, we need to ensure that different
- * independent registers in cur don't incorrectly satisfy the ID matching
- * requirements of linked registers in old.
- *
- * Example: if old has r6.id=X and r7.id=X (linked), but cur has r6.id=0
- * and r7.id=0 (both independent), without temp IDs both would map old_id=X
- * to cur_id=0 and pass. With temp IDs: r6 maps X->temp1, r7 tries to map
- * X->temp2, but X is already mapped to temp1, so the check fails correctly.
- */
-static bool check_scalar_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
-{
- if (!old_id)
- return true;
-
- cur_id = cur_id ? cur_id : ++idmap->tmp_id_gen;
-
- return check_ids(old_id, cur_id, idmap);
-}
-
-static void clean_func_state(struct bpf_verifier_env *env,
- struct bpf_func_state *st,
- u32 ip)
-{
- u16 live_regs = env->insn_aux_data[ip].live_regs_before;
- int i, j;
-
- for (i = 0; i < BPF_REG_FP; i++) {
- /* liveness must not touch this register anymore */
- if (!(live_regs & BIT(i)))
- /* since the register is unused, clear its state
- * to make further comparison simpler
- */
- __mark_reg_not_init(env, &st->regs[i]);
- }
-
- for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
- if (!bpf_stack_slot_alive(env, st->frameno, i)) {
- __mark_reg_not_init(env, &st->stack[i].spilled_ptr);
- for (j = 0; j < BPF_REG_SIZE; j++)
- st->stack[i].slot_type[j] = STACK_INVALID;
- }
- }
-}
-
-static void clean_verifier_state(struct bpf_verifier_env *env,
- struct bpf_verifier_state *st)
-{
- int i, ip;
-
- bpf_live_stack_query_init(env, st);
- st->cleaned = true;
- for (i = 0; i <= st->curframe; i++) {
- ip = frame_insn_idx(st, i);
- clean_func_state(env, st->frame[i], ip);
- }
-}
-
-/* the parentage chains form a tree.
- * the verifier states are added to state lists at given insn and
- * pushed into state stack for future exploration.
- * when the verifier reaches bpf_exit insn some of the verifier states
- * stored in the state lists have their final liveness state already,
- * but a lot of states will get revised from liveness point of view when
- * the verifier explores other branches.
- * Example:
- * 1: *(u64)(r10 - 8) = 1
- * 2: if r1 == 100 goto pc+1
- * 3: *(u64)(r10 - 8) = 2
- * 4: r0 = *(u64)(r10 - 8)
- * 5: exit
- * when the verifier reaches exit insn the stack slot -8 in the state list of
- * insn 2 is not yet marked alive. Then the verifier pops the other_branch
- * of insn 2 and goes exploring further. After the insn 4 read, liveness
- * analysis would propagate read mark for -8 at insn 2.
- *
- * Since the verifier pushes the branch states as it sees them while exploring
- * the program the condition of walking the branch instruction for the second
- * time means that all states below this branch were already explored and
- * their final liveness marks are already propagated.
- * Hence when the verifier completes the search of state list in is_state_visited()
- * we can call this clean_live_states() function to clear dead the registers and stack
- * slots to simplify state merging.
- *
- * Important note here that walking the same branch instruction in the callee
- * doesn't meant that the states are DONE. The verifier has to compare
- * the callsites
- */
-
/* Find id in idset and increment its count, or add new entry */
static void idset_cnt_inc(struct bpf_idset *idset, u32 id)
{
@@ -19745,8 +17273,8 @@ static u32 idset_cnt_get(struct bpf_idset *idset, u32 id)
* A register with a non-zero id is called singular if no other register shares
* the same base id. Such registers can be treated as independent (id=0).
*/
-static void clear_singular_ids(struct bpf_verifier_env *env,
- struct bpf_verifier_state *st)
+void bpf_clear_singular_ids(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *st)
{
struct bpf_idset *idset = &env->idset_scratch;
struct bpf_func_state *func;
@@ -19767,1072 +17295,11 @@ static void clear_singular_ids(struct bpf_verifier_env *env,
continue;
if (!reg->id)
continue;
- if (idset_cnt_get(idset, reg->id & ~BPF_ADD_CONST) == 1) {
- reg->id = 0;
- reg->off = 0;
- }
+ if (idset_cnt_get(idset, reg->id & ~BPF_ADD_CONST) == 1)
+ clear_scalar_id(reg);
}));
}
-static void clean_live_states(struct bpf_verifier_env *env, int insn,
- struct bpf_verifier_state *cur)
-{
- struct bpf_verifier_state_list *sl;
- struct list_head *pos, *head;
-
- head = explored_state(env, insn);
- list_for_each(pos, head) {
- sl = container_of(pos, struct bpf_verifier_state_list, node);
- if (sl->state.branches)
- continue;
- if (sl->state.insn_idx != insn ||
- !same_callsites(&sl->state, cur))
- continue;
- if (sl->state.cleaned)
- /* all regs in this state in all frames were already marked */
- continue;
- if (incomplete_read_marks(env, &sl->state))
- continue;
- clean_verifier_state(env, &sl->state);
- }
-}
-
-static bool regs_exact(const struct bpf_reg_state *rold,
- const struct bpf_reg_state *rcur,
- struct bpf_idmap *idmap)
-{
- return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
- check_ids(rold->id, rcur->id, idmap) &&
- check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
-}
-
-enum exact_level {
- NOT_EXACT,
- EXACT,
- RANGE_WITHIN
-};
-
-/* Returns true if (rold safe implies rcur safe) */
-static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
- struct bpf_reg_state *rcur, struct bpf_idmap *idmap,
- enum exact_level exact)
-{
- if (exact == EXACT)
- return regs_exact(rold, rcur, idmap);
-
- if (rold->type == NOT_INIT)
- /* explored state can't have used this */
- return true;
-
- /* Enforce that register types have to match exactly, including their
- * modifiers (like PTR_MAYBE_NULL, MEM_RDONLY, etc), as a general
- * rule.
- *
- * One can make a point that using a pointer register as unbounded
- * SCALAR would be technically acceptable, but this could lead to
- * pointer leaks because scalars are allowed to leak while pointers
- * are not. We could make this safe in special cases if root is
- * calling us, but it's probably not worth the hassle.
- *
- * Also, register types that are *not* MAYBE_NULL could technically be
- * safe to use as their MAYBE_NULL variants (e.g., PTR_TO_MAP_VALUE
- * is safe to be used as PTR_TO_MAP_VALUE_OR_NULL, provided both point
- * to the same map).
- * However, if the old MAYBE_NULL register then got NULL checked,
- * doing so could have affected others with the same id, and we can't
- * check for that because we lost the id when we converted to
- * a non-MAYBE_NULL variant.
- * So, as a general rule we don't allow mixing MAYBE_NULL and
- * non-MAYBE_NULL registers as well.
- */
- if (rold->type != rcur->type)
- return false;
-
- switch (base_type(rold->type)) {
- case SCALAR_VALUE:
- if (env->explore_alu_limits) {
- /* explore_alu_limits disables tnum_in() and range_within()
- * logic and requires everything to be strict
- */
- return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
- check_scalar_ids(rold->id, rcur->id, idmap);
- }
- if (!rold->precise && exact == NOT_EXACT)
- return true;
- /*
- * Linked register tracking uses rold->id to detect relationships.
- * When rold->id == 0, the register is independent and any linking
- * in rcur only adds constraints. When rold->id != 0, we must verify
- * id mapping and (for BPF_ADD_CONST) offset consistency.
- *
- * +------------------+-----------+------------------+---------------+
- * | | rold->id | rold + ADD_CONST | rold->id == 0 |
- * |------------------+-----------+------------------+---------------|
- * | rcur->id | range,ids | false | range |
- * | rcur + ADD_CONST | false | range,ids,off | range |
- * | rcur->id == 0 | range,ids | false | range |
- * +------------------+-----------+------------------+---------------+
- *
- * Why check_ids() for scalar registers?
- *
- * Consider the following BPF code:
- * 1: r6 = ... unbound scalar, ID=a ...
- * 2: r7 = ... unbound scalar, ID=b ...
- * 3: if (r6 > r7) goto +1
- * 4: r6 = r7
- * 5: if (r6 > X) goto ...
- * 6: ... memory operation using r7 ...
- *
- * First verification path is [1-6]:
- * - at (4) same bpf_reg_state::id (b) would be assigned to r6 and r7;
- * - at (5) r6 would be marked <= X, sync_linked_regs() would also mark
- * r7 <= X, because r6 and r7 share same id.
- * Next verification path is [1-4, 6].
- *
- * Instruction (6) would be reached in two states:
- * I. r6{.id=b}, r7{.id=b} via path 1-6;
- * II. r6{.id=a}, r7{.id=b} via path 1-4, 6.
- *
- * Use check_ids() to distinguish these states.
- * ---
- * Also verify that new value satisfies old value range knowledge.
- */
-
- /*
- * ADD_CONST flags must match exactly: BPF_ADD_CONST32 and
- * BPF_ADD_CONST64 have different linking semantics in
- * sync_linked_regs() (alu32 zero-extends, alu64 does not),
- * so pruning across different flag types is unsafe.
- */
- if (rold->id &&
- (rold->id & BPF_ADD_CONST) != (rcur->id & BPF_ADD_CONST))
- return false;
-
- /* Both have offset linkage: offsets must match */
- if ((rold->id & BPF_ADD_CONST) && rold->off != rcur->off)
- return false;
-
- if (!check_scalar_ids(rold->id, rcur->id, idmap))
- return false;
-
- return range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off);
- case PTR_TO_MAP_KEY:
- case PTR_TO_MAP_VALUE:
- case PTR_TO_MEM:
- case PTR_TO_BUF:
- case PTR_TO_TP_BUFFER:
- /* If the new min/max/var_off satisfy the old ones and
- * everything else matches, we are OK.
- */
- return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 &&
- range_within(rold, rcur) &&
- tnum_in(rold->var_off, rcur->var_off) &&
- check_ids(rold->id, rcur->id, idmap) &&
- check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
- case PTR_TO_PACKET_META:
- case PTR_TO_PACKET:
- /* We must have at least as much range as the old ptr
- * did, so that any accesses which were safe before are
- * still safe. This is true even if old range < old off,
- * since someone could have accessed through (ptr - k), or
- * even done ptr -= k in a register, to get a safe access.
- */
- if (rold->range < 0 || rcur->range < 0) {
- /* special case for [BEYOND|AT]_PKT_END */
- if (rold->range != rcur->range)
- return false;
- } else if (rold->range > rcur->range) {
- return false;
- }
- /* If the offsets don't match, we can't trust our alignment;
- * nor can we be sure that we won't fall out of range.
- */
- if (rold->off != rcur->off)
- return false;
- /* id relations must be preserved */
- if (!check_ids(rold->id, rcur->id, idmap))
- return false;
- /* new val must satisfy old val knowledge */
- return range_within(rold, rcur) &&
- tnum_in(rold->var_off, rcur->var_off);
- case PTR_TO_STACK:
- /* two stack pointers are equal only if they're pointing to
- * the same stack frame, since fp-8 in foo != fp-8 in bar
- */
- return regs_exact(rold, rcur, idmap) && rold->frameno == rcur->frameno;
- case PTR_TO_ARENA:
- return true;
- case PTR_TO_INSN:
- return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 &&
- rold->off == rcur->off && range_within(rold, rcur) &&
- tnum_in(rold->var_off, rcur->var_off);
- default:
- return regs_exact(rold, rcur, idmap);
- }
-}
-
-static struct bpf_reg_state unbound_reg;
-
-static __init int unbound_reg_init(void)
-{
- __mark_reg_unknown_imprecise(&unbound_reg);
- return 0;
-}
-late_initcall(unbound_reg_init);
-
-static bool is_stack_all_misc(struct bpf_verifier_env *env,
- struct bpf_stack_state *stack)
-{
- u32 i;
-
- for (i = 0; i < ARRAY_SIZE(stack->slot_type); ++i) {
- if ((stack->slot_type[i] == STACK_MISC) ||
- (stack->slot_type[i] == STACK_INVALID && env->allow_uninit_stack))
- continue;
- return false;
- }
-
- return true;
-}
-
-static struct bpf_reg_state *scalar_reg_for_stack(struct bpf_verifier_env *env,
- struct bpf_stack_state *stack)
-{
- if (is_spilled_scalar_reg64(stack))
- return &stack->spilled_ptr;
-
- if (is_stack_all_misc(env, stack))
- return &unbound_reg;
-
- return NULL;
-}
-
-static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
- struct bpf_func_state *cur, struct bpf_idmap *idmap,
- enum exact_level exact)
-{
- int i, spi;
-
- /* walk slots of the explored stack and ignore any additional
- * slots in the current stack, since explored(safe) state
- * didn't use them
- */
- for (i = 0; i < old->allocated_stack; i++) {
- struct bpf_reg_state *old_reg, *cur_reg;
-
- spi = i / BPF_REG_SIZE;
-
- if (exact == EXACT &&
- (i >= cur->allocated_stack ||
- old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
- cur->stack[spi].slot_type[i % BPF_REG_SIZE]))
- return false;
-
- if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
- continue;
-
- if (env->allow_uninit_stack &&
- old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC)
- continue;
-
- /* explored stack has more populated slots than current stack
- * and these slots were used
- */
- if (i >= cur->allocated_stack)
- return false;
-
- /* 64-bit scalar spill vs all slots MISC and vice versa.
- * Load from all slots MISC produces unbound scalar.
- * Construct a fake register for such stack and call
- * regsafe() to ensure scalar ids are compared.
- */
- old_reg = scalar_reg_for_stack(env, &old->stack[spi]);
- cur_reg = scalar_reg_for_stack(env, &cur->stack[spi]);
- if (old_reg && cur_reg) {
- if (!regsafe(env, old_reg, cur_reg, idmap, exact))
- return false;
- i += BPF_REG_SIZE - 1;
- continue;
- }
-
- /* if old state was safe with misc data in the stack
- * it will be safe with zero-initialized stack.
- * The opposite is not true
- */
- if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
- cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
- continue;
- if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
- cur->stack[spi].slot_type[i % BPF_REG_SIZE])
- /* Ex: old explored (safe) state has STACK_SPILL in
- * this stack slot, but current has STACK_MISC ->
- * this verifier states are not equivalent,
- * return false to continue verification of this path
- */
- return false;
- if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1)
- continue;
- /* Both old and cur are having same slot_type */
- switch (old->stack[spi].slot_type[BPF_REG_SIZE - 1]) {
- case STACK_SPILL:
- /* when explored and current stack slot are both storing
- * spilled registers, check that stored pointers types
- * are the same as well.
- * Ex: explored safe path could have stored
- * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
- * but current path has stored:
- * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
- * such verifier states are not equivalent.
- * return false to continue verification of this path
- */
- if (!regsafe(env, &old->stack[spi].spilled_ptr,
- &cur->stack[spi].spilled_ptr, idmap, exact))
- return false;
- break;
- case STACK_DYNPTR:
- old_reg = &old->stack[spi].spilled_ptr;
- cur_reg = &cur->stack[spi].spilled_ptr;
- if (old_reg->dynptr.type != cur_reg->dynptr.type ||
- old_reg->dynptr.first_slot != cur_reg->dynptr.first_slot ||
- !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap))
- return false;
- break;
- case STACK_ITER:
- old_reg = &old->stack[spi].spilled_ptr;
- cur_reg = &cur->stack[spi].spilled_ptr;
- /* iter.depth is not compared between states as it
- * doesn't matter for correctness and would otherwise
- * prevent convergence; we maintain it only to prevent
- * infinite loop check triggering, see
- * iter_active_depths_differ()
- */
- if (old_reg->iter.btf != cur_reg->iter.btf ||
- old_reg->iter.btf_id != cur_reg->iter.btf_id ||
- old_reg->iter.state != cur_reg->iter.state ||
- /* ignore {old_reg,cur_reg}->iter.depth, see above */
- !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap))
- return false;
- break;
- case STACK_IRQ_FLAG:
- old_reg = &old->stack[spi].spilled_ptr;
- cur_reg = &cur->stack[spi].spilled_ptr;
- if (!check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap) ||
- old_reg->irq.kfunc_class != cur_reg->irq.kfunc_class)
- return false;
- break;
- case STACK_MISC:
- case STACK_ZERO:
- case STACK_INVALID:
- continue;
- /* Ensure that new unhandled slot types return false by default */
- default:
- return false;
- }
- }
- return true;
-}
-
-static bool refsafe(struct bpf_verifier_state *old, struct bpf_verifier_state *cur,
- struct bpf_idmap *idmap)
-{
- int i;
-
- if (old->acquired_refs != cur->acquired_refs)
- return false;
-
- if (old->active_locks != cur->active_locks)
- return false;
-
- if (old->active_preempt_locks != cur->active_preempt_locks)
- return false;
-
- if (old->active_rcu_locks != cur->active_rcu_locks)
- return false;
-
- if (!check_ids(old->active_irq_id, cur->active_irq_id, idmap))
- return false;
-
- if (!check_ids(old->active_lock_id, cur->active_lock_id, idmap) ||
- old->active_lock_ptr != cur->active_lock_ptr)
- return false;
-
- for (i = 0; i < old->acquired_refs; i++) {
- if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap) ||
- old->refs[i].type != cur->refs[i].type)
- return false;
- switch (old->refs[i].type) {
- case REF_TYPE_PTR:
- case REF_TYPE_IRQ:
- break;
- case REF_TYPE_LOCK:
- case REF_TYPE_RES_LOCK:
- case REF_TYPE_RES_LOCK_IRQ:
- if (old->refs[i].ptr != cur->refs[i].ptr)
- return false;
- break;
- default:
- WARN_ONCE(1, "Unhandled enum type for reference state: %d\n", old->refs[i].type);
- return false;
- }
- }
-
- return true;
-}
-
-/* compare two verifier states
- *
- * all states stored in state_list are known to be valid, since
- * verifier reached 'bpf_exit' instruction through them
- *
- * this function is called when verifier exploring different branches of
- * execution popped from the state stack. If it sees an old state that has
- * more strict register state and more strict stack state then this execution
- * branch doesn't need to be explored further, since verifier already
- * concluded that more strict state leads to valid finish.
- *
- * Therefore two states are equivalent if register state is more conservative
- * and explored stack state is more conservative than the current one.
- * Example:
- * explored current
- * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
- * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
- *
- * In other words if current stack state (one being explored) has more
- * valid slots than old one that already passed validation, it means
- * the verifier can stop exploring and conclude that current state is valid too
- *
- * Similarly with registers. If explored state has register type as invalid
- * whereas register type in current state is meaningful, it means that
- * the current state will reach 'bpf_exit' instruction safely
- */
-static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
- struct bpf_func_state *cur, u32 insn_idx, enum exact_level exact)
-{
- u16 live_regs = env->insn_aux_data[insn_idx].live_regs_before;
- u16 i;
-
- if (old->callback_depth > cur->callback_depth)
- return false;
-
- for (i = 0; i < MAX_BPF_REG; i++)
- if (((1 << i) & live_regs) &&
- !regsafe(env, &old->regs[i], &cur->regs[i],
- &env->idmap_scratch, exact))
- return false;
-
- if (!stacksafe(env, old, cur, &env->idmap_scratch, exact))
- return false;
-
- return true;
-}
-
-static void reset_idmap_scratch(struct bpf_verifier_env *env)
-{
- struct bpf_idmap *idmap = &env->idmap_scratch;
-
- idmap->tmp_id_gen = env->id_gen;
- idmap->cnt = 0;
-}
-
-static bool states_equal(struct bpf_verifier_env *env,
- struct bpf_verifier_state *old,
- struct bpf_verifier_state *cur,
- enum exact_level exact)
-{
- u32 insn_idx;
- int i;
-
- if (old->curframe != cur->curframe)
- return false;
-
- reset_idmap_scratch(env);
-
- /* Verification state from speculative execution simulation
- * must never prune a non-speculative execution one.
- */
- if (old->speculative && !cur->speculative)
- return false;
-
- if (old->in_sleepable != cur->in_sleepable)
- return false;
-
- if (!refsafe(old, cur, &env->idmap_scratch))
- return false;
-
- /* for states to be equal callsites have to be the same
- * and all frame states need to be equivalent
- */
- for (i = 0; i <= old->curframe; i++) {
- insn_idx = frame_insn_idx(old, i);
- if (old->frame[i]->callsite != cur->frame[i]->callsite)
- return false;
- if (!func_states_equal(env, old->frame[i], cur->frame[i], insn_idx, exact))
- return false;
- }
- return true;
-}
-
-/* find precise scalars in the previous equivalent state and
- * propagate them into the current state
- */
-static int propagate_precision(struct bpf_verifier_env *env,
- const struct bpf_verifier_state *old,
- struct bpf_verifier_state *cur,
- bool *changed)
-{
- struct bpf_reg_state *state_reg;
- struct bpf_func_state *state;
- int i, err = 0, fr;
- bool first;
-
- for (fr = old->curframe; fr >= 0; fr--) {
- state = old->frame[fr];
- state_reg = state->regs;
- first = true;
- for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
- if (state_reg->type != SCALAR_VALUE ||
- !state_reg->precise)
- continue;
- if (env->log.level & BPF_LOG_LEVEL2) {
- if (first)
- verbose(env, "frame %d: propagating r%d", fr, i);
- else
- verbose(env, ",r%d", i);
- }
- bt_set_frame_reg(&env->bt, fr, i);
- first = false;
- }
-
- for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
- if (!is_spilled_reg(&state->stack[i]))
- continue;
- state_reg = &state->stack[i].spilled_ptr;
- if (state_reg->type != SCALAR_VALUE ||
- !state_reg->precise)
- continue;
- if (env->log.level & BPF_LOG_LEVEL2) {
- if (first)
- verbose(env, "frame %d: propagating fp%d",
- fr, (-i - 1) * BPF_REG_SIZE);
- else
- verbose(env, ",fp%d", (-i - 1) * BPF_REG_SIZE);
- }
- bt_set_frame_slot(&env->bt, fr, i);
- first = false;
- }
- if (!first && (env->log.level & BPF_LOG_LEVEL2))
- verbose(env, "\n");
- }
-
- err = __mark_chain_precision(env, cur, -1, changed);
- if (err < 0)
- return err;
-
- return 0;
-}
-
-#define MAX_BACKEDGE_ITERS 64
-
-/* Propagate read and precision marks from visit->backedges[*].state->equal_state
- * to corresponding parent states of visit->backedges[*].state until fixed point is reached,
- * then free visit->backedges.
- * After execution of this function incomplete_read_marks() will return false
- * for all states corresponding to @visit->callchain.
- */
-static int propagate_backedges(struct bpf_verifier_env *env, struct bpf_scc_visit *visit)
-{
- struct bpf_scc_backedge *backedge;
- struct bpf_verifier_state *st;
- bool changed;
- int i, err;
-
- i = 0;
- do {
- if (i++ > MAX_BACKEDGE_ITERS) {
- if (env->log.level & BPF_LOG_LEVEL2)
- verbose(env, "%s: too many iterations\n", __func__);
- for (backedge = visit->backedges; backedge; backedge = backedge->next)
- mark_all_scalars_precise(env, &backedge->state);
- break;
- }
- changed = false;
- for (backedge = visit->backedges; backedge; backedge = backedge->next) {
- st = &backedge->state;
- err = propagate_precision(env, st->equal_state, st, &changed);
- if (err)
- return err;
- }
- } while (changed);
-
- free_backedges(visit);
- return 0;
-}
-
-static bool states_maybe_looping(struct bpf_verifier_state *old,
- struct bpf_verifier_state *cur)
-{
- struct bpf_func_state *fold, *fcur;
- int i, fr = cur->curframe;
-
- if (old->curframe != fr)
- return false;
-
- fold = old->frame[fr];
- fcur = cur->frame[fr];
- for (i = 0; i < MAX_BPF_REG; i++)
- if (memcmp(&fold->regs[i], &fcur->regs[i],
- offsetof(struct bpf_reg_state, frameno)))
- return false;
- return true;
-}
-
-static bool is_iter_next_insn(struct bpf_verifier_env *env, int insn_idx)
-{
- return env->insn_aux_data[insn_idx].is_iter_next;
-}
-
-/* is_state_visited() handles iter_next() (see process_iter_next_call() for
- * terminology) calls specially: as opposed to bounded BPF loops, it *expects*
- * states to match, which otherwise would look like an infinite loop. So while
- * iter_next() calls are taken care of, we still need to be careful and
- * prevent erroneous and too eager declaration of "infinite loop", when
- * iterators are involved.
- *
- * Here's a situation in pseudo-BPF assembly form:
- *
- * 0: again: ; set up iter_next() call args
- * 1: r1 = &it ; <CHECKPOINT HERE>
- * 2: call bpf_iter_num_next ; this is iter_next() call
- * 3: if r0 == 0 goto done
- * 4: ... something useful here ...
- * 5: goto again ; another iteration
- * 6: done:
- * 7: r1 = &it
- * 8: call bpf_iter_num_destroy ; clean up iter state
- * 9: exit
- *
- * This is a typical loop. Let's assume that we have a prune point at 1:,
- * before we get to `call bpf_iter_num_next` (e.g., because of that `goto
- * again`, assuming other heuristics don't get in a way).
- *
- * When we first time come to 1:, let's say we have some state X. We proceed
- * to 2:, fork states, enqueue ACTIVE, validate NULL case successfully, exit.
- * Now we come back to validate that forked ACTIVE state. We proceed through
- * 3-5, come to goto, jump to 1:. Let's assume our state didn't change, so we
- * are converging. But the problem is that we don't know that yet, as this
- * convergence has to happen at iter_next() call site only. So if nothing is
- * done, at 1: verifier will use bounded loop logic and declare infinite
- * looping (and would be *technically* correct, if not for iterator's
- * "eventual sticky NULL" contract, see process_iter_next_call()). But we
- * don't want that. So what we do in process_iter_next_call() when we go on
- * another ACTIVE iteration, we bump slot->iter.depth, to mark that it's
- * a different iteration. So when we suspect an infinite loop, we additionally
- * check if any of the *ACTIVE* iterator states depths differ. If yes, we
- * pretend we are not looping and wait for next iter_next() call.
- *
- * This only applies to ACTIVE state. In DRAINED state we don't expect to
- * loop, because that would actually mean infinite loop, as DRAINED state is
- * "sticky", and so we'll keep returning into the same instruction with the
- * same state (at least in one of possible code paths).
- *
- * This approach allows to keep infinite loop heuristic even in the face of
- * active iterator. E.g., C snippet below is and will be detected as
- * infinitely looping:
- *
- * struct bpf_iter_num it;
- * int *p, x;
- *
- * bpf_iter_num_new(&it, 0, 10);
- * while ((p = bpf_iter_num_next(&t))) {
- * x = p;
- * while (x--) {} // <<-- infinite loop here
- * }
- *
- */
-static bool iter_active_depths_differ(struct bpf_verifier_state *old, struct bpf_verifier_state *cur)
-{
- struct bpf_reg_state *slot, *cur_slot;
- struct bpf_func_state *state;
- int i, fr;
-
- for (fr = old->curframe; fr >= 0; fr--) {
- state = old->frame[fr];
- for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
- if (state->stack[i].slot_type[0] != STACK_ITER)
- continue;
-
- slot = &state->stack[i].spilled_ptr;
- if (slot->iter.state != BPF_ITER_STATE_ACTIVE)
- continue;
-
- cur_slot = &cur->frame[fr]->stack[i].spilled_ptr;
- if (cur_slot->iter.depth != slot->iter.depth)
- return true;
- }
- }
- return false;
-}
-
-static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
-{
- struct bpf_verifier_state_list *new_sl;
- struct bpf_verifier_state_list *sl;
- struct bpf_verifier_state *cur = env->cur_state, *new;
- bool force_new_state, add_new_state, loop;
- int n, err, states_cnt = 0;
- struct list_head *pos, *tmp, *head;
-
- force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx) ||
- /* Avoid accumulating infinitely long jmp history */
- cur->jmp_history_cnt > 40;
-
- /* bpf progs typically have pruning point every 4 instructions
- * http://vger.kernel.org/bpfconf2019.html#session-1
- * Do not add new state for future pruning if the verifier hasn't seen
- * at least 2 jumps and at least 8 instructions.
- * This heuristics helps decrease 'total_states' and 'peak_states' metric.
- * In tests that amounts to up to 50% reduction into total verifier
- * memory consumption and 20% verifier time speedup.
- */
- add_new_state = force_new_state;
- if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
- env->insn_processed - env->prev_insn_processed >= 8)
- add_new_state = true;
-
- clean_live_states(env, insn_idx, cur);
-
- loop = false;
- head = explored_state(env, insn_idx);
- list_for_each_safe(pos, tmp, head) {
- sl = container_of(pos, struct bpf_verifier_state_list, node);
- states_cnt++;
- if (sl->state.insn_idx != insn_idx)
- continue;
-
- if (sl->state.branches) {
- struct bpf_func_state *frame = sl->state.frame[sl->state.curframe];
-
- if (frame->in_async_callback_fn &&
- frame->async_entry_cnt != cur->frame[cur->curframe]->async_entry_cnt) {
- /* Different async_entry_cnt means that the verifier is
- * processing another entry into async callback.
- * Seeing the same state is not an indication of infinite
- * loop or infinite recursion.
- * But finding the same state doesn't mean that it's safe
- * to stop processing the current state. The previous state
- * hasn't yet reached bpf_exit, since state.branches > 0.
- * Checking in_async_callback_fn alone is not enough either.
- * Since the verifier still needs to catch infinite loops
- * inside async callbacks.
- */
- goto skip_inf_loop_check;
- }
- /* BPF open-coded iterators loop detection is special.
- * states_maybe_looping() logic is too simplistic in detecting
- * states that *might* be equivalent, because it doesn't know
- * about ID remapping, so don't even perform it.
- * See process_iter_next_call() and iter_active_depths_differ()
- * for overview of the logic. When current and one of parent
- * states are detected as equivalent, it's a good thing: we prove
- * convergence and can stop simulating further iterations.
- * It's safe to assume that iterator loop will finish, taking into
- * account iter_next() contract of eventually returning
- * sticky NULL result.
- *
- * Note, that states have to be compared exactly in this case because
- * read and precision marks might not be finalized inside the loop.
- * E.g. as in the program below:
- *
- * 1. r7 = -16
- * 2. r6 = bpf_get_prandom_u32()
- * 3. while (bpf_iter_num_next(&fp[-8])) {
- * 4. if (r6 != 42) {
- * 5. r7 = -32
- * 6. r6 = bpf_get_prandom_u32()
- * 7. continue
- * 8. }
- * 9. r0 = r10
- * 10. r0 += r7
- * 11. r8 = *(u64 *)(r0 + 0)
- * 12. r6 = bpf_get_prandom_u32()
- * 13. }
- *
- * Here verifier would first visit path 1-3, create a checkpoint at 3
- * with r7=-16, continue to 4-7,3. Existing checkpoint at 3 does
- * not have read or precision mark for r7 yet, thus inexact states
- * comparison would discard current state with r7=-32
- * => unsafe memory access at 11 would not be caught.
- */
- if (is_iter_next_insn(env, insn_idx)) {
- if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
- struct bpf_func_state *cur_frame;
- struct bpf_reg_state *iter_state, *iter_reg;
- int spi;
-
- cur_frame = cur->frame[cur->curframe];
- /* btf_check_iter_kfuncs() enforces that
- * iter state pointer is always the first arg
- */
- iter_reg = &cur_frame->regs[BPF_REG_1];
- /* current state is valid due to states_equal(),
- * so we can assume valid iter and reg state,
- * no need for extra (re-)validations
- */
- spi = __get_spi(iter_reg->off + iter_reg->var_off.value);
- iter_state = &func(env, iter_reg)->stack[spi].spilled_ptr;
- if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) {
- loop = true;
- goto hit;
- }
- }
- goto skip_inf_loop_check;
- }
- if (is_may_goto_insn_at(env, insn_idx)) {
- if (sl->state.may_goto_depth != cur->may_goto_depth &&
- states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
- loop = true;
- goto hit;
- }
- }
- if (bpf_calls_callback(env, insn_idx)) {
- if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
- loop = true;
- goto hit;
- }
- goto skip_inf_loop_check;
- }
- /* attempt to detect infinite loop to avoid unnecessary doomed work */
- if (states_maybe_looping(&sl->state, cur) &&
- states_equal(env, &sl->state, cur, EXACT) &&
- !iter_active_depths_differ(&sl->state, cur) &&
- sl->state.may_goto_depth == cur->may_goto_depth &&
- sl->state.callback_unroll_depth == cur->callback_unroll_depth) {
- verbose_linfo(env, insn_idx, "; ");
- verbose(env, "infinite loop detected at insn %d\n", insn_idx);
- verbose(env, "cur state:");
- print_verifier_state(env, cur, cur->curframe, true);
- verbose(env, "old state:");
- print_verifier_state(env, &sl->state, cur->curframe, true);
- return -EINVAL;
- }
- /* if the verifier is processing a loop, avoid adding new state
- * too often, since different loop iterations have distinct
- * states and may not help future pruning.
- * This threshold shouldn't be too low to make sure that
- * a loop with large bound will be rejected quickly.
- * The most abusive loop will be:
- * r1 += 1
- * if r1 < 1000000 goto pc-2
- * 1M insn_procssed limit / 100 == 10k peak states.
- * This threshold shouldn't be too high either, since states
- * at the end of the loop are likely to be useful in pruning.
- */
-skip_inf_loop_check:
- if (!force_new_state &&
- env->jmps_processed - env->prev_jmps_processed < 20 &&
- env->insn_processed - env->prev_insn_processed < 100)
- add_new_state = false;
- goto miss;
- }
- /* See comments for mark_all_regs_read_and_precise() */
- loop = incomplete_read_marks(env, &sl->state);
- if (states_equal(env, &sl->state, cur, loop ? RANGE_WITHIN : NOT_EXACT)) {
-hit:
- sl->hit_cnt++;
-
- /* if previous state reached the exit with precision and
- * current state is equivalent to it (except precision marks)
- * the precision needs to be propagated back in
- * the current state.
- */
- err = 0;
- if (is_jmp_point(env, env->insn_idx))
- err = push_jmp_history(env, cur, 0, 0);
- err = err ? : propagate_precision(env, &sl->state, cur, NULL);
- if (err)
- return err;
- /* When processing iterator based loops above propagate_liveness and
- * propagate_precision calls are not sufficient to transfer all relevant
- * read and precision marks. E.g. consider the following case:
- *
- * .-> A --. Assume the states are visited in the order A, B, C.
- * | | | Assume that state B reaches a state equivalent to state A.
- * | v v At this point, state C is not processed yet, so state A
- * '-- B C has not received any read or precision marks from C.
- * Thus, marks propagated from A to B are incomplete.
- *
- * The verifier mitigates this by performing the following steps:
- *
- * - Prior to the main verification pass, strongly connected components
- * (SCCs) are computed over the program's control flow graph,
- * intraprocedurally.
- *
- * - During the main verification pass, `maybe_enter_scc()` checks
- * whether the current verifier state is entering an SCC. If so, an
- * instance of a `bpf_scc_visit` object is created, and the state
- * entering the SCC is recorded as the entry state.
- *
- * - This instance is associated not with the SCC itself, but with a
- * `bpf_scc_callchain`: a tuple consisting of the call sites leading to
- * the SCC and the SCC id. See `compute_scc_callchain()`.
- *
- * - When a verification path encounters a `states_equal(...,
- * RANGE_WITHIN)` condition, there exists a call chain describing the
- * current state and a corresponding `bpf_scc_visit` instance. A copy
- * of the current state is created and added to
- * `bpf_scc_visit->backedges`.
- *
- * - When a verification path terminates, `maybe_exit_scc()` is called
- * from `update_branch_counts()`. For states with `branches == 0`, it
- * checks whether the state is the entry state of any `bpf_scc_visit`
- * instance. If it is, this indicates that all paths originating from
- * this SCC visit have been explored. `propagate_backedges()` is then
- * called, which propagates read and precision marks through the
- * backedges until a fixed point is reached.
- * (In the earlier example, this would propagate marks from A to B,
- * from C to A, and then again from A to B.)
- *
- * A note on callchains
- * --------------------
- *
- * Consider the following example:
- *
- * void foo() { loop { ... SCC#1 ... } }
- * void main() {
- * A: foo();
- * B: ...
- * C: foo();
- * }
- *
- * Here, there are two distinct callchains leading to SCC#1:
- * - (A, SCC#1)
- * - (C, SCC#1)
- *
- * Each callchain identifies a separate `bpf_scc_visit` instance that
- * accumulates backedge states. The `propagate_{liveness,precision}()`
- * functions traverse the parent state of each backedge state, which
- * means these parent states must remain valid (i.e., not freed) while
- * the corresponding `bpf_scc_visit` instance exists.
- *
- * Associating `bpf_scc_visit` instances directly with SCCs instead of
- * callchains would break this invariant:
- * - States explored during `C: foo()` would contribute backedges to
- * SCC#1, but SCC#1 would only be exited once the exploration of
- * `A: foo()` completes.
- * - By that time, the states explored between `A: foo()` and `C: foo()`
- * (i.e., `B: ...`) may have already been freed, causing the parent
- * links for states from `C: foo()` to become invalid.
- */
- if (loop) {
- struct bpf_scc_backedge *backedge;
-
- backedge = kzalloc_obj(*backedge,
- GFP_KERNEL_ACCOUNT);
- if (!backedge)
- return -ENOMEM;
- err = copy_verifier_state(&backedge->state, cur);
- backedge->state.equal_state = &sl->state;
- backedge->state.insn_idx = insn_idx;
- err = err ?: add_scc_backedge(env, &sl->state, backedge);
- if (err) {
- free_verifier_state(&backedge->state, false);
- kfree(backedge);
- return err;
- }
- }
- return 1;
- }
-miss:
- /* when new state is not going to be added do not increase miss count.
- * Otherwise several loop iterations will remove the state
- * recorded earlier. The goal of these heuristics is to have
- * states from some iterations of the loop (some in the beginning
- * and some at the end) to help pruning.
- */
- if (add_new_state)
- sl->miss_cnt++;
- /* heuristic to determine whether this state is beneficial
- * to keep checking from state equivalence point of view.
- * Higher numbers increase max_states_per_insn and verification time,
- * but do not meaningfully decrease insn_processed.
- * 'n' controls how many times state could miss before eviction.
- * Use bigger 'n' for checkpoints because evicting checkpoint states
- * too early would hinder iterator convergence.
- */
- n = is_force_checkpoint(env, insn_idx) && sl->state.branches > 0 ? 64 : 3;
- if (sl->miss_cnt > sl->hit_cnt * n + n) {
- /* the state is unlikely to be useful. Remove it to
- * speed up verification
- */
- sl->in_free_list = true;
- list_del(&sl->node);
- list_add(&sl->node, &env->free_list);
- env->free_list_size++;
- env->explored_states_size--;
- maybe_free_verifier_state(env, sl);
- }
- }
-
- if (env->max_states_per_insn < states_cnt)
- env->max_states_per_insn = states_cnt;
-
- if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
- return 0;
-
- if (!add_new_state)
- return 0;
-
- /* There were no equivalent states, remember the current one.
- * Technically the current state is not proven to be safe yet,
- * but it will either reach outer most bpf_exit (which means it's safe)
- * or it will be rejected. When there are no loops the verifier won't be
- * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
- * again on the way to bpf_exit.
- * When looping the sl->state.branches will be > 0 and this state
- * will not be considered for equivalence until branches == 0.
- */
- new_sl = kzalloc_obj(struct bpf_verifier_state_list, GFP_KERNEL_ACCOUNT);
- if (!new_sl)
- return -ENOMEM;
- env->total_states++;
- env->explored_states_size++;
- update_peak_states(env);
- env->prev_jmps_processed = env->jmps_processed;
- env->prev_insn_processed = env->insn_processed;
-
- /* forget precise markings we inherited, see __mark_chain_precision */
- if (env->bpf_capable)
- mark_all_scalars_imprecise(env, cur);
-
- clear_singular_ids(env, cur);
-
- /* add new state to the head of linked list */
- new = &new_sl->state;
- err = copy_verifier_state(new, cur);
- if (err) {
- free_verifier_state(new, false);
- kfree(new_sl);
- return err;
- }
- new->insn_idx = insn_idx;
- verifier_bug_if(new->branches != 1, env,
- "%s:branches_to_explore=%d insn %d",
- __func__, new->branches, insn_idx);
- err = maybe_enter_scc(env, new);
- if (err) {
- free_verifier_state(new, false);
- kfree(new_sl);
- return err;
- }
-
- cur->parent = new;
- cur->first_insn_idx = insn_idx;
- cur->dfs_depth = new->dfs_depth + 1;
- clear_jmp_history(cur);
- list_add(&new_sl->node, head);
- return 0;
-}
-
/* Return true if it's OK to have the same insn return a different type. */
static bool reg_type_mismatch_ok(enum bpf_reg_type type)
{
@@ -20933,13 +17400,16 @@ static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type typ
}
enum {
- PROCESS_BPF_EXIT = 1
+ PROCESS_BPF_EXIT = 1,
+ INSN_IDX_UPDATED = 2,
};
static int process_bpf_exit_full(struct bpf_verifier_env *env,
bool *do_print_state,
bool exception_exit)
{
+ struct bpf_func_state *cur_frame = cur_func(env);
+
/* We must do check_reference_leak here before
* prepare_func_exit to handle the case when
* state->curframe > 0, it may be a callback function,
@@ -20971,10 +17441,24 @@ static int process_bpf_exit_full(struct bpf_verifier_env *env,
if (err)
return err;
*do_print_state = true;
- return 0;
+ return INSN_IDX_UPDATED;
}
- err = check_return_code(env, BPF_REG_0, "R0");
+ /*
+ * Return from a regular global subprogram differs from return
+ * from the main program or async/exception callback.
+ * Main program exit implies return code restrictions
+ * that depend on program type.
+ * Exit from exception callback is equivalent to main program exit.
+ * Exit from async callback implies return code restrictions
+ * that depend on async scheduling mechanism.
+ */
+ if (cur_frame->subprogno &&
+ !cur_frame->in_async_callback_fn &&
+ !cur_frame->in_exception_callback_fn)
+ err = check_global_subprog_return_code(env);
+ else
+ err = check_return_code(env, BPF_REG_0, "R0");
if (err)
return err;
return PROCESS_BPF_EXIT;
@@ -20986,19 +17470,16 @@ static int indirect_jump_min_max_index(struct bpf_verifier_env *env,
u32 *pmin_index, u32 *pmax_index)
{
struct bpf_reg_state *reg = reg_state(env, regno);
- u64 min_index, max_index;
+ u64 min_index = reg->umin_value;
+ u64 max_index = reg->umax_value;
const u32 size = 8;
- if (check_add_overflow(reg->umin_value, reg->off, &min_index) ||
- (min_index > (u64) U32_MAX * size)) {
- verbose(env, "the sum of R%u umin_value %llu and off %u is too big\n",
- regno, reg->umin_value, reg->off);
+ if (min_index > (u64) U32_MAX * size) {
+ verbose(env, "the sum of R%u umin_value %llu is too big\n", regno, reg->umin_value);
return -ERANGE;
}
- if (check_add_overflow(reg->umax_value, reg->off, &max_index) ||
- (max_index > (u64) U32_MAX * size)) {
- verbose(env, "the sum of R%u umax_value %llu and off %u is too big\n",
- regno, reg->umax_value, reg->off);
+ if (max_index > (u64) U32_MAX * size) {
+ verbose(env, "the sum of R%u umax_value %llu is too big\n", regno, reg->umax_value);
return -ERANGE;
}
@@ -21048,13 +17529,13 @@ static int check_indirect_jump(struct bpf_verifier_env *env, struct bpf_insn *in
/* Ensure that the buffer is large enough */
if (!env->gotox_tmp_buf || env->gotox_tmp_buf->cnt < max_index - min_index + 1) {
- env->gotox_tmp_buf = iarray_realloc(env->gotox_tmp_buf,
- max_index - min_index + 1);
+ env->gotox_tmp_buf = bpf_iarray_realloc(env->gotox_tmp_buf,
+ max_index - min_index + 1);
if (!env->gotox_tmp_buf)
return -ENOMEM;
}
- n = copy_insn_array_uniq(map, min_index, max_index, env->gotox_tmp_buf->items);
+ n = bpf_copy_insn_array_uniq(map, min_index, max_index, env->gotox_tmp_buf->items);
if (n < 0)
return n;
if (n == 0) {
@@ -21070,7 +17551,7 @@ static int check_indirect_jump(struct bpf_verifier_env *env, struct bpf_insn *in
return PTR_ERR(other_branch);
}
env->insn_idx = env->gotox_tmp_buf->items[n-1];
- return 0;
+ return INSN_IDX_UPDATED;
}
static int do_check_insn(struct bpf_verifier_env *env, bool *do_print_state)
@@ -21079,81 +17560,48 @@ static int do_check_insn(struct bpf_verifier_env *env, bool *do_print_state)
struct bpf_insn *insn = &env->prog->insnsi[env->insn_idx];
u8 class = BPF_CLASS(insn->code);
- if (class == BPF_ALU || class == BPF_ALU64) {
- err = check_alu_op(env, insn);
- if (err)
- return err;
-
- } else if (class == BPF_LDX) {
- bool is_ldsx = BPF_MODE(insn->code) == BPF_MEMSX;
+ switch (class) {
+ case BPF_ALU:
+ case BPF_ALU64:
+ return check_alu_op(env, insn);
- /* Check for reserved fields is already done in
- * resolve_pseudo_ldimm64().
- */
- err = check_load_mem(env, insn, false, is_ldsx, true, "ldx");
- if (err)
- return err;
- } else if (class == BPF_STX) {
- if (BPF_MODE(insn->code) == BPF_ATOMIC) {
- err = check_atomic(env, insn);
- if (err)
- return err;
- env->insn_idx++;
- return 0;
- }
+ case BPF_LDX:
+ return check_load_mem(env, insn, false,
+ BPF_MODE(insn->code) == BPF_MEMSX,
+ true, "ldx");
- if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) {
- verbose(env, "BPF_STX uses reserved fields\n");
- return -EINVAL;
- }
+ case BPF_STX:
+ if (BPF_MODE(insn->code) == BPF_ATOMIC)
+ return check_atomic(env, insn);
+ return check_store_reg(env, insn, false);
- err = check_store_reg(env, insn, false);
- if (err)
- return err;
- } else if (class == BPF_ST) {
+ case BPF_ST: {
enum bpf_reg_type dst_reg_type;
- if (BPF_MODE(insn->code) != BPF_MEM ||
- insn->src_reg != BPF_REG_0) {
- verbose(env, "BPF_ST uses reserved fields\n");
- return -EINVAL;
- }
- /* check src operand */
err = check_reg_arg(env, insn->dst_reg, SRC_OP);
if (err)
return err;
dst_reg_type = cur_regs(env)[insn->dst_reg].type;
- /* check that memory (dst_reg + off) is writeable */
err = check_mem_access(env, env->insn_idx, insn->dst_reg,
insn->off, BPF_SIZE(insn->code),
BPF_WRITE, -1, false, false);
if (err)
return err;
- err = save_aux_ptr_type(env, dst_reg_type, false);
- if (err)
- return err;
- } else if (class == BPF_JMP || class == BPF_JMP32) {
+ return save_aux_ptr_type(env, dst_reg_type, false);
+ }
+ case BPF_JMP:
+ case BPF_JMP32: {
u8 opcode = BPF_OP(insn->code);
env->jmps_processed++;
if (opcode == BPF_CALL) {
- if (BPF_SRC(insn->code) != BPF_K ||
- (insn->src_reg != BPF_PSEUDO_KFUNC_CALL &&
- insn->off != 0) ||
- (insn->src_reg != BPF_REG_0 &&
- insn->src_reg != BPF_PSEUDO_CALL &&
- insn->src_reg != BPF_PSEUDO_KFUNC_CALL) ||
- insn->dst_reg != BPF_REG_0 || class == BPF_JMP32) {
- verbose(env, "BPF_CALL uses reserved fields\n");
- return -EINVAL;
- }
-
if (env->cur_state->active_locks) {
if ((insn->src_reg == BPF_REG_0 &&
- insn->imm != BPF_FUNC_spin_unlock) ||
+ insn->imm != BPF_FUNC_spin_unlock &&
+ insn->imm != BPF_FUNC_kptr_xchg) ||
(insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
(insn->off != 0 || !kfunc_spin_allowed(insn->imm)))) {
verbose(env,
@@ -21161,84 +17609,45 @@ static int do_check_insn(struct bpf_verifier_env *env, bool *do_print_state)
return -EINVAL;
}
}
- if (insn->src_reg == BPF_PSEUDO_CALL) {
- err = check_func_call(env, insn, &env->insn_idx);
- } else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
- err = check_kfunc_call(env, insn, &env->insn_idx);
- if (!err && is_bpf_throw_kfunc(insn))
- return process_bpf_exit_full(env, do_print_state, true);
- } else {
- err = check_helper_call(env, insn, &env->insn_idx);
- }
- if (err)
- return err;
-
mark_reg_scratched(env, BPF_REG_0);
+ if (insn->src_reg == BPF_PSEUDO_CALL)
+ return check_func_call(env, insn, &env->insn_idx);
+ if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
+ return check_kfunc_call(env, insn, &env->insn_idx);
+ return check_helper_call(env, insn, &env->insn_idx);
} else if (opcode == BPF_JA) {
- if (BPF_SRC(insn->code) == BPF_X) {
- if (insn->src_reg != BPF_REG_0 ||
- insn->imm != 0 || insn->off != 0) {
- verbose(env, "BPF_JA|BPF_X uses reserved fields\n");
- return -EINVAL;
- }
+ if (BPF_SRC(insn->code) == BPF_X)
return check_indirect_jump(env, insn);
- }
-
- if (BPF_SRC(insn->code) != BPF_K ||
- insn->src_reg != BPF_REG_0 ||
- insn->dst_reg != BPF_REG_0 ||
- (class == BPF_JMP && insn->imm != 0) ||
- (class == BPF_JMP32 && insn->off != 0)) {
- verbose(env, "BPF_JA uses reserved fields\n");
- return -EINVAL;
- }
if (class == BPF_JMP)
env->insn_idx += insn->off + 1;
else
env->insn_idx += insn->imm + 1;
- return 0;
+ return INSN_IDX_UPDATED;
} else if (opcode == BPF_EXIT) {
- if (BPF_SRC(insn->code) != BPF_K ||
- insn->imm != 0 ||
- insn->src_reg != BPF_REG_0 ||
- insn->dst_reg != BPF_REG_0 ||
- class == BPF_JMP32) {
- verbose(env, "BPF_EXIT uses reserved fields\n");
- return -EINVAL;
- }
return process_bpf_exit_full(env, do_print_state, false);
- } else {
- err = check_cond_jmp_op(env, insn, &env->insn_idx);
- if (err)
- return err;
}
- } else if (class == BPF_LD) {
+ return check_cond_jmp_op(env, insn, &env->insn_idx);
+ }
+ case BPF_LD: {
u8 mode = BPF_MODE(insn->code);
- if (mode == BPF_ABS || mode == BPF_IND) {
- err = check_ld_abs(env, insn);
- if (err)
- return err;
+ if (mode == BPF_ABS || mode == BPF_IND)
+ return check_ld_abs(env, insn);
- } else if (mode == BPF_IMM) {
+ if (mode == BPF_IMM) {
err = check_ld_imm(env, insn);
if (err)
return err;
env->insn_idx++;
sanitize_mark_insn_seen(env);
- } else {
- verbose(env, "invalid BPF_LD mode\n");
- return -EINVAL;
}
- } else {
- verbose(env, "unknown insn class %d\n", class);
- return -EINVAL;
+ return 0;
}
-
- env->insn_idx++;
- return 0;
+ }
+ /* all class values are handled above. silence compiler warning */
+ return -EFAULT;
}
static int do_check(struct bpf_verifier_env *env)
@@ -21253,7 +17662,7 @@ static int do_check(struct bpf_verifier_env *env)
for (;;) {
struct bpf_insn *insn;
struct bpf_insn_aux_data *insn_aux;
- int err, marks_err;
+ int err;
/* reset current history entry on each new instruction */
env->cur_hist_ent = NULL;
@@ -21278,8 +17687,8 @@ static int do_check(struct bpf_verifier_env *env)
state->last_insn_idx = env->prev_insn_idx;
state->insn_idx = env->insn_idx;
- if (is_prune_point(env, env->insn_idx)) {
- err = is_state_visited(env, env->insn_idx);
+ if (bpf_is_prune_point(env, env->insn_idx)) {
+ err = bpf_is_state_visited(env, env->insn_idx);
if (err < 0)
return err;
if (err == 1) {
@@ -21297,8 +17706,8 @@ static int do_check(struct bpf_verifier_env *env)
}
}
- if (is_jmp_point(env, env->insn_idx)) {
- err = push_jmp_history(env, state, 0, 0);
+ if (bpf_is_jmp_point(env, env->insn_idx)) {
+ err = bpf_push_jmp_history(env, state, 0, 0);
if (err)
return err;
}
@@ -21325,7 +17734,7 @@ static int do_check(struct bpf_verifier_env *env)
verbose_linfo(env, env->insn_idx, "; ");
env->prev_log_pos = env->log.end_pos;
verbose(env, "%d: ", env->insn_idx);
- verbose_insn(env, insn);
+ bpf_verbose_insn(env, insn);
env->prev_insn_print_pos = env->log.end_pos - env->prev_log_pos;
env->prev_log_pos = env->log.end_pos;
}
@@ -21340,21 +17749,34 @@ static int do_check(struct bpf_verifier_env *env)
sanitize_mark_insn_seen(env);
prev_insn_idx = env->insn_idx;
+ /* Sanity check: precomputed constants must match verifier state */
+ if (!state->speculative && insn_aux->const_reg_mask) {
+ struct bpf_reg_state *regs = cur_regs(env);
+ u16 mask = insn_aux->const_reg_mask;
+
+ for (int r = 0; r < ARRAY_SIZE(insn_aux->const_reg_vals); r++) {
+ u32 cval = insn_aux->const_reg_vals[r];
+
+ if (!(mask & BIT(r)))
+ continue;
+ if (regs[r].type != SCALAR_VALUE)
+ continue;
+ if (!tnum_is_const(regs[r].var_off))
+ continue;
+ if (verifier_bug_if((u32)regs[r].var_off.value != cval,
+ env, "const R%d: %u != %llu",
+ r, cval, regs[r].var_off.value))
+ return -EFAULT;
+ }
+ }
+
/* Reduce verification complexity by stopping speculative path
* verification when a nospec is encountered.
*/
if (state->speculative && insn_aux->nospec)
goto process_bpf_exit;
- err = bpf_reset_stack_write_marks(env, env->insn_idx);
- if (err)
- return err;
err = do_check_insn(env, &do_print_state);
- if (err >= 0 || error_recoverable_with_nospec(err)) {
- marks_err = bpf_commit_stack_write_marks(env);
- if (marks_err)
- return marks_err;
- }
if (error_recoverable_with_nospec(err) && state->speculative) {
/* Prevent this speculative path from ever reaching the
* insn that would have been unsafe to execute.
@@ -21369,8 +17791,10 @@ static int do_check(struct bpf_verifier_env *env)
return err;
} else if (err == PROCESS_BPF_EXIT) {
goto process_bpf_exit;
+ } else if (err == INSN_IDX_UPDATED) {
+ } else if (err == 0) {
+ env->insn_idx++;
}
- WARN_ON_ONCE(err);
if (state->speculative && insn_aux->nospec_result) {
/* If we are on a path that performed a jump-op, this
@@ -21394,10 +17818,7 @@ static int do_check(struct bpf_verifier_env *env)
return -EFAULT;
process_bpf_exit:
mark_verifier_state_scratched(env);
- err = update_branch_counts(env, env->cur_state);
- if (err)
- return err;
- err = bpf_update_live_stack(env);
+ err = bpf_update_branch_counts(env, env->cur_state);
if (err)
return err;
err = pop_stack(env, &prev_insn_idx, &env->insn_idx,
@@ -21790,14 +18211,199 @@ static int add_used_map(struct bpf_verifier_env *env, int fd)
return __add_used_map(env, map);
}
-/* find and rewrite pseudo imm in ld_imm64 instructions:
+static int check_alu_fields(struct bpf_verifier_env *env, struct bpf_insn *insn)
+{
+ u8 class = BPF_CLASS(insn->code);
+ u8 opcode = BPF_OP(insn->code);
+
+ switch (opcode) {
+ case BPF_NEG:
+ if (BPF_SRC(insn->code) != BPF_K || insn->src_reg != BPF_REG_0 ||
+ insn->off != 0 || insn->imm != 0) {
+ verbose(env, "BPF_NEG uses reserved fields\n");
+ return -EINVAL;
+ }
+ return 0;
+ case BPF_END:
+ if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
+ (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
+ (class == BPF_ALU64 && BPF_SRC(insn->code) != BPF_TO_LE)) {
+ verbose(env, "BPF_END uses reserved fields\n");
+ return -EINVAL;
+ }
+ return 0;
+ case BPF_MOV:
+ if (BPF_SRC(insn->code) == BPF_X) {
+ if (class == BPF_ALU) {
+ if ((insn->off != 0 && insn->off != 8 && insn->off != 16) ||
+ insn->imm) {
+ verbose(env, "BPF_MOV uses reserved fields\n");
+ return -EINVAL;
+ }
+ } else if (insn->off == BPF_ADDR_SPACE_CAST) {
+ if (insn->imm != 1 && insn->imm != 1u << 16) {
+ verbose(env, "addr_space_cast insn can only convert between address space 1 and 0\n");
+ return -EINVAL;
+ }
+ } else if ((insn->off != 0 && insn->off != 8 &&
+ insn->off != 16 && insn->off != 32) || insn->imm) {
+ verbose(env, "BPF_MOV uses reserved fields\n");
+ return -EINVAL;
+ }
+ } else if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
+ verbose(env, "BPF_MOV uses reserved fields\n");
+ return -EINVAL;
+ }
+ return 0;
+ case BPF_ADD:
+ case BPF_SUB:
+ case BPF_AND:
+ case BPF_OR:
+ case BPF_XOR:
+ case BPF_LSH:
+ case BPF_RSH:
+ case BPF_ARSH:
+ case BPF_MUL:
+ case BPF_DIV:
+ case BPF_MOD:
+ if (BPF_SRC(insn->code) == BPF_X) {
+ if (insn->imm != 0 || (insn->off != 0 && insn->off != 1) ||
+ (insn->off == 1 && opcode != BPF_MOD && opcode != BPF_DIV)) {
+ verbose(env, "BPF_ALU uses reserved fields\n");
+ return -EINVAL;
+ }
+ } else if (insn->src_reg != BPF_REG_0 ||
+ (insn->off != 0 && insn->off != 1) ||
+ (insn->off == 1 && opcode != BPF_MOD && opcode != BPF_DIV)) {
+ verbose(env, "BPF_ALU uses reserved fields\n");
+ return -EINVAL;
+ }
+ return 0;
+ default:
+ verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
+ return -EINVAL;
+ }
+}
+
+static int check_jmp_fields(struct bpf_verifier_env *env, struct bpf_insn *insn)
+{
+ u8 class = BPF_CLASS(insn->code);
+ u8 opcode = BPF_OP(insn->code);
+
+ switch (opcode) {
+ case BPF_CALL:
+ if (BPF_SRC(insn->code) != BPF_K ||
+ (insn->src_reg != BPF_PSEUDO_KFUNC_CALL && insn->off != 0) ||
+ (insn->src_reg != BPF_REG_0 && insn->src_reg != BPF_PSEUDO_CALL &&
+ insn->src_reg != BPF_PSEUDO_KFUNC_CALL) ||
+ insn->dst_reg != BPF_REG_0 || class == BPF_JMP32) {
+ verbose(env, "BPF_CALL uses reserved fields\n");
+ return -EINVAL;
+ }
+ return 0;
+ case BPF_JA:
+ if (BPF_SRC(insn->code) == BPF_X) {
+ if (insn->src_reg != BPF_REG_0 || insn->imm != 0 || insn->off != 0) {
+ verbose(env, "BPF_JA|BPF_X uses reserved fields\n");
+ return -EINVAL;
+ }
+ } else if (insn->src_reg != BPF_REG_0 || insn->dst_reg != BPF_REG_0 ||
+ (class == BPF_JMP && insn->imm != 0) ||
+ (class == BPF_JMP32 && insn->off != 0)) {
+ verbose(env, "BPF_JA uses reserved fields\n");
+ return -EINVAL;
+ }
+ return 0;
+ case BPF_EXIT:
+ if (BPF_SRC(insn->code) != BPF_K || insn->imm != 0 ||
+ insn->src_reg != BPF_REG_0 || insn->dst_reg != BPF_REG_0 ||
+ class == BPF_JMP32) {
+ verbose(env, "BPF_EXIT uses reserved fields\n");
+ return -EINVAL;
+ }
+ return 0;
+ case BPF_JCOND:
+ if (insn->code != (BPF_JMP | BPF_JCOND) || insn->src_reg != BPF_MAY_GOTO ||
+ insn->dst_reg || insn->imm) {
+ verbose(env, "invalid may_goto imm %d\n", insn->imm);
+ return -EINVAL;
+ }
+ return 0;
+ default:
+ if (BPF_SRC(insn->code) == BPF_X) {
+ if (insn->imm != 0) {
+ verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
+ return -EINVAL;
+ }
+ } else if (insn->src_reg != BPF_REG_0) {
+ verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
+ return -EINVAL;
+ }
+ return 0;
+ }
+}
+
+static int check_insn_fields(struct bpf_verifier_env *env, struct bpf_insn *insn)
+{
+ switch (BPF_CLASS(insn->code)) {
+ case BPF_ALU:
+ case BPF_ALU64:
+ return check_alu_fields(env, insn);
+ case BPF_LDX:
+ if ((BPF_MODE(insn->code) != BPF_MEM && BPF_MODE(insn->code) != BPF_MEMSX) ||
+ insn->imm != 0) {
+ verbose(env, "BPF_LDX uses reserved fields\n");
+ return -EINVAL;
+ }
+ return 0;
+ case BPF_STX:
+ if (BPF_MODE(insn->code) == BPF_ATOMIC)
+ return 0;
+ if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) {
+ verbose(env, "BPF_STX uses reserved fields\n");
+ return -EINVAL;
+ }
+ return 0;
+ case BPF_ST:
+ if (BPF_MODE(insn->code) != BPF_MEM || insn->src_reg != BPF_REG_0) {
+ verbose(env, "BPF_ST uses reserved fields\n");
+ return -EINVAL;
+ }
+ return 0;
+ case BPF_JMP:
+ case BPF_JMP32:
+ return check_jmp_fields(env, insn);
+ case BPF_LD: {
+ u8 mode = BPF_MODE(insn->code);
+
+ if (mode == BPF_ABS || mode == BPF_IND) {
+ if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
+ BPF_SIZE(insn->code) == BPF_DW ||
+ (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
+ verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
+ return -EINVAL;
+ }
+ } else if (mode != BPF_IMM) {
+ verbose(env, "invalid BPF_LD mode\n");
+ return -EINVAL;
+ }
+ return 0;
+ }
+ default:
+ verbose(env, "unknown insn class %d\n", BPF_CLASS(insn->code));
+ return -EINVAL;
+ }
+}
+
+/*
+ * Check that insns are sane and rewrite pseudo imm in ld_imm64 instructions:
*
* 1. if it accesses map FD, replace it with actual map pointer.
* 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
*
* NOTE: btf_vmlinux is required for converting pseudo btf_id.
*/
-static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
+static int check_and_resolve_insns(struct bpf_verifier_env *env)
{
struct bpf_insn *insn = env->prog->insnsi;
int insn_cnt = env->prog->len;
@@ -21808,13 +18414,14 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
return err;
for (i = 0; i < insn_cnt; i++, insn++) {
- if (BPF_CLASS(insn->code) == BPF_LDX &&
- ((BPF_MODE(insn->code) != BPF_MEM && BPF_MODE(insn->code) != BPF_MEMSX) ||
- insn->imm != 0)) {
- verbose(env, "BPF_LDX uses reserved fields\n");
+ if (insn->dst_reg >= MAX_BPF_REG) {
+ verbose(env, "R%d is invalid\n", insn->dst_reg);
+ return -EINVAL;
+ }
+ if (insn->src_reg >= MAX_BPF_REG) {
+ verbose(env, "R%d is invalid\n", insn->src_reg);
return -EINVAL;
}
-
if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
struct bpf_insn_aux_data *aux;
struct bpf_map *map;
@@ -21829,6 +18436,11 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
return -EINVAL;
}
+ if (insn[0].off != 0) {
+ verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
+ return -EINVAL;
+ }
+
if (insn[0].src_reg == 0)
/* valid generic load 64-bit imm */
goto next_insn;
@@ -21925,6 +18537,10 @@ next_insn:
verbose(env, "unknown opcode %02x\n", insn->code);
return -EINVAL;
}
+
+ err = check_insn_fields(env, insn);
+ if (err)
+ return err;
}
/* now all pseudo BPF_LD_IMM64 instructions load valid
@@ -21963,53 +18579,6 @@ static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
}
}
-/* single env->prog->insni[off] instruction was replaced with the range
- * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying
- * [0, off) and [off, end) to new locations, so the patched range stays zero
- */
-static void adjust_insn_aux_data(struct bpf_verifier_env *env,
- struct bpf_prog *new_prog, u32 off, u32 cnt)
-{
- struct bpf_insn_aux_data *data = env->insn_aux_data;
- struct bpf_insn *insn = new_prog->insnsi;
- u32 old_seen = data[off].seen;
- u32 prog_len;
- int i;
-
- /* aux info at OFF always needs adjustment, no matter fast path
- * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
- * original insn at old prog.
- */
- data[off].zext_dst = insn_has_def32(insn + off + cnt - 1);
-
- if (cnt == 1)
- return;
- prog_len = new_prog->len;
-
- memmove(data + off + cnt - 1, data + off,
- sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
- memset(data + off, 0, sizeof(struct bpf_insn_aux_data) * (cnt - 1));
- for (i = off; i < off + cnt - 1; i++) {
- /* Expand insni[off]'s seen count to the patched range. */
- data[i].seen = old_seen;
- data[i].zext_dst = insn_has_def32(insn + i);
- }
-}
-
-static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
-{
- int i;
-
- if (len == 1)
- return;
- /* NOTE: fake 'exit' subprog should be updated as well. */
- for (i = 0; i <= env->subprog_cnt; i++) {
- if (env->subprog_info[i].start <= off)
- continue;
- env->subprog_info[i].start += len - 1;
- }
-}
-
static void release_insn_arrays(struct bpf_verifier_env *env)
{
int i;
@@ -22018,281 +18587,7 @@ static void release_insn_arrays(struct bpf_verifier_env *env)
bpf_insn_array_release(env->insn_array_maps[i]);
}
-static void adjust_insn_arrays(struct bpf_verifier_env *env, u32 off, u32 len)
-{
- int i;
-
- if (len == 1)
- return;
-
- for (i = 0; i < env->insn_array_map_cnt; i++)
- bpf_insn_array_adjust(env->insn_array_maps[i], off, len);
-}
-
-static void adjust_insn_arrays_after_remove(struct bpf_verifier_env *env, u32 off, u32 len)
-{
- int i;
-
- for (i = 0; i < env->insn_array_map_cnt; i++)
- bpf_insn_array_adjust_after_remove(env->insn_array_maps[i], off, len);
-}
-
-static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
-{
- struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
- int i, sz = prog->aux->size_poke_tab;
- struct bpf_jit_poke_descriptor *desc;
-
- for (i = 0; i < sz; i++) {
- desc = &tab[i];
- if (desc->insn_idx <= off)
- continue;
- desc->insn_idx += len - 1;
- }
-}
-
-static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
- const struct bpf_insn *patch, u32 len)
-{
- struct bpf_prog *new_prog;
- struct bpf_insn_aux_data *new_data = NULL;
-
- if (len > 1) {
- new_data = vrealloc(env->insn_aux_data,
- array_size(env->prog->len + len - 1,
- sizeof(struct bpf_insn_aux_data)),
- GFP_KERNEL_ACCOUNT | __GFP_ZERO);
- if (!new_data)
- return NULL;
-
- env->insn_aux_data = new_data;
- }
-
- new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
- if (IS_ERR(new_prog)) {
- if (PTR_ERR(new_prog) == -ERANGE)
- verbose(env,
- "insn %d cannot be patched due to 16-bit range\n",
- env->insn_aux_data[off].orig_idx);
- return NULL;
- }
- adjust_insn_aux_data(env, new_prog, off, len);
- adjust_subprog_starts(env, off, len);
- adjust_insn_arrays(env, off, len);
- adjust_poke_descs(new_prog, off, len);
- return new_prog;
-}
-
-/*
- * For all jmp insns in a given 'prog' that point to 'tgt_idx' insn adjust the
- * jump offset by 'delta'.
- */
-static int adjust_jmp_off(struct bpf_prog *prog, u32 tgt_idx, u32 delta)
-{
- struct bpf_insn *insn = prog->insnsi;
- u32 insn_cnt = prog->len, i;
- s32 imm;
- s16 off;
-
- for (i = 0; i < insn_cnt; i++, insn++) {
- u8 code = insn->code;
-
- if (tgt_idx <= i && i < tgt_idx + delta)
- continue;
-
- if ((BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) ||
- BPF_OP(code) == BPF_CALL || BPF_OP(code) == BPF_EXIT)
- continue;
-
- if (insn->code == (BPF_JMP32 | BPF_JA)) {
- if (i + 1 + insn->imm != tgt_idx)
- continue;
- if (check_add_overflow(insn->imm, delta, &imm))
- return -ERANGE;
- insn->imm = imm;
- } else {
- if (i + 1 + insn->off != tgt_idx)
- continue;
- if (check_add_overflow(insn->off, delta, &off))
- return -ERANGE;
- insn->off = off;
- }
- }
- return 0;
-}
-
-static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
- u32 off, u32 cnt)
-{
- int i, j;
-
- /* find first prog starting at or after off (first to remove) */
- for (i = 0; i < env->subprog_cnt; i++)
- if (env->subprog_info[i].start >= off)
- break;
- /* find first prog starting at or after off + cnt (first to stay) */
- for (j = i; j < env->subprog_cnt; j++)
- if (env->subprog_info[j].start >= off + cnt)
- break;
- /* if j doesn't start exactly at off + cnt, we are just removing
- * the front of previous prog
- */
- if (env->subprog_info[j].start != off + cnt)
- j--;
-
- if (j > i) {
- struct bpf_prog_aux *aux = env->prog->aux;
- int move;
-
- /* move fake 'exit' subprog as well */
- move = env->subprog_cnt + 1 - j;
-
- memmove(env->subprog_info + i,
- env->subprog_info + j,
- sizeof(*env->subprog_info) * move);
- env->subprog_cnt -= j - i;
-
- /* remove func_info */
- if (aux->func_info) {
- move = aux->func_info_cnt - j;
-
- memmove(aux->func_info + i,
- aux->func_info + j,
- sizeof(*aux->func_info) * move);
- aux->func_info_cnt -= j - i;
- /* func_info->insn_off is set after all code rewrites,
- * in adjust_btf_func() - no need to adjust
- */
- }
- } else {
- /* convert i from "first prog to remove" to "first to adjust" */
- if (env->subprog_info[i].start == off)
- i++;
- }
-
- /* update fake 'exit' subprog as well */
- for (; i <= env->subprog_cnt; i++)
- env->subprog_info[i].start -= cnt;
-
- return 0;
-}
-
-static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
- u32 cnt)
-{
- struct bpf_prog *prog = env->prog;
- u32 i, l_off, l_cnt, nr_linfo;
- struct bpf_line_info *linfo;
-
- nr_linfo = prog->aux->nr_linfo;
- if (!nr_linfo)
- return 0;
-
- linfo = prog->aux->linfo;
-
- /* find first line info to remove, count lines to be removed */
- for (i = 0; i < nr_linfo; i++)
- if (linfo[i].insn_off >= off)
- break;
-
- l_off = i;
- l_cnt = 0;
- for (; i < nr_linfo; i++)
- if (linfo[i].insn_off < off + cnt)
- l_cnt++;
- else
- break;
-
- /* First live insn doesn't match first live linfo, it needs to "inherit"
- * last removed linfo. prog is already modified, so prog->len == off
- * means no live instructions after (tail of the program was removed).
- */
- if (prog->len != off && l_cnt &&
- (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
- l_cnt--;
- linfo[--i].insn_off = off + cnt;
- }
-
- /* remove the line info which refer to the removed instructions */
- if (l_cnt) {
- memmove(linfo + l_off, linfo + i,
- sizeof(*linfo) * (nr_linfo - i));
-
- prog->aux->nr_linfo -= l_cnt;
- nr_linfo = prog->aux->nr_linfo;
- }
-
- /* pull all linfo[i].insn_off >= off + cnt in by cnt */
- for (i = l_off; i < nr_linfo; i++)
- linfo[i].insn_off -= cnt;
-
- /* fix up all subprogs (incl. 'exit') which start >= off */
- for (i = 0; i <= env->subprog_cnt; i++)
- if (env->subprog_info[i].linfo_idx > l_off) {
- /* program may have started in the removed region but
- * may not be fully removed
- */
- if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
- env->subprog_info[i].linfo_idx -= l_cnt;
- else
- env->subprog_info[i].linfo_idx = l_off;
- }
-
- return 0;
-}
-
-/*
- * Clean up dynamically allocated fields of aux data for instructions [start, ...]
- */
-static void clear_insn_aux_data(struct bpf_verifier_env *env, int start, int len)
-{
- struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
- struct bpf_insn *insns = env->prog->insnsi;
- int end = start + len;
- int i;
- for (i = start; i < end; i++) {
- if (aux_data[i].jt) {
- kvfree(aux_data[i].jt);
- aux_data[i].jt = NULL;
- }
-
- if (bpf_is_ldimm64(&insns[i]))
- i++;
- }
-}
-
-static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
-{
- struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
- unsigned int orig_prog_len = env->prog->len;
- int err;
-
- if (bpf_prog_is_offloaded(env->prog->aux))
- bpf_prog_offload_remove_insns(env, off, cnt);
-
- /* Should be called before bpf_remove_insns, as it uses prog->insnsi */
- clear_insn_aux_data(env, off, cnt);
-
- err = bpf_remove_insns(env->prog, off, cnt);
- if (err)
- return err;
-
- err = adjust_subprog_starts_after_remove(env, off, cnt);
- if (err)
- return err;
-
- err = bpf_adj_linfo_after_remove(env, off, cnt);
- if (err)
- return err;
-
- adjust_insn_arrays_after_remove(env, off, cnt);
-
- memmove(aux_data + off, aux_data + off + cnt,
- sizeof(*aux_data) * (orig_prog_len - off - cnt));
-
- return 0;
-}
/* The verifier does more data flow analysis than llvm and will not
* explore branches that are dead at run time. Malicious programs can
@@ -22321,2210 +18616,7 @@ static void sanitize_dead_code(struct bpf_verifier_env *env)
}
}
-static bool insn_is_cond_jump(u8 code)
-{
- u8 op;
-
- op = BPF_OP(code);
- if (BPF_CLASS(code) == BPF_JMP32)
- return op != BPF_JA;
-
- if (BPF_CLASS(code) != BPF_JMP)
- return false;
-
- return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
-}
-
-static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
-{
- struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
- struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
- struct bpf_insn *insn = env->prog->insnsi;
- const int insn_cnt = env->prog->len;
- int i;
-
- for (i = 0; i < insn_cnt; i++, insn++) {
- if (!insn_is_cond_jump(insn->code))
- continue;
-
- if (!aux_data[i + 1].seen)
- ja.off = insn->off;
- else if (!aux_data[i + 1 + insn->off].seen)
- ja.off = 0;
- else
- continue;
-
- if (bpf_prog_is_offloaded(env->prog->aux))
- bpf_prog_offload_replace_insn(env, i, &ja);
-
- memcpy(insn, &ja, sizeof(ja));
- }
-}
-
-static int opt_remove_dead_code(struct bpf_verifier_env *env)
-{
- struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
- int insn_cnt = env->prog->len;
- int i, err;
-
- for (i = 0; i < insn_cnt; i++) {
- int j;
-
- j = 0;
- while (i + j < insn_cnt && !aux_data[i + j].seen)
- j++;
- if (!j)
- continue;
-
- err = verifier_remove_insns(env, i, j);
- if (err)
- return err;
- insn_cnt = env->prog->len;
- }
-
- return 0;
-}
-
-static const struct bpf_insn NOP = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
-static const struct bpf_insn MAY_GOTO_0 = BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0, 0);
-
-static int opt_remove_nops(struct bpf_verifier_env *env)
-{
- struct bpf_insn *insn = env->prog->insnsi;
- int insn_cnt = env->prog->len;
- bool is_may_goto_0, is_ja;
- int i, err;
-
- for (i = 0; i < insn_cnt; i++) {
- is_may_goto_0 = !memcmp(&insn[i], &MAY_GOTO_0, sizeof(MAY_GOTO_0));
- is_ja = !memcmp(&insn[i], &NOP, sizeof(NOP));
-
- if (!is_may_goto_0 && !is_ja)
- continue;
-
- err = verifier_remove_insns(env, i, 1);
- if (err)
- return err;
- insn_cnt--;
- /* Go back one insn to catch may_goto +1; may_goto +0 sequence */
- i -= (is_may_goto_0 && i > 0) ? 2 : 1;
- }
-
- return 0;
-}
-
-static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
- const union bpf_attr *attr)
-{
- struct bpf_insn *patch;
- /* use env->insn_buf as two independent buffers */
- struct bpf_insn *zext_patch = env->insn_buf;
- struct bpf_insn *rnd_hi32_patch = &env->insn_buf[2];
- struct bpf_insn_aux_data *aux = env->insn_aux_data;
- int i, patch_len, delta = 0, len = env->prog->len;
- struct bpf_insn *insns = env->prog->insnsi;
- struct bpf_prog *new_prog;
- bool rnd_hi32;
-
- rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
- zext_patch[1] = BPF_ZEXT_REG(0);
- rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
- rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
- rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
- for (i = 0; i < len; i++) {
- int adj_idx = i + delta;
- struct bpf_insn insn;
- int load_reg;
-
- insn = insns[adj_idx];
- load_reg = insn_def_regno(&insn);
- if (!aux[adj_idx].zext_dst) {
- u8 code, class;
- u32 imm_rnd;
-
- if (!rnd_hi32)
- continue;
-
- code = insn.code;
- class = BPF_CLASS(code);
- if (load_reg == -1)
- continue;
-
- /* NOTE: arg "reg" (the fourth one) is only used for
- * BPF_STX + SRC_OP, so it is safe to pass NULL
- * here.
- */
- if (is_reg64(&insn, load_reg, NULL, DST_OP)) {
- if (class == BPF_LD &&
- BPF_MODE(code) == BPF_IMM)
- i++;
- continue;
- }
-
- /* ctx load could be transformed into wider load. */
- if (class == BPF_LDX &&
- aux[adj_idx].ptr_type == PTR_TO_CTX)
- continue;
-
- imm_rnd = get_random_u32();
- rnd_hi32_patch[0] = insn;
- rnd_hi32_patch[1].imm = imm_rnd;
- rnd_hi32_patch[3].dst_reg = load_reg;
- patch = rnd_hi32_patch;
- patch_len = 4;
- goto apply_patch_buffer;
- }
-
- /* Add in an zero-extend instruction if a) the JIT has requested
- * it or b) it's a CMPXCHG.
- *
- * The latter is because: BPF_CMPXCHG always loads a value into
- * R0, therefore always zero-extends. However some archs'
- * equivalent instruction only does this load when the
- * comparison is successful. This detail of CMPXCHG is
- * orthogonal to the general zero-extension behaviour of the
- * CPU, so it's treated independently of bpf_jit_needs_zext.
- */
- if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn))
- continue;
-
- /* Zero-extension is done by the caller. */
- if (bpf_pseudo_kfunc_call(&insn))
- continue;
-
- if (verifier_bug_if(load_reg == -1, env,
- "zext_dst is set, but no reg is defined"))
- return -EFAULT;
-
- zext_patch[0] = insn;
- zext_patch[1].dst_reg = load_reg;
- zext_patch[1].src_reg = load_reg;
- patch = zext_patch;
- patch_len = 2;
-apply_patch_buffer:
- new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
- if (!new_prog)
- return -ENOMEM;
- env->prog = new_prog;
- insns = new_prog->insnsi;
- aux = env->insn_aux_data;
- delta += patch_len - 1;
- }
-
- return 0;
-}
-
-/* convert load instructions that access fields of a context type into a
- * sequence of instructions that access fields of the underlying structure:
- * struct __sk_buff -> struct sk_buff
- * struct bpf_sock_ops -> struct sock
- */
-static int convert_ctx_accesses(struct bpf_verifier_env *env)
-{
- struct bpf_subprog_info *subprogs = env->subprog_info;
- const struct bpf_verifier_ops *ops = env->ops;
- int i, cnt, size, ctx_field_size, ret, delta = 0, epilogue_cnt = 0;
- const int insn_cnt = env->prog->len;
- struct bpf_insn *epilogue_buf = env->epilogue_buf;
- struct bpf_insn *insn_buf = env->insn_buf;
- struct bpf_insn *insn;
- u32 target_size, size_default, off;
- struct bpf_prog *new_prog;
- enum bpf_access_type type;
- bool is_narrower_load;
- int epilogue_idx = 0;
-
- if (ops->gen_epilogue) {
- epilogue_cnt = ops->gen_epilogue(epilogue_buf, env->prog,
- -(subprogs[0].stack_depth + 8));
- if (epilogue_cnt >= INSN_BUF_SIZE) {
- verifier_bug(env, "epilogue is too long");
- return -EFAULT;
- } else if (epilogue_cnt) {
- /* Save the ARG_PTR_TO_CTX for the epilogue to use */
- cnt = 0;
- subprogs[0].stack_depth += 8;
- insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_1,
- -subprogs[0].stack_depth);
- insn_buf[cnt++] = env->prog->insnsi[0];
- new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
- if (!new_prog)
- return -ENOMEM;
- env->prog = new_prog;
- delta += cnt - 1;
-
- ret = add_kfunc_in_insns(env, epilogue_buf, epilogue_cnt - 1);
- if (ret < 0)
- return ret;
- }
- }
-
- if (ops->gen_prologue || env->seen_direct_write) {
- if (!ops->gen_prologue) {
- verifier_bug(env, "gen_prologue is null");
- return -EFAULT;
- }
- cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
- env->prog);
- if (cnt >= INSN_BUF_SIZE) {
- verifier_bug(env, "prologue is too long");
- return -EFAULT;
- } else if (cnt) {
- new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
- if (!new_prog)
- return -ENOMEM;
-
- env->prog = new_prog;
- delta += cnt - 1;
-
- ret = add_kfunc_in_insns(env, insn_buf, cnt - 1);
- if (ret < 0)
- return ret;
- }
- }
-
- if (delta)
- WARN_ON(adjust_jmp_off(env->prog, 0, delta));
-
- if (bpf_prog_is_offloaded(env->prog->aux))
- return 0;
-
- insn = env->prog->insnsi + delta;
- for (i = 0; i < insn_cnt; i++, insn++) {
- bpf_convert_ctx_access_t convert_ctx_access;
- u8 mode;
-
- if (env->insn_aux_data[i + delta].nospec) {
- WARN_ON_ONCE(env->insn_aux_data[i + delta].alu_state);
- struct bpf_insn *patch = insn_buf;
-
- *patch++ = BPF_ST_NOSPEC();
- *patch++ = *insn;
- cnt = patch - insn_buf;
- new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
- if (!new_prog)
- return -ENOMEM;
-
- delta += cnt - 1;
- env->prog = new_prog;
- insn = new_prog->insnsi + i + delta;
- /* This can not be easily merged with the
- * nospec_result-case, because an insn may require a
- * nospec before and after itself. Therefore also do not
- * 'continue' here but potentially apply further
- * patching to insn. *insn should equal patch[1] now.
- */
- }
-
- if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
- insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
- insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
- insn->code == (BPF_LDX | BPF_MEM | BPF_DW) ||
- insn->code == (BPF_LDX | BPF_MEMSX | BPF_B) ||
- insn->code == (BPF_LDX | BPF_MEMSX | BPF_H) ||
- insn->code == (BPF_LDX | BPF_MEMSX | BPF_W)) {
- type = BPF_READ;
- } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
- insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
- insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
- insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
- insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
- insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
- insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
- insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
- type = BPF_WRITE;
- } else if ((insn->code == (BPF_STX | BPF_ATOMIC | BPF_B) ||
- insn->code == (BPF_STX | BPF_ATOMIC | BPF_H) ||
- insn->code == (BPF_STX | BPF_ATOMIC | BPF_W) ||
- insn->code == (BPF_STX | BPF_ATOMIC | BPF_DW)) &&
- env->insn_aux_data[i + delta].ptr_type == PTR_TO_ARENA) {
- insn->code = BPF_STX | BPF_PROBE_ATOMIC | BPF_SIZE(insn->code);
- env->prog->aux->num_exentries++;
- continue;
- } else if (insn->code == (BPF_JMP | BPF_EXIT) &&
- epilogue_cnt &&
- i + delta < subprogs[1].start) {
- /* Generate epilogue for the main prog */
- if (epilogue_idx) {
- /* jump back to the earlier generated epilogue */
- insn_buf[0] = BPF_JMP32_A(epilogue_idx - i - delta - 1);
- cnt = 1;
- } else {
- memcpy(insn_buf, epilogue_buf,
- epilogue_cnt * sizeof(*epilogue_buf));
- cnt = epilogue_cnt;
- /* epilogue_idx cannot be 0. It must have at
- * least one ctx ptr saving insn before the
- * epilogue.
- */
- epilogue_idx = i + delta;
- }
- goto patch_insn_buf;
- } else {
- continue;
- }
-
- if (type == BPF_WRITE &&
- env->insn_aux_data[i + delta].nospec_result) {
- /* nospec_result is only used to mitigate Spectre v4 and
- * to limit verification-time for Spectre v1.
- */
- struct bpf_insn *patch = insn_buf;
-
- *patch++ = *insn;
- *patch++ = BPF_ST_NOSPEC();
- cnt = patch - insn_buf;
- new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
- if (!new_prog)
- return -ENOMEM;
-
- delta += cnt - 1;
- env->prog = new_prog;
- insn = new_prog->insnsi + i + delta;
- continue;
- }
-
- switch ((int)env->insn_aux_data[i + delta].ptr_type) {
- case PTR_TO_CTX:
- if (!ops->convert_ctx_access)
- continue;
- convert_ctx_access = ops->convert_ctx_access;
- break;
- case PTR_TO_SOCKET:
- case PTR_TO_SOCK_COMMON:
- convert_ctx_access = bpf_sock_convert_ctx_access;
- break;
- case PTR_TO_TCP_SOCK:
- convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
- break;
- case PTR_TO_XDP_SOCK:
- convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
- break;
- case PTR_TO_BTF_ID:
- case PTR_TO_BTF_ID | PTR_UNTRUSTED:
- /* PTR_TO_BTF_ID | MEM_ALLOC always has a valid lifetime, unlike
- * PTR_TO_BTF_ID, and an active ref_obj_id, but the same cannot
- * be said once it is marked PTR_UNTRUSTED, hence we must handle
- * any faults for loads into such types. BPF_WRITE is disallowed
- * for this case.
- */
- case PTR_TO_BTF_ID | MEM_ALLOC | PTR_UNTRUSTED:
- case PTR_TO_MEM | MEM_RDONLY | PTR_UNTRUSTED:
- if (type == BPF_READ) {
- if (BPF_MODE(insn->code) == BPF_MEM)
- insn->code = BPF_LDX | BPF_PROBE_MEM |
- BPF_SIZE((insn)->code);
- else
- insn->code = BPF_LDX | BPF_PROBE_MEMSX |
- BPF_SIZE((insn)->code);
- env->prog->aux->num_exentries++;
- }
- continue;
- case PTR_TO_ARENA:
- if (BPF_MODE(insn->code) == BPF_MEMSX) {
- if (!bpf_jit_supports_insn(insn, true)) {
- verbose(env, "sign extending loads from arena are not supported yet\n");
- return -EOPNOTSUPP;
- }
- insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32SX | BPF_SIZE(insn->code);
- } else {
- insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32 | BPF_SIZE(insn->code);
- }
- env->prog->aux->num_exentries++;
- continue;
- default:
- continue;
- }
-
- ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
- size = BPF_LDST_BYTES(insn);
- mode = BPF_MODE(insn->code);
-
- /* If the read access is a narrower load of the field,
- * convert to a 4/8-byte load, to minimum program type specific
- * convert_ctx_access changes. If conversion is successful,
- * we will apply proper mask to the result.
- */
- is_narrower_load = size < ctx_field_size;
- size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
- off = insn->off;
- if (is_narrower_load) {
- u8 size_code;
-
- if (type == BPF_WRITE) {
- verifier_bug(env, "narrow ctx access misconfigured");
- return -EFAULT;
- }
-
- size_code = BPF_H;
- if (ctx_field_size == 4)
- size_code = BPF_W;
- else if (ctx_field_size == 8)
- size_code = BPF_DW;
-
- insn->off = off & ~(size_default - 1);
- insn->code = BPF_LDX | BPF_MEM | size_code;
- }
-
- target_size = 0;
- cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
- &target_size);
- if (cnt == 0 || cnt >= INSN_BUF_SIZE ||
- (ctx_field_size && !target_size)) {
- verifier_bug(env, "error during ctx access conversion (%d)", cnt);
- return -EFAULT;
- }
-
- if (is_narrower_load && size < target_size) {
- u8 shift = bpf_ctx_narrow_access_offset(
- off, size, size_default) * 8;
- if (shift && cnt + 1 >= INSN_BUF_SIZE) {
- verifier_bug(env, "narrow ctx load misconfigured");
- return -EFAULT;
- }
- if (ctx_field_size <= 4) {
- if (shift)
- insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
- insn->dst_reg,
- shift);
- insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
- (1 << size * 8) - 1);
- } else {
- if (shift)
- insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
- insn->dst_reg,
- shift);
- insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
- (1ULL << size * 8) - 1);
- }
- }
- if (mode == BPF_MEMSX)
- insn_buf[cnt++] = BPF_RAW_INSN(BPF_ALU64 | BPF_MOV | BPF_X,
- insn->dst_reg, insn->dst_reg,
- size * 8, 0);
-
-patch_insn_buf:
- new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
- if (!new_prog)
- return -ENOMEM;
-
- delta += cnt - 1;
-
- /* keep walking new program and skip insns we just inserted */
- env->prog = new_prog;
- insn = new_prog->insnsi + i + delta;
- }
-
- return 0;
-}
-
-static int jit_subprogs(struct bpf_verifier_env *env)
-{
- struct bpf_prog *prog = env->prog, **func, *tmp;
- int i, j, subprog_start, subprog_end = 0, len, subprog;
- struct bpf_map *map_ptr;
- struct bpf_insn *insn;
- void *old_bpf_func;
- int err, num_exentries;
- int old_len, subprog_start_adjustment = 0;
-
- if (env->subprog_cnt <= 1)
- return 0;
-
- for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
- if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn))
- continue;
-
- /* Upon error here we cannot fall back to interpreter but
- * need a hard reject of the program. Thus -EFAULT is
- * propagated in any case.
- */
- subprog = find_subprog(env, i + insn->imm + 1);
- if (verifier_bug_if(subprog < 0, env, "No program to jit at insn %d",
- i + insn->imm + 1))
- return -EFAULT;
- /* temporarily remember subprog id inside insn instead of
- * aux_data, since next loop will split up all insns into funcs
- */
- insn->off = subprog;
- /* remember original imm in case JIT fails and fallback
- * to interpreter will be needed
- */
- env->insn_aux_data[i].call_imm = insn->imm;
- /* point imm to __bpf_call_base+1 from JITs point of view */
- insn->imm = 1;
- if (bpf_pseudo_func(insn)) {
-#if defined(MODULES_VADDR)
- u64 addr = MODULES_VADDR;
-#else
- u64 addr = VMALLOC_START;
-#endif
- /* jit (e.g. x86_64) may emit fewer instructions
- * if it learns a u32 imm is the same as a u64 imm.
- * Set close enough to possible prog address.
- */
- insn[0].imm = (u32)addr;
- insn[1].imm = addr >> 32;
- }
- }
-
- err = bpf_prog_alloc_jited_linfo(prog);
- if (err)
- goto out_undo_insn;
-
- err = -ENOMEM;
- func = kzalloc_objs(prog, env->subprog_cnt);
- if (!func)
- goto out_undo_insn;
-
- for (i = 0; i < env->subprog_cnt; i++) {
- subprog_start = subprog_end;
- subprog_end = env->subprog_info[i + 1].start;
-
- len = subprog_end - subprog_start;
- /* bpf_prog_run() doesn't call subprogs directly,
- * hence main prog stats include the runtime of subprogs.
- * subprogs don't have IDs and not reachable via prog_get_next_id
- * func[i]->stats will never be accessed and stays NULL
- */
- func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
- if (!func[i])
- goto out_free;
- memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
- len * sizeof(struct bpf_insn));
- func[i]->type = prog->type;
- func[i]->len = len;
- if (bpf_prog_calc_tag(func[i]))
- goto out_free;
- func[i]->is_func = 1;
- func[i]->sleepable = prog->sleepable;
- func[i]->aux->func_idx = i;
- /* Below members will be freed only at prog->aux */
- func[i]->aux->btf = prog->aux->btf;
- func[i]->aux->subprog_start = subprog_start + subprog_start_adjustment;
- func[i]->aux->func_info = prog->aux->func_info;
- func[i]->aux->func_info_cnt = prog->aux->func_info_cnt;
- func[i]->aux->poke_tab = prog->aux->poke_tab;
- func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
- func[i]->aux->main_prog_aux = prog->aux;
-
- for (j = 0; j < prog->aux->size_poke_tab; j++) {
- struct bpf_jit_poke_descriptor *poke;
-
- poke = &prog->aux->poke_tab[j];
- if (poke->insn_idx < subprog_end &&
- poke->insn_idx >= subprog_start)
- poke->aux = func[i]->aux;
- }
-
- func[i]->aux->name[0] = 'F';
- func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
- if (env->subprog_info[i].priv_stack_mode == PRIV_STACK_ADAPTIVE)
- func[i]->aux->jits_use_priv_stack = true;
-
- func[i]->jit_requested = 1;
- func[i]->blinding_requested = prog->blinding_requested;
- func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
- func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab;
- func[i]->aux->linfo = prog->aux->linfo;
- func[i]->aux->nr_linfo = prog->aux->nr_linfo;
- func[i]->aux->jited_linfo = prog->aux->jited_linfo;
- func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
- func[i]->aux->arena = prog->aux->arena;
- func[i]->aux->used_maps = env->used_maps;
- func[i]->aux->used_map_cnt = env->used_map_cnt;
- num_exentries = 0;
- insn = func[i]->insnsi;
- for (j = 0; j < func[i]->len; j++, insn++) {
- if (BPF_CLASS(insn->code) == BPF_LDX &&
- (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
- BPF_MODE(insn->code) == BPF_PROBE_MEM32 ||
- BPF_MODE(insn->code) == BPF_PROBE_MEM32SX ||
- BPF_MODE(insn->code) == BPF_PROBE_MEMSX))
- num_exentries++;
- if ((BPF_CLASS(insn->code) == BPF_STX ||
- BPF_CLASS(insn->code) == BPF_ST) &&
- BPF_MODE(insn->code) == BPF_PROBE_MEM32)
- num_exentries++;
- if (BPF_CLASS(insn->code) == BPF_STX &&
- BPF_MODE(insn->code) == BPF_PROBE_ATOMIC)
- num_exentries++;
- }
- func[i]->aux->num_exentries = num_exentries;
- func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
- func[i]->aux->exception_cb = env->subprog_info[i].is_exception_cb;
- func[i]->aux->changes_pkt_data = env->subprog_info[i].changes_pkt_data;
- func[i]->aux->might_sleep = env->subprog_info[i].might_sleep;
- if (!i)
- func[i]->aux->exception_boundary = env->seen_exception;
-
- /*
- * To properly pass the absolute subprog start to jit
- * all instruction adjustments should be accumulated
- */
- old_len = func[i]->len;
- func[i] = bpf_int_jit_compile(func[i]);
- subprog_start_adjustment += func[i]->len - old_len;
-
- if (!func[i]->jited) {
- err = -ENOTSUPP;
- goto out_free;
- }
- cond_resched();
- }
-
- /* at this point all bpf functions were successfully JITed
- * now populate all bpf_calls with correct addresses and
- * run last pass of JIT
- */
- for (i = 0; i < env->subprog_cnt; i++) {
- insn = func[i]->insnsi;
- for (j = 0; j < func[i]->len; j++, insn++) {
- if (bpf_pseudo_func(insn)) {
- subprog = insn->off;
- insn[0].imm = (u32)(long)func[subprog]->bpf_func;
- insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
- continue;
- }
- if (!bpf_pseudo_call(insn))
- continue;
- subprog = insn->off;
- insn->imm = BPF_CALL_IMM(func[subprog]->bpf_func);
- }
-
- /* we use the aux data to keep a list of the start addresses
- * of the JITed images for each function in the program
- *
- * for some architectures, such as powerpc64, the imm field
- * might not be large enough to hold the offset of the start
- * address of the callee's JITed image from __bpf_call_base
- *
- * in such cases, we can lookup the start address of a callee
- * by using its subprog id, available from the off field of
- * the call instruction, as an index for this list
- */
- func[i]->aux->func = func;
- func[i]->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt;
- func[i]->aux->real_func_cnt = env->subprog_cnt;
- }
- for (i = 0; i < env->subprog_cnt; i++) {
- old_bpf_func = func[i]->bpf_func;
- tmp = bpf_int_jit_compile(func[i]);
- if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
- verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
- err = -ENOTSUPP;
- goto out_free;
- }
- cond_resched();
- }
-
- /*
- * Cleanup func[i]->aux fields which aren't required
- * or can become invalid in future
- */
- for (i = 0; i < env->subprog_cnt; i++) {
- func[i]->aux->used_maps = NULL;
- func[i]->aux->used_map_cnt = 0;
- }
-
- /* finally lock prog and jit images for all functions and
- * populate kallsysm. Begin at the first subprogram, since
- * bpf_prog_load will add the kallsyms for the main program.
- */
- for (i = 1; i < env->subprog_cnt; i++) {
- err = bpf_prog_lock_ro(func[i]);
- if (err)
- goto out_free;
- }
-
- for (i = 1; i < env->subprog_cnt; i++)
- bpf_prog_kallsyms_add(func[i]);
-
- /* Last step: make now unused interpreter insns from main
- * prog consistent for later dump requests, so they can
- * later look the same as if they were interpreted only.
- */
- for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
- if (bpf_pseudo_func(insn)) {
- insn[0].imm = env->insn_aux_data[i].call_imm;
- insn[1].imm = insn->off;
- insn->off = 0;
- continue;
- }
- if (!bpf_pseudo_call(insn))
- continue;
- insn->off = env->insn_aux_data[i].call_imm;
- subprog = find_subprog(env, i + insn->off + 1);
- insn->imm = subprog;
- }
-
- prog->jited = 1;
- prog->bpf_func = func[0]->bpf_func;
- prog->jited_len = func[0]->jited_len;
- prog->aux->extable = func[0]->aux->extable;
- prog->aux->num_exentries = func[0]->aux->num_exentries;
- prog->aux->func = func;
- prog->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt;
- prog->aux->real_func_cnt = env->subprog_cnt;
- prog->aux->bpf_exception_cb = (void *)func[env->exception_callback_subprog]->bpf_func;
- prog->aux->exception_boundary = func[0]->aux->exception_boundary;
- bpf_prog_jit_attempt_done(prog);
- return 0;
-out_free:
- /* We failed JIT'ing, so at this point we need to unregister poke
- * descriptors from subprogs, so that kernel is not attempting to
- * patch it anymore as we're freeing the subprog JIT memory.
- */
- for (i = 0; i < prog->aux->size_poke_tab; i++) {
- map_ptr = prog->aux->poke_tab[i].tail_call.map;
- map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
- }
- /* At this point we're guaranteed that poke descriptors are not
- * live anymore. We can just unlink its descriptor table as it's
- * released with the main prog.
- */
- for (i = 0; i < env->subprog_cnt; i++) {
- if (!func[i])
- continue;
- func[i]->aux->poke_tab = NULL;
- bpf_jit_free(func[i]);
- }
- kfree(func);
-out_undo_insn:
- /* cleanup main prog to be interpreted */
- prog->jit_requested = 0;
- prog->blinding_requested = 0;
- for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
- if (!bpf_pseudo_call(insn))
- continue;
- insn->off = 0;
- insn->imm = env->insn_aux_data[i].call_imm;
- }
- bpf_prog_jit_attempt_done(prog);
- return err;
-}
-
-static int fixup_call_args(struct bpf_verifier_env *env)
-{
-#ifndef CONFIG_BPF_JIT_ALWAYS_ON
- struct bpf_prog *prog = env->prog;
- struct bpf_insn *insn = prog->insnsi;
- bool has_kfunc_call = bpf_prog_has_kfunc_call(prog);
- int i, depth;
-#endif
- int err = 0;
-
- if (env->prog->jit_requested &&
- !bpf_prog_is_offloaded(env->prog->aux)) {
- err = jit_subprogs(env);
- if (err == 0)
- return 0;
- if (err == -EFAULT)
- return err;
- }
-#ifndef CONFIG_BPF_JIT_ALWAYS_ON
- if (has_kfunc_call) {
- verbose(env, "calling kernel functions are not allowed in non-JITed programs\n");
- return -EINVAL;
- }
- if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
- /* When JIT fails the progs with bpf2bpf calls and tail_calls
- * have to be rejected, since interpreter doesn't support them yet.
- */
- verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
- return -EINVAL;
- }
- for (i = 0; i < prog->len; i++, insn++) {
- if (bpf_pseudo_func(insn)) {
- /* When JIT fails the progs with callback calls
- * have to be rejected, since interpreter doesn't support them yet.
- */
- verbose(env, "callbacks are not allowed in non-JITed programs\n");
- return -EINVAL;
- }
-
- if (!bpf_pseudo_call(insn))
- continue;
- depth = get_callee_stack_depth(env, insn, i);
- if (depth < 0)
- return depth;
- bpf_patch_call_args(insn, depth);
- }
- err = 0;
-#endif
- return err;
-}
-
-/* replace a generic kfunc with a specialized version if necessary */
-static int specialize_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc, int insn_idx)
-{
- struct bpf_prog *prog = env->prog;
- bool seen_direct_write;
- void *xdp_kfunc;
- bool is_rdonly;
- u32 func_id = desc->func_id;
- u16 offset = desc->offset;
- unsigned long addr = desc->addr;
-
- if (offset) /* return if module BTF is used */
- return 0;
-
- if (bpf_dev_bound_kfunc_id(func_id)) {
- xdp_kfunc = bpf_dev_bound_resolve_kfunc(prog, func_id);
- if (xdp_kfunc)
- addr = (unsigned long)xdp_kfunc;
- /* fallback to default kfunc when not supported by netdev */
- } else if (func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
- seen_direct_write = env->seen_direct_write;
- is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE);
-
- if (is_rdonly)
- addr = (unsigned long)bpf_dynptr_from_skb_rdonly;
-
- /* restore env->seen_direct_write to its original value, since
- * may_access_direct_pkt_data mutates it
- */
- env->seen_direct_write = seen_direct_write;
- } else if (func_id == special_kfunc_list[KF_bpf_set_dentry_xattr]) {
- if (bpf_lsm_has_d_inode_locked(prog))
- addr = (unsigned long)bpf_set_dentry_xattr_locked;
- } else if (func_id == special_kfunc_list[KF_bpf_remove_dentry_xattr]) {
- if (bpf_lsm_has_d_inode_locked(prog))
- addr = (unsigned long)bpf_remove_dentry_xattr_locked;
- } else if (func_id == special_kfunc_list[KF_bpf_dynptr_from_file]) {
- if (!env->insn_aux_data[insn_idx].non_sleepable)
- addr = (unsigned long)bpf_dynptr_from_file_sleepable;
- } else if (func_id == special_kfunc_list[KF_bpf_arena_alloc_pages]) {
- if (env->insn_aux_data[insn_idx].non_sleepable)
- addr = (unsigned long)bpf_arena_alloc_pages_non_sleepable;
- } else if (func_id == special_kfunc_list[KF_bpf_arena_free_pages]) {
- if (env->insn_aux_data[insn_idx].non_sleepable)
- addr = (unsigned long)bpf_arena_free_pages_non_sleepable;
- }
- desc->addr = addr;
- return 0;
-}
-
-static void __fixup_collection_insert_kfunc(struct bpf_insn_aux_data *insn_aux,
- u16 struct_meta_reg,
- u16 node_offset_reg,
- struct bpf_insn *insn,
- struct bpf_insn *insn_buf,
- int *cnt)
-{
- struct btf_struct_meta *kptr_struct_meta = insn_aux->kptr_struct_meta;
- struct bpf_insn addr[2] = { BPF_LD_IMM64(struct_meta_reg, (long)kptr_struct_meta) };
-
- insn_buf[0] = addr[0];
- insn_buf[1] = addr[1];
- insn_buf[2] = BPF_MOV64_IMM(node_offset_reg, insn_aux->insert_off);
- insn_buf[3] = *insn;
- *cnt = 4;
-}
-
-static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
- struct bpf_insn *insn_buf, int insn_idx, int *cnt)
-{
- struct bpf_kfunc_desc *desc;
- int err;
-
- if (!insn->imm) {
- verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
- return -EINVAL;
- }
-
- *cnt = 0;
-
- /* insn->imm has the btf func_id. Replace it with an offset relative to
- * __bpf_call_base, unless the JIT needs to call functions that are
- * further than 32 bits away (bpf_jit_supports_far_kfunc_call()).
- */
- desc = find_kfunc_desc(env->prog, insn->imm, insn->off);
- if (!desc) {
- verifier_bug(env, "kernel function descriptor not found for func_id %u",
- insn->imm);
- return -EFAULT;
- }
-
- err = specialize_kfunc(env, desc, insn_idx);
- if (err)
- return err;
-
- if (!bpf_jit_supports_far_kfunc_call())
- insn->imm = BPF_CALL_IMM(desc->addr);
-
- if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl] ||
- desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
- struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
- struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
- u64 obj_new_size = env->insn_aux_data[insn_idx].obj_new_size;
-
- if (desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl] && kptr_struct_meta) {
- verifier_bug(env, "NULL kptr_struct_meta expected at insn_idx %d",
- insn_idx);
- return -EFAULT;
- }
-
- insn_buf[0] = BPF_MOV64_IMM(BPF_REG_1, obj_new_size);
- insn_buf[1] = addr[0];
- insn_buf[2] = addr[1];
- insn_buf[3] = *insn;
- *cnt = 4;
- } else if (desc->func_id == special_kfunc_list[KF_bpf_obj_drop_impl] ||
- desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl] ||
- desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
- struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
- struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
-
- if (desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl] && kptr_struct_meta) {
- verifier_bug(env, "NULL kptr_struct_meta expected at insn_idx %d",
- insn_idx);
- return -EFAULT;
- }
-
- if (desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl] &&
- !kptr_struct_meta) {
- verifier_bug(env, "kptr_struct_meta expected at insn_idx %d",
- insn_idx);
- return -EFAULT;
- }
-
- insn_buf[0] = addr[0];
- insn_buf[1] = addr[1];
- insn_buf[2] = *insn;
- *cnt = 3;
- } else if (desc->func_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
- desc->func_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
- desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
- struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
- int struct_meta_reg = BPF_REG_3;
- int node_offset_reg = BPF_REG_4;
-
- /* rbtree_add has extra 'less' arg, so args-to-fixup are in diff regs */
- if (desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
- struct_meta_reg = BPF_REG_4;
- node_offset_reg = BPF_REG_5;
- }
-
- if (!kptr_struct_meta) {
- verifier_bug(env, "kptr_struct_meta expected at insn_idx %d",
- insn_idx);
- return -EFAULT;
- }
-
- __fixup_collection_insert_kfunc(&env->insn_aux_data[insn_idx], struct_meta_reg,
- node_offset_reg, insn, insn_buf, cnt);
- } else if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
- desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
- insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
- *cnt = 1;
- } else if (desc->func_id == special_kfunc_list[KF_bpf_session_is_return] &&
- env->prog->expected_attach_type == BPF_TRACE_FSESSION) {
- /*
- * inline the bpf_session_is_return() for fsession:
- * bool bpf_session_is_return(void *ctx)
- * {
- * return (((u64 *)ctx)[-1] >> BPF_TRAMP_IS_RETURN_SHIFT) & 1;
- * }
- */
- insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
- insn_buf[1] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, BPF_TRAMP_IS_RETURN_SHIFT);
- insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1);
- *cnt = 3;
- } else if (desc->func_id == special_kfunc_list[KF_bpf_session_cookie] &&
- env->prog->expected_attach_type == BPF_TRACE_FSESSION) {
- /*
- * inline bpf_session_cookie() for fsession:
- * __u64 *bpf_session_cookie(void *ctx)
- * {
- * u64 off = (((u64 *)ctx)[-1] >> BPF_TRAMP_COOKIE_INDEX_SHIFT) & 0xFF;
- * return &((u64 *)ctx)[-off];
- * }
- */
- insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
- insn_buf[1] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, BPF_TRAMP_COOKIE_INDEX_SHIFT);
- insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
- insn_buf[3] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
- insn_buf[4] = BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1);
- insn_buf[5] = BPF_ALU64_IMM(BPF_NEG, BPF_REG_0, 0);
- *cnt = 6;
- }
-
- if (env->insn_aux_data[insn_idx].arg_prog) {
- u32 regno = env->insn_aux_data[insn_idx].arg_prog;
- struct bpf_insn ld_addrs[2] = { BPF_LD_IMM64(regno, (long)env->prog->aux) };
- int idx = *cnt;
-
- insn_buf[idx++] = ld_addrs[0];
- insn_buf[idx++] = ld_addrs[1];
- insn_buf[idx++] = *insn;
- *cnt = idx;
- }
- return 0;
-}
-
-/* The function requires that first instruction in 'patch' is insnsi[prog->len - 1] */
-static int add_hidden_subprog(struct bpf_verifier_env *env, struct bpf_insn *patch, int len)
-{
- struct bpf_subprog_info *info = env->subprog_info;
- int cnt = env->subprog_cnt;
- struct bpf_prog *prog;
-
- /* We only reserve one slot for hidden subprogs in subprog_info. */
- if (env->hidden_subprog_cnt) {
- verifier_bug(env, "only one hidden subprog supported");
- return -EFAULT;
- }
- /* We're not patching any existing instruction, just appending the new
- * ones for the hidden subprog. Hence all of the adjustment operations
- * in bpf_patch_insn_data are no-ops.
- */
- prog = bpf_patch_insn_data(env, env->prog->len - 1, patch, len);
- if (!prog)
- return -ENOMEM;
- env->prog = prog;
- info[cnt + 1].start = info[cnt].start;
- info[cnt].start = prog->len - len + 1;
- env->subprog_cnt++;
- env->hidden_subprog_cnt++;
- return 0;
-}
-
-/* Do various post-verification rewrites in a single program pass.
- * These rewrites simplify JIT and interpreter implementations.
- */
-static int do_misc_fixups(struct bpf_verifier_env *env)
-{
- struct bpf_prog *prog = env->prog;
- enum bpf_attach_type eatype = prog->expected_attach_type;
- enum bpf_prog_type prog_type = resolve_prog_type(prog);
- struct bpf_insn *insn = prog->insnsi;
- const struct bpf_func_proto *fn;
- const int insn_cnt = prog->len;
- const struct bpf_map_ops *ops;
- struct bpf_insn_aux_data *aux;
- struct bpf_insn *insn_buf = env->insn_buf;
- struct bpf_prog *new_prog;
- struct bpf_map *map_ptr;
- int i, ret, cnt, delta = 0, cur_subprog = 0;
- struct bpf_subprog_info *subprogs = env->subprog_info;
- u16 stack_depth = subprogs[cur_subprog].stack_depth;
- u16 stack_depth_extra = 0;
-
- if (env->seen_exception && !env->exception_callback_subprog) {
- struct bpf_insn *patch = insn_buf;
-
- *patch++ = env->prog->insnsi[insn_cnt - 1];
- *patch++ = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
- *patch++ = BPF_EXIT_INSN();
- ret = add_hidden_subprog(env, insn_buf, patch - insn_buf);
- if (ret < 0)
- return ret;
- prog = env->prog;
- insn = prog->insnsi;
-
- env->exception_callback_subprog = env->subprog_cnt - 1;
- /* Don't update insn_cnt, as add_hidden_subprog always appends insns */
- mark_subprog_exc_cb(env, env->exception_callback_subprog);
- }
-
- for (i = 0; i < insn_cnt;) {
- if (insn->code == (BPF_ALU64 | BPF_MOV | BPF_X) && insn->imm) {
- if ((insn->off == BPF_ADDR_SPACE_CAST && insn->imm == 1) ||
- (((struct bpf_map *)env->prog->aux->arena)->map_flags & BPF_F_NO_USER_CONV)) {
- /* convert to 32-bit mov that clears upper 32-bit */
- insn->code = BPF_ALU | BPF_MOV | BPF_X;
- /* clear off and imm, so it's a normal 'wX = wY' from JIT pov */
- insn->off = 0;
- insn->imm = 0;
- } /* cast from as(0) to as(1) should be handled by JIT */
- goto next_insn;
- }
-
- if (env->insn_aux_data[i + delta].needs_zext)
- /* Convert BPF_CLASS(insn->code) == BPF_ALU64 to 32-bit ALU */
- insn->code = BPF_ALU | BPF_OP(insn->code) | BPF_SRC(insn->code);
-
- /* Make sdiv/smod divide-by-minus-one exceptions impossible. */
- if ((insn->code == (BPF_ALU64 | BPF_MOD | BPF_K) ||
- insn->code == (BPF_ALU64 | BPF_DIV | BPF_K) ||
- insn->code == (BPF_ALU | BPF_MOD | BPF_K) ||
- insn->code == (BPF_ALU | BPF_DIV | BPF_K)) &&
- insn->off == 1 && insn->imm == -1) {
- bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
- bool isdiv = BPF_OP(insn->code) == BPF_DIV;
- struct bpf_insn *patch = insn_buf;
-
- if (isdiv)
- *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
- BPF_NEG | BPF_K, insn->dst_reg,
- 0, 0, 0);
- else
- *patch++ = BPF_MOV32_IMM(insn->dst_reg, 0);
-
- cnt = patch - insn_buf;
-
- new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
- if (!new_prog)
- return -ENOMEM;
-
- delta += cnt - 1;
- env->prog = prog = new_prog;
- insn = new_prog->insnsi + i + delta;
- goto next_insn;
- }
-
- /* Make divide-by-zero and divide-by-minus-one exceptions impossible. */
- if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
- insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
- insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
- insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
- bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
- bool isdiv = BPF_OP(insn->code) == BPF_DIV;
- bool is_sdiv = isdiv && insn->off == 1;
- bool is_smod = !isdiv && insn->off == 1;
- struct bpf_insn *patch = insn_buf;
-
- if (is_sdiv) {
- /* [R,W]x sdiv 0 -> 0
- * LLONG_MIN sdiv -1 -> LLONG_MIN
- * INT_MIN sdiv -1 -> INT_MIN
- */
- *patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg);
- *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
- BPF_ADD | BPF_K, BPF_REG_AX,
- 0, 0, 1);
- *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
- BPF_JGT | BPF_K, BPF_REG_AX,
- 0, 4, 1);
- *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
- BPF_JEQ | BPF_K, BPF_REG_AX,
- 0, 1, 0);
- *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
- BPF_MOV | BPF_K, insn->dst_reg,
- 0, 0, 0);
- /* BPF_NEG(LLONG_MIN) == -LLONG_MIN == LLONG_MIN */
- *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
- BPF_NEG | BPF_K, insn->dst_reg,
- 0, 0, 0);
- *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
- *patch++ = *insn;
- cnt = patch - insn_buf;
- } else if (is_smod) {
- /* [R,W]x mod 0 -> [R,W]x */
- /* [R,W]x mod -1 -> 0 */
- *patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg);
- *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
- BPF_ADD | BPF_K, BPF_REG_AX,
- 0, 0, 1);
- *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
- BPF_JGT | BPF_K, BPF_REG_AX,
- 0, 3, 1);
- *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
- BPF_JEQ | BPF_K, BPF_REG_AX,
- 0, 3 + (is64 ? 0 : 1), 1);
- *patch++ = BPF_MOV32_IMM(insn->dst_reg, 0);
- *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
- *patch++ = *insn;
-
- if (!is64) {
- *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
- *patch++ = BPF_MOV32_REG(insn->dst_reg, insn->dst_reg);
- }
- cnt = patch - insn_buf;
- } else if (isdiv) {
- /* [R,W]x div 0 -> 0 */
- *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
- BPF_JNE | BPF_K, insn->src_reg,
- 0, 2, 0);
- *patch++ = BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg);
- *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
- *patch++ = *insn;
- cnt = patch - insn_buf;
- } else {
- /* [R,W]x mod 0 -> [R,W]x */
- *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
- BPF_JEQ | BPF_K, insn->src_reg,
- 0, 1 + (is64 ? 0 : 1), 0);
- *patch++ = *insn;
-
- if (!is64) {
- *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
- *patch++ = BPF_MOV32_REG(insn->dst_reg, insn->dst_reg);
- }
- cnt = patch - insn_buf;
- }
-
- new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
- if (!new_prog)
- return -ENOMEM;
-
- delta += cnt - 1;
- env->prog = prog = new_prog;
- insn = new_prog->insnsi + i + delta;
- goto next_insn;
- }
-
- /* Make it impossible to de-reference a userspace address */
- if (BPF_CLASS(insn->code) == BPF_LDX &&
- (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
- BPF_MODE(insn->code) == BPF_PROBE_MEMSX)) {
- struct bpf_insn *patch = insn_buf;
- u64 uaddress_limit = bpf_arch_uaddress_limit();
-
- if (!uaddress_limit)
- goto next_insn;
-
- *patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg);
- if (insn->off)
- *patch++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_AX, insn->off);
- *patch++ = BPF_ALU64_IMM(BPF_RSH, BPF_REG_AX, 32);
- *patch++ = BPF_JMP_IMM(BPF_JLE, BPF_REG_AX, uaddress_limit >> 32, 2);
- *patch++ = *insn;
- *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
- *patch++ = BPF_MOV64_IMM(insn->dst_reg, 0);
-
- cnt = patch - insn_buf;
- new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
- if (!new_prog)
- return -ENOMEM;
-
- delta += cnt - 1;
- env->prog = prog = new_prog;
- insn = new_prog->insnsi + i + delta;
- goto next_insn;
- }
-
- /* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
- if (BPF_CLASS(insn->code) == BPF_LD &&
- (BPF_MODE(insn->code) == BPF_ABS ||
- BPF_MODE(insn->code) == BPF_IND)) {
- cnt = env->ops->gen_ld_abs(insn, insn_buf);
- if (cnt == 0 || cnt >= INSN_BUF_SIZE) {
- verifier_bug(env, "%d insns generated for ld_abs", cnt);
- return -EFAULT;
- }
-
- new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
- if (!new_prog)
- return -ENOMEM;
-
- delta += cnt - 1;
- env->prog = prog = new_prog;
- insn = new_prog->insnsi + i + delta;
- goto next_insn;
- }
-
- /* Rewrite pointer arithmetic to mitigate speculation attacks. */
- if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
- insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
- const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
- const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
- struct bpf_insn *patch = insn_buf;
- bool issrc, isneg, isimm;
- u32 off_reg;
-
- aux = &env->insn_aux_data[i + delta];
- if (!aux->alu_state ||
- aux->alu_state == BPF_ALU_NON_POINTER)
- goto next_insn;
-
- isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
- issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
- BPF_ALU_SANITIZE_SRC;
- isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
-
- off_reg = issrc ? insn->src_reg : insn->dst_reg;
- if (isimm) {
- *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
- } else {
- if (isneg)
- *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
- *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
- *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
- *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
- *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
- *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
- *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
- }
- if (!issrc)
- *patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
- insn->src_reg = BPF_REG_AX;
- if (isneg)
- insn->code = insn->code == code_add ?
- code_sub : code_add;
- *patch++ = *insn;
- if (issrc && isneg && !isimm)
- *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
- cnt = patch - insn_buf;
-
- new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
- if (!new_prog)
- return -ENOMEM;
-
- delta += cnt - 1;
- env->prog = prog = new_prog;
- insn = new_prog->insnsi + i + delta;
- goto next_insn;
- }
-
- if (is_may_goto_insn(insn) && bpf_jit_supports_timed_may_goto()) {
- int stack_off_cnt = -stack_depth - 16;
-
- /*
- * Two 8 byte slots, depth-16 stores the count, and
- * depth-8 stores the start timestamp of the loop.
- *
- * The starting value of count is BPF_MAX_TIMED_LOOPS
- * (0xffff). Every iteration loads it and subs it by 1,
- * until the value becomes 0 in AX (thus, 1 in stack),
- * after which we call arch_bpf_timed_may_goto, which
- * either sets AX to 0xffff to keep looping, or to 0
- * upon timeout. AX is then stored into the stack. In
- * the next iteration, we either see 0 and break out, or
- * continue iterating until the next time value is 0
- * after subtraction, rinse and repeat.
- */
- stack_depth_extra = 16;
- insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off_cnt);
- if (insn->off >= 0)
- insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 5);
- else
- insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1);
- insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1);
- insn_buf[3] = BPF_JMP_IMM(BPF_JNE, BPF_REG_AX, 0, 2);
- /*
- * AX is used as an argument to pass in stack_off_cnt
- * (to add to r10/fp), and also as the return value of
- * the call to arch_bpf_timed_may_goto.
- */
- insn_buf[4] = BPF_MOV64_IMM(BPF_REG_AX, stack_off_cnt);
- insn_buf[5] = BPF_EMIT_CALL(arch_bpf_timed_may_goto);
- insn_buf[6] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off_cnt);
- cnt = 7;
-
- new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
- if (!new_prog)
- return -ENOMEM;
-
- delta += cnt - 1;
- env->prog = prog = new_prog;
- insn = new_prog->insnsi + i + delta;
- goto next_insn;
- } else if (is_may_goto_insn(insn)) {
- int stack_off = -stack_depth - 8;
-
- stack_depth_extra = 8;
- insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off);
- if (insn->off >= 0)
- insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 2);
- else
- insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1);
- insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1);
- insn_buf[3] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off);
- cnt = 4;
-
- new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
- if (!new_prog)
- return -ENOMEM;
-
- delta += cnt - 1;
- env->prog = prog = new_prog;
- insn = new_prog->insnsi + i + delta;
- goto next_insn;
- }
-
- if (insn->code != (BPF_JMP | BPF_CALL))
- goto next_insn;
- if (insn->src_reg == BPF_PSEUDO_CALL)
- goto next_insn;
- if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
- ret = fixup_kfunc_call(env, insn, insn_buf, i + delta, &cnt);
- if (ret)
- return ret;
- if (cnt == 0)
- goto next_insn;
-
- new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
- if (!new_prog)
- return -ENOMEM;
-
- delta += cnt - 1;
- env->prog = prog = new_prog;
- insn = new_prog->insnsi + i + delta;
- goto next_insn;
- }
-
- /* Skip inlining the helper call if the JIT does it. */
- if (bpf_jit_inlines_helper_call(insn->imm))
- goto next_insn;
-
- if (insn->imm == BPF_FUNC_get_route_realm)
- prog->dst_needed = 1;
- if (insn->imm == BPF_FUNC_get_prandom_u32)
- bpf_user_rnd_init_once();
- if (insn->imm == BPF_FUNC_override_return)
- prog->kprobe_override = 1;
- if (insn->imm == BPF_FUNC_tail_call) {
- /* If we tail call into other programs, we
- * cannot make any assumptions since they can
- * be replaced dynamically during runtime in
- * the program array.
- */
- prog->cb_access = 1;
- if (!allow_tail_call_in_subprogs(env))
- prog->aux->stack_depth = MAX_BPF_STACK;
- prog->aux->max_pkt_offset = MAX_PACKET_OFF;
-
- /* mark bpf_tail_call as different opcode to avoid
- * conditional branch in the interpreter for every normal
- * call and to prevent accidental JITing by JIT compiler
- * that doesn't support bpf_tail_call yet
- */
- insn->imm = 0;
- insn->code = BPF_JMP | BPF_TAIL_CALL;
-
- aux = &env->insn_aux_data[i + delta];
- if (env->bpf_capable && !prog->blinding_requested &&
- prog->jit_requested &&
- !bpf_map_key_poisoned(aux) &&
- !bpf_map_ptr_poisoned(aux) &&
- !bpf_map_ptr_unpriv(aux)) {
- struct bpf_jit_poke_descriptor desc = {
- .reason = BPF_POKE_REASON_TAIL_CALL,
- .tail_call.map = aux->map_ptr_state.map_ptr,
- .tail_call.key = bpf_map_key_immediate(aux),
- .insn_idx = i + delta,
- };
-
- ret = bpf_jit_add_poke_descriptor(prog, &desc);
- if (ret < 0) {
- verbose(env, "adding tail call poke descriptor failed\n");
- return ret;
- }
-
- insn->imm = ret + 1;
- goto next_insn;
- }
-
- if (!bpf_map_ptr_unpriv(aux))
- goto next_insn;
-
- /* instead of changing every JIT dealing with tail_call
- * emit two extra insns:
- * if (index >= max_entries) goto out;
- * index &= array->index_mask;
- * to avoid out-of-bounds cpu speculation
- */
- if (bpf_map_ptr_poisoned(aux)) {
- verbose(env, "tail_call abusing map_ptr\n");
- return -EINVAL;
- }
-
- map_ptr = aux->map_ptr_state.map_ptr;
- insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
- map_ptr->max_entries, 2);
- insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
- container_of(map_ptr,
- struct bpf_array,
- map)->index_mask);
- insn_buf[2] = *insn;
- cnt = 3;
- new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
- if (!new_prog)
- return -ENOMEM;
-
- delta += cnt - 1;
- env->prog = prog = new_prog;
- insn = new_prog->insnsi + i + delta;
- goto next_insn;
- }
-
- if (insn->imm == BPF_FUNC_timer_set_callback) {
- /* The verifier will process callback_fn as many times as necessary
- * with different maps and the register states prepared by
- * set_timer_callback_state will be accurate.
- *
- * The following use case is valid:
- * map1 is shared by prog1, prog2, prog3.
- * prog1 calls bpf_timer_init for some map1 elements
- * prog2 calls bpf_timer_set_callback for some map1 elements.
- * Those that were not bpf_timer_init-ed will return -EINVAL.
- * prog3 calls bpf_timer_start for some map1 elements.
- * Those that were not both bpf_timer_init-ed and
- * bpf_timer_set_callback-ed will return -EINVAL.
- */
- struct bpf_insn ld_addrs[2] = {
- BPF_LD_IMM64(BPF_REG_3, (long)prog->aux),
- };
-
- insn_buf[0] = ld_addrs[0];
- insn_buf[1] = ld_addrs[1];
- insn_buf[2] = *insn;
- cnt = 3;
-
- new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
- if (!new_prog)
- return -ENOMEM;
-
- delta += cnt - 1;
- env->prog = prog = new_prog;
- insn = new_prog->insnsi + i + delta;
- goto patch_call_imm;
- }
-
- if (is_storage_get_function(insn->imm)) {
- if (env->insn_aux_data[i + delta].non_sleepable)
- insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_ATOMIC);
- else
- insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL);
- insn_buf[1] = *insn;
- cnt = 2;
-
- new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
- if (!new_prog)
- return -ENOMEM;
-
- delta += cnt - 1;
- env->prog = prog = new_prog;
- insn = new_prog->insnsi + i + delta;
- goto patch_call_imm;
- }
-
- /* bpf_per_cpu_ptr() and bpf_this_cpu_ptr() */
- if (env->insn_aux_data[i + delta].call_with_percpu_alloc_ptr) {
- /* patch with 'r1 = *(u64 *)(r1 + 0)' since for percpu data,
- * bpf_mem_alloc() returns a ptr to the percpu data ptr.
- */
- insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0);
- insn_buf[1] = *insn;
- cnt = 2;
-
- new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
- if (!new_prog)
- return -ENOMEM;
-
- delta += cnt - 1;
- env->prog = prog = new_prog;
- insn = new_prog->insnsi + i + delta;
- goto patch_call_imm;
- }
-
- /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
- * and other inlining handlers are currently limited to 64 bit
- * only.
- */
- if (prog->jit_requested && BITS_PER_LONG == 64 &&
- (insn->imm == BPF_FUNC_map_lookup_elem ||
- insn->imm == BPF_FUNC_map_update_elem ||
- insn->imm == BPF_FUNC_map_delete_elem ||
- insn->imm == BPF_FUNC_map_push_elem ||
- insn->imm == BPF_FUNC_map_pop_elem ||
- insn->imm == BPF_FUNC_map_peek_elem ||
- insn->imm == BPF_FUNC_redirect_map ||
- insn->imm == BPF_FUNC_for_each_map_elem ||
- insn->imm == BPF_FUNC_map_lookup_percpu_elem)) {
- aux = &env->insn_aux_data[i + delta];
- if (bpf_map_ptr_poisoned(aux))
- goto patch_call_imm;
-
- map_ptr = aux->map_ptr_state.map_ptr;
- ops = map_ptr->ops;
- if (insn->imm == BPF_FUNC_map_lookup_elem &&
- ops->map_gen_lookup) {
- cnt = ops->map_gen_lookup(map_ptr, insn_buf);
- if (cnt == -EOPNOTSUPP)
- goto patch_map_ops_generic;
- if (cnt <= 0 || cnt >= INSN_BUF_SIZE) {
- verifier_bug(env, "%d insns generated for map lookup", cnt);
- return -EFAULT;
- }
-
- new_prog = bpf_patch_insn_data(env, i + delta,
- insn_buf, cnt);
- if (!new_prog)
- return -ENOMEM;
-
- delta += cnt - 1;
- env->prog = prog = new_prog;
- insn = new_prog->insnsi + i + delta;
- goto next_insn;
- }
-
- BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
- (void *(*)(struct bpf_map *map, void *key))NULL));
- BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
- (long (*)(struct bpf_map *map, void *key))NULL));
- BUILD_BUG_ON(!__same_type(ops->map_update_elem,
- (long (*)(struct bpf_map *map, void *key, void *value,
- u64 flags))NULL));
- BUILD_BUG_ON(!__same_type(ops->map_push_elem,
- (long (*)(struct bpf_map *map, void *value,
- u64 flags))NULL));
- BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
- (long (*)(struct bpf_map *map, void *value))NULL));
- BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
- (long (*)(struct bpf_map *map, void *value))NULL));
- BUILD_BUG_ON(!__same_type(ops->map_redirect,
- (long (*)(struct bpf_map *map, u64 index, u64 flags))NULL));
- BUILD_BUG_ON(!__same_type(ops->map_for_each_callback,
- (long (*)(struct bpf_map *map,
- bpf_callback_t callback_fn,
- void *callback_ctx,
- u64 flags))NULL));
- BUILD_BUG_ON(!__same_type(ops->map_lookup_percpu_elem,
- (void *(*)(struct bpf_map *map, void *key, u32 cpu))NULL));
-
-patch_map_ops_generic:
- switch (insn->imm) {
- case BPF_FUNC_map_lookup_elem:
- insn->imm = BPF_CALL_IMM(ops->map_lookup_elem);
- goto next_insn;
- case BPF_FUNC_map_update_elem:
- insn->imm = BPF_CALL_IMM(ops->map_update_elem);
- goto next_insn;
- case BPF_FUNC_map_delete_elem:
- insn->imm = BPF_CALL_IMM(ops->map_delete_elem);
- goto next_insn;
- case BPF_FUNC_map_push_elem:
- insn->imm = BPF_CALL_IMM(ops->map_push_elem);
- goto next_insn;
- case BPF_FUNC_map_pop_elem:
- insn->imm = BPF_CALL_IMM(ops->map_pop_elem);
- goto next_insn;
- case BPF_FUNC_map_peek_elem:
- insn->imm = BPF_CALL_IMM(ops->map_peek_elem);
- goto next_insn;
- case BPF_FUNC_redirect_map:
- insn->imm = BPF_CALL_IMM(ops->map_redirect);
- goto next_insn;
- case BPF_FUNC_for_each_map_elem:
- insn->imm = BPF_CALL_IMM(ops->map_for_each_callback);
- goto next_insn;
- case BPF_FUNC_map_lookup_percpu_elem:
- insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem);
- goto next_insn;
- }
-
- goto patch_call_imm;
- }
-
- /* Implement bpf_jiffies64 inline. */
- if (prog->jit_requested && BITS_PER_LONG == 64 &&
- insn->imm == BPF_FUNC_jiffies64) {
- struct bpf_insn ld_jiffies_addr[2] = {
- BPF_LD_IMM64(BPF_REG_0,
- (unsigned long)&jiffies),
- };
-
- insn_buf[0] = ld_jiffies_addr[0];
- insn_buf[1] = ld_jiffies_addr[1];
- insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
- BPF_REG_0, 0);
- cnt = 3;
-
- new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
- cnt);
- if (!new_prog)
- return -ENOMEM;
-
- delta += cnt - 1;
- env->prog = prog = new_prog;
- insn = new_prog->insnsi + i + delta;
- goto next_insn;
- }
-
-#if defined(CONFIG_X86_64) && !defined(CONFIG_UML)
- /* Implement bpf_get_smp_processor_id() inline. */
- if (insn->imm == BPF_FUNC_get_smp_processor_id &&
- verifier_inlines_helper_call(env, insn->imm)) {
- /* BPF_FUNC_get_smp_processor_id inlining is an
- * optimization, so if cpu_number is ever
- * changed in some incompatible and hard to support
- * way, it's fine to back out this inlining logic
- */
-#ifdef CONFIG_SMP
- insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, (u32)(unsigned long)&cpu_number);
- insn_buf[1] = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0);
- insn_buf[2] = BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0);
- cnt = 3;
-#else
- insn_buf[0] = BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_0);
- cnt = 1;
-#endif
- new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
- if (!new_prog)
- return -ENOMEM;
-
- delta += cnt - 1;
- env->prog = prog = new_prog;
- insn = new_prog->insnsi + i + delta;
- goto next_insn;
- }
-
- /* Implement bpf_get_current_task() and bpf_get_current_task_btf() inline. */
- if ((insn->imm == BPF_FUNC_get_current_task || insn->imm == BPF_FUNC_get_current_task_btf) &&
- verifier_inlines_helper_call(env, insn->imm)) {
- insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, (u32)(unsigned long)&current_task);
- insn_buf[1] = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0);
- insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0);
- cnt = 3;
-
- new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
- if (!new_prog)
- return -ENOMEM;
-
- delta += cnt - 1;
- env->prog = prog = new_prog;
- insn = new_prog->insnsi + i + delta;
- goto next_insn;
- }
-#endif
- /* Implement bpf_get_func_arg inline. */
- if (prog_type == BPF_PROG_TYPE_TRACING &&
- insn->imm == BPF_FUNC_get_func_arg) {
- if (eatype == BPF_TRACE_RAW_TP) {
- int nr_args = btf_type_vlen(prog->aux->attach_func_proto);
-
- /* skip 'void *__data' in btf_trace_##name() and save to reg0 */
- insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, nr_args - 1);
- cnt = 1;
- } else {
- /* Load nr_args from ctx - 8 */
- insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
- insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
- cnt = 2;
- }
- insn_buf[cnt++] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, 6);
- insn_buf[cnt++] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 3);
- insn_buf[cnt++] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1);
- insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0);
- insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
- insn_buf[cnt++] = BPF_MOV64_IMM(BPF_REG_0, 0);
- insn_buf[cnt++] = BPF_JMP_A(1);
- insn_buf[cnt++] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
-
- new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
- if (!new_prog)
- return -ENOMEM;
-
- delta += cnt - 1;
- env->prog = prog = new_prog;
- insn = new_prog->insnsi + i + delta;
- goto next_insn;
- }
-
- /* Implement bpf_get_func_ret inline. */
- if (prog_type == BPF_PROG_TYPE_TRACING &&
- insn->imm == BPF_FUNC_get_func_ret) {
- if (eatype == BPF_TRACE_FEXIT ||
- eatype == BPF_TRACE_FSESSION ||
- eatype == BPF_MODIFY_RETURN) {
- /* Load nr_args from ctx - 8 */
- insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
- insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
- insn_buf[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
- insn_buf[3] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1);
- insn_buf[4] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
- insn_buf[5] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0);
- insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, 0);
- cnt = 7;
- } else {
- insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, -EOPNOTSUPP);
- cnt = 1;
- }
-
- new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
- if (!new_prog)
- return -ENOMEM;
-
- delta += cnt - 1;
- env->prog = prog = new_prog;
- insn = new_prog->insnsi + i + delta;
- goto next_insn;
- }
-
- /* Implement get_func_arg_cnt inline. */
- if (prog_type == BPF_PROG_TYPE_TRACING &&
- insn->imm == BPF_FUNC_get_func_arg_cnt) {
- if (eatype == BPF_TRACE_RAW_TP) {
- int nr_args = btf_type_vlen(prog->aux->attach_func_proto);
-
- /* skip 'void *__data' in btf_trace_##name() and save to reg0 */
- insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, nr_args - 1);
- cnt = 1;
- } else {
- /* Load nr_args from ctx - 8 */
- insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
- insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
- cnt = 2;
- }
-
- new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
- if (!new_prog)
- return -ENOMEM;
-
- delta += cnt - 1;
- env->prog = prog = new_prog;
- insn = new_prog->insnsi + i + delta;
- goto next_insn;
- }
-
- /* Implement bpf_get_func_ip inline. */
- if (prog_type == BPF_PROG_TYPE_TRACING &&
- insn->imm == BPF_FUNC_get_func_ip) {
- /* Load IP address from ctx - 16 */
- insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -16);
-
- new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
- if (!new_prog)
- return -ENOMEM;
-
- env->prog = prog = new_prog;
- insn = new_prog->insnsi + i + delta;
- goto next_insn;
- }
-
- /* Implement bpf_get_branch_snapshot inline. */
- if (IS_ENABLED(CONFIG_PERF_EVENTS) &&
- prog->jit_requested && BITS_PER_LONG == 64 &&
- insn->imm == BPF_FUNC_get_branch_snapshot) {
- /* We are dealing with the following func protos:
- * u64 bpf_get_branch_snapshot(void *buf, u32 size, u64 flags);
- * int perf_snapshot_branch_stack(struct perf_branch_entry *entries, u32 cnt);
- */
- const u32 br_entry_size = sizeof(struct perf_branch_entry);
-
- /* struct perf_branch_entry is part of UAPI and is
- * used as an array element, so extremely unlikely to
- * ever grow or shrink
- */
- BUILD_BUG_ON(br_entry_size != 24);
-
- /* if (unlikely(flags)) return -EINVAL */
- insn_buf[0] = BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 0, 7);
-
- /* Transform size (bytes) into number of entries (cnt = size / 24).
- * But to avoid expensive division instruction, we implement
- * divide-by-3 through multiplication, followed by further
- * division by 8 through 3-bit right shift.
- * Refer to book "Hacker's Delight, 2nd ed." by Henry S. Warren, Jr.,
- * p. 227, chapter "Unsigned Division by 3" for details and proofs.
- *
- * N / 3 <=> M * N / 2^33, where M = (2^33 + 1) / 3 = 0xaaaaaaab.
- */
- insn_buf[1] = BPF_MOV32_IMM(BPF_REG_0, 0xaaaaaaab);
- insn_buf[2] = BPF_ALU64_REG(BPF_MUL, BPF_REG_2, BPF_REG_0);
- insn_buf[3] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36);
-
- /* call perf_snapshot_branch_stack implementation */
- insn_buf[4] = BPF_EMIT_CALL(static_call_query(perf_snapshot_branch_stack));
- /* if (entry_cnt == 0) return -ENOENT */
- insn_buf[5] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4);
- /* return entry_cnt * sizeof(struct perf_branch_entry) */
- insn_buf[6] = BPF_ALU32_IMM(BPF_MUL, BPF_REG_0, br_entry_size);
- insn_buf[7] = BPF_JMP_A(3);
- /* return -EINVAL; */
- insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
- insn_buf[9] = BPF_JMP_A(1);
- /* return -ENOENT; */
- insn_buf[10] = BPF_MOV64_IMM(BPF_REG_0, -ENOENT);
- cnt = 11;
-
- new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
- if (!new_prog)
- return -ENOMEM;
-
- delta += cnt - 1;
- env->prog = prog = new_prog;
- insn = new_prog->insnsi + i + delta;
- goto next_insn;
- }
-
- /* Implement bpf_kptr_xchg inline */
- if (prog->jit_requested && BITS_PER_LONG == 64 &&
- insn->imm == BPF_FUNC_kptr_xchg &&
- bpf_jit_supports_ptr_xchg()) {
- insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_2);
- insn_buf[1] = BPF_ATOMIC_OP(BPF_DW, BPF_XCHG, BPF_REG_1, BPF_REG_0, 0);
- cnt = 2;
-
- new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
- if (!new_prog)
- return -ENOMEM;
-
- delta += cnt - 1;
- env->prog = prog = new_prog;
- insn = new_prog->insnsi + i + delta;
- goto next_insn;
- }
-patch_call_imm:
- fn = env->ops->get_func_proto(insn->imm, env->prog);
- /* all functions that have prototype and verifier allowed
- * programs to call them, must be real in-kernel functions
- */
- if (!fn->func) {
- verifier_bug(env,
- "not inlined functions %s#%d is missing func",
- func_id_name(insn->imm), insn->imm);
- return -EFAULT;
- }
- insn->imm = fn->func - __bpf_call_base;
-next_insn:
- if (subprogs[cur_subprog + 1].start == i + delta + 1) {
- subprogs[cur_subprog].stack_depth += stack_depth_extra;
- subprogs[cur_subprog].stack_extra = stack_depth_extra;
-
- stack_depth = subprogs[cur_subprog].stack_depth;
- if (stack_depth > MAX_BPF_STACK && !prog->jit_requested) {
- verbose(env, "stack size %d(extra %d) is too large\n",
- stack_depth, stack_depth_extra);
- return -EINVAL;
- }
- cur_subprog++;
- stack_depth = subprogs[cur_subprog].stack_depth;
- stack_depth_extra = 0;
- }
- i++;
- insn++;
- }
-
- env->prog->aux->stack_depth = subprogs[0].stack_depth;
- for (i = 0; i < env->subprog_cnt; i++) {
- int delta = bpf_jit_supports_timed_may_goto() ? 2 : 1;
- int subprog_start = subprogs[i].start;
- int stack_slots = subprogs[i].stack_extra / 8;
- int slots = delta, cnt = 0;
-
- if (!stack_slots)
- continue;
- /* We need two slots in case timed may_goto is supported. */
- if (stack_slots > slots) {
- verifier_bug(env, "stack_slots supports may_goto only");
- return -EFAULT;
- }
-
- stack_depth = subprogs[i].stack_depth;
- if (bpf_jit_supports_timed_may_goto()) {
- insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth,
- BPF_MAX_TIMED_LOOPS);
- insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth + 8, 0);
- } else {
- /* Add ST insn to subprog prologue to init extra stack */
- insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth,
- BPF_MAX_LOOPS);
- }
- /* Copy first actual insn to preserve it */
- insn_buf[cnt++] = env->prog->insnsi[subprog_start];
-
- new_prog = bpf_patch_insn_data(env, subprog_start, insn_buf, cnt);
- if (!new_prog)
- return -ENOMEM;
- env->prog = prog = new_prog;
- /*
- * If may_goto is a first insn of a prog there could be a jmp
- * insn that points to it, hence adjust all such jmps to point
- * to insn after BPF_ST that inits may_goto count.
- * Adjustment will succeed because bpf_patch_insn_data() didn't fail.
- */
- WARN_ON(adjust_jmp_off(env->prog, subprog_start, delta));
- }
-
- /* Since poke tab is now finalized, publish aux to tracker. */
- for (i = 0; i < prog->aux->size_poke_tab; i++) {
- map_ptr = prog->aux->poke_tab[i].tail_call.map;
- if (!map_ptr->ops->map_poke_track ||
- !map_ptr->ops->map_poke_untrack ||
- !map_ptr->ops->map_poke_run) {
- verifier_bug(env, "poke tab is misconfigured");
- return -EFAULT;
- }
-
- ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
- if (ret < 0) {
- verbose(env, "tracking tail call prog failed\n");
- return ret;
- }
- }
-
- ret = sort_kfunc_descs_by_imm_off(env);
- if (ret)
- return ret;
-
- return 0;
-}
-
-static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env,
- int position,
- s32 stack_base,
- u32 callback_subprogno,
- u32 *total_cnt)
-{
- s32 r6_offset = stack_base + 0 * BPF_REG_SIZE;
- s32 r7_offset = stack_base + 1 * BPF_REG_SIZE;
- s32 r8_offset = stack_base + 2 * BPF_REG_SIZE;
- int reg_loop_max = BPF_REG_6;
- int reg_loop_cnt = BPF_REG_7;
- int reg_loop_ctx = BPF_REG_8;
-
- struct bpf_insn *insn_buf = env->insn_buf;
- struct bpf_prog *new_prog;
- u32 callback_start;
- u32 call_insn_offset;
- s32 callback_offset;
- u32 cnt = 0;
-
- /* This represents an inlined version of bpf_iter.c:bpf_loop,
- * be careful to modify this code in sync.
- */
-
- /* Return error and jump to the end of the patch if
- * expected number of iterations is too big.
- */
- insn_buf[cnt++] = BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2);
- insn_buf[cnt++] = BPF_MOV32_IMM(BPF_REG_0, -E2BIG);
- insn_buf[cnt++] = BPF_JMP_IMM(BPF_JA, 0, 0, 16);
- /* spill R6, R7, R8 to use these as loop vars */
- insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset);
- insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset);
- insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset);
- /* initialize loop vars */
- insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_max, BPF_REG_1);
- insn_buf[cnt++] = BPF_MOV32_IMM(reg_loop_cnt, 0);
- insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3);
- /* loop header,
- * if reg_loop_cnt >= reg_loop_max skip the loop body
- */
- insn_buf[cnt++] = BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5);
- /* callback call,
- * correct callback offset would be set after patching
- */
- insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt);
- insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx);
- insn_buf[cnt++] = BPF_CALL_REL(0);
- /* increment loop counter */
- insn_buf[cnt++] = BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1);
- /* jump to loop header if callback returned 0 */
- insn_buf[cnt++] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6);
- /* return value of bpf_loop,
- * set R0 to the number of iterations
- */
- insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt);
- /* restore original values of R6, R7, R8 */
- insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset);
- insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset);
- insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset);
-
- *total_cnt = cnt;
- new_prog = bpf_patch_insn_data(env, position, insn_buf, cnt);
- if (!new_prog)
- return new_prog;
-
- /* callback start is known only after patching */
- callback_start = env->subprog_info[callback_subprogno].start;
- /* Note: insn_buf[12] is an offset of BPF_CALL_REL instruction */
- call_insn_offset = position + 12;
- callback_offset = callback_start - call_insn_offset - 1;
- new_prog->insnsi[call_insn_offset].imm = callback_offset;
-
- return new_prog;
-}
-
-static bool is_bpf_loop_call(struct bpf_insn *insn)
-{
- return insn->code == (BPF_JMP | BPF_CALL) &&
- insn->src_reg == 0 &&
- insn->imm == BPF_FUNC_loop;
-}
-
-/* For all sub-programs in the program (including main) check
- * insn_aux_data to see if there are bpf_loop calls that require
- * inlining. If such calls are found the calls are replaced with a
- * sequence of instructions produced by `inline_bpf_loop` function and
- * subprog stack_depth is increased by the size of 3 registers.
- * This stack space is used to spill values of the R6, R7, R8. These
- * registers are used to store the loop bound, counter and context
- * variables.
- */
-static int optimize_bpf_loop(struct bpf_verifier_env *env)
-{
- struct bpf_subprog_info *subprogs = env->subprog_info;
- int i, cur_subprog = 0, cnt, delta = 0;
- struct bpf_insn *insn = env->prog->insnsi;
- int insn_cnt = env->prog->len;
- u16 stack_depth = subprogs[cur_subprog].stack_depth;
- u16 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
- u16 stack_depth_extra = 0;
-
- for (i = 0; i < insn_cnt; i++, insn++) {
- struct bpf_loop_inline_state *inline_state =
- &env->insn_aux_data[i + delta].loop_inline_state;
-
- if (is_bpf_loop_call(insn) && inline_state->fit_for_inline) {
- struct bpf_prog *new_prog;
-
- stack_depth_extra = BPF_REG_SIZE * 3 + stack_depth_roundup;
- new_prog = inline_bpf_loop(env,
- i + delta,
- -(stack_depth + stack_depth_extra),
- inline_state->callback_subprogno,
- &cnt);
- if (!new_prog)
- return -ENOMEM;
-
- delta += cnt - 1;
- env->prog = new_prog;
- insn = new_prog->insnsi + i + delta;
- }
-
- if (subprogs[cur_subprog + 1].start == i + delta + 1) {
- subprogs[cur_subprog].stack_depth += stack_depth_extra;
- cur_subprog++;
- stack_depth = subprogs[cur_subprog].stack_depth;
- stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
- stack_depth_extra = 0;
- }
- }
-
- env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
-
- return 0;
-}
-
-/* Remove unnecessary spill/fill pairs, members of fastcall pattern,
- * adjust subprograms stack depth when possible.
- */
-static int remove_fastcall_spills_fills(struct bpf_verifier_env *env)
-{
- struct bpf_subprog_info *subprog = env->subprog_info;
- struct bpf_insn_aux_data *aux = env->insn_aux_data;
- struct bpf_insn *insn = env->prog->insnsi;
- int insn_cnt = env->prog->len;
- u32 spills_num;
- bool modified = false;
- int i, j;
-
- for (i = 0; i < insn_cnt; i++, insn++) {
- if (aux[i].fastcall_spills_num > 0) {
- spills_num = aux[i].fastcall_spills_num;
- /* NOPs would be removed by opt_remove_nops() */
- for (j = 1; j <= spills_num; ++j) {
- *(insn - j) = NOP;
- *(insn + j) = NOP;
- }
- modified = true;
- }
- if ((subprog + 1)->start == i + 1) {
- if (modified && !subprog->keep_fastcall_stack)
- subprog->stack_depth = -subprog->fastcall_stack_off;
- subprog++;
- modified = false;
- }
- }
-
- return 0;
-}
static void free_states(struct bpf_verifier_env *env)
{
@@ -24533,13 +18625,13 @@ static void free_states(struct bpf_verifier_env *env)
struct bpf_scc_info *info;
int i, j;
- free_verifier_state(env->cur_state, true);
+ bpf_free_verifier_state(env->cur_state, true);
env->cur_state = NULL;
while (!pop_stack(env, NULL, NULL, false));
list_for_each_safe(pos, tmp, &env->free_list) {
sl = container_of(pos, struct bpf_verifier_state_list, node);
- free_verifier_state(&sl->state, false);
+ bpf_free_verifier_state(&sl->state, false);
kfree(sl);
}
INIT_LIST_HEAD(&env->free_list);
@@ -24549,7 +18641,7 @@ static void free_states(struct bpf_verifier_env *env)
if (!info)
continue;
for (j = 0; j < info->num_visits; j++)
- free_backedges(&info->visits[j]);
+ bpf_free_backedges(&info->visits[j]);
kvfree(info);
env->scc_info[i] = NULL;
}
@@ -24562,7 +18654,7 @@ static void free_states(struct bpf_verifier_env *env)
list_for_each_safe(pos, tmp, head) {
sl = container_of(pos, struct bpf_verifier_state_list, node);
- free_verifier_state(&sl->state, false);
+ bpf_free_verifier_state(&sl->state, false);
kfree(sl);
}
INIT_LIST_HEAD(&env->explored_states[i]);
@@ -24615,10 +18707,18 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog)
if (subprog_is_exc_cb(env, subprog)) {
state->frame[0]->in_exception_callback_fn = true;
- /* We have already ensured that the callback returns an integer, just
- * like all global subprogs. We need to determine it only has a single
- * scalar argument.
+
+ /*
+ * Global functions are scalar or void, make sure
+ * we return a scalar.
*/
+ if (subprog_returns_void(env, subprog)) {
+ verbose(env, "exception cb cannot return void\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* Also ensure the callback only has a single scalar argument. */
if (sub->arg_cnt != 1 || sub->args[0].arg_type != ARG_ANYTHING) {
verbose(env, "exception cb only supports single integer argument\n");
ret = -EINVAL;
@@ -24735,7 +18835,7 @@ static int do_check_subprogs(struct bpf_verifier_env *env)
again:
new_cnt = 0;
for (i = 1; i < env->subprog_cnt; i++) {
- if (!subprog_is_global(env, i))
+ if (!bpf_subprog_is_global(env, i))
continue;
sub_aux = subprog_aux(env, i);
@@ -24897,7 +18997,7 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
}
for (i = 0; i < st_ops_desc->arg_info[member_idx].cnt; i++) {
- if (st_ops_desc->arg_info[member_idx].info->refcounted) {
+ if (st_ops_desc->arg_info[member_idx].info[i].refcounted) {
has_refcounted_arg = true;
break;
}
@@ -24925,14 +19025,7 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
}
#define SECURITY_PREFIX "security_"
-static int check_attach_modify_return(unsigned long addr, const char *func_name)
-{
- if (within_error_injection_list(addr) ||
- !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
- return 0;
-
- return -EINVAL;
-}
+#ifdef CONFIG_FUNCTION_ERROR_INJECTION
/* list of non-sleepable functions that are otherwise on
* ALLOW_ERROR_INJECTION list
@@ -24955,6 +19048,75 @@ static int check_non_sleepable_error_inject(u32 btf_id)
return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
}
+static int check_attach_sleepable(u32 btf_id, unsigned long addr, const char *func_name)
+{
+ /* fentry/fexit/fmod_ret progs can be sleepable if they are
+ * attached to ALLOW_ERROR_INJECTION and are not in denylist.
+ */
+ if (!check_non_sleepable_error_inject(btf_id) &&
+ within_error_injection_list(addr))
+ return 0;
+
+ return -EINVAL;
+}
+
+static int check_attach_modify_return(unsigned long addr, const char *func_name)
+{
+ if (within_error_injection_list(addr) ||
+ !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
+ return 0;
+
+ return -EINVAL;
+}
+
+#else
+
+/* Unfortunately, the arch-specific prefixes are hard-coded in arch syscall code
+ * so we need to hard-code them, too. Ftrace has arch_syscall_match_sym_name()
+ * but that just compares two concrete function names.
+ */
+static bool has_arch_syscall_prefix(const char *func_name)
+{
+#if defined(__x86_64__)
+ return !strncmp(func_name, "__x64_", 6);
+#elif defined(__i386__)
+ return !strncmp(func_name, "__ia32_", 7);
+#elif defined(__s390x__)
+ return !strncmp(func_name, "__s390x_", 8);
+#elif defined(__aarch64__)
+ return !strncmp(func_name, "__arm64_", 8);
+#elif defined(__riscv)
+ return !strncmp(func_name, "__riscv_", 8);
+#elif defined(__powerpc__) || defined(__powerpc64__)
+ return !strncmp(func_name, "sys_", 4);
+#elif defined(__loongarch__)
+ return !strncmp(func_name, "sys_", 4);
+#else
+ return false;
+#endif
+}
+
+/* Without error injection, allow sleepable and fmod_ret progs on syscalls. */
+
+static int check_attach_sleepable(u32 btf_id, unsigned long addr, const char *func_name)
+{
+ if (has_arch_syscall_prefix(func_name))
+ return 0;
+
+ return -EINVAL;
+}
+
+static int check_attach_modify_return(unsigned long addr, const char *func_name)
+{
+ if (has_arch_syscall_prefix(func_name) ||
+ !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
+ return 0;
+
+ return -EINVAL;
+}
+
+#endif /* CONFIG_FUNCTION_ERROR_INJECTION */
+
int bpf_check_attach_target(struct bpf_verifier_log *log,
const struct bpf_prog *prog,
const struct bpf_prog *tgt_prog,
@@ -24981,7 +19143,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
btf = tgt_prog ? tgt_prog->aux->btf : prog->aux->attach_btf;
if (!btf) {
bpf_log(log,
- "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
+ "Tracing program can only be attached to another program annotated with BTF\n");
return -EINVAL;
}
t = btf_type_by_id(btf, btf_id);
@@ -25017,7 +19179,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
if (aux->func && aux->func[subprog]->aux->exception_cb) {
bpf_log(log,
"%s programs cannot attach to exception callback\n",
- prog_extension ? "Extension" : "FENTRY/FEXIT");
+ prog_extension ? "Extension" : "Tracing");
return -EINVAL;
}
conservative = aux->func_info_aux[subprog].unreliable;
@@ -25106,7 +19268,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
case BPF_TRACE_RAW_TP:
if (tgt_prog) {
bpf_log(log,
- "Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
+ "Only FENTRY/FEXIT/FSESSION progs are attachable to another BPF prog\n");
return -EINVAL;
}
if (!btf_type_is_typedef(t)) {
@@ -25234,12 +19396,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
ret = -EINVAL;
switch (prog->type) {
case BPF_PROG_TYPE_TRACING:
-
- /* fentry/fexit/fmod_ret progs can be sleepable if they are
- * attached to ALLOW_ERROR_INJECTION and are not in denylist.
- */
- if (!check_non_sleepable_error_inject(btf_id) &&
- within_error_injection_list(addr))
+ if (!check_attach_sleepable(btf_id, addr, tname))
ret = 0;
/* fentry/fexit/fmod_ret progs can also be sleepable if they are
* in the fmodret id set with the KF_SLEEPABLE flag.
@@ -25377,7 +19534,7 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
}
if (prog->sleepable && !can_be_sleepable(prog)) {
- verbose(env, "Only fentry/fexit/fmod_ret, lsm, iter, uprobe, and struct_ops programs can be sleepable\n");
+ verbose(env, "Only fentry/fexit/fsession/fmod_ret, lsm, iter, uprobe, and struct_ops programs can be sleepable\n");
return -EINVAL;
}
@@ -25525,430 +19682,209 @@ static int process_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr,
return 0;
}
-/* Each field is a register bitmask */
-struct insn_live_regs {
- u16 use; /* registers read by instruction */
- u16 def; /* registers written by instruction */
- u16 in; /* registers that may be alive before instruction */
- u16 out; /* registers that may be alive after instruction */
-};
+/* replace a generic kfunc with a specialized version if necessary */
+static int specialize_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc, int insn_idx)
+{
+ struct bpf_prog *prog = env->prog;
+ bool seen_direct_write;
+ void *xdp_kfunc;
+ bool is_rdonly;
+ u32 func_id = desc->func_id;
+ u16 offset = desc->offset;
+ unsigned long addr = desc->addr;
-/* Bitmask with 1s for all caller saved registers */
-#define ALL_CALLER_SAVED_REGS ((1u << CALLER_SAVED_REGS) - 1)
+ if (offset) /* return if module BTF is used */
+ return 0;
-/* Compute info->{use,def} fields for the instruction */
-static void compute_insn_live_regs(struct bpf_verifier_env *env,
- struct bpf_insn *insn,
- struct insn_live_regs *info)
-{
- struct call_summary cs;
- u8 class = BPF_CLASS(insn->code);
- u8 code = BPF_OP(insn->code);
- u8 mode = BPF_MODE(insn->code);
- u16 src = BIT(insn->src_reg);
- u16 dst = BIT(insn->dst_reg);
- u16 r0 = BIT(0);
- u16 def = 0;
- u16 use = 0xffff;
+ if (bpf_dev_bound_kfunc_id(func_id)) {
+ xdp_kfunc = bpf_dev_bound_resolve_kfunc(prog, func_id);
+ if (xdp_kfunc)
+ addr = (unsigned long)xdp_kfunc;
+ /* fallback to default kfunc when not supported by netdev */
+ } else if (func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
+ seen_direct_write = env->seen_direct_write;
+ is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE);
- switch (class) {
- case BPF_LD:
- switch (mode) {
- case BPF_IMM:
- if (BPF_SIZE(insn->code) == BPF_DW) {
- def = dst;
- use = 0;
- }
- break;
- case BPF_LD | BPF_ABS:
- case BPF_LD | BPF_IND:
- /* stick with defaults */
- break;
- }
- break;
- case BPF_LDX:
- switch (mode) {
- case BPF_MEM:
- case BPF_MEMSX:
- def = dst;
- use = src;
- break;
- }
- break;
- case BPF_ST:
- switch (mode) {
- case BPF_MEM:
- def = 0;
- use = dst;
- break;
- }
- break;
- case BPF_STX:
- switch (mode) {
- case BPF_MEM:
- def = 0;
- use = dst | src;
- break;
- case BPF_ATOMIC:
- switch (insn->imm) {
- case BPF_CMPXCHG:
- use = r0 | dst | src;
- def = r0;
- break;
- case BPF_LOAD_ACQ:
- def = dst;
- use = src;
- break;
- case BPF_STORE_REL:
- def = 0;
- use = dst | src;
- break;
- default:
- use = dst | src;
- if (insn->imm & BPF_FETCH)
- def = src;
- else
- def = 0;
- }
- break;
- }
- break;
- case BPF_ALU:
- case BPF_ALU64:
- switch (code) {
- case BPF_END:
- use = dst;
- def = dst;
- break;
- case BPF_MOV:
- def = dst;
- if (BPF_SRC(insn->code) == BPF_K)
- use = 0;
- else
- use = src;
- break;
- default:
- def = dst;
- if (BPF_SRC(insn->code) == BPF_K)
- use = dst;
- else
- use = dst | src;
- }
- break;
- case BPF_JMP:
- case BPF_JMP32:
- switch (code) {
- case BPF_JA:
- def = 0;
- if (BPF_SRC(insn->code) == BPF_X)
- use = dst;
- else
- use = 0;
- break;
- case BPF_JCOND:
- def = 0;
- use = 0;
- break;
- case BPF_EXIT:
- def = 0;
- use = r0;
- break;
- case BPF_CALL:
- def = ALL_CALLER_SAVED_REGS;
- use = def & ~BIT(BPF_REG_0);
- if (get_call_summary(env, insn, &cs))
- use = GENMASK(cs.num_params, 1);
- break;
- default:
- def = 0;
- if (BPF_SRC(insn->code) == BPF_K)
- use = dst;
- else
- use = dst | src;
- }
- break;
+ if (is_rdonly)
+ addr = (unsigned long)bpf_dynptr_from_skb_rdonly;
+
+ /* restore env->seen_direct_write to its original value, since
+ * may_access_direct_pkt_data mutates it
+ */
+ env->seen_direct_write = seen_direct_write;
+ } else if (func_id == special_kfunc_list[KF_bpf_set_dentry_xattr]) {
+ if (bpf_lsm_has_d_inode_locked(prog))
+ addr = (unsigned long)bpf_set_dentry_xattr_locked;
+ } else if (func_id == special_kfunc_list[KF_bpf_remove_dentry_xattr]) {
+ if (bpf_lsm_has_d_inode_locked(prog))
+ addr = (unsigned long)bpf_remove_dentry_xattr_locked;
+ } else if (func_id == special_kfunc_list[KF_bpf_dynptr_from_file]) {
+ if (!env->insn_aux_data[insn_idx].non_sleepable)
+ addr = (unsigned long)bpf_dynptr_from_file_sleepable;
+ } else if (func_id == special_kfunc_list[KF_bpf_arena_alloc_pages]) {
+ if (env->insn_aux_data[insn_idx].non_sleepable)
+ addr = (unsigned long)bpf_arena_alloc_pages_non_sleepable;
+ } else if (func_id == special_kfunc_list[KF_bpf_arena_free_pages]) {
+ if (env->insn_aux_data[insn_idx].non_sleepable)
+ addr = (unsigned long)bpf_arena_free_pages_non_sleepable;
}
+ desc->addr = addr;
+ return 0;
+}
+
+static void __fixup_collection_insert_kfunc(struct bpf_insn_aux_data *insn_aux,
+ u16 struct_meta_reg,
+ u16 node_offset_reg,
+ struct bpf_insn *insn,
+ struct bpf_insn *insn_buf,
+ int *cnt)
+{
+ struct btf_struct_meta *kptr_struct_meta = insn_aux->kptr_struct_meta;
+ struct bpf_insn addr[2] = { BPF_LD_IMM64(struct_meta_reg, (long)kptr_struct_meta) };
- info->def = def;
- info->use = use;
+ insn_buf[0] = addr[0];
+ insn_buf[1] = addr[1];
+ insn_buf[2] = BPF_MOV64_IMM(node_offset_reg, insn_aux->insert_off);
+ insn_buf[3] = *insn;
+ *cnt = 4;
}
-/* Compute may-live registers after each instruction in the program.
- * The register is live after the instruction I if it is read by some
- * instruction S following I during program execution and is not
- * overwritten between I and S.
- *
- * Store result in env->insn_aux_data[i].live_regs.
- */
-static int compute_live_registers(struct bpf_verifier_env *env)
+int bpf_fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
+ struct bpf_insn *insn_buf, int insn_idx, int *cnt)
{
- struct bpf_insn_aux_data *insn_aux = env->insn_aux_data;
- struct bpf_insn *insns = env->prog->insnsi;
- struct insn_live_regs *state;
- int insn_cnt = env->prog->len;
- int err = 0, i, j;
- bool changed;
-
- /* Use the following algorithm:
- * - define the following:
- * - I.use : a set of all registers read by instruction I;
- * - I.def : a set of all registers written by instruction I;
- * - I.in : a set of all registers that may be alive before I execution;
- * - I.out : a set of all registers that may be alive after I execution;
- * - insn_successors(I): a set of instructions S that might immediately
- * follow I for some program execution;
- * - associate separate empty sets 'I.in' and 'I.out' with each instruction;
- * - visit each instruction in a postorder and update
- * state[i].in, state[i].out as follows:
- *
- * state[i].out = U [state[s].in for S in insn_successors(i)]
- * state[i].in = (state[i].out / state[i].def) U state[i].use
- *
- * (where U stands for set union, / stands for set difference)
- * - repeat the computation while {in,out} fields changes for
- * any instruction.
+ struct bpf_kfunc_desc *desc;
+ int err;
+
+ if (!insn->imm) {
+ verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
+ return -EINVAL;
+ }
+
+ *cnt = 0;
+
+ /* insn->imm has the btf func_id. Replace it with an offset relative to
+ * __bpf_call_base, unless the JIT needs to call functions that are
+ * further than 32 bits away (bpf_jit_supports_far_kfunc_call()).
*/
- state = kvzalloc_objs(*state, insn_cnt, GFP_KERNEL_ACCOUNT);
- if (!state) {
- err = -ENOMEM;
- goto out;
+ desc = find_kfunc_desc(env->prog, insn->imm, insn->off);
+ if (!desc) {
+ verifier_bug(env, "kernel function descriptor not found for func_id %u",
+ insn->imm);
+ return -EFAULT;
}
- for (i = 0; i < insn_cnt; ++i)
- compute_insn_live_regs(env, &insns[i], &state[i]);
-
- changed = true;
- while (changed) {
- changed = false;
- for (i = 0; i < env->cfg.cur_postorder; ++i) {
- int insn_idx = env->cfg.insn_postorder[i];
- struct insn_live_regs *live = &state[insn_idx];
- struct bpf_iarray *succ;
- u16 new_out = 0;
- u16 new_in = 0;
-
- succ = bpf_insn_successors(env, insn_idx);
- for (int s = 0; s < succ->cnt; ++s)
- new_out |= state[succ->items[s]].in;
- new_in = (new_out & ~live->def) | live->use;
- if (new_out != live->out || new_in != live->in) {
- live->in = new_in;
- live->out = new_out;
- changed = true;
- }
+ err = specialize_kfunc(env, desc, insn_idx);
+ if (err)
+ return err;
+
+ if (!bpf_jit_supports_far_kfunc_call())
+ insn->imm = BPF_CALL_IMM(desc->addr);
+
+ if (is_bpf_obj_new_kfunc(desc->func_id) || is_bpf_percpu_obj_new_kfunc(desc->func_id)) {
+ struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
+ struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
+ u64 obj_new_size = env->insn_aux_data[insn_idx].obj_new_size;
+
+ if (is_bpf_percpu_obj_new_kfunc(desc->func_id) && kptr_struct_meta) {
+ verifier_bug(env, "NULL kptr_struct_meta expected at insn_idx %d",
+ insn_idx);
+ return -EFAULT;
}
- }
- for (i = 0; i < insn_cnt; ++i)
- insn_aux[i].live_regs_before = state[i].in;
+ insn_buf[0] = BPF_MOV64_IMM(BPF_REG_1, obj_new_size);
+ insn_buf[1] = addr[0];
+ insn_buf[2] = addr[1];
+ insn_buf[3] = *insn;
+ *cnt = 4;
+ } else if (is_bpf_obj_drop_kfunc(desc->func_id) ||
+ is_bpf_percpu_obj_drop_kfunc(desc->func_id) ||
+ is_bpf_refcount_acquire_kfunc(desc->func_id)) {
+ struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
+ struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
- if (env->log.level & BPF_LOG_LEVEL2) {
- verbose(env, "Live regs before insn:\n");
- for (i = 0; i < insn_cnt; ++i) {
- if (env->insn_aux_data[i].scc)
- verbose(env, "%3d ", env->insn_aux_data[i].scc);
- else
- verbose(env, " ");
- verbose(env, "%3d: ", i);
- for (j = BPF_REG_0; j < BPF_REG_10; ++j)
- if (insn_aux[i].live_regs_before & BIT(j))
- verbose(env, "%d", j);
- else
- verbose(env, ".");
- verbose(env, " ");
- verbose_insn(env, &insns[i]);
- if (bpf_is_ldimm64(&insns[i]))
- i++;
+ if (is_bpf_percpu_obj_drop_kfunc(desc->func_id) && kptr_struct_meta) {
+ verifier_bug(env, "NULL kptr_struct_meta expected at insn_idx %d",
+ insn_idx);
+ return -EFAULT;
}
- }
-out:
- kvfree(state);
- return err;
-}
+ if (is_bpf_refcount_acquire_kfunc(desc->func_id) && !kptr_struct_meta) {
+ verifier_bug(env, "kptr_struct_meta expected at insn_idx %d",
+ insn_idx);
+ return -EFAULT;
+ }
-/*
- * Compute strongly connected components (SCCs) on the CFG.
- * Assign an SCC number to each instruction, recorded in env->insn_aux[*].scc.
- * If instruction is a sole member of its SCC and there are no self edges,
- * assign it SCC number of zero.
- * Uses a non-recursive adaptation of Tarjan's algorithm for SCC computation.
- */
-static int compute_scc(struct bpf_verifier_env *env)
-{
- const u32 NOT_ON_STACK = U32_MAX;
+ insn_buf[0] = addr[0];
+ insn_buf[1] = addr[1];
+ insn_buf[2] = *insn;
+ *cnt = 3;
+ } else if (is_bpf_list_push_kfunc(desc->func_id) ||
+ is_bpf_rbtree_add_kfunc(desc->func_id)) {
+ struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
+ int struct_meta_reg = BPF_REG_3;
+ int node_offset_reg = BPF_REG_4;
- struct bpf_insn_aux_data *aux = env->insn_aux_data;
- const u32 insn_cnt = env->prog->len;
- int stack_sz, dfs_sz, err = 0;
- u32 *stack, *pre, *low, *dfs;
- u32 i, j, t, w;
- u32 next_preorder_num;
- u32 next_scc_id;
- bool assign_scc;
- struct bpf_iarray *succ;
-
- next_preorder_num = 1;
- next_scc_id = 1;
- /*
- * - 'stack' accumulates vertices in DFS order, see invariant comment below;
- * - 'pre[t] == p' => preorder number of vertex 't' is 'p';
- * - 'low[t] == n' => smallest preorder number of the vertex reachable from 't' is 'n';
- * - 'dfs' DFS traversal stack, used to emulate explicit recursion.
- */
- stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT);
- pre = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT);
- low = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT);
- dfs = kvcalloc(insn_cnt, sizeof(*dfs), GFP_KERNEL_ACCOUNT);
- if (!stack || !pre || !low || !dfs) {
- err = -ENOMEM;
- goto exit;
+ /* rbtree_add has extra 'less' arg, so args-to-fixup are in diff regs */
+ if (is_bpf_rbtree_add_kfunc(desc->func_id)) {
+ struct_meta_reg = BPF_REG_4;
+ node_offset_reg = BPF_REG_5;
+ }
+
+ if (!kptr_struct_meta) {
+ verifier_bug(env, "kptr_struct_meta expected at insn_idx %d",
+ insn_idx);
+ return -EFAULT;
+ }
+
+ __fixup_collection_insert_kfunc(&env->insn_aux_data[insn_idx], struct_meta_reg,
+ node_offset_reg, insn, insn_buf, cnt);
+ } else if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
+ desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
+ insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
+ *cnt = 1;
+ } else if (desc->func_id == special_kfunc_list[KF_bpf_session_is_return] &&
+ env->prog->expected_attach_type == BPF_TRACE_FSESSION) {
+ /*
+ * inline the bpf_session_is_return() for fsession:
+ * bool bpf_session_is_return(void *ctx)
+ * {
+ * return (((u64 *)ctx)[-1] >> BPF_TRAMP_IS_RETURN_SHIFT) & 1;
+ * }
+ */
+ insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
+ insn_buf[1] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, BPF_TRAMP_IS_RETURN_SHIFT);
+ insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1);
+ *cnt = 3;
+ } else if (desc->func_id == special_kfunc_list[KF_bpf_session_cookie] &&
+ env->prog->expected_attach_type == BPF_TRACE_FSESSION) {
+ /*
+ * inline bpf_session_cookie() for fsession:
+ * __u64 *bpf_session_cookie(void *ctx)
+ * {
+ * u64 off = (((u64 *)ctx)[-1] >> BPF_TRAMP_COOKIE_INDEX_SHIFT) & 0xFF;
+ * return &((u64 *)ctx)[-off];
+ * }
+ */
+ insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
+ insn_buf[1] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, BPF_TRAMP_COOKIE_INDEX_SHIFT);
+ insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
+ insn_buf[3] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
+ insn_buf[4] = BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1);
+ insn_buf[5] = BPF_ALU64_IMM(BPF_NEG, BPF_REG_0, 0);
+ *cnt = 6;
}
- /*
- * References:
- * [1] R. Tarjan "Depth-First Search and Linear Graph Algorithms"
- * [2] D. J. Pearce "A Space-Efficient Algorithm for Finding Strongly Connected Components"
- *
- * The algorithm maintains the following invariant:
- * - suppose there is a path 'u' ~> 'v', such that 'pre[v] < pre[u]';
- * - then, vertex 'u' remains on stack while vertex 'v' is on stack.
- *
- * Consequently:
- * - If 'low[v] < pre[v]', there is a path from 'v' to some vertex 'u',
- * such that 'pre[u] == low[v]'; vertex 'u' is currently on the stack,
- * and thus there is an SCC (loop) containing both 'u' and 'v'.
- * - If 'low[v] == pre[v]', loops containing 'v' have been explored,
- * and 'v' can be considered the root of some SCC.
- *
- * Here is a pseudo-code for an explicitly recursive version of the algorithm:
- *
- * NOT_ON_STACK = insn_cnt + 1
- * pre = [0] * insn_cnt
- * low = [0] * insn_cnt
- * scc = [0] * insn_cnt
- * stack = []
- *
- * next_preorder_num = 1
- * next_scc_id = 1
- *
- * def recur(w):
- * nonlocal next_preorder_num
- * nonlocal next_scc_id
- *
- * pre[w] = next_preorder_num
- * low[w] = next_preorder_num
- * next_preorder_num += 1
- * stack.append(w)
- * for s in successors(w):
- * # Note: for classic algorithm the block below should look as:
- * #
- * # if pre[s] == 0:
- * # recur(s)
- * # low[w] = min(low[w], low[s])
- * # elif low[s] != NOT_ON_STACK:
- * # low[w] = min(low[w], pre[s])
- * #
- * # But replacing both 'min' instructions with 'low[w] = min(low[w], low[s])'
- * # does not break the invariant and makes itartive version of the algorithm
- * # simpler. See 'Algorithm #3' from [2].
- *
- * # 's' not yet visited
- * if pre[s] == 0:
- * recur(s)
- * # if 's' is on stack, pick lowest reachable preorder number from it;
- * # if 's' is not on stack 'low[s] == NOT_ON_STACK > low[w]',
- * # so 'min' would be a noop.
- * low[w] = min(low[w], low[s])
- *
- * if low[w] == pre[w]:
- * # 'w' is the root of an SCC, pop all vertices
- * # below 'w' on stack and assign same SCC to them.
- * while True:
- * t = stack.pop()
- * low[t] = NOT_ON_STACK
- * scc[t] = next_scc_id
- * if t == w:
- * break
- * next_scc_id += 1
- *
- * for i in range(0, insn_cnt):
- * if pre[i] == 0:
- * recur(i)
- *
- * Below implementation replaces explicit recursion with array 'dfs'.
- */
- for (i = 0; i < insn_cnt; i++) {
- if (pre[i])
- continue;
- stack_sz = 0;
- dfs_sz = 1;
- dfs[0] = i;
-dfs_continue:
- while (dfs_sz) {
- w = dfs[dfs_sz - 1];
- if (pre[w] == 0) {
- low[w] = next_preorder_num;
- pre[w] = next_preorder_num;
- next_preorder_num++;
- stack[stack_sz++] = w;
- }
- /* Visit 'w' successors */
- succ = bpf_insn_successors(env, w);
- for (j = 0; j < succ->cnt; ++j) {
- if (pre[succ->items[j]]) {
- low[w] = min(low[w], low[succ->items[j]]);
- } else {
- dfs[dfs_sz++] = succ->items[j];
- goto dfs_continue;
- }
- }
- /*
- * Preserve the invariant: if some vertex above in the stack
- * is reachable from 'w', keep 'w' on the stack.
- */
- if (low[w] < pre[w]) {
- dfs_sz--;
- goto dfs_continue;
- }
- /*
- * Assign SCC number only if component has two or more elements,
- * or if component has a self reference, or if instruction is a
- * callback calling function (implicit loop).
- */
- assign_scc = stack[stack_sz - 1] != w; /* two or more elements? */
- for (j = 0; j < succ->cnt; ++j) { /* self reference? */
- if (succ->items[j] == w) {
- assign_scc = true;
- break;
- }
- }
- if (bpf_calls_callback(env, w)) /* implicit loop? */
- assign_scc = true;
- /* Pop component elements from stack */
- do {
- t = stack[--stack_sz];
- low[t] = NOT_ON_STACK;
- if (assign_scc)
- aux[t].scc = next_scc_id;
- } while (t != w);
- if (assign_scc)
- next_scc_id++;
- dfs_sz--;
- }
- }
- env->scc_info = kvzalloc_objs(*env->scc_info, next_scc_id,
- GFP_KERNEL_ACCOUNT);
- if (!env->scc_info) {
- err = -ENOMEM;
- goto exit;
- }
- env->scc_cnt = next_scc_id;
-exit:
- kvfree(stack);
- kvfree(pre);
- kvfree(low);
- kvfree(dfs);
- return err;
+
+ if (env->insn_aux_data[insn_idx].arg_prog) {
+ u32 regno = env->insn_aux_data[insn_idx].arg_prog;
+ struct bpf_insn ld_addrs[2] = { BPF_LD_IMM64(regno, (long)env->prog->aux) };
+ int idx = *cnt;
+
+ insn_buf[idx++] = ld_addrs[0];
+ insn_buf[idx++] = ld_addrs[1];
+ insn_buf[idx++] = *insn;
+ *cnt = idx;
+ }
+ return 0;
}
int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
@@ -25982,7 +19918,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
goto err_free_env;
for (i = 0; i < len; i++)
env->insn_aux_data[i].orig_idx = i;
- env->succ = iarray_realloc(NULL, 2);
+ env->succ = bpf_iarray_realloc(NULL, 2);
if (!env->succ)
goto err_free_env;
env->prog = *prog;
@@ -26043,7 +19979,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
INIT_LIST_HEAD(&env->explored_states[i]);
INIT_LIST_HEAD(&env->free_list);
- ret = check_btf_info_early(env, attr, uattr);
+ ret = bpf_check_btf_info_early(env, attr, uattr);
if (ret < 0)
goto skip_full_check;
@@ -26055,11 +19991,11 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
if (ret < 0)
goto skip_full_check;
- ret = check_btf_info(env, attr, uattr);
+ ret = bpf_check_btf_info(env, attr, uattr);
if (ret < 0)
goto skip_full_check;
- ret = resolve_pseudo_ldimm64(env);
+ ret = check_and_resolve_insns(env);
if (ret < 0)
goto skip_full_check;
@@ -26069,11 +20005,11 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
goto skip_full_check;
}
- ret = check_cfg(env);
+ ret = bpf_check_cfg(env);
if (ret < 0)
goto skip_full_check;
- ret = compute_postorder(env);
+ ret = bpf_compute_postorder(env);
if (ret < 0)
goto skip_full_check;
@@ -26085,11 +20021,23 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
if (ret)
goto skip_full_check;
- ret = compute_scc(env);
+ ret = bpf_compute_const_regs(env);
+ if (ret < 0)
+ goto skip_full_check;
+
+ ret = bpf_prune_dead_branches(env);
+ if (ret < 0)
+ goto skip_full_check;
+
+ ret = sort_subprogs_topo(env);
+ if (ret < 0)
+ goto skip_full_check;
+
+ ret = bpf_compute_scc(env);
if (ret < 0)
goto skip_full_check;
- ret = compute_live_registers(env);
+ ret = bpf_compute_live_registers(env);
if (ret < 0)
goto skip_full_check;
@@ -26110,22 +20058,22 @@ skip_full_check:
* allocate additional slots.
*/
if (ret == 0)
- ret = remove_fastcall_spills_fills(env);
+ ret = bpf_remove_fastcall_spills_fills(env);
if (ret == 0)
ret = check_max_stack_depth(env);
/* instruction rewrites happen after this point */
if (ret == 0)
- ret = optimize_bpf_loop(env);
+ ret = bpf_optimize_bpf_loop(env);
if (is_priv) {
if (ret == 0)
- opt_hard_wire_dead_code_branches(env);
+ bpf_opt_hard_wire_dead_code_branches(env);
if (ret == 0)
- ret = opt_remove_dead_code(env);
+ ret = bpf_opt_remove_dead_code(env);
if (ret == 0)
- ret = opt_remove_nops(env);
+ ret = bpf_opt_remove_nops(env);
} else {
if (ret == 0)
sanitize_dead_code(env);
@@ -26133,22 +20081,22 @@ skip_full_check:
if (ret == 0)
/* program is valid, convert *(u32*)(ctx + off) accesses */
- ret = convert_ctx_accesses(env);
+ ret = bpf_convert_ctx_accesses(env);
if (ret == 0)
- ret = do_misc_fixups(env);
+ ret = bpf_do_misc_fixups(env);
/* do 32-bit optimization after insn patching has done so those patched
* insns could be handled correctly.
*/
if (ret == 0 && !bpf_prog_is_offloaded(env->prog->aux)) {
- ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
+ ret = bpf_opt_subreg_zext_lo32_rnd_hi32(env, attr);
env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
: false;
}
if (ret == 0)
- ret = fixup_call_args(env);
+ ret = bpf_fixup_call_args(env);
env->verification_time = ktime_get_ns() - start_time;
print_verification_stats(env);
@@ -26230,7 +20178,7 @@ err_release_maps:
err_unlock:
if (!is_priv)
mutex_unlock(&bpf_verifier_lock);
- clear_insn_aux_data(env, 0, env->prog->len);
+ bpf_clear_insn_aux_data(env, 0, env->prog->len);
vfree(env->insn_aux_data);
err_free_env:
bpf_stack_liveness_free(env);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index a5dbb72528e0..058724c41c46 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -765,6 +765,14 @@ static unsigned int number_of_same_symbols(const char *mod, const char *func_nam
if (!mod)
kallsyms_on_each_match_symbol(count_symbols, func_name, &ctx.count);
+ /*
+ * If the symbol is found in vmlinux, use vmlinux resolution only.
+ * This prevents module symbols from shadowing vmlinux symbols
+ * and causing -EADDRNOTAVAIL for unqualified kprobe targets.
+ */
+ if (!mod && ctx.count > 0)
+ return ctx.count;
+
module_kallsyms_on_each_symbol(mod, count_mod_symbols, &ctx);
return ctx.count;
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 178c4738e63b..2bc04feadfab 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -567,6 +567,23 @@ noinline void bpf_fentry_test_sinfo(struct skb_shared_info *sinfo)
{
}
+noinline void bpf_fentry_test_ppvoid(void **pp)
+{
+}
+
+noinline void bpf_fentry_test_pppvoid(void ***ppp)
+{
+}
+
+noinline void bpf_fentry_test_ppfile(struct file **ppf)
+{
+}
+
+noinline struct file **bpf_fexit_test_ret_ppfile(void)
+{
+ return (struct file **)NULL;
+}
+
__bpf_kfunc int bpf_modify_return_test(int a, int *b)
{
*b += 1;
@@ -1120,19 +1137,23 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
switch (skb->protocol) {
case htons(ETH_P_IP):
- sk->sk_family = AF_INET;
- if (sizeof(struct iphdr) <= skb_headlen(skb)) {
- sk->sk_rcv_saddr = ip_hdr(skb)->saddr;
- sk->sk_daddr = ip_hdr(skb)->daddr;
+ if (skb_headlen(skb) < sizeof(struct iphdr)) {
+ ret = -EINVAL;
+ goto out;
}
+ sk->sk_family = AF_INET;
+ sk->sk_rcv_saddr = ip_hdr(skb)->saddr;
+ sk->sk_daddr = ip_hdr(skb)->daddr;
break;
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
- sk->sk_family = AF_INET6;
- if (sizeof(struct ipv6hdr) <= skb_headlen(skb)) {
- sk->sk_v6_rcv_saddr = ipv6_hdr(skb)->saddr;
- sk->sk_v6_daddr = ipv6_hdr(skb)->daddr;
+ if (skb_headlen(skb) < sizeof(struct ipv6hdr)) {
+ ret = -EINVAL;
+ goto out;
}
+ sk->sk_family = AF_INET6;
+ sk->sk_v6_rcv_saddr = ipv6_hdr(skb)->saddr;
+ sk->sk_v6_daddr = ipv6_hdr(skb)->daddr;
break;
#endif
default:
@@ -1156,6 +1177,18 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
skb->ip_summed = CHECKSUM_COMPLETE;
}
+ if (prog->type == BPF_PROG_TYPE_LWT_XMIT) {
+ if (!ipv6_mod_enabled()) {
+ pr_warn_once("Please test this program with IPv6 enabled kernel\n");
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ dst_hold(&net->ipv6.ip6_null_entry->dst);
+ skb_dst_set(skb, &net->ipv6.ip6_null_entry->dst);
+#endif
+ }
+
ret = bpf_test_run(prog, skb, repeat, &retval, &duration, false);
if (ret)
goto out;
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index f8338acebf07..14eb7812bda4 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -68,7 +68,7 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
{
- return bpf_local_storage_map_alloc(attr, &sk_cache, false);
+ return bpf_local_storage_map_alloc(attr, &sk_cache);
}
static int notsupp_get_next_key(struct bpf_map *map, void *key,
@@ -106,7 +106,7 @@ static long bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key,
if (sock) {
sdata = bpf_local_storage_update(
sock->sk, (struct bpf_local_storage_map *)map, value,
- map_flags, false, GFP_ATOMIC);
+ map_flags, false);
sockfd_put(sock);
return PTR_ERR_OR_ZERO(sdata);
}
@@ -137,7 +137,7 @@ bpf_sk_storage_clone_elem(struct sock *newsk,
{
struct bpf_local_storage_elem *copy_selem;
- copy_selem = bpf_selem_alloc(smap, newsk, NULL, false, GFP_ATOMIC);
+ copy_selem = bpf_selem_alloc(smap, newsk, NULL, false);
if (!copy_selem)
return NULL;
@@ -202,7 +202,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
}
bpf_selem_link_storage_nolock(new_sk_storage, copy_selem);
} else {
- ret = bpf_local_storage_alloc(newsk, smap, copy_selem, GFP_ATOMIC);
+ ret = bpf_local_storage_alloc(newsk, smap, copy_selem);
if (ret) {
bpf_selem_free(copy_selem, true);
atomic_sub(smap->elem_size,
@@ -227,9 +227,8 @@ out:
return ret;
}
-/* *gfp_flags* is a hidden argument provided by the verifier */
-BPF_CALL_5(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
- void *, value, u64, flags, gfp_t, gfp_flags)
+BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
+ void *, value, u64, flags)
{
struct bpf_local_storage_data *sdata;
@@ -250,7 +249,7 @@ BPF_CALL_5(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
refcount_inc_not_zero(&sk->sk_refcnt)) {
sdata = bpf_local_storage_update(
sk, (struct bpf_local_storage_map *)map, value,
- BPF_NOEXIST, false, gfp_flags);
+ BPF_NOEXIST, false);
/* sk must be a fullsock (guaranteed by verifier),
* so sock_gen_put() is unnecessary.
*/
@@ -383,16 +382,14 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog)
return false;
}
-/* *gfp_flags* is a hidden argument provided by the verifier */
-BPF_CALL_5(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk,
- void *, value, u64, flags, gfp_t, gfp_flags)
+BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk,
+ void *, value, u64, flags)
{
WARN_ON_ONCE(!bpf_rcu_lock_held());
if (in_hardirq() || in_nmi())
return (unsigned long)NULL;
- return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags,
- gfp_flags);
+ return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags);
}
BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index dd0b4d80e0f8..22c5cdffeae7 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -36,6 +36,7 @@
#define pr_fmt(fmt) "IPv6: " fmt
+#include <crypto/sha1.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/kernel.h>
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index c8ef9e427c9c..19fef1398f1c 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -160,6 +160,17 @@ static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff_xsk *xskb, u32 len,
return 0;
}
+static void __xsk_rcv_zc_safe(struct xdp_sock *xs, struct xdp_buff_xsk *xskb,
+ u32 len, u32 flags)
+{
+ u64 addr;
+
+ addr = xp_get_handle(xskb, xskb->pool);
+ __xskq_prod_reserve_desc(xs->rx, addr, len, flags);
+
+ xp_release(xskb);
+}
+
static int xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
{
struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
@@ -185,13 +196,13 @@ static int xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
goto err;
}
- __xsk_rcv_zc(xs, xskb, len, contd);
+ __xsk_rcv_zc_safe(xs, xskb, len, contd);
xskb_list = &xskb->pool->xskb_list;
list_for_each_entry_safe(pos, tmp, xskb_list, list_node) {
if (list_is_singular(xskb_list))
contd = 0;
len = pos->xdp.data_end - pos->xdp.data;
- __xsk_rcv_zc(xs, pos, len, contd);
+ __xsk_rcv_zc_safe(xs, pos, len, contd);
list_del_init(&pos->list_node);
}
@@ -298,7 +309,8 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
rem -= copied;
xskb = container_of(xsk_xdp, struct xdp_buff_xsk, xdp);
- __xsk_rcv_zc(xs, xskb, copied - meta_len, rem ? XDP_PKT_CONTD : 0);
+ __xsk_rcv_zc_safe(xs, xskb, copied - meta_len,
+ rem ? XDP_PKT_CONTD : 0);
meta_len = 0;
} while (rem);
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index ec08d9c102b1..3e3fbb73d23e 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -445,20 +445,26 @@ static inline void xskq_prod_write_addr_batch(struct xsk_queue *q, struct xdp_de
q->cached_prod = cached_prod;
}
-static inline int xskq_prod_reserve_desc(struct xsk_queue *q,
- u64 addr, u32 len, u32 flags)
+static inline void __xskq_prod_reserve_desc(struct xsk_queue *q,
+ u64 addr, u32 len, u32 flags)
{
struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
u32 idx;
- if (xskq_prod_is_full(q))
- return -ENOBUFS;
-
/* A, matches D */
idx = q->cached_prod++ & q->ring_mask;
ring->desc[idx].addr = addr;
ring->desc[idx].len = len;
ring->desc[idx].options = flags;
+}
+
+static inline int xskq_prod_reserve_desc(struct xsk_queue *q,
+ u64 addr, u32 len, u32 flags)
+{
+ if (xskq_prod_is_full(q))
+ return -ENOBUFS;
+
+ __xskq_prod_reserve_desc(q, addr, len, flags);
return 0;
}
diff --git a/scripts/Makefile.btf b/scripts/Makefile.btf
index 562a04b40e06..e66e13e79653 100644
--- a/scripts/Makefile.btf
+++ b/scripts/Makefile.btf
@@ -18,6 +18,8 @@ pahole-flags-$(call test-ge, $(pahole-ver), 126) = -j$(JOBS) --btf_features=enc
pahole-flags-$(call test-ge, $(pahole-ver), 130) += --btf_features=attributes
+pahole-flags-$(call test-ge, $(pahole-ver), 131) += --btf_features=layout
+
endif
pahole-flags-$(CONFIG_PAHOLE_HAS_LANG_EXCLUDE) += --lang_exclude=rust
diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
index d47dddc2b4ee..cf75a7fa2d6b 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
@@ -27,7 +27,7 @@ BTF COMMANDS
| **bpftool** **btf dump** *BTF_SRC* [**format** *FORMAT*] [**root_id** *ROOT_ID*]
| **bpftool** **btf help**
|
-| *BTF_SRC* := { **id** *BTF_ID* | **prog** *PROG* | **map** *MAP* [{**key** | **value** | **kv** | **all**}] | **file** *FILE* }
+| *BTF_SRC* := { **id** *BTF_ID* | **prog** *PROG* | **map** *MAP* [{**key** | **value** | **kv** | **all**}] | **file** *FILE* [**file** *FILE*]... }
| *FORMAT* := { **raw** | **c** [**unsorted**] }
| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* | **name** *PROG_NAME* }
@@ -58,9 +58,12 @@ bpftool btf dump *BTF_SRC* [format *FORMAT*] [root_id *ROOT_ID*]
When **prog** is provided, it's expected that program has associated BTF
object with BTF types.
- When specifying *FILE*, an ELF file is expected, containing .BTF section
- with well-defined BTF binary format data, typically produced by clang or
- pahole.
+ When specifying *FILE*, an ELF file or a raw BTF file (e.g. from
+ ``/sys/kernel/btf/``) is expected. Multiple **file** arguments may be
+ given to merge BTF from several kernel modules into a single output.
+ When sysfs paths are used, vmlinux BTF is loaded automatically as the
+ base; if vmlinux itself appears in the file list it is skipped.
+ A base BTF can also be specified explicitly with **-B**.
**format** option can be used to override default (raw) output format. Raw
(**raw**) or C-syntax (**c**) output formats are supported. With C-style
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 35aeeaf5f711..90fa2a48cc26 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -54,7 +54,7 @@ PROG COMMANDS
| **cgroup/sendmsg4** | **cgroup/sendmsg6** | **cgroup/sendmsg_unix** |
| **cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/recvmsg_unix** | **cgroup/sysctl** |
| **cgroup/getsockopt** | **cgroup/setsockopt** | **cgroup/sock_release** |
-| **struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup**
+| **struct_ops** | **fentry** | **fexit** | **fsession** | **freplace** | **sk_lookup**
| }
| *ATTACH_TYPE* := {
| **sk_msg_verdict** | **sk_skb_verdict** | **sk_skb_stream_verdict** |
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 519ea5cb8ab1..0febf60e1b64 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -97,6 +97,15 @@ RM ?= rm -f
FEATURE_USER = .bpftool
+# Skip optional dependencies: LLVM (JIT disasm), libbfd (fallback
+# disasm), libcrypto (program signing).
+SKIP_LLVM ?=
+SKIP_LIBBFD ?=
+SKIP_CRYPTO ?=
+ifneq ($(SKIP_CRYPTO),1)
+ CRYPTO_LIBS := -lcrypto
+endif
+
FEATURE_TESTS := clang-bpf-co-re
FEATURE_TESTS += llvm
FEATURE_TESTS += libcap
@@ -130,8 +139,8 @@ include $(FEATURES_DUMP)
endif
endif
-LIBS = $(LIBBPF) -lelf -lcrypto -lz
-LIBS_BOOTSTRAP = $(LIBBPF_BOOTSTRAP) -lelf -lcrypto -lz
+LIBS = $(LIBBPF) -lelf $(CRYPTO_LIBS) -lz
+LIBS_BOOTSTRAP = $(LIBBPF_BOOTSTRAP) -lelf $(CRYPTO_LIBS) -lz
ifeq ($(feature-libelf-zstd),1)
LIBS += -lzstd
@@ -150,7 +159,12 @@ all: $(OUTPUT)bpftool
SRCS := $(wildcard *.c)
ifeq ($(feature-llvm),1)
- # If LLVM is available, use it for JIT disassembly
+ifneq ($(SKIP_LLVM),1)
+HAS_LLVM := 1
+endif
+endif
+
+ifeq ($(HAS_LLVM),1)
CFLAGS += -DHAVE_LLVM_SUPPORT
LLVM_CONFIG_LIB_COMPONENTS := mcdisassembler all-targets
# llvm-config always adds -D_GNU_SOURCE, however, it may already be in CFLAGS
@@ -165,6 +179,7 @@ ifeq ($(feature-llvm),1)
endif
LDFLAGS += $(shell $(LLVM_CONFIG) --ldflags)
else
+ ifneq ($(SKIP_LIBBFD),1)
# Fall back on libbfd
ifeq ($(feature-libbfd),1)
LIBS += -lbfd -ldl -lopcodes
@@ -186,15 +201,22 @@ else
CFLAGS += -DDISASM_INIT_STYLED
endif
endif
+ endif # SKIP_LIBBFD
endif
ifeq ($(filter -DHAVE_LLVM_SUPPORT -DHAVE_LIBBFD_SUPPORT,$(CFLAGS)),)
# No support for JIT disassembly
SRCS := $(filter-out jit_disasm.c,$(SRCS))
endif
+ifeq ($(SKIP_CRYPTO),1)
+ CFLAGS += -DBPFTOOL_WITHOUT_CRYPTO
+ HOST_CFLAGS += -DBPFTOOL_WITHOUT_CRYPTO
+ SRCS := $(filter-out sign.c,$(SRCS))
+endif
+
BPFTOOL_BOOTSTRAP := $(BOOTSTRAP_OUTPUT)bpftool
-BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o sign.o)
+BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o $(if $(CRYPTO_LIBS),sign.o))
$(BOOTSTRAP_OBJS): $(LIBBPF_BOOTSTRAP)
OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index a28f0cc522e4..75cbcb512eba 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -501,7 +501,7 @@ _bpftool()
cgroup/post_bind4 cgroup/post_bind6 \
cgroup/sysctl cgroup/getsockopt \
cgroup/setsockopt cgroup/sock_release struct_ops \
- fentry fexit freplace sk_lookup'
+ fentry fexit fsession freplace sk_lookup'
COMPREPLY=( $( compgen -W "$BPFTOOL_PROG_LOAD_TYPES" -- "$cur" ) )
return 0
;;
@@ -961,10 +961,14 @@ _bpftool()
*)
# emit extra options
case ${words[3]} in
- id|file)
+ id)
COMPREPLY=( $( compgen -W "root_id" -- "$cur" ) )
_bpftool_once_attr 'format'
;;
+ file)
+ COMPREPLY=( $( compgen -W "root_id file" -- "$cur" ) )
+ _bpftool_once_attr 'format'
+ ;;
map|prog)
if [[ ${words[3]} == "map" ]] && [[ $cword == 6 ]]; then
COMPREPLY+=( $( compgen -W "key value kv all" -- "$cur" ) )
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index 946612029dee..2e899e940034 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -28,6 +28,7 @@
#define FASTCALL_DECL_TAG "bpf_fastcall"
#define MAX_ROOT_IDS 16
+#define MAX_BTF_FILES 64
static const char * const btf_kind_str[NR_BTF_KINDS] = {
[BTF_KIND_UNKN] = "UNKNOWN",
@@ -878,6 +879,45 @@ static bool btf_is_kernel_module(__u32 btf_id)
return btf_info.kernel_btf && strncmp(btf_name, "vmlinux", sizeof(btf_name)) != 0;
}
+static struct btf *merge_btf_files(const char **files, int nr_files,
+ struct btf *vmlinux_base)
+{
+ struct btf *combined, *mod;
+ int ret;
+
+ combined = btf__new_empty_split(vmlinux_base);
+ if (!combined) {
+ p_err("failed to create combined BTF: %s", strerror(errno));
+ return NULL;
+ }
+
+ for (int j = 0; j < nr_files; j++) {
+ mod = btf__parse_split(files[j], vmlinux_base);
+ if (!mod) {
+ p_err("failed to load BTF from %s: %s", files[j], strerror(errno));
+ btf__free(combined);
+ return NULL;
+ }
+
+ ret = btf__add_btf(combined, mod);
+ btf__free(mod);
+ if (ret < 0) {
+ p_err("failed to merge BTF from %s: %s", files[j], strerror(-ret));
+ btf__free(combined);
+ return NULL;
+ }
+ }
+
+ ret = btf__dedup(combined, NULL);
+ if (ret) {
+ p_err("failed to dedup combined BTF: %s", strerror(-ret));
+ btf__free(combined);
+ return NULL;
+ }
+
+ return combined;
+}
+
static int do_dump(int argc, char **argv)
{
bool dump_c = false, sort_dump_c = true;
@@ -958,20 +998,76 @@ static int do_dump(int argc, char **argv)
NEXT_ARG();
} else if (is_prefix(src, "file")) {
const char sysfs_prefix[] = "/sys/kernel/btf/";
+ struct btf *vmlinux_base = base_btf;
+ const char *files[MAX_BTF_FILES];
+ int nr_files = 0;
- if (!base_btf &&
- strncmp(*argv, sysfs_prefix, sizeof(sysfs_prefix) - 1) == 0 &&
- strcmp(*argv, sysfs_vmlinux) != 0)
- base = get_vmlinux_btf_from_sysfs();
-
- btf = btf__parse_split(*argv, base ?: base_btf);
- if (!btf) {
- err = -errno;
- p_err("failed to load BTF from %s: %s",
- *argv, strerror(errno));
- goto done;
+ /* First grab our argument, filtering out the sysfs_vmlinux. */
+ if (strcmp(*argv, sysfs_vmlinux) != 0) {
+ files[nr_files++] = *argv;
+ } else {
+ p_info("skipping %s (will be loaded as base)", *argv);
}
NEXT_ARG();
+
+ while (argc && is_prefix(*argv, "file")) {
+ NEXT_ARG();
+ if (!REQ_ARGS(1)) {
+ err = -EINVAL;
+ goto done;
+ }
+ /* Filter out any sysfs vmlinux entries. */
+ if (strcmp(*argv, sysfs_vmlinux) == 0) {
+ p_info("skipping %s (will be loaded as base)", *argv);
+ NEXT_ARG();
+ continue;
+ }
+ if (nr_files >= MAX_BTF_FILES) {
+ p_err("too many BTF files (max %d)", MAX_BTF_FILES);
+ err = -E2BIG;
+ goto done;
+ }
+ files[nr_files++] = *argv;
+ NEXT_ARG();
+ }
+
+ /* Auto-detect vmlinux base if any file is from sysfs */
+ if (!vmlinux_base) {
+ for (int j = 0; j < nr_files; j++) {
+ if (strncmp(files[j], sysfs_prefix, sizeof(sysfs_prefix) - 1) == 0) {
+ base = get_vmlinux_btf_from_sysfs();
+ vmlinux_base = base;
+ break;
+ }
+ }
+ }
+
+ /* All files were the sysfs_vmlinux, handle it like we used to */
+ if (nr_files == 0) {
+ nr_files = 1;
+ files[0] = sysfs_vmlinux;
+ }
+
+ if (nr_files == 1) {
+ btf = btf__parse_split(files[0], base ?: base_btf);
+ if (!btf) {
+ err = -errno;
+ p_err("failed to load BTF from %s: %s", files[0], strerror(errno));
+ goto done;
+ }
+ } else {
+ if (!vmlinux_base) {
+ p_err("base BTF is required when merging multiple BTF files; use -B/--base-btf or use sysfs paths");
+ err = -EINVAL;
+ goto done;
+ }
+
+ btf = merge_btf_files(files, nr_files, vmlinux_base);
+ if (!btf) {
+ err = -errno;
+ goto done;
+ }
+ }
} else {
err = -1;
p_err("unrecognized BTF source specifier: '%s'", src);
@@ -1445,7 +1541,8 @@ static int do_help(int argc, char **argv)
" %1$s %2$s dump BTF_SRC [format FORMAT] [root_id ROOT_ID]\n"
" %1$s %2$s help\n"
"\n"
- " BTF_SRC := { id BTF_ID | prog PROG | map MAP [{key | value | kv | all}] | file FILE }\n"
+ " BTF_SRC := { id BTF_ID | prog PROG | map MAP [{key | value | kv | all}] |\n"
+ " file FILE [file FILE]... }\n"
" FORMAT := { raw | c [unsorted] }\n"
" " HELP_SPEC_MAP "\n"
" " HELP_SPEC_PROGRAM "\n"
diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c
index 8895b4e1f690..04541155e9cc 100644
--- a/tools/bpf/bpftool/jit_disasm.c
+++ b/tools/bpf/bpftool/jit_disasm.c
@@ -93,7 +93,16 @@ init_context(disasm_ctx_t *ctx, const char *arch,
p_err("Failed to retrieve triple");
return -1;
}
- *ctx = LLVMCreateDisasm(triple, NULL, 0, NULL, symbol_lookup_callback);
+
+ /*
+ * Enable all aarch64 ISA extensions so the disassembler can handle any
+ * instruction the kernel JIT might emit (e.g. ARM64 LSE atomics).
+ */
+ if (!strncmp(triple, "aarch64", 7))
+ *ctx = LLVMCreateDisasmCPUFeatures(triple, "", "+all", NULL, 0, NULL,
+ symbol_lookup_callback);
+ else
+ *ctx = LLVMCreateDisasm(triple, NULL, 0, NULL, symbol_lookup_callback);
LLVMDisposeMessage(triple);
if (!*ctx) {
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index a829a6a49037..c91e1a6e1a1e 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -132,6 +132,11 @@ static int do_version(int argc, char **argv)
#else
const bool has_skeletons = true;
#endif
+#ifdef BPFTOOL_WITHOUT_CRYPTO
+ const bool has_crypto = false;
+#else
+ const bool has_crypto = true;
+#endif
bool bootstrap = false;
int i;
@@ -163,6 +168,7 @@ static int do_version(int argc, char **argv)
jsonw_start_object(json_wtr); /* features */
jsonw_bool_field(json_wtr, "libbfd", has_libbfd);
jsonw_bool_field(json_wtr, "llvm", has_llvm);
+ jsonw_bool_field(json_wtr, "crypto", has_crypto);
jsonw_bool_field(json_wtr, "skeletons", has_skeletons);
jsonw_bool_field(json_wtr, "bootstrap", bootstrap);
jsonw_end_object(json_wtr); /* features */
@@ -181,6 +187,7 @@ static int do_version(int argc, char **argv)
printf("features:");
print_feature("libbfd", has_libbfd, &nb_features);
print_feature("llvm", has_llvm, &nb_features);
+ print_feature("crypto", has_crypto, &nb_features);
print_feature("skeletons", has_skeletons, &nb_features);
print_feature("bootstrap", bootstrap, &nb_features);
printf("\n");
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 1130299cede0..78b6e0ebb85d 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -293,6 +293,20 @@ struct kernel_config_option {
int read_kernel_config(const struct kernel_config_option *requested_options,
size_t num_options, char **out_values,
const char *define_prefix);
+#ifndef BPFTOOL_WITHOUT_CRYPTO
int bpftool_prog_sign(struct bpf_load_and_run_opts *opts);
__u32 register_session_key(const char *key_der_path);
+#else
+static inline int bpftool_prog_sign(struct bpf_load_and_run_opts *opts)
+{
+ p_err("bpftool was built without signing support");
+ return -ENOTSUP;
+}
+
+static inline __u32 register_session_key(const char *key_der_path)
+{
+ p_err("bpftool was built without signing support");
+ return -1;
+}
+#endif
#endif
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 6daf19809ca4..a9f730d407a9 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -2583,7 +2583,7 @@ static int do_help(int argc, char **argv)
" cgroup/getsockname_unix | cgroup/sendmsg4 | cgroup/sendmsg6 |\n"
" cgroup/sendmsg_unix | cgroup/recvmsg4 | cgroup/recvmsg6 | cgroup/recvmsg_unix |\n"
" cgroup/getsockopt | cgroup/setsockopt | cgroup/sock_release |\n"
- " struct_ops | fentry | fexit | freplace | sk_lookup }\n"
+ " struct_ops | fentry | fexit | fsession | freplace | sk_lookup }\n"
" ATTACH_TYPE := { sk_msg_verdict | sk_skb_verdict | sk_skb_stream_verdict |\n"
" sk_skb_stream_parser | flow_dissector }\n"
" METRIC := { cycles | instructions | l1d_loads | llc_misses | itlb_misses | dtlb_misses }\n"
diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
index 5208f650080f..f8a91fa7584f 100644
--- a/tools/bpf/resolve_btfids/main.c
+++ b/tools/bpf/resolve_btfids/main.c
@@ -1065,6 +1065,7 @@ static bool is_kf_implicit_arg(const struct btf *btf, const struct btf_param *p)
{
static const char *const kf_implicit_arg_types[] = {
"bpf_prog_aux",
+ "btf_struct_meta",
};
const struct btf_type *t;
const char *name;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 5e38b4887de6..677be9a47347 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4645,7 +4645,9 @@ union bpf_attr {
* Description
* Discard reserved ring buffer sample, pointed to by *data*.
* If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification
- * of new data availability is sent.
+ * of new data availability is sent. Discarded records remain in
+ * the ring buffer until consumed by user space, so a later submit
+ * using adaptive wakeup might not wake up the consumer.
* If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification
* of new data availability is sent unconditionally.
* If **0** is specified in *flags*, an adaptive notification
diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h
index 266d4ffa6c07..638615ebddc2 100644
--- a/tools/include/uapi/linux/btf.h
+++ b/tools/include/uapi/linux/btf.h
@@ -8,6 +8,16 @@
#define BTF_MAGIC 0xeB9F
#define BTF_VERSION 1
+/*
+ * BTF layout section consists of a struct btf_layout for each known
+ * kind at BTF encoding time.
+ */
+struct btf_layout {
+ __u8 info_sz; /* size of singular element after btf_type */
+ __u8 elem_sz; /* size of each of btf_vlen(t) elements */
+ __u16 flags; /* currently unused */
+};
+
struct btf_header {
__u16 magic;
__u8 version;
@@ -19,6 +29,8 @@ struct btf_header {
__u32 type_len; /* length of type section */
__u32 str_off; /* offset of string section */
__u32 str_len; /* length of string section */
+ __u32 layout_off; /* offset of layout section */
+ __u32 layout_len; /* length of layout section */
};
/* Max # of type identifier */
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 83fe79ffcb8f..ceb57b46a878 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -29,6 +29,36 @@
static struct btf_type btf_void;
+/*
+ * Describe how kinds are laid out; some have a singular element following the "struct btf_type",
+ * some have BTF_INFO_VLEN(t->info) elements. Specify sizes for both. Flags are currently unused.
+ * Kind layout can be optionally added to the BTF representation in a dedicated section to
+ * facilitate parsing. New kinds must be added here.
+ */
+static struct btf_layout layouts[NR_BTF_KINDS] = {
+/* singular element size vlen element(s) size flags */
+[BTF_KIND_UNKN] = { 0, 0, 0 },
+[BTF_KIND_INT] = { sizeof(__u32), 0, 0 },
+[BTF_KIND_PTR] = { 0, 0, 0 },
+[BTF_KIND_ARRAY] = { sizeof(struct btf_array), 0, 0 },
+[BTF_KIND_STRUCT] = { 0, sizeof(struct btf_member), 0 },
+[BTF_KIND_UNION] = { 0, sizeof(struct btf_member), 0 },
+[BTF_KIND_ENUM] = { 0, sizeof(struct btf_enum), 0 },
+[BTF_KIND_FWD] = { 0, 0, 0 },
+[BTF_KIND_TYPEDEF] = { 0, 0, 0 },
+[BTF_KIND_VOLATILE] = { 0, 0, 0 },
+[BTF_KIND_CONST] = { 0, 0, 0 },
+[BTF_KIND_RESTRICT] = { 0, 0, 0 },
+[BTF_KIND_FUNC] = { 0, 0, 0 },
+[BTF_KIND_FUNC_PROTO] = { 0, sizeof(struct btf_param), 0 },
+[BTF_KIND_VAR] = { sizeof(struct btf_var), 0, 0 },
+[BTF_KIND_DATASEC] = { 0, sizeof(struct btf_var_secinfo), 0 },
+[BTF_KIND_FLOAT] = { 0, 0, 0 },
+[BTF_KIND_DECL_TAG] = { sizeof(struct btf_decl_tag), 0, 0 },
+[BTF_KIND_TYPE_TAG] = { 0, 0, 0 },
+[BTF_KIND_ENUM64] = { 0, sizeof(struct btf_enum64), 0 },
+};
+
struct btf {
/* raw BTF data in native endianness */
void *raw_data;
@@ -40,42 +70,53 @@ struct btf {
/*
* When BTF is loaded from an ELF or raw memory it is stored
- * in a contiguous memory block. The hdr, type_data, and, strs_data
+ * in a contiguous memory block. The type_data, layout and strs_data
* point inside that memory region to their respective parts of BTF
* representation:
*
- * +--------------------------------+
- * | Header | Types | Strings |
- * +--------------------------------+
- * ^ ^ ^
- * | | |
- * hdr | |
- * types_data-+ |
- * strs_data------------+
+ * +----------------------------------------+---------------+
+ * | Header | Types | Optional layout | Strings |
+ * +--------------------------------------------------------+
+ * ^ ^ ^ ^
+ * | | | |
+ * raw_data | | |
+ * types_data-+ | |
+ * layout---------------+ |
+ * strs_data--------------------------------+
+ *
+ * A separate struct btf_header is embedded as btf->hdr,
+ * and header information is copied into it. This allows us
+ * to handle header data for various header formats; the original,
+ * the extended header with layout info, etc.
*
* If BTF data is later modified, e.g., due to types added or
* removed, BTF deduplication performed, etc, this contiguous
- * representation is broken up into three independently allocated
- * memory regions to be able to modify them independently.
+ * representation is broken up into four independent memory
+ * regions.
+ *
* raw_data is nulled out at that point, but can be later allocated
* and cached again if user calls btf__raw_data(), at which point
- * raw_data will contain a contiguous copy of header, types, and
- * strings:
+ * raw_data will contain a contiguous copy of header, types, optional
+ * layout and strings. layout optionally points to a
+ * btf_layout array - this allows us to encode information about
+ * the kinds known at encoding time. If layout is NULL no
+ * layout information is encoded.
*
- * +----------+ +---------+ +-----------+
- * | Header | | Types | | Strings |
- * +----------+ +---------+ +-----------+
- * ^ ^ ^
- * | | |
- * hdr | |
- * types_data----+ |
- * strset__data(strs_set)-----+
+ * +----------+ +---------+ +-----------+ +-----------+
+ * | Header | | Types | | Layout | | Strings |
+ * +----------+ +---------+ +-----------+ +-----------+
+ * ^ ^ ^ ^
+ * | | | |
+ * hdr | | |
+ * types_data----+ | |
+ * layout---------------------+ |
+ * strset__data(strs_set)---------------------+
*
- * +----------+---------+-----------+
- * | Header | Types | Strings |
- * raw_data----->+----------+---------+-----------+
+ * +----------+---------+-------------------+-----------+
+ * | Header | Types | Optional Layout | Strings |
+ * raw_data----->+----------+---------+-------------------+-----------+
*/
- struct btf_header *hdr;
+ struct btf_header hdr;
void *types_data;
size_t types_data_cap; /* used size stored in hdr->type_len */
@@ -125,6 +166,17 @@ struct btf {
/* whether raw_data is a (read-only) mmap */
bool raw_data_is_mmap;
+ /* is BTF modifiable? i.e. is it split into separate sections as described above? */
+ bool modifiable;
+ /* does BTF have header information we do not support? If so, disallow
+ * modification.
+ */
+ bool has_hdr_extra;
+ /* Points either at raw kind layout data in parsed BTF (if present), or
+ * at an allocated kind layout array when BTF is modifiable.
+ */
+ void *layout;
+
/* BTF object FD, if loaded into kernel */
int fd;
@@ -216,7 +268,7 @@ static int btf_add_type_idx_entry(struct btf *btf, __u32 type_off)
return 0;
}
-static void btf_bswap_hdr(struct btf_header *h)
+static void btf_bswap_hdr(struct btf_header *h, __u32 hdr_len)
{
h->magic = bswap_16(h->magic);
h->hdr_len = bswap_32(h->hdr_len);
@@ -224,66 +276,115 @@ static void btf_bswap_hdr(struct btf_header *h)
h->type_len = bswap_32(h->type_len);
h->str_off = bswap_32(h->str_off);
h->str_len = bswap_32(h->str_len);
+ /* May be operating on raw data with hdr_len that does not include below fields */
+ if (hdr_len >= sizeof(struct btf_header)) {
+ h->layout_off = bswap_32(h->layout_off);
+ h->layout_len = bswap_32(h->layout_len);
+ }
}
static int btf_parse_hdr(struct btf *btf)
{
- struct btf_header *hdr = btf->hdr;
- __u32 meta_left;
+ struct btf_header *hdr = btf->raw_data;
+ __u32 hdr_len, meta_left;
- if (btf->raw_size < sizeof(struct btf_header)) {
+ if (btf->raw_size < offsetofend(struct btf_header, str_len)) {
pr_debug("BTF header not found\n");
return -EINVAL;
}
+ hdr_len = hdr->hdr_len;
+
if (hdr->magic == bswap_16(BTF_MAGIC)) {
btf->swapped_endian = true;
- if (bswap_32(hdr->hdr_len) != sizeof(struct btf_header)) {
+ hdr_len = bswap_32(hdr->hdr_len);
+ if (hdr_len < offsetofend(struct btf_header, str_len)) {
pr_warn("Can't load BTF with non-native endianness due to unsupported header length %u\n",
- bswap_32(hdr->hdr_len));
+ hdr_len);
return -ENOTSUP;
}
- btf_bswap_hdr(hdr);
} else if (hdr->magic != BTF_MAGIC) {
pr_debug("Invalid BTF magic: %x\n", hdr->magic);
return -EINVAL;
}
- if (btf->raw_size < hdr->hdr_len) {
+ if (btf->raw_size < hdr_len) {
pr_debug("BTF header len %u larger than data size %u\n",
- hdr->hdr_len, btf->raw_size);
+ hdr_len, btf->raw_size);
return -EINVAL;
}
- meta_left = btf->raw_size - hdr->hdr_len;
- if (meta_left < (long long)hdr->str_off + hdr->str_len) {
+ if (btf->swapped_endian)
+ btf_bswap_hdr(hdr, hdr_len);
+
+ memcpy(&btf->hdr, hdr, min((size_t)hdr_len, sizeof(struct btf_header)));
+
+ /* If unknown header data is found, modification is prohibited in
+ * btf_ensure_modifiable().
+ */
+ if (hdr_len > sizeof(struct btf_header)) {
+ __u8 *h = (__u8 *)hdr;
+ __u32 i;
+
+ for (i = sizeof(struct btf_header); i < hdr_len; i++) {
+ if (!h[i])
+ continue;
+ btf->has_hdr_extra = true;
+ pr_debug("Unknown BTF header data at offset %u; modification is disallowed\n",
+ i);
+ break;
+ }
+ }
+
+ meta_left = btf->raw_size - hdr_len;
+ if (meta_left < (long long)btf->hdr.str_off + btf->hdr.str_len) {
pr_debug("Invalid BTF total size: %u\n", btf->raw_size);
return -EINVAL;
}
- if ((long long)hdr->type_off + hdr->type_len > hdr->str_off) {
+ if ((long long)btf->hdr.type_off + btf->hdr.type_len > btf->hdr.str_off) {
pr_debug("Invalid BTF data sections layout: type data at %u + %u, strings data at %u + %u\n",
- hdr->type_off, hdr->type_len, hdr->str_off, hdr->str_len);
+ btf->hdr.type_off, btf->hdr.type_len, btf->hdr.str_off,
+ btf->hdr.str_len);
return -EINVAL;
}
- if (hdr->type_off % 4) {
+ if (btf->hdr.type_off % 4) {
pr_debug("BTF type section is not aligned to 4 bytes\n");
return -EINVAL;
}
+ if (btf->hdr.layout_len == 0)
+ return 0;
+
+ /* optional layout section sits between types and strings */
+ if (btf->hdr.layout_off % 4) {
+ pr_debug("BTF layout section is not aligned to 4 bytes\n");
+ return -EINVAL;
+ }
+ if (btf->hdr.layout_off < (long long)btf->hdr.type_off + btf->hdr.type_len) {
+ pr_debug("Invalid BTF data sections layout: type data at %u + %u, layout data at %u + %u\n",
+ btf->hdr.type_off, btf->hdr.type_len,
+ btf->hdr.layout_off, btf->hdr.layout_len);
+ return -EINVAL;
+ }
+ if ((long long)btf->hdr.layout_off + btf->hdr.layout_len > btf->hdr.str_off ||
+ btf->hdr.layout_off > btf->hdr.str_off) {
+ pr_debug("Invalid BTF data sections layout: layout data at %u + %u, strings data at %u\n",
+ btf->hdr.layout_off, btf->hdr.layout_len, btf->hdr.str_off);
+ return -EINVAL;
+ }
return 0;
}
static int btf_parse_str_sec(struct btf *btf)
{
- const struct btf_header *hdr = btf->hdr;
const char *start = btf->strs_data;
- const char *end = start + btf->hdr->str_len;
+ const char *end = start + btf->hdr.str_len;
- if (btf->base_btf && hdr->str_len == 0)
+ if (btf->base_btf && btf->hdr.str_len == 0)
return 0;
- if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_STR_OFFSET || end[-1]) {
+ if (!btf->hdr.str_len || btf->hdr.str_len - 1 > BTF_MAX_STR_OFFSET || end[-1]) {
pr_debug("Invalid BTF string section\n");
return -EINVAL;
}
@@ -294,7 +395,63 @@ static int btf_parse_str_sec(struct btf *btf)
return 0;
}
-static int btf_type_size(const struct btf_type *t)
+static int btf_parse_layout_sec(struct btf *btf)
+{
+ if (!btf->hdr.layout_len)
+ return 0;
+
+ if (btf->hdr.layout_len % sizeof(struct btf_layout) != 0) {
+ pr_debug("Invalid BTF kind layout section\n");
+ return -EINVAL;
+ }
+ btf->layout = btf->raw_data + btf->hdr.hdr_len + btf->hdr.layout_off;
+
+ if (btf->swapped_endian) {
+ struct btf_layout *l, *end = btf->layout + btf->hdr.layout_len;
+
+ for (l = btf->layout; l < end; l++)
+ l->flags = bswap_16(l->flags);
+ }
+
+ return 0;
+}
+
+/* for unknown kinds, consult kind layout. */
+static int btf_type_size_unknown(const struct btf *btf, const struct btf_type *t)
+{
+ __u32 l_cnt = btf->hdr.layout_len / sizeof(struct btf_layout);
+ struct btf_layout *l = btf->layout;
+ __u16 vlen = btf_vlen(t);
+ __u32 kind = btf_kind(t);
+
+ /* Fall back to base BTF if needed as they share layout information */
+ if (!l) {
+ struct btf *base_btf = btf->base_btf;
+
+ if (base_btf) {
+ l = base_btf->layout;
+ l_cnt = base_btf->hdr.layout_len / sizeof(struct btf_layout);
+ }
+ }
+ if (!l || kind >= l_cnt) {
+ pr_debug("Unsupported BTF_KIND: %u\n", btf_kind(t));
+ return -EINVAL;
+ }
+ if (l[kind].info_sz % 4) {
+ pr_debug("Unsupported info_sz %u for kind %u\n",
+ l[kind].info_sz, kind);
+ return -EINVAL;
+ }
+ if (l[kind].elem_sz % 4) {
+ pr_debug("Unsupported elem_sz %u for kind %u\n",
+ l[kind].elem_sz, kind);
+ return -EINVAL;
+ }
+
+ return sizeof(struct btf_type) + l[kind].info_sz + vlen * l[kind].elem_sz;
+}
+
+static int btf_type_size(const struct btf *btf, const struct btf_type *t)
{
const int base_size = sizeof(struct btf_type);
__u16 vlen = btf_vlen(t);
@@ -330,8 +487,7 @@ static int btf_type_size(const struct btf_type *t)
case BTF_KIND_DECL_TAG:
return base_size + sizeof(struct btf_decl_tag);
default:
- pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t));
- return -EINVAL;
+ return btf_type_size_unknown(btf, t);
}
}
@@ -421,16 +577,15 @@ static int btf_bswap_type_rest(struct btf_type *t)
static int btf_parse_type_sec(struct btf *btf)
{
- struct btf_header *hdr = btf->hdr;
void *next_type = btf->types_data;
- void *end_type = next_type + hdr->type_len;
+ void *end_type = next_type + btf->hdr.type_len;
int err, type_size;
while (next_type + sizeof(struct btf_type) <= end_type) {
if (btf->swapped_endian)
btf_bswap_type_base(next_type);
- type_size = btf_type_size(next_type);
+ type_size = btf_type_size(btf, next_type);
if (type_size < 0)
return type_size;
if (next_type + type_size > end_type) {
@@ -591,8 +746,12 @@ static int btf_validate_type(const struct btf *btf, const struct btf_type *t, __
break;
}
default:
- pr_warn("btf: type [%u]: unrecognized kind %u\n", id, kind);
- return -EINVAL;
+ /* Kind may be represented in kind layout information. */
+ if (btf_type_size_unknown(btf, t) < 0) {
+ pr_warn("btf: type [%u]: unrecognized kind %u\n", id, kind);
+ return -EINVAL;
+ }
+ break;
}
return 0;
}
@@ -1012,7 +1171,8 @@ __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
static bool btf_is_modifiable(const struct btf *btf)
{
- return (void *)btf->hdr != btf->raw_data;
+ /* BTF is modifiable if split into multiple sections */
+ return btf->modifiable;
}
static void btf_free_raw_data(struct btf *btf)
@@ -1036,14 +1196,14 @@ void btf__free(struct btf *btf)
if (btf_is_modifiable(btf)) {
/* if BTF was modified after loading, it will have a split
- * in-memory representation for header, types, and strings
+ * in-memory representation for types, strings and layout
* sections, so we need to free all of them individually. It
* might still have a cached contiguous raw data present,
* which will be unconditionally freed below.
*/
- free(btf->hdr);
free(btf->types_data);
strset__free(btf->strs_set);
+ free(btf->layout);
}
btf_free_raw_data(btf);
free(btf->raw_data_swapped);
@@ -1053,8 +1213,11 @@ void btf__free(struct btf *btf)
free(btf);
}
-static struct btf *btf_new_empty(struct btf *base_btf)
+static struct btf *btf_new_empty(struct btf_new_opts *opts)
{
+ bool add_layout = OPTS_GET(opts, add_layout, false);
+ struct btf *base_btf = OPTS_GET(opts, base_btf, NULL);
+ struct btf_header *hdr;
struct btf *btf;
btf = calloc(1, sizeof(*btf));
@@ -1072,26 +1235,42 @@ static struct btf *btf_new_empty(struct btf *base_btf)
if (base_btf) {
btf->base_btf = base_btf;
btf->start_id = btf__type_cnt(base_btf);
- btf->start_str_off = base_btf->hdr->str_len + base_btf->start_str_off;
+ btf->start_str_off = base_btf->hdr.str_len + base_btf->start_str_off;
btf->swapped_endian = base_btf->swapped_endian;
}
/* +1 for empty string at offset 0 */
btf->raw_size = sizeof(struct btf_header) + (base_btf ? 0 : 1);
+ if (add_layout)
+ btf->raw_size += sizeof(layouts);
btf->raw_data = calloc(1, btf->raw_size);
if (!btf->raw_data) {
free(btf);
return ERR_PTR(-ENOMEM);
}
- btf->hdr = btf->raw_data;
- btf->hdr->hdr_len = sizeof(struct btf_header);
- btf->hdr->magic = BTF_MAGIC;
- btf->hdr->version = BTF_VERSION;
+ hdr = btf->raw_data;
+ hdr->hdr_len = sizeof(struct btf_header);
+ hdr->magic = BTF_MAGIC;
+ hdr->version = BTF_VERSION;
- btf->types_data = btf->raw_data + btf->hdr->hdr_len;
- btf->strs_data = btf->raw_data + btf->hdr->hdr_len;
- btf->hdr->str_len = base_btf ? 0 : 1; /* empty string at offset 0 */
+ btf->types_data = btf->raw_data + hdr->hdr_len;
+ btf->strs_data = btf->raw_data + hdr->hdr_len;
+ hdr->str_len = base_btf ? 0 : 1; /* empty string at offset 0 */
+
+ if (add_layout) {
+ hdr->layout_len = sizeof(layouts);
+ btf->layout = layouts;
+ /*
+ * No need to swap endianness here as btf_get_raw_data()
+ * will do this for us if btf->swapped_endian is true.
+ */
+ memcpy(btf->raw_data + hdr->hdr_len, layouts, sizeof(layouts));
+ btf->strs_data += sizeof(layouts);
+ hdr->str_off += sizeof(layouts);
+ }
+
+ memcpy(&btf->hdr, hdr, sizeof(*hdr));
return btf;
}
@@ -1103,7 +1282,19 @@ struct btf *btf__new_empty(void)
struct btf *btf__new_empty_split(struct btf *base_btf)
{
- return libbpf_ptr(btf_new_empty(base_btf));
+ LIBBPF_OPTS(btf_new_opts, opts);
+
+ opts.base_btf = base_btf;
+
+ return libbpf_ptr(btf_new_empty(&opts));
+}
+
+struct btf *btf__new_empty_opts(struct btf_new_opts *opts)
+{
+ if (!OPTS_VALID(opts, btf_new_opts))
+ return libbpf_err_ptr(-EINVAL);
+
+ return libbpf_ptr(btf_new_empty(opts));
}
static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf, bool is_mmap)
@@ -1124,7 +1315,7 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf, b
if (base_btf) {
btf->base_btf = base_btf;
btf->start_id = btf__type_cnt(base_btf);
- btf->start_str_off = base_btf->hdr->str_len + base_btf->start_str_off;
+ btf->start_str_off = base_btf->hdr.str_len + base_btf->start_str_off;
}
if (is_mmap) {
@@ -1141,15 +1332,15 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf, b
btf->raw_size = size;
- btf->hdr = btf->raw_data;
err = btf_parse_hdr(btf);
if (err)
goto done;
- btf->strs_data = btf->raw_data + btf->hdr->hdr_len + btf->hdr->str_off;
- btf->types_data = btf->raw_data + btf->hdr->hdr_len + btf->hdr->type_off;
+ btf->strs_data = btf->raw_data + btf->hdr.hdr_len + btf->hdr.str_off;
+ btf->types_data = btf->raw_data + btf->hdr.hdr_len + btf->hdr.type_off;
err = btf_parse_str_sec(btf);
+ err = err ?: btf_parse_layout_sec(btf);
err = err ?: btf_parse_type_sec(btf);
err = err ?: btf_sanity_check(btf);
if (err)
@@ -1601,7 +1792,7 @@ static const void *btf_strs_data(const struct btf *btf)
static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian)
{
- struct btf_header *hdr = btf->hdr;
+ const struct btf_header *hdr = &btf->hdr;
struct btf_type *t;
void *data, *p;
__u32 data_sz;
@@ -1614,14 +1805,17 @@ static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endi
}
data_sz = hdr->hdr_len + hdr->type_len + hdr->str_len;
+ if (btf->layout)
+ data_sz += hdr->layout_len;
+
data = calloc(1, data_sz);
if (!data)
return NULL;
p = data;
- memcpy(p, hdr, hdr->hdr_len);
+ memcpy(p, hdr, min((__u32)sizeof(struct btf_header), hdr->hdr_len));
if (swap_endian)
- btf_bswap_hdr(p);
+ btf_bswap_hdr(p, hdr->hdr_len);
p += hdr->hdr_len;
memcpy(p, btf->types_data, hdr->type_len);
@@ -1639,8 +1833,18 @@ static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endi
}
p += hdr->type_len;
+ if (btf->layout) {
+ memcpy(p, btf->layout, hdr->layout_len);
+ if (swap_endian) {
+ struct btf_layout *l, *end = p + hdr->layout_len;
+
+ for (l = p; l < end ; l++)
+ l->flags = bswap_16(l->flags);
+ }
+ p += hdr->layout_len;
+ }
+
memcpy(p, btf_strs_data(btf), hdr->str_len);
- p += hdr->str_len;
*size = data_sz;
return data;
@@ -1675,7 +1879,7 @@ const char *btf__str_by_offset(const struct btf *btf, __u32 offset)
{
if (offset < btf->start_str_off)
return btf__str_by_offset(btf->base_btf, offset);
- else if (offset - btf->start_str_off < btf->hdr->str_len)
+ else if (offset - btf->start_str_off < btf->hdr.str_len)
return btf_strs_data(btf) + (offset - btf->start_str_off);
else
return errno = EINVAL, NULL;
@@ -1783,12 +1987,12 @@ static void btf_invalidate_raw_data(struct btf *btf)
}
/* Ensure BTF is ready to be modified (by splitting into a three memory
- * regions for header, types, and strings). Also invalidate cached
+ * regions for types, strings and layout. Also invalidate cached
* raw_data, if any.
*/
static int btf_ensure_modifiable(struct btf *btf)
{
- void *hdr, *types;
+ void *types, *layout = NULL;
struct strset *set = NULL;
int err = -ENOMEM;
@@ -1798,45 +2002,58 @@ static int btf_ensure_modifiable(struct btf *btf)
return 0;
}
- /* split raw data into three memory regions */
- hdr = malloc(btf->hdr->hdr_len);
- types = malloc(btf->hdr->type_len);
- if (!hdr || !types)
+ if (btf->has_hdr_extra) {
+ /* Additional BTF header data was found; not safe to modify. */
+ return -EOPNOTSUPP;
+ }
+
+ /* split raw data into memory regions; btf->hdr is done already. */
+ types = malloc(btf->hdr.type_len);
+ if (!types)
goto err_out;
+ memcpy(types, btf->types_data, btf->hdr.type_len);
- memcpy(hdr, btf->hdr, btf->hdr->hdr_len);
- memcpy(types, btf->types_data, btf->hdr->type_len);
+ if (btf->hdr.layout_len) {
+ layout = malloc(btf->hdr.layout_len);
+ if (!layout)
+ goto err_out;
+ memcpy(layout, btf->raw_data + btf->hdr.hdr_len + btf->hdr.layout_off,
+ btf->hdr.layout_len);
+ }
/* build lookup index for all strings */
- set = strset__new(BTF_MAX_STR_OFFSET, btf->strs_data, btf->hdr->str_len);
+ set = strset__new(BTF_MAX_STR_OFFSET, btf->strs_data, btf->hdr.str_len);
if (IS_ERR(set)) {
err = PTR_ERR(set);
goto err_out;
}
/* only when everything was successful, update internal state */
- btf->hdr = hdr;
btf->types_data = types;
- btf->types_data_cap = btf->hdr->type_len;
+ btf->types_data_cap = btf->hdr.type_len;
btf->strs_data = NULL;
btf->strs_set = set;
+ if (layout)
+ btf->layout = layout;
/* if BTF was created from scratch, all strings are guaranteed to be
* unique and deduplicated
*/
- if (btf->hdr->str_len == 0)
+ if (btf->hdr.str_len == 0)
btf->strs_deduped = true;
- if (!btf->base_btf && btf->hdr->str_len == 1)
+ if (!btf->base_btf && btf->hdr.str_len == 1)
btf->strs_deduped = true;
/* invalidate raw_data representation */
btf_invalidate_raw_data(btf);
+ btf->modifiable = true;
+
return 0;
err_out:
strset__free(set);
- free(hdr);
free(types);
+ free(layout);
return err;
}
@@ -1849,6 +2066,7 @@ err_out:
int btf__find_str(struct btf *btf, const char *s)
{
int off;
+ int err;
if (btf->base_btf) {
off = btf__find_str(btf->base_btf, s);
@@ -1857,8 +2075,9 @@ int btf__find_str(struct btf *btf, const char *s)
}
/* BTF needs to be in a modifiable state to build string lookup index */
- if (btf_ensure_modifiable(btf))
- return libbpf_err(-ENOMEM);
+ err = btf_ensure_modifiable(btf);
+ if (err)
+ return libbpf_err(err);
off = strset__find_str(btf->strs_set, s);
if (off < 0)
@@ -1875,6 +2094,7 @@ int btf__find_str(struct btf *btf, const char *s)
int btf__add_str(struct btf *btf, const char *s)
{
int off;
+ int err;
if (btf->base_btf) {
off = btf__find_str(btf->base_btf, s);
@@ -1882,14 +2102,15 @@ int btf__add_str(struct btf *btf, const char *s)
return off;
}
- if (btf_ensure_modifiable(btf))
- return libbpf_err(-ENOMEM);
+ err = btf_ensure_modifiable(btf);
+ if (err)
+ return libbpf_err(err);
off = strset__add_str(btf->strs_set, s);
if (off < 0)
return libbpf_err(off);
- btf->hdr->str_len = strset__data_size(btf->strs_set);
+ btf->hdr.str_len = strset__data_size(btf->strs_set);
return btf->start_str_off + off;
}
@@ -1897,7 +2118,7 @@ int btf__add_str(struct btf *btf, const char *s)
static void *btf_add_type_mem(struct btf *btf, size_t add_sz)
{
return libbpf_add_mem(&btf->types_data, &btf->types_data_cap, 1,
- btf->hdr->type_len, UINT_MAX, add_sz);
+ btf->hdr.type_len, UINT_MAX, add_sz);
}
static void btf_type_inc_vlen(struct btf_type *t)
@@ -1905,16 +2126,31 @@ static void btf_type_inc_vlen(struct btf_type *t)
t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, btf_kflag(t));
}
+static void btf_hdr_update_type_len(struct btf *btf, int new_len)
+{
+ btf->hdr.type_len = new_len;
+ if (btf->layout) {
+ btf->hdr.layout_off = btf->hdr.type_off + new_len;
+ btf->hdr.str_off = btf->hdr.layout_off + btf->hdr.layout_len;
+ } else {
+ btf->hdr.str_off = btf->hdr.type_off + new_len;
+ }
+}
+
+static void btf_hdr_update_str_len(struct btf *btf, int new_len)
+{
+ btf->hdr.str_len = new_len;
+}
+
static int btf_commit_type(struct btf *btf, int data_sz)
{
int err;
- err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
+ err = btf_add_type_idx_entry(btf, btf->hdr.type_len);
if (err)
return libbpf_err(err);
- btf->hdr->type_len += data_sz;
- btf->hdr->str_off += data_sz;
+ btf_hdr_update_type_len(btf, btf->hdr.type_len + data_sz);
btf->nr_types++;
return btf->start_id + btf->nr_types - 1;
}
@@ -1963,13 +2199,14 @@ static int btf_add_type(struct btf_pipe *p, const struct btf_type *src_type)
__u32 *str_off;
int sz, err;
- sz = btf_type_size(src_type);
+ sz = btf_type_size(p->src, src_type);
if (sz < 0)
return libbpf_err(sz);
/* deconstruct BTF, if necessary, and invalidate raw_data */
- if (btf_ensure_modifiable(p->dst))
- return libbpf_err(-ENOMEM);
+ err = btf_ensure_modifiable(p->dst);
+ if (err)
+ return libbpf_err(err);
t = btf_add_type_mem(p->dst, sz);
if (!t)
@@ -2004,24 +2241,31 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf)
{
struct btf_pipe p = { .src = src_btf, .dst = btf };
int data_sz, sz, cnt, i, err, old_strs_len;
+ __u32 src_start_id;
__u32 *off;
void *t;
- /* appending split BTF isn't supported yet */
- if (src_btf->base_btf)
- return libbpf_err(-ENOTSUP);
+ /*
+ * When appending split BTF, the destination must share the same base
+ * BTF so that base type ID references remain valid.
+ */
+ if (src_btf->base_btf && src_btf->base_btf != btf->base_btf)
+ return libbpf_err(-EOPNOTSUPP);
+
+ src_start_id = src_btf->base_btf ? btf__type_cnt(src_btf->base_btf) : 1;
/* deconstruct BTF, if necessary, and invalidate raw_data */
- if (btf_ensure_modifiable(btf))
- return libbpf_err(-ENOMEM);
+ err = btf_ensure_modifiable(btf);
+ if (err)
+ return libbpf_err(err);
/* remember original strings section size if we have to roll back
* partial strings section changes
*/
- old_strs_len = btf->hdr->str_len;
+ old_strs_len = btf->hdr.str_len;
- data_sz = src_btf->hdr->type_len;
- cnt = btf__type_cnt(src_btf) - 1;
+ data_sz = src_btf->hdr.type_len;
+ cnt = src_btf->nr_types;
/* pre-allocate enough memory for new types */
t = btf_add_type_mem(btf, data_sz);
@@ -2045,7 +2289,7 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf)
struct btf_field_iter it;
__u32 *type_id, *str_off;
- sz = btf_type_size(t);
+ sz = btf_type_size(src_btf, t);
if (sz < 0) {
/* unlikely, has to be corrupted src_btf */
err = sz;
@@ -2060,6 +2304,9 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf)
if (err)
goto err_out;
while ((str_off = btf_field_iter_next(&it))) {
+ /* don't remap strings from shared base BTF */
+ if (*str_off < src_btf->start_str_off)
+ continue;
err = btf_rewrite_str(&p, str_off);
if (err)
goto err_out;
@@ -2074,11 +2321,11 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf)
if (!*type_id) /* nothing to do for VOID references */
continue;
- /* we haven't updated btf's type count yet, so
- * btf->start_id + btf->nr_types - 1 is the type ID offset we should
- * add to all newly added BTF types
- */
- *type_id += btf->start_id + btf->nr_types - 1;
+ /* don't remap types from shared base BTF */
+ if (*type_id < src_start_id)
+ continue;
+
+ *type_id += btf->start_id + btf->nr_types - src_start_id;
}
/* go to next type data and type offset index entry */
@@ -2094,8 +2341,7 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf)
* update type count and various internal offsets and sizes to
* "commit" the changes and made them visible to the outside world.
*/
- btf->hdr->type_len += data_sz;
- btf->hdr->str_off += data_sz;
+ btf_hdr_update_type_len(btf, btf->hdr.type_len + data_sz);
btf->nr_types += cnt;
hashmap__free(p.str_off_map);
@@ -2106,13 +2352,14 @@ err_out:
/* zero out preallocated memory as if it was just allocated with
* libbpf_add_mem()
*/
- memset(btf->types_data + btf->hdr->type_len, 0, data_sz);
- memset(btf->strs_data + old_strs_len, 0, btf->hdr->str_len - old_strs_len);
+ memset(btf->types_data + btf->hdr.type_len, 0, data_sz);
+ if (btf->strs_data)
+ memset(btf->strs_data + old_strs_len, 0, btf->hdr.str_len - old_strs_len);
/* and now restore original strings section size; types data size
* wasn't modified, so doesn't need restoring, see big comment above
*/
- btf->hdr->str_len = old_strs_len;
+ btf_hdr_update_str_len(btf, old_strs_len);
hashmap__free(p.str_off_map);
@@ -2132,6 +2379,7 @@ int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding
{
struct btf_type *t;
int sz, name_off;
+ int err;
/* non-empty name */
if (str_is_empty(name))
@@ -2143,8 +2391,9 @@ int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding
return libbpf_err(-EINVAL);
/* deconstruct BTF, if necessary, and invalidate raw_data */
- if (btf_ensure_modifiable(btf))
- return libbpf_err(-ENOMEM);
+ err = btf_ensure_modifiable(btf);
+ if (err)
+ return libbpf_err(err);
sz = sizeof(struct btf_type) + sizeof(int);
t = btf_add_type_mem(btf, sz);
@@ -2180,6 +2429,7 @@ int btf__add_float(struct btf *btf, const char *name, size_t byte_sz)
{
struct btf_type *t;
int sz, name_off;
+ int err;
/* non-empty name */
if (str_is_empty(name))
@@ -2190,8 +2440,9 @@ int btf__add_float(struct btf *btf, const char *name, size_t byte_sz)
byte_sz != 16)
return libbpf_err(-EINVAL);
- if (btf_ensure_modifiable(btf))
- return libbpf_err(-ENOMEM);
+ err = btf_ensure_modifiable(btf);
+ if (err)
+ return libbpf_err(err);
sz = sizeof(struct btf_type);
t = btf_add_type_mem(btf, sz);
@@ -2225,12 +2476,14 @@ static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref
{
struct btf_type *t;
int sz, name_off = 0;
+ int err;
if (validate_type_id(ref_type_id))
return libbpf_err(-EINVAL);
- if (btf_ensure_modifiable(btf))
- return libbpf_err(-ENOMEM);
+ err = btf_ensure_modifiable(btf);
+ if (err)
+ return libbpf_err(err);
sz = sizeof(struct btf_type);
t = btf_add_type_mem(btf, sz);
@@ -2275,13 +2528,15 @@ int btf__add_array(struct btf *btf, int index_type_id, int elem_type_id, __u32 n
{
struct btf_type *t;
struct btf_array *a;
+ int err;
int sz;
if (validate_type_id(index_type_id) || validate_type_id(elem_type_id))
return libbpf_err(-EINVAL);
- if (btf_ensure_modifiable(btf))
- return libbpf_err(-ENOMEM);
+ err = btf_ensure_modifiable(btf);
+ if (err)
+ return libbpf_err(err);
sz = sizeof(struct btf_type) + sizeof(struct btf_array);
t = btf_add_type_mem(btf, sz);
@@ -2305,9 +2560,11 @@ static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32
{
struct btf_type *t;
int sz, name_off = 0;
+ int err;
- if (btf_ensure_modifiable(btf))
- return libbpf_err(-ENOMEM);
+ err = btf_ensure_modifiable(btf);
+ if (err)
+ return libbpf_err(err);
sz = sizeof(struct btf_type);
t = btf_add_type_mem(btf, sz);
@@ -2387,6 +2644,7 @@ int btf__add_field(struct btf *btf, const char *name, int type_id,
struct btf_member *m;
bool is_bitfield;
int sz, name_off = 0;
+ int err;
/* last type should be union/struct */
if (btf->nr_types == 0)
@@ -2407,8 +2665,9 @@ int btf__add_field(struct btf *btf, const char *name, int type_id,
return libbpf_err(-EINVAL);
/* decompose and invalidate raw data */
- if (btf_ensure_modifiable(btf))
- return libbpf_err(-ENOMEM);
+ err = btf_ensure_modifiable(btf);
+ if (err)
+ return libbpf_err(err);
sz = sizeof(struct btf_member);
m = btf_add_type_mem(btf, sz);
@@ -2430,8 +2689,7 @@ int btf__add_field(struct btf *btf, const char *name, int type_id,
/* update parent type's vlen and kflag */
t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, is_bitfield || btf_kflag(t));
- btf->hdr->type_len += sz;
- btf->hdr->str_off += sz;
+ btf_hdr_update_type_len(btf, btf->hdr.type_len + sz);
return 0;
}
@@ -2440,13 +2698,15 @@ static int btf_add_enum_common(struct btf *btf, const char *name, __u32 byte_sz,
{
struct btf_type *t;
int sz, name_off = 0;
+ int err;
/* byte_sz must be power of 2 */
if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 8)
return libbpf_err(-EINVAL);
- if (btf_ensure_modifiable(btf))
- return libbpf_err(-ENOMEM);
+ err = btf_ensure_modifiable(btf);
+ if (err)
+ return libbpf_err(err);
sz = sizeof(struct btf_type);
t = btf_add_type_mem(btf, sz);
@@ -2502,6 +2762,7 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value)
struct btf_type *t;
struct btf_enum *v;
int sz, name_off;
+ int err;
/* last type should be BTF_KIND_ENUM */
if (btf->nr_types == 0)
@@ -2517,8 +2778,9 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value)
return libbpf_err(-E2BIG);
/* decompose and invalidate raw data */
- if (btf_ensure_modifiable(btf))
- return libbpf_err(-ENOMEM);
+ err = btf_ensure_modifiable(btf);
+ if (err)
+ return libbpf_err(err);
sz = sizeof(struct btf_enum);
v = btf_add_type_mem(btf, sz);
@@ -2540,8 +2802,7 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value)
if (value < 0)
t->info = btf_type_info(btf_kind(t), btf_vlen(t), true);
- btf->hdr->type_len += sz;
- btf->hdr->str_off += sz;
+ btf_hdr_update_type_len(btf, btf->hdr.type_len + sz);
return 0;
}
@@ -2579,6 +2840,7 @@ int btf__add_enum64_value(struct btf *btf, const char *name, __u64 value)
struct btf_enum64 *v;
struct btf_type *t;
int sz, name_off;
+ int err;
/* last type should be BTF_KIND_ENUM64 */
if (btf->nr_types == 0)
@@ -2592,8 +2854,9 @@ int btf__add_enum64_value(struct btf *btf, const char *name, __u64 value)
return libbpf_err(-EINVAL);
/* decompose and invalidate raw data */
- if (btf_ensure_modifiable(btf))
- return libbpf_err(-ENOMEM);
+ err = btf_ensure_modifiable(btf);
+ if (err)
+ return libbpf_err(err);
sz = sizeof(struct btf_enum64);
v = btf_add_type_mem(btf, sz);
@@ -2612,8 +2875,7 @@ int btf__add_enum64_value(struct btf *btf, const char *name, __u64 value)
t = btf_last_type(btf);
btf_type_inc_vlen(t);
- btf->hdr->type_len += sz;
- btf->hdr->str_off += sz;
+ btf_hdr_update_type_len(btf, btf->hdr.type_len + sz);
return 0;
}
@@ -2782,13 +3044,15 @@ int btf__add_func(struct btf *btf, const char *name,
int btf__add_func_proto(struct btf *btf, int ret_type_id)
{
struct btf_type *t;
+ int err;
int sz;
if (validate_type_id(ret_type_id))
return libbpf_err(-EINVAL);
- if (btf_ensure_modifiable(btf))
- return libbpf_err(-ENOMEM);
+ err = btf_ensure_modifiable(btf);
+ if (err)
+ return libbpf_err(err);
sz = sizeof(struct btf_type);
t = btf_add_type_mem(btf, sz);
@@ -2818,6 +3082,7 @@ int btf__add_func_param(struct btf *btf, const char *name, int type_id)
struct btf_type *t;
struct btf_param *p;
int sz, name_off = 0;
+ int err;
if (validate_type_id(type_id))
return libbpf_err(-EINVAL);
@@ -2830,8 +3095,9 @@ int btf__add_func_param(struct btf *btf, const char *name, int type_id)
return libbpf_err(-EINVAL);
/* decompose and invalidate raw data */
- if (btf_ensure_modifiable(btf))
- return libbpf_err(-ENOMEM);
+ err = btf_ensure_modifiable(btf);
+ if (err)
+ return libbpf_err(err);
sz = sizeof(struct btf_param);
p = btf_add_type_mem(btf, sz);
@@ -2851,8 +3117,7 @@ int btf__add_func_param(struct btf *btf, const char *name, int type_id)
t = btf_last_type(btf);
btf_type_inc_vlen(t);
- btf->hdr->type_len += sz;
- btf->hdr->str_off += sz;
+ btf_hdr_update_type_len(btf, btf->hdr.type_len + sz);
return 0;
}
@@ -2871,6 +3136,7 @@ int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id)
struct btf_type *t;
struct btf_var *v;
int sz, name_off;
+ int err;
/* non-empty name */
if (str_is_empty(name))
@@ -2882,8 +3148,9 @@ int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id)
return libbpf_err(-EINVAL);
/* deconstruct BTF, if necessary, and invalidate raw_data */
- if (btf_ensure_modifiable(btf))
- return libbpf_err(-ENOMEM);
+ err = btf_ensure_modifiable(btf);
+ if (err)
+ return libbpf_err(err);
sz = sizeof(struct btf_type) + sizeof(struct btf_var);
t = btf_add_type_mem(btf, sz);
@@ -2920,13 +3187,15 @@ int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz)
{
struct btf_type *t;
int sz, name_off;
+ int err;
/* non-empty name */
if (str_is_empty(name))
return libbpf_err(-EINVAL);
- if (btf_ensure_modifiable(btf))
- return libbpf_err(-ENOMEM);
+ err = btf_ensure_modifiable(btf);
+ if (err)
+ return libbpf_err(err);
sz = sizeof(struct btf_type);
t = btf_add_type_mem(btf, sz);
@@ -2959,6 +3228,7 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __
{
struct btf_type *t;
struct btf_var_secinfo *v;
+ int err;
int sz;
/* last type should be BTF_KIND_DATASEC */
@@ -2972,8 +3242,9 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __
return libbpf_err(-EINVAL);
/* decompose and invalidate raw data */
- if (btf_ensure_modifiable(btf))
- return libbpf_err(-ENOMEM);
+ err = btf_ensure_modifiable(btf);
+ if (err)
+ return libbpf_err(err);
sz = sizeof(struct btf_var_secinfo);
v = btf_add_type_mem(btf, sz);
@@ -2988,8 +3259,7 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __
t = btf_last_type(btf);
btf_type_inc_vlen(t);
- btf->hdr->type_len += sz;
- btf->hdr->str_off += sz;
+ btf_hdr_update_type_len(btf, btf->hdr.type_len + sz);
return 0;
}
@@ -2998,6 +3268,7 @@ static int btf_add_decl_tag(struct btf *btf, const char *value, int ref_type_id,
{
struct btf_type *t;
int sz, value_off;
+ int err;
if (str_is_empty(value) || component_idx < -1)
return libbpf_err(-EINVAL);
@@ -3005,8 +3276,9 @@ static int btf_add_decl_tag(struct btf *btf, const char *value, int ref_type_id,
if (validate_type_id(ref_type_id))
return libbpf_err(-EINVAL);
- if (btf_ensure_modifiable(btf))
- return libbpf_err(-ENOMEM);
+ err = btf_ensure_modifiable(btf);
+ if (err)
+ return libbpf_err(err);
sz = sizeof(struct btf_type) + sizeof(struct btf_decl_tag);
t = btf_add_type_mem(btf, sz);
@@ -3630,10 +3902,9 @@ int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts)
return libbpf_err(-EINVAL);
}
- if (btf_ensure_modifiable(btf)) {
- err = -ENOMEM;
+ err = btf_ensure_modifiable(btf);
+ if (err)
goto done;
- }
err = btf_dedup_prep(d);
if (err) {
@@ -3953,7 +4224,7 @@ static int btf_dedup_strings(struct btf_dedup *d)
/* replace BTF string data and hash with deduped ones */
strset__free(d->btf->strs_set);
- d->btf->hdr->str_len = strset__data_size(d->strs_set);
+ btf_hdr_update_str_len(d->btf, strset__data_size(d->strs_set));
d->btf->strs_set = d->strs_set;
d->strs_set = NULL;
d->btf->strs_deduped = true;
@@ -5386,7 +5657,7 @@ static int btf_dedup_compact_types(struct btf_dedup *d)
continue;
t = btf__type_by_id(d->btf, id);
- len = btf_type_size(t);
+ len = btf_type_size(d->btf, t);
if (len < 0)
return len;
@@ -5400,14 +5671,17 @@ static int btf_dedup_compact_types(struct btf_dedup *d)
/* shrink struct btf's internal types index and update btf_header */
d->btf->nr_types = next_type_id - d->btf->start_id;
d->btf->type_offs_cap = d->btf->nr_types;
- d->btf->hdr->type_len = p - d->btf->types_data;
+ d->btf->hdr.type_len = p - d->btf->types_data;
new_offs = libbpf_reallocarray(d->btf->type_offs, d->btf->type_offs_cap,
sizeof(*new_offs));
if (d->btf->type_offs_cap && !new_offs)
return -ENOMEM;
d->btf->type_offs = new_offs;
- d->btf->hdr->str_off = d->btf->hdr->type_len;
- d->btf->raw_size = d->btf->hdr->hdr_len + d->btf->hdr->type_len + d->btf->hdr->str_len;
+ if (d->btf->layout)
+ d->btf->hdr.layout_off = d->btf->hdr.type_off + d->btf->hdr.type_len;
+ d->btf->hdr.str_off = d->btf->hdr.type_off + d->btf->hdr.type_len + d->btf->hdr.layout_len;
+ d->btf->raw_size = d->btf->hdr.hdr_len + d->btf->hdr.type_off + d->btf->hdr.type_len +
+ d->btf->hdr.layout_len + d->btf->hdr.str_len;
return 0;
}
@@ -5865,7 +6139,7 @@ int btf__distill_base(const struct btf *src_btf, struct btf **new_base_btf,
goto done;
}
dist.split_start_id = btf__type_cnt(old_base);
- dist.split_start_str = old_base->hdr->str_len;
+ dist.split_start_str = old_base->hdr.str_len;
/* Pass over src split BTF; generate the list of base BTF type ids it
* references; these will constitute our distilled BTF set to be
@@ -5934,14 +6208,14 @@ done:
const struct btf_header *btf_header(const struct btf *btf)
{
- return btf->hdr;
+ return &btf->hdr;
}
void btf_set_base_btf(struct btf *btf, const struct btf *base_btf)
{
btf->base_btf = (struct btf *)base_btf;
btf->start_id = btf__type_cnt(base_btf);
- btf->start_str_off = base_btf->hdr->str_len + base_btf->start_str_off;
+ btf->start_str_off = base_btf->hdr.str_len + base_btf->start_str_off;
}
int btf__relocate(struct btf *btf, const struct btf *base_btf)
@@ -6008,16 +6282,15 @@ int btf__permute(struct btf *btf, __u32 *id_map, __u32 id_map_cnt,
goto done;
}
- new_types = calloc(btf->hdr->type_len, 1);
+ new_types = calloc(btf->hdr.type_len, 1);
if (!new_types) {
err = -ENOMEM;
goto done;
}
- if (btf_ensure_modifiable(btf)) {
- err = -ENOMEM;
+ err = btf_ensure_modifiable(btf);
+ if (err)
goto done;
- }
for (i = start_offs; i < id_map_cnt; i++) {
id = id_map[i];
@@ -6046,7 +6319,7 @@ int btf__permute(struct btf *btf, __u32 *id_map, __u32 id_map_cnt,
id = order_map[i];
t = btf__type_by_id(btf, id);
- type_size = btf_type_size(t);
+ type_size = btf_type_size(btf, t);
memcpy(nt, t, type_size);
/* fix up referenced IDs for BTF */
@@ -6072,7 +6345,7 @@ int btf__permute(struct btf *btf, __u32 *id_map, __u32 id_map_cnt,
for (nt = new_types, i = 0; i < id_map_cnt - start_offs; i++) {
btf->type_offs[i] = nt - new_types;
- nt += btf_type_size(nt);
+ nt += btf_type_size(btf, nt);
}
free(order_map);
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index b30008c267c0..a1f8deca2603 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -109,6 +109,26 @@ LIBBPF_API struct btf *btf__new_empty(void);
*/
LIBBPF_API struct btf *btf__new_empty_split(struct btf *base_btf);
+struct btf_new_opts {
+ size_t sz;
+ struct btf *base_btf; /* optional base BTF */
+ bool add_layout; /* add BTF layout information */
+ size_t:0;
+};
+#define btf_new_opts__last_field add_layout
+
+/**
+ * @brief **btf__new_empty_opts()** creates an unpopulated BTF object with
+ * optional *base_btf* and BTF kind layout description if *add_layout*
+ * is set
+ * @return new BTF object instance which has to be eventually freed with
+ * **btf__free()**
+ *
+ * On error, NULL is returned and the thread-local `errno` variable is
+ * set to the error code.
+ */
+LIBBPF_API struct btf *btf__new_empty_opts(struct btf_new_opts *opts);
+
/**
* @brief **btf__distill_base()** creates new versions of the split BTF
* *src_btf* and its base BTF. The new base BTF will only contain the types
diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c
index 2fa434f09cce..4f19a0d79b0c 100644
--- a/tools/lib/bpf/features.c
+++ b/tools/lib/bpf/features.c
@@ -568,6 +568,53 @@ static int probe_ldimm64_full_range_off(int token_fd)
return 1;
}
+#ifdef __x86_64__
+
+#ifndef __NR_uprobe
+#define __NR_uprobe 336
+#endif
+
+static int probe_uprobe_syscall(int token_fd)
+{
+ /*
+ * If kernel supports uprobe() syscall, it will return -ENXIO when called
+ * from the outside of a kernel-generated uprobe trampoline.
+ */
+ return syscall(__NR_uprobe) < 0 && errno == ENXIO;
+}
+#else
+static int probe_uprobe_syscall(int token_fd)
+{
+ return 0;
+}
+#endif
+
+static int probe_kern_btf_layout(int token_fd)
+{
+ static const char strs[] = "\0int";
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
+ };
+ struct btf_layout layout[] = {
+ { 0, 0, 0 },
+ { sizeof(__u32), 0, 0 },
+ };
+ struct btf_header hdr = {
+ .magic = BTF_MAGIC,
+ .version = BTF_VERSION,
+ .hdr_len = sizeof(struct btf_header),
+ .type_len = sizeof(types),
+ .str_off = sizeof(types) + sizeof(layout),
+ .str_len = sizeof(strs),
+ .layout_off = sizeof(types),
+ .layout_len = sizeof(layout),
+ };
+
+ return probe_fd(libbpf__load_raw_btf_hdr(&hdr, (char *)types, strs,
+ (char *)layout, token_fd));
+}
+
typedef int (*feature_probe_fn)(int /* token_fd */);
static struct kern_feature_cache feature_cache;
@@ -646,6 +693,12 @@ static struct kern_feature_desc {
[FEAT_LDIMM64_FULL_RANGE_OFF] = {
"full range LDIMM64 support", probe_ldimm64_full_range_off,
},
+ [FEAT_UPROBE_SYSCALL] = {
+ "kernel supports uprobe syscall", probe_uprobe_syscall,
+ },
+ [FEAT_BTF_LAYOUT] = {
+ "kernel supports BTF layout", probe_kern_btf_layout,
+ },
};
bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id)
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 0be7017800fe..8b0c3246097f 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -3138,12 +3138,14 @@ static bool btf_needs_sanitization(struct bpf_object *obj)
bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC);
+ bool has_layout = kernel_supports(obj, FEAT_BTF_LAYOUT);
return !has_func || !has_datasec || !has_func_global || !has_float ||
- !has_decl_tag || !has_type_tag || !has_enum64 || !has_qmark_datasec;
+ !has_decl_tag || !has_type_tag || !has_enum64 || !has_qmark_datasec ||
+ !has_layout;
}
-static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
+struct btf *bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *orig_btf)
{
bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
@@ -3153,9 +3155,64 @@ static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC);
+ bool has_layout = kernel_supports(obj, FEAT_BTF_LAYOUT);
int enum64_placeholder_id = 0;
+ const struct btf_header *hdr;
+ struct btf *btf = NULL;
+ const void *raw_data;
struct btf_type *t;
int i, j, vlen;
+ __u32 sz;
+ int err;
+
+ /* clone BTF to sanitize a copy and leave the original intact */
+ raw_data = btf__raw_data(orig_btf, &sz);
+ if (!raw_data)
+ return ERR_PTR(-ENOMEM);
+ /* btf_header() gives us endian-safe header info */
+ hdr = btf_header(orig_btf);
+
+ if (!has_layout && hdr->hdr_len >= sizeof(struct btf_header) &&
+ (hdr->layout_len != 0 || hdr->layout_off != 0)) {
+ const struct btf_header *old_hdr = raw_data;
+ struct btf_header *new_hdr;
+ void *new_raw_data;
+ __u32 new_str_off;
+
+ /*
+ * Need to rewrite BTF to exclude layout information and
+ * move string section to immediately after types.
+ */
+ new_raw_data = malloc(sz);
+ if (!new_raw_data)
+ return ERR_PTR(-ENOMEM);
+
+ memcpy(new_raw_data, raw_data, sz);
+ new_hdr = new_raw_data;
+ new_hdr->layout_off = 0;
+ new_hdr->layout_len = 0;
+ new_str_off = hdr->type_off + hdr->type_len;
+ /* Handle swapped endian case */
+ if (old_hdr->magic != hdr->magic)
+ new_hdr->str_off = bswap_32(new_str_off);
+ else
+ new_hdr->str_off = new_str_off;
+
+ memmove(new_raw_data + hdr->hdr_len + new_str_off,
+ new_raw_data + hdr->hdr_len + hdr->str_off,
+ hdr->str_len);
+ sz = hdr->hdr_len + hdr->type_off + hdr->type_len + hdr->str_len;
+ btf = btf__new(new_raw_data, sz);
+ free(new_raw_data);
+ } else {
+ btf = btf__new(raw_data, sz);
+ }
+ err = libbpf_get_error(btf);
+ if (err)
+ return ERR_PTR(err);
+
+ /* enforce 8-byte pointers for BPF-targeted BTFs */
+ btf__set_pointer_size(btf, 8);
for (i = 1; i < btf__type_cnt(btf); i++) {
t = (struct btf_type *)btf__type_by_id(btf, i);
@@ -3233,9 +3290,10 @@ static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
if (enum64_placeholder_id == 0) {
enum64_placeholder_id = btf__add_int(btf, "enum64_placeholder", 1, 0);
- if (enum64_placeholder_id < 0)
- return enum64_placeholder_id;
-
+ if (enum64_placeholder_id < 0) {
+ btf__free(btf);
+ return ERR_PTR(enum64_placeholder_id);
+ }
t = (struct btf_type *)btf__type_by_id(btf, i);
}
@@ -3249,7 +3307,7 @@ static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
}
}
- return 0;
+ return btf;
}
static bool libbpf_needs_btf(const struct bpf_object *obj)
@@ -3600,21 +3658,9 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
sanitize = btf_needs_sanitization(obj);
if (sanitize) {
- const void *raw_data;
- __u32 sz;
-
- /* clone BTF to sanitize a copy and leave the original intact */
- raw_data = btf__raw_data(obj->btf, &sz);
- kern_btf = btf__new(raw_data, sz);
- err = libbpf_get_error(kern_btf);
- if (err)
- return err;
-
- /* enforce 8-byte pointers for BPF-targeted BTFs */
- btf__set_pointer_size(obj->btf, 8);
- err = bpf_object__sanitize_btf(obj, kern_btf);
- if (err)
- return err;
+ kern_btf = bpf_object__sanitize_btf(obj, obj->btf);
+ if (IS_ERR(kern_btf))
+ return PTR_ERR(kern_btf);
}
if (obj->gen_loader) {
@@ -5157,12 +5203,20 @@ bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
*/
return true;
- if (obj->token_fd)
+ if (obj->feat_cache)
return feat_supported(obj->feat_cache, feat_id);
return feat_supported(NULL, feat_id);
}
+/* Used in testing to simulate missing features. */
+void bpf_object_set_feat_cache(struct bpf_object *obj, struct kern_feature_cache *cache)
+{
+ if (obj->feat_cache)
+ free(obj->feat_cache);
+ obj->feat_cache = cache;
+}
+
static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
{
struct bpf_map_info map_info;
@@ -9802,6 +9856,111 @@ __u32 bpf_program__line_info_cnt(const struct bpf_program *prog)
return prog->line_info_cnt;
}
+int bpf_program__clone(struct bpf_program *prog, const struct bpf_prog_load_opts *opts)
+{
+ LIBBPF_OPTS(bpf_prog_load_opts, attr);
+ struct bpf_object *obj;
+ const void *info;
+ __u32 info_cnt, info_rec_size;
+ int err, fd, prog_btf_fd;
+
+ if (!prog)
+ return libbpf_err(-EINVAL);
+
+ if (!OPTS_VALID(opts, bpf_prog_load_opts))
+ return libbpf_err(-EINVAL);
+
+ obj = prog->obj;
+ if (obj->state < OBJ_PREPARED)
+ return libbpf_err(-EINVAL);
+
+ /*
+ * Caller-provided opts take priority; fall back to
+ * prog/object defaults when the caller leaves them zero.
+ */
+ attr.attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0) ?: prog->attach_prog_fd;
+ attr.prog_flags = OPTS_GET(opts, prog_flags, 0) ?: prog->prog_flags;
+ attr.prog_ifindex = OPTS_GET(opts, prog_ifindex, 0) ?: prog->prog_ifindex;
+ attr.kern_version = OPTS_GET(opts, kern_version, 0) ?: obj->kern_version;
+ attr.fd_array = OPTS_GET(opts, fd_array, NULL) ?: obj->fd_array;
+ attr.fd_array_cnt = OPTS_GET(opts, fd_array_cnt, 0) ?: obj->fd_array_cnt;
+ attr.token_fd = OPTS_GET(opts, token_fd, 0) ?: obj->token_fd;
+ if (attr.token_fd)
+ attr.prog_flags |= BPF_F_TOKEN_FD;
+
+ prog_btf_fd = OPTS_GET(opts, prog_btf_fd, 0);
+ if (!prog_btf_fd && obj->btf)
+ prog_btf_fd = btf__fd(obj->btf);
+
+ /* BTF func/line info: only pass if kernel supports it */
+ if (kernel_supports(obj, FEAT_BTF_FUNC) && prog_btf_fd > 0) {
+ attr.prog_btf_fd = prog_btf_fd;
+
+ /* func_info/line_info triples: all-or-nothing from caller */
+ info = OPTS_GET(opts, func_info, NULL);
+ info_cnt = OPTS_GET(opts, func_info_cnt, 0);
+ info_rec_size = OPTS_GET(opts, func_info_rec_size, 0);
+ if (!!info != !!info_cnt || !!info != !!info_rec_size) {
+ pr_warn("prog '%s': func_info, func_info_cnt, and func_info_rec_size must all be specified or all omitted\n",
+ prog->name);
+ return libbpf_err(-EINVAL);
+ }
+ attr.func_info = info ?: prog->func_info;
+ attr.func_info_cnt = info ? info_cnt : prog->func_info_cnt;
+ attr.func_info_rec_size = info ? info_rec_size : prog->func_info_rec_size;
+
+ info = OPTS_GET(opts, line_info, NULL);
+ info_cnt = OPTS_GET(opts, line_info_cnt, 0);
+ info_rec_size = OPTS_GET(opts, line_info_rec_size, 0);
+ if (!!info != !!info_cnt || !!info != !!info_rec_size) {
+ pr_warn("prog '%s': line_info, line_info_cnt, and line_info_rec_size must all be specified or all omitted\n",
+ prog->name);
+ return libbpf_err(-EINVAL);
+ }
+ attr.line_info = info ?: prog->line_info;
+ attr.line_info_cnt = info ? info_cnt : prog->line_info_cnt;
+ attr.line_info_rec_size = info ? info_rec_size : prog->line_info_rec_size;
+ }
+
+ /* Logging is caller-controlled; no fallback to prog/obj log settings */
+ attr.log_buf = OPTS_GET(opts, log_buf, NULL);
+ attr.log_size = OPTS_GET(opts, log_size, 0);
+ attr.log_level = OPTS_GET(opts, log_level, 0);
+
+ /*
+ * Fields below may be mutated by prog_prepare_load_fn:
+ * Seed them from prog/obj defaults here;
+ * Later override with caller-provided opts.
+ */
+ attr.expected_attach_type = prog->expected_attach_type;
+ attr.attach_btf_id = prog->attach_btf_id;
+ attr.attach_btf_obj_fd = prog->attach_btf_obj_fd;
+
+ if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) {
+ err = prog->sec_def->prog_prepare_load_fn(prog, &attr, prog->sec_def->cookie);
+ if (err)
+ return libbpf_err(err);
+ }
+
+ /* Re-apply caller overrides for output fields */
+ if (OPTS_GET(opts, expected_attach_type, 0))
+ attr.expected_attach_type = OPTS_GET(opts, expected_attach_type, 0);
+ if (OPTS_GET(opts, attach_btf_id, 0))
+ attr.attach_btf_id = OPTS_GET(opts, attach_btf_id, 0);
+ if (OPTS_GET(opts, attach_btf_obj_fd, 0))
+ attr.attach_btf_obj_fd = OPTS_GET(opts, attach_btf_obj_fd, 0);
+
+ /*
+ * Unlike bpf_object_load_prog(), we intentionally do not call bpf_prog_bind_map()
+ * for RODATA maps here to avoid mutating the object's state. Callers can bind the
+ * required maps themselves using bpf_prog_bind_map().
+ */
+ fd = bpf_prog_load(prog->type, prog->name, obj->license, prog->insns, prog->insns_cnt,
+ &attr);
+
+ return libbpf_err(fd);
+}
+
#define SEC_DEF(sec_pfx, ptype, atype, flags, ...) { \
.sec = (char *)sec_pfx, \
.prog_type = BPF_PROG_TYPE_##ptype, \
@@ -11692,6 +11851,8 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
default:
return libbpf_err_ptr(-EINVAL);
}
+ if (!func_name && legacy)
+ return libbpf_err_ptr(-EOPNOTSUPP);
if (!legacy) {
pfd = perf_event_open_probe(false /* uprobe */, retprobe,
@@ -11711,21 +11872,21 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
offset, -1 /* pid */);
}
if (pfd < 0) {
- err = -errno;
- pr_warn("prog '%s': failed to create %s '%s+0x%zx' perf event: %s\n",
+ err = pfd;
+ pr_warn("prog '%s': failed to create %s '%s%s0x%zx' perf event: %s\n",
prog->name, retprobe ? "kretprobe" : "kprobe",
- func_name, offset,
- errstr(err));
+ func_name ?: "", func_name ? "+" : "",
+ offset, errstr(err));
goto err_out;
}
link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
err = libbpf_get_error(link);
if (err) {
close(pfd);
- pr_warn("prog '%s': failed to attach to %s '%s+0x%zx': %s\n",
+ pr_warn("prog '%s': failed to attach to %s '%s%s0x%zx': %s\n",
prog->name, retprobe ? "kretprobe" : "kprobe",
- func_name, offset,
- errstr(err));
+ func_name ?: "", func_name ? "+" : "",
+ offset, errstr(err));
goto err_clean_legacy;
}
if (legacy) {
@@ -12041,7 +12202,16 @@ bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog,
if (addrs && syms)
return libbpf_err_ptr(-EINVAL);
- if (pattern) {
+ /*
+ * Exact function name (no wildcards) without unique_match:
+ * bypass kallsyms parsing and pass the symbol directly to the
+ * kernel via syms[] array. When unique_match is set, fall
+ * through to the slow path which detects duplicate symbols.
+ */
+ if (pattern && !strpbrk(pattern, "*?") && !unique_match) {
+ syms = &pattern;
+ cnt = 1;
+ } else if (pattern) {
if (has_available_filter_functions_addrs())
err = libbpf_available_kprobes_parse(&res);
else
@@ -12084,6 +12254,14 @@ bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog,
link_fd = bpf_link_create(prog_fd, 0, attach_type, &lopts);
if (link_fd < 0) {
err = -errno;
+ /*
+ * Normalize error code: when exact name bypasses kallsyms
+ * parsing, kernel returns ESRCH from ftrace_lookup_symbols().
+ * Convert to ENOENT for API consistency with the pattern
+ * matching path which returns ENOENT from userspace.
+ */
+ if (err == -ESRCH)
+ err = -ENOENT;
pr_warn("prog '%s': failed to attach: %s\n",
prog->name, errstr(err));
goto error;
@@ -12684,7 +12862,7 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
binary_path, func_offset, pid);
}
if (pfd < 0) {
- err = -errno;
+ err = pfd;
pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
prog->name, retprobe ? "uretprobe" : "uprobe",
binary_path, func_offset,
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index dfc37a615578..bba4e8464396 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -557,7 +557,7 @@ struct bpf_kprobe_opts {
size_t sz;
/* custom user-provided value fetchable through bpf_get_attach_cookie() */
__u64 bpf_cookie;
- /* function's offset to install kprobe to */
+ /* function offset, or raw address if func_name == NULL */
size_t offset;
/* kprobe is return probe */
bool retprobe;
@@ -565,11 +565,36 @@ struct bpf_kprobe_opts {
enum probe_attach_mode attach_mode;
size_t :0;
};
+
#define bpf_kprobe_opts__last_field attach_mode
+/**
+ * @brief **bpf_program__attach_kprobe()** attaches a BPF program to a
+ * kernel function entry or return.
+ *
+ * @param prog BPF program to attach
+ * @param retprobe Attach to function return
+ * @param func_name Name of the kernel function to attach to
+ * @return Reference to the newly created BPF link; or NULL is returned on
+ * error, error code is stored in errno
+ */
LIBBPF_API struct bpf_link *
bpf_program__attach_kprobe(const struct bpf_program *prog, bool retprobe,
const char *func_name);
+
+/**
+ * @brief **bpf_program__attach_kprobe_opts()** is just like
+ * bpf_program__attach_kprobe() except with an options struct
+ * for various configurations.
+ *
+ * @param prog BPF program to attach
+ * @param func_name Name of the kernel function to attach to. If NULL,
+ * opts->offset is treated as a raw kernel address. Raw-address attach
+ * is supported with PROBE_ATTACH_MODE_PERF and PROBE_ATTACH_MODE_LINK.
+ * @param opts Options for altering program attachment
+ * @return Reference to the newly created BPF link; or NULL is returned on
+ * error, error code is stored in errno
+ */
LIBBPF_API struct bpf_link *
bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
const char *func_name,
@@ -2021,6 +2046,23 @@ LIBBPF_API int libbpf_register_prog_handler(const char *sec,
*/
LIBBPF_API int libbpf_unregister_prog_handler(int handler_id);
+/**
+ * @brief **bpf_program__clone()** loads a single BPF program from a prepared
+ * BPF object into the kernel, returning its file descriptor.
+ *
+ * The BPF object must have been previously prepared with
+ * **bpf_object__prepare()**. If @opts is provided, any non-zero field
+ * overrides the defaults derived from the program/object internals.
+ * If @opts is NULL, all fields are populated automatically.
+ *
+ * The returned FD is owned by the caller and must be closed with close().
+ *
+ * @param prog BPF program from a prepared object
+ * @param opts Optional load options; non-zero fields override defaults
+ * @return program FD (>= 0) on success; negative error code on failure
+ */
+LIBBPF_API int bpf_program__clone(struct bpf_program *prog, const struct bpf_prog_load_opts *opts);
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index d18fbcea7578..dfed8d60af05 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -455,3 +455,9 @@ LIBBPF_1.7.0 {
bpf_program__assoc_struct_ops;
btf__permute;
} LIBBPF_1.6.0;
+
+LIBBPF_1.8.0 {
+ global:
+ bpf_program__clone;
+ btf__new_empty_opts;
+} LIBBPF_1.7.0;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 974147e8a8aa..3781c45b46d3 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -394,6 +394,10 @@ enum kern_feature_id {
FEAT_BTF_QMARK_DATASEC,
/* Kernel supports LDIMM64 imm offsets past 512 MiB. */
FEAT_LDIMM64_FULL_RANGE_OFF,
+ /* Kernel supports uprobe syscall */
+ FEAT_UPROBE_SYSCALL,
+ /* Kernel supports BTF layout information */
+ FEAT_BTF_LAYOUT,
__FEAT_CNT,
};
@@ -410,6 +414,7 @@ struct kern_feature_cache {
bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id);
bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id);
+void bpf_object_set_feat_cache(struct bpf_object *obj, struct kern_feature_cache *cache);
int probe_kern_syscall_wrapper(int token_fd);
int probe_memcg_account(int token_fd);
@@ -420,6 +425,10 @@ int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz);
int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
const char *str_sec, size_t str_len,
int token_fd);
+int libbpf__load_raw_btf_hdr(const struct btf_header *hdr,
+ const char *raw_types, const char *str_sec,
+ const char *layout_sec, int token_fd);
+struct btf *bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *orig_btf);
int btf_load_into_kernel(struct btf *btf,
char *log_buf, size_t log_sz, __u32 log_level,
int token_fd);
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index bccf4bb747e1..b70d9637ecf5 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -218,18 +218,10 @@ int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts)
return libbpf_err(ret);
}
-int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
- const char *str_sec, size_t str_len,
- int token_fd)
+int libbpf__load_raw_btf_hdr(const struct btf_header *hdr, const char *raw_types,
+ const char *str_sec, const char *layout_sec,
+ int token_fd)
{
- struct btf_header hdr = {
- .magic = BTF_MAGIC,
- .version = BTF_VERSION,
- .hdr_len = sizeof(struct btf_header),
- .type_len = types_len,
- .str_off = types_len,
- .str_len = str_len,
- };
LIBBPF_OPTS(bpf_btf_load_opts, opts,
.token_fd = token_fd,
.btf_flags = token_fd ? BPF_F_TOKEN_FD : 0,
@@ -237,14 +229,16 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
int btf_fd, btf_len;
__u8 *raw_btf;
- btf_len = hdr.hdr_len + hdr.type_len + hdr.str_len;
+ btf_len = hdr->hdr_len + hdr->type_off + hdr->type_len + hdr->str_len + hdr->layout_len;
raw_btf = malloc(btf_len);
if (!raw_btf)
return -ENOMEM;
- memcpy(raw_btf, &hdr, sizeof(hdr));
- memcpy(raw_btf + hdr.hdr_len, raw_types, hdr.type_len);
- memcpy(raw_btf + hdr.hdr_len + hdr.type_len, str_sec, hdr.str_len);
+ memcpy(raw_btf, hdr, sizeof(*hdr));
+ memcpy(raw_btf + hdr->hdr_len + hdr->type_off, raw_types, hdr->type_len);
+ memcpy(raw_btf + hdr->hdr_len + hdr->str_off, str_sec, hdr->str_len);
+ if (layout_sec)
+ memcpy(raw_btf + hdr->hdr_len + hdr->layout_off, layout_sec, hdr->layout_len);
btf_fd = bpf_btf_load(raw_btf, btf_len, &opts);
@@ -252,6 +246,22 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
return btf_fd;
}
+int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
+ const char *str_sec, size_t str_len,
+ int token_fd)
+{
+ struct btf_header hdr = {
+ .magic = BTF_MAGIC,
+ .version = BTF_VERSION,
+ .hdr_len = sizeof(struct btf_header),
+ .type_len = types_len,
+ .str_off = types_len,
+ .str_len = str_len,
+ };
+
+ return libbpf__load_raw_btf_hdr(&hdr, raw_types, str_sec, NULL, token_fd);
+}
+
static int load_local_storage_btf(void)
{
const char strs[] = "\0bpf_spin_lock\0val\0cnt\0l";
diff --git a/tools/lib/bpf/libbpf_version.h b/tools/lib/bpf/libbpf_version.h
index 99331e317dee..c446c0cd8cf9 100644
--- a/tools/lib/bpf/libbpf_version.h
+++ b/tools/lib/bpf/libbpf_version.h
@@ -4,6 +4,6 @@
#define __LIBBPF_VERSION_H
#define LIBBPF_MAJOR_VERSION 1
-#define LIBBPF_MINOR_VERSION 7
+#define LIBBPF_MINOR_VERSION 8
#endif /* __LIBBPF_VERSION_H */
diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c
index 6eea5edba58a..0ccc8f548cba 100644
--- a/tools/lib/bpf/relo_core.c
+++ b/tools/lib/bpf/relo_core.c
@@ -292,6 +292,8 @@ int bpf_core_parse_spec(const char *prog_name, const struct btf *btf,
++spec_str;
if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1)
return -EINVAL;
+ if (access_idx < 0)
+ return -EINVAL;
if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
return -E2BIG;
spec_str += parsed_len;
diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c
index d1524f6f54ae..e3710933fd52 100644
--- a/tools/lib/bpf/usdt.c
+++ b/tools/lib/bpf/usdt.c
@@ -262,6 +262,7 @@ struct usdt_manager {
bool has_bpf_cookie;
bool has_sema_refcnt;
bool has_uprobe_multi;
+ bool has_uprobe_syscall;
};
struct usdt_manager *usdt_manager_new(struct bpf_object *obj)
@@ -301,6 +302,13 @@ struct usdt_manager *usdt_manager_new(struct bpf_object *obj)
* usdt probes.
*/
man->has_uprobe_multi = kernel_supports(obj, FEAT_UPROBE_MULTI_LINK);
+
+ /*
+ * Detect kernel support for uprobe() syscall, it's presence means we can
+ * take advantage of faster nop5 uprobe handling.
+ * Added in: 56101b69c919 ("uprobes/x86: Add uprobe syscall to speed up uprobe")
+ */
+ man->has_uprobe_syscall = kernel_supports(obj, FEAT_UPROBE_SYSCALL);
return man;
}
@@ -585,13 +593,34 @@ static int parse_usdt_note(GElf_Nhdr *nhdr, const char *data, size_t name_off,
static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie);
-static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *path, pid_t pid,
- const char *usdt_provider, const char *usdt_name, __u64 usdt_cookie,
- struct usdt_target **out_targets, size_t *out_target_cnt)
+#if defined(__x86_64__)
+static bool has_nop_combo(int fd, long off)
+{
+ unsigned char nop_combo[6] = {
+ 0x90, 0x0f, 0x1f, 0x44, 0x00, 0x00 /* nop,nop5 */
+ };
+ unsigned char buf[6];
+
+ if (pread(fd, buf, 6, off) != 6)
+ return false;
+ return memcmp(buf, nop_combo, 6) == 0;
+}
+#else
+static bool has_nop_combo(int fd, long off)
+{
+ return false;
+}
+#endif
+
+static int collect_usdt_targets(struct usdt_manager *man, struct elf_fd *elf_fd, const char *path,
+ pid_t pid, const char *usdt_provider, const char *usdt_name,
+ __u64 usdt_cookie, struct usdt_target **out_targets,
+ size_t *out_target_cnt)
{
size_t off, name_off, desc_off, seg_cnt = 0, vma_seg_cnt = 0, target_cnt = 0;
struct elf_seg *segs = NULL, *vma_segs = NULL;
struct usdt_target *targets = NULL, *target;
+ Elf *elf = elf_fd->elf;
long base_addr = 0;
Elf_Scn *notes_scn, *base_scn;
GElf_Shdr base_shdr, notes_shdr;
@@ -784,6 +813,16 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
target = &targets[target_cnt];
memset(target, 0, sizeof(*target));
+ /*
+ * We have uprobe syscall and usdt with nop,nop5 instructions combo,
+ * so we can place the uprobe directly on nop5 (+1) and get this probe
+ * optimized.
+ */
+ if (man->has_uprobe_syscall && has_nop_combo(elf_fd->fd, usdt_rel_ip)) {
+ usdt_abs_ip++;
+ usdt_rel_ip++;
+ }
+
target->abs_ip = usdt_abs_ip;
target->rel_ip = usdt_rel_ip;
target->sema_off = usdt_sema_off;
@@ -998,7 +1037,7 @@ struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct
/* discover USDT in given binary, optionally limiting
* activations to a given PID, if pid > 0
*/
- err = collect_usdt_targets(man, elf_fd.elf, path, pid, usdt_provider, usdt_name,
+ err = collect_usdt_targets(man, &elf_fd, path, pid, usdt_provider, usdt_name,
usdt_cookie, &targets, &target_cnt);
if (err <= 0) {
err = (err == 0) ? -ENOENT : err;
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index a3ea98211ea6..bfdc5518ecc8 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -47,3 +47,5 @@ verification_cert.h
*.BTF
*.BTF_ids
*.BTF.base
+usdt_1
+usdt_2
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 52e05b256040..78e60040811e 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -41,6 +41,8 @@ LIBELF_LIBS := $(shell $(PKG_CONFIG) libelf --libs 2>/dev/null || echo -lelf)
SKIP_DOCS ?=
SKIP_LLVM ?=
+SKIP_LIBBFD ?=
+SKIP_CRYPTO ?=
ifeq ($(srctree),)
srctree := $(patsubst %/,%,$(dir $(CURDIR)))
@@ -111,14 +113,12 @@ TEST_PROGS := test_kmod.sh \
test_lirc_mode2.sh \
test_xdping.sh \
test_bpftool_build.sh \
- test_bpftool.sh \
test_doc_build.sh \
test_xsk.sh \
test_xdp_features.sh
TEST_PROGS_EXTENDED := \
- ima_setup.sh verify_sig_setup.sh \
- test_bpftool.py
+ ima_setup.sh verify_sig_setup.sh
TEST_KMODS := bpf_testmod.ko bpf_test_no_cfi.ko bpf_test_modorder_x.ko \
bpf_test_modorder_y.ko bpf_test_rqspinlock.ko
@@ -336,6 +336,9 @@ $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \
OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \
LIBBPF_OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \
LIBBPF_DESTDIR=$(HOST_SCRATCH_DIR)/ \
+ SKIP_LLVM=$(SKIP_LLVM) \
+ SKIP_LIBBFD=$(SKIP_LIBBFD) \
+ SKIP_CRYPTO=$(SKIP_CRYPTO) \
prefix= DESTDIR=$(HOST_SCRATCH_DIR)/ install-bin
ifneq ($(CROSS_COMPILE),)
@@ -348,6 +351,9 @@ $(CROSS_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \
OUTPUT=$(BUILD_DIR)/bpftool/ \
LIBBPF_OUTPUT=$(BUILD_DIR)/libbpf/ \
LIBBPF_DESTDIR=$(SCRATCH_DIR)/ \
+ SKIP_LLVM=$(SKIP_LLVM) \
+ SKIP_LIBBFD=$(SKIP_LIBBFD) \
+ SKIP_CRYPTO=$(SKIP_CRYPTO) \
prefix= DESTDIR=$(SCRATCH_DIR)/ install-bin
endif
@@ -756,7 +762,8 @@ TRUNNER_EXTRA_SOURCES := test_progs.c \
$(VERIFY_SIG_HDR) \
flow_dissector_load.h \
ip_check_defrag_frags.h \
- bpftool_helpers.c
+ bpftool_helpers.c \
+ usdt_1.c usdt_2.c
TRUNNER_LIB_SOURCES := find_bit.c
TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \
$(OUTPUT)/liburandom_read.so \
@@ -880,6 +887,8 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \
$(OUTPUT)/bench_bpf_crypto.o \
$(OUTPUT)/bench_sockmap.o \
$(OUTPUT)/bench_lpm_trie_map.o \
+ $(OUTPUT)/usdt_1.o \
+ $(OUTPUT)/usdt_2.o \
#
$(call msg,BINARY,,$@)
$(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 8368bd3a0665..029b3e21f438 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -541,6 +541,8 @@ extern const struct bench bench_trig_uprobe_nop5;
extern const struct bench bench_trig_uretprobe_nop5;
extern const struct bench bench_trig_uprobe_multi_nop5;
extern const struct bench bench_trig_uretprobe_multi_nop5;
+extern const struct bench bench_trig_usdt_nop;
+extern const struct bench bench_trig_usdt_nop5;
#endif
extern const struct bench bench_rb_libbpf;
@@ -617,6 +619,8 @@ static const struct bench *benchs[] = {
&bench_trig_uretprobe_nop5,
&bench_trig_uprobe_multi_nop5,
&bench_trig_uretprobe_multi_nop5,
+ &bench_trig_usdt_nop,
+ &bench_trig_usdt_nop5,
#endif
/* ringbuf/perfbuf benchmarks */
&bench_rb_libbpf,
diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c
index e2ff8ea1cb79..71e38000ee06 100644
--- a/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c
+++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c
@@ -101,11 +101,6 @@ static void setup(void)
}
}
- if (!bpf_program__attach(skel->progs.kmalloc)) {
- fprintf(stderr, "Error attaching bpf program\n");
- exit(1);
- }
-
threads = calloc(env.producer_cnt, sizeof(*threads));
if (!threads) {
@@ -140,7 +135,6 @@ static void setup(void)
static void measure(struct bench_res *res)
{
res->hits = atomic_swap(&skel->bss->create_cnts, 0);
- res->drops = atomic_swap(&skel->bss->kmalloc_cnts, 0);
}
static void *sk_producer(void *input)
@@ -203,28 +197,25 @@ static void *producer(void *input)
static void report_progress(int iter, struct bench_res *res, long delta_ns)
{
- double creates_per_sec, kmallocs_per_create;
+ double creates_per_sec;
creates_per_sec = res->hits / 1000.0 / (delta_ns / 1000000000.0);
- kmallocs_per_create = (double)res->drops / res->hits;
printf("Iter %3d (%7.3lfus): ",
iter, (delta_ns - 1000000000) / 1000.0);
- printf("creates %8.3lfk/s (%7.3lfk/prod), ",
+ printf("creates %8.3lfk/s (%7.3lfk/prod)\n",
creates_per_sec, creates_per_sec / env.producer_cnt);
- printf("%3.2lf kmallocs/create\n", kmallocs_per_create);
}
static void report_final(struct bench_res res[], int res_cnt)
{
double creates_mean = 0.0, creates_stddev = 0.0;
- long total_creates = 0, total_kmallocs = 0;
+ long total_creates = 0;
int i;
for (i = 0; i < res_cnt; i++) {
creates_mean += res[i].hits / 1000.0 / (0.0 + res_cnt);
total_creates += res[i].hits;
- total_kmallocs += res[i].drops;
}
if (res_cnt > 1) {
@@ -234,9 +225,9 @@ static void report_final(struct bench_res res[], int res_cnt)
(res_cnt - 1.0);
creates_stddev = sqrt(creates_stddev);
}
- printf("Summary: creates %8.3lf \u00B1 %5.3lfk/s (%7.3lfk/prod), ",
- creates_mean, creates_stddev, creates_mean / env.producer_cnt);
- printf("%4.2lf kmallocs/create\n", (double)total_kmallocs / total_creates);
+ printf("Summary: creates %8.3lf \u00B1 %5.3lfk/s (%7.3lfk/prod), %ld total\n",
+ creates_mean, creates_stddev, creates_mean / env.producer_cnt,
+ total_creates);
if (create_owner_errs || skel->bss->create_errs)
printf("%s() errors %ld create_errs %ld\n",
storage_type == BPF_MAP_TYPE_SK_STORAGE ?
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index f74b313d6ae4..2f22ec61667b 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -407,6 +407,23 @@ static void *uprobe_producer_nop5(void *input)
uprobe_target_nop5();
return NULL;
}
+
+void usdt_1(void);
+void usdt_2(void);
+
+static void *uprobe_producer_usdt_nop(void *input)
+{
+ while (true)
+ usdt_1();
+ return NULL;
+}
+
+static void *uprobe_producer_usdt_nop5(void *input)
+{
+ while (true)
+ usdt_2();
+ return NULL;
+}
#endif
static void usetup(bool use_retprobe, bool use_multi, void *target_addr)
@@ -544,6 +561,47 @@ static void uretprobe_multi_nop5_setup(void)
{
usetup(true, true /* use_multi */, &uprobe_target_nop5);
}
+
+static void usdt_setup(const char *name)
+{
+ struct bpf_link *link;
+ int err;
+
+ setup_libbpf();
+
+ ctx.skel = trigger_bench__open();
+ if (!ctx.skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ bpf_program__set_autoload(ctx.skel->progs.bench_trigger_usdt, true);
+
+ err = trigger_bench__load(ctx.skel);
+ if (err) {
+ fprintf(stderr, "failed to load skeleton\n");
+ exit(1);
+ }
+
+ link = bpf_program__attach_usdt(ctx.skel->progs.bench_trigger_usdt,
+ 0 /*self*/, "/proc/self/exe",
+ "optimized_attach", name, NULL);
+ if (libbpf_get_error(link)) {
+ fprintf(stderr, "failed to attach optimized_attach:%s usdt probe\n", name);
+ exit(1);
+ }
+ ctx.skel->links.bench_trigger_usdt = link;
+}
+
+static void usdt_nop_setup(void)
+{
+ usdt_setup("usdt_1");
+}
+
+static void usdt_nop5_setup(void)
+{
+ usdt_setup("usdt_2");
+}
#endif
const struct bench bench_trig_syscall_count = {
@@ -611,4 +669,6 @@ BENCH_TRIG_USERMODE(uprobe_nop5, nop5, "uprobe-nop5");
BENCH_TRIG_USERMODE(uretprobe_nop5, nop5, "uretprobe-nop5");
BENCH_TRIG_USERMODE(uprobe_multi_nop5, nop5, "uprobe-multi-nop5");
BENCH_TRIG_USERMODE(uretprobe_multi_nop5, nop5, "uretprobe-multi-nop5");
+BENCH_TRIG_USERMODE(usdt_nop, usdt_nop, "usdt-nop");
+BENCH_TRIG_USERMODE(usdt_nop5, usdt_nop5, "usdt-nop5");
#endif
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
index 03f55405484b..9ec59423b949 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
@@ -2,7 +2,7 @@
set -eufo pipefail
-for i in usermode-count syscall-count {uprobe,uretprobe}-{nop,push,ret,nop5}
+for i in usermode-count syscall-count {uprobe,uretprobe}-{nop,push,ret,nop5} usdt-nop usdt-nop5
do
summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
printf "%-15s: %s\n" $i "$summary"
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
index 4b7210c318dd..2234bd6bc9d3 100644
--- a/tools/testing/selftests/bpf/bpf_experimental.h
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -8,156 +8,11 @@
#define __contains(name, node) __attribute__((btf_decl_tag("contains:" #name ":" #node)))
-/* Description
- * Allocates an object of the type represented by 'local_type_id' in
- * program BTF. User may use the bpf_core_type_id_local macro to pass the
- * type ID of a struct in program BTF.
- *
- * The 'local_type_id' parameter must be a known constant.
- * The 'meta' parameter is rewritten by the verifier, no need for BPF
- * program to set it.
- * Returns
- * A pointer to an object of the type corresponding to the passed in
- * 'local_type_id', or NULL on failure.
- */
-extern void *bpf_obj_new_impl(__u64 local_type_id, void *meta) __ksym;
-
-/* Convenience macro to wrap over bpf_obj_new_impl */
-#define bpf_obj_new(type) ((type *)bpf_obj_new_impl(bpf_core_type_id_local(type), NULL))
-
-/* Description
- * Free an allocated object. All fields of the object that require
- * destruction will be destructed before the storage is freed.
- *
- * The 'meta' parameter is rewritten by the verifier, no need for BPF
- * program to set it.
- * Returns
- * Void.
- */
-extern void bpf_obj_drop_impl(void *kptr, void *meta) __ksym;
-
-/* Convenience macro to wrap over bpf_obj_drop_impl */
-#define bpf_obj_drop(kptr) bpf_obj_drop_impl(kptr, NULL)
-
-/* Description
- * Increment the refcount on a refcounted local kptr, turning the
- * non-owning reference input into an owning reference in the process.
- *
- * The 'meta' parameter is rewritten by the verifier, no need for BPF
- * program to set it.
- * Returns
- * An owning reference to the object pointed to by 'kptr'
- */
-extern void *bpf_refcount_acquire_impl(void *kptr, void *meta) __ksym;
-
-/* Convenience macro to wrap over bpf_refcount_acquire_impl */
-#define bpf_refcount_acquire(kptr) bpf_refcount_acquire_impl(kptr, NULL)
-
-/* Description
- * Add a new entry to the beginning of the BPF linked list.
- *
- * The 'meta' and 'off' parameters are rewritten by the verifier, no need
- * for BPF programs to set them
- * Returns
- * 0 if the node was successfully added
- * -EINVAL if the node wasn't added because it's already in a list
- */
-extern int bpf_list_push_front_impl(struct bpf_list_head *head,
- struct bpf_list_node *node,
- void *meta, __u64 off) __ksym;
-
-/* Convenience macro to wrap over bpf_list_push_front_impl */
-#define bpf_list_push_front(head, node) bpf_list_push_front_impl(head, node, NULL, 0)
-
-/* Description
- * Add a new entry to the end of the BPF linked list.
- *
- * The 'meta' and 'off' parameters are rewritten by the verifier, no need
- * for BPF programs to set them
- * Returns
- * 0 if the node was successfully added
- * -EINVAL if the node wasn't added because it's already in a list
- */
-extern int bpf_list_push_back_impl(struct bpf_list_head *head,
- struct bpf_list_node *node,
- void *meta, __u64 off) __ksym;
-
-/* Convenience macro to wrap over bpf_list_push_back_impl */
-#define bpf_list_push_back(head, node) bpf_list_push_back_impl(head, node, NULL, 0)
-
-/* Description
- * Remove the entry at the beginning of the BPF linked list.
- * Returns
- * Pointer to bpf_list_node of deleted entry, or NULL if list is empty.
- */
-extern struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head) __ksym;
+/* Convenience macro to wrap over bpf_obj_new */
+#define bpf_obj_new(type) ((type *)bpf_obj_new(bpf_core_type_id_local(type)))
-/* Description
- * Remove the entry at the end of the BPF linked list.
- * Returns
- * Pointer to bpf_list_node of deleted entry, or NULL if list is empty.
- */
-extern struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head) __ksym;
-
-/* Description
- * Remove 'node' from rbtree with root 'root'
- * Returns
- * Pointer to the removed node, or NULL if 'root' didn't contain 'node'
- */
-extern struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root,
- struct bpf_rb_node *node) __ksym;
-
-/* Description
- * Add 'node' to rbtree with root 'root' using comparator 'less'
- *
- * The 'meta' and 'off' parameters are rewritten by the verifier, no need
- * for BPF programs to set them
- * Returns
- * 0 if the node was successfully added
- * -EINVAL if the node wasn't added because it's already in a tree
- */
-extern int bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node,
- bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b),
- void *meta, __u64 off) __ksym;
-
-/* Convenience macro to wrap over bpf_rbtree_add_impl */
-#define bpf_rbtree_add(head, node, less) bpf_rbtree_add_impl(head, node, less, NULL, 0)
-
-/* Description
- * Return the first (leftmost) node in input tree
- * Returns
- * Pointer to the node, which is _not_ removed from the tree. If the tree
- * contains no nodes, returns NULL.
- */
-extern struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root) __ksym;
-
-/* Description
- * Allocates a percpu object of the type represented by 'local_type_id' in
- * program BTF. User may use the bpf_core_type_id_local macro to pass the
- * type ID of a struct in program BTF.
- *
- * The 'local_type_id' parameter must be a known constant.
- * The 'meta' parameter is rewritten by the verifier, no need for BPF
- * program to set it.
- * Returns
- * A pointer to a percpu object of the type corresponding to the passed in
- * 'local_type_id', or NULL on failure.
- */
-extern void *bpf_percpu_obj_new_impl(__u64 local_type_id, void *meta) __ksym;
-
-/* Convenience macro to wrap over bpf_percpu_obj_new_impl */
-#define bpf_percpu_obj_new(type) ((type __percpu_kptr *)bpf_percpu_obj_new_impl(bpf_core_type_id_local(type), NULL))
-
-/* Description
- * Free an allocated percpu object. All fields of the object that require
- * destruction will be destructed before the storage is freed.
- *
- * The 'meta' parameter is rewritten by the verifier, no need for BPF
- * program to set it.
- * Returns
- * Void.
- */
-extern void bpf_percpu_obj_drop_impl(void *kptr, void *meta) __ksym;
+/* Convenience macro to wrap over bpf_percpu_obj_new */
+#define bpf_percpu_obj_new(type) ((type __percpu_kptr *)bpf_percpu_obj_new(bpf_core_type_id_local(type)))
struct bpf_iter_task_vma;
@@ -167,9 +22,6 @@ extern int bpf_iter_task_vma_new(struct bpf_iter_task_vma *it,
extern struct vm_area_struct *bpf_iter_task_vma_next(struct bpf_iter_task_vma *it) __ksym;
extern void bpf_iter_task_vma_destroy(struct bpf_iter_task_vma *it) __ksym;
-/* Convenience macro to wrap over bpf_obj_drop_impl */
-#define bpf_percpu_obj_drop(kptr) bpf_percpu_obj_drop_impl(kptr, NULL)
-
/* Description
* Throw a BPF exception from the program, immediately terminating its
* execution and unwinding the stack. The supplied 'cookie' parameter
@@ -627,6 +479,10 @@ struct task_struct___preempt_rt {
int softirq_disable_cnt;
} __attribute__((preserve_access_index));
+#ifdef bpf_target_s390
+extern struct lowcore *bpf_get_lowcore(void) __weak __ksym;
+#endif
+
static inline int get_preempt_count(void)
{
#if defined(bpf_target_x86)
@@ -645,6 +501,10 @@ static inline int get_preempt_count(void)
bpf_this_cpu_ptr(&pcpu_hot))->preempt_count;
#elif defined(bpf_target_arm64)
return bpf_get_current_task_btf()->thread_info.preempt.count;
+#elif defined(bpf_target_powerpc)
+ return bpf_get_current_task_btf()->thread_info.preempt_count;
+#elif defined(bpf_target_s390)
+ return bpf_get_lowcore()->preempt_count;
#endif
return 0;
}
@@ -653,6 +513,8 @@ static inline int get_preempt_count(void)
* Report whether it is in interrupt context. Only works on the following archs:
* * x86
* * arm64
+ * * powerpc64
+ * * s390x
*/
static inline int bpf_in_interrupt(void)
{
@@ -672,6 +534,8 @@ static inline int bpf_in_interrupt(void)
* Report whether it is in NMI context. Only works on the following archs:
* * x86
* * arm64
+ * * powerpc64
+ * * s390x
*/
static inline int bpf_in_nmi(void)
{
@@ -682,6 +546,8 @@ static inline int bpf_in_nmi(void)
* Report whether it is in hard IRQ context. Only works on the following archs:
* * x86
* * arm64
+ * * powerpc64
+ * * s390x
*/
static inline int bpf_in_hardirq(void)
{
@@ -692,6 +558,8 @@ static inline int bpf_in_hardirq(void)
* Report whether it is in softirq context. Only works on the following archs:
* * x86
* * arm64
+ * * powerpc64
+ * * s390x
*/
static inline int bpf_in_serving_softirq(void)
{
@@ -710,6 +578,8 @@ static inline int bpf_in_serving_softirq(void)
* Report whether it is in task context. Only works on the following archs:
* * x86
* * arm64
+ * * powerpc64
+ * * s390x
*/
static inline int bpf_in_task(void)
{
diff --git a/tools/testing/selftests/bpf/bpftool_helpers.c b/tools/testing/selftests/bpf/bpftool_helpers.c
index 929fc257f431..0a2a4f0a2794 100644
--- a/tools/testing/selftests/bpf/bpftool_helpers.c
+++ b/tools/testing/selftests/bpf/bpftool_helpers.c
@@ -2,18 +2,18 @@
#include <unistd.h>
#include <string.h>
#include <stdbool.h>
+#include <limits.h>
#include "bpf_util.h"
#include "bpftool_helpers.h"
-#define BPFTOOL_PATH_MAX_LEN 64
-#define BPFTOOL_FULL_CMD_MAX_LEN 512
+#define BPFTOOL_FULL_CMD_MAX_LEN (PATH_MAX * 2)
#define BPFTOOL_DEFAULT_PATH "tools/sbin/bpftool"
static int detect_bpftool_path(char *buffer, size_t size)
{
- char tmp[BPFTOOL_PATH_MAX_LEN];
+ char tmp[PATH_MAX];
const char *env_path;
/* First, check if BPFTOOL environment variable is set */
@@ -29,7 +29,7 @@ static int detect_bpftool_path(char *buffer, size_t size)
/* Check default bpftool location (will work if we are running the
* default flavor of test_progs)
*/
- snprintf(tmp, BPFTOOL_PATH_MAX_LEN, "./%s", BPFTOOL_DEFAULT_PATH);
+ snprintf(tmp, sizeof(tmp), "./%s", BPFTOOL_DEFAULT_PATH);
if (access(tmp, X_OK) == 0) {
strscpy(buffer, tmp, size);
return 0;
@@ -38,7 +38,7 @@ static int detect_bpftool_path(char *buffer, size_t size)
/* Check alternate bpftool location (will work if we are running a
* specific flavor of test_progs, e.g. cpuv4 or no_alu32)
*/
- snprintf(tmp, BPFTOOL_PATH_MAX_LEN, "../%s", BPFTOOL_DEFAULT_PATH);
+ snprintf(tmp, sizeof(tmp), "../%s", BPFTOOL_DEFAULT_PATH);
if (access(tmp, X_OK) == 0) {
strscpy(buffer, tmp, size);
return 0;
@@ -50,7 +50,7 @@ static int detect_bpftool_path(char *buffer, size_t size)
static int run_command(char *args, char *output_buf, size_t output_max_len)
{
- static char bpftool_path[BPFTOOL_PATH_MAX_LEN] = {0};
+ static char bpftool_path[PATH_MAX] = {};
bool suppress_output = !(output_buf && output_max_len);
char command[BPFTOOL_FULL_CMD_MAX_LEN];
FILE *f;
@@ -60,7 +60,7 @@ static int run_command(char *args, char *output_buf, size_t output_max_len)
if (bpftool_path[0] == 0 && detect_bpftool_path(bpftool_path, sizeof(bpftool_path)))
return 1;
- ret = snprintf(command, BPFTOOL_FULL_CMD_MAX_LEN, "%s %s%s",
+ ret = snprintf(command, sizeof(command), "%s %s%s",
bpftool_path, args,
suppress_output ? " > /dev/null 2>&1" : "");
@@ -84,4 +84,3 @@ int get_bpftool_command_output(char *args, char *output_buf, size_t output_max_l
{
return run_command(args, output_buf, output_max_len);
}
-
diff --git a/tools/testing/selftests/bpf/cgroup_iter_memcg.h b/tools/testing/selftests/bpf/cgroup_iter_memcg.h
index 3f59b127943b..ff20ec537164 100644
--- a/tools/testing/selftests/bpf/cgroup_iter_memcg.h
+++ b/tools/testing/selftests/bpf/cgroup_iter_memcg.h
@@ -9,8 +9,6 @@ struct memcg_query {
unsigned long nr_shmem;
unsigned long nr_file_pages;
unsigned long nr_file_mapped;
- /* some memcg_stat_item */
- unsigned long memcg_kmem;
/* some vm_event_item */
unsigned long pgfault;
};
diff --git a/tools/testing/selftests/bpf/prog_tests/access_variable_array.c b/tools/testing/selftests/bpf/prog_tests/access_variable_array.c
deleted file mode 100644
index 08131782437c..000000000000
--- a/tools/testing/selftests/bpf/prog_tests/access_variable_array.c
+++ /dev/null
@@ -1,16 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright (c) 2022 Bytedance */
-
-#include <test_progs.h>
-#include "test_access_variable_array.skel.h"
-
-void test_access_variable_array(void)
-{
- struct test_access_variable_array *skel;
-
- skel = test_access_variable_array__open_and_load();
- if (!ASSERT_OK_PTR(skel, "test_access_variable_array__open_and_load"))
- return;
-
- test_access_variable_array__destroy(skel);
-}
diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c
deleted file mode 100644
index 24c509ce4e5b..000000000000
--- a/tools/testing/selftests/bpf/prog_tests/align.c
+++ /dev/null
@@ -1,712 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <test_progs.h>
-
-#define MAX_INSNS 512
-#define MAX_MATCHES 24
-
-struct bpf_reg_match {
- unsigned int line;
- const char *reg;
- const char *match;
-};
-
-struct bpf_align_test {
- const char *descr;
- struct bpf_insn insns[MAX_INSNS];
- enum {
- UNDEF,
- ACCEPT,
- REJECT
- } result;
- enum bpf_prog_type prog_type;
- /* Matches must be in order of increasing line */
- struct bpf_reg_match matches[MAX_MATCHES];
-};
-
-static struct bpf_align_test tests[] = {
- /* Four tests of known constants. These aren't staggeringly
- * interesting since we track exact values now.
- */
- {
- .descr = "mov",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_3, 2),
- BPF_MOV64_IMM(BPF_REG_3, 4),
- BPF_MOV64_IMM(BPF_REG_3, 8),
- BPF_MOV64_IMM(BPF_REG_3, 16),
- BPF_MOV64_IMM(BPF_REG_3, 32),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .matches = {
- {0, "R1", "ctx()"},
- {0, "R10", "fp0"},
- {0, "R3", "2"},
- {1, "R3", "4"},
- {2, "R3", "8"},
- {3, "R3", "16"},
- {4, "R3", "32"},
- },
- },
- {
- .descr = "shift",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_3, 1),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_3, 4),
- BPF_MOV64_IMM(BPF_REG_4, 32),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .matches = {
- {0, "R1", "ctx()"},
- {0, "R10", "fp0"},
- {0, "R3", "1"},
- {1, "R3", "2"},
- {2, "R3", "4"},
- {3, "R3", "8"},
- {4, "R3", "16"},
- {5, "R3", "1"},
- {6, "R4", "32"},
- {7, "R4", "16"},
- {8, "R4", "8"},
- {9, "R4", "4"},
- {10, "R4", "2"},
- },
- },
- {
- .descr = "addsub",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_3, 4),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 4),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 2),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .matches = {
- {0, "R1", "ctx()"},
- {0, "R10", "fp0"},
- {0, "R3", "4"},
- {1, "R3", "8"},
- {2, "R3", "10"},
- {3, "R4", "8"},
- {4, "R4", "12"},
- {5, "R4", "14"},
- },
- },
- {
- .descr = "mul",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_3, 7),
- BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 1),
- BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 2),
- BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 4),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .matches = {
- {0, "R1", "ctx()"},
- {0, "R10", "fp0"},
- {0, "R3", "7"},
- {1, "R3", "7"},
- {2, "R3", "14"},
- {3, "R3", "56"},
- },
- },
-
- /* Tests using unknown values */
-#define PREP_PKT_POINTERS \
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, \
- offsetof(struct __sk_buff, data)), \
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, \
- offsetof(struct __sk_buff, data_end))
-
-#define LOAD_UNKNOWN(DST_REG) \
- PREP_PKT_POINTERS, \
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), \
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), \
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 1), \
- BPF_EXIT_INSN(), \
- BPF_LDX_MEM(BPF_B, DST_REG, BPF_REG_2, 0)
-
- {
- .descr = "unknown shift",
- .insns = {
- LOAD_UNKNOWN(BPF_REG_3),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
- LOAD_UNKNOWN(BPF_REG_4),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_4, 5),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .matches = {
- {6, "R0", "pkt(off=8,r=8)"},
- {6, "R3", "var_off=(0x0; 0xff)"},
- {7, "R3", "var_off=(0x0; 0x1fe)"},
- {8, "R3", "var_off=(0x0; 0x3fc)"},
- {9, "R3", "var_off=(0x0; 0x7f8)"},
- {10, "R3", "var_off=(0x0; 0xff0)"},
- {12, "R3", "pkt_end()"},
- {17, "R4", "var_off=(0x0; 0xff)"},
- {18, "R4", "var_off=(0x0; 0x1fe0)"},
- {19, "R4", "var_off=(0x0; 0xff0)"},
- {20, "R4", "var_off=(0x0; 0x7f8)"},
- {21, "R4", "var_off=(0x0; 0x3fc)"},
- {22, "R4", "var_off=(0x0; 0x1fe)"},
- },
- },
- {
- .descr = "unknown mul",
- .insns = {
- LOAD_UNKNOWN(BPF_REG_3),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_3),
- BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 1),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_3),
- BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 2),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_3),
- BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 4),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_3),
- BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 8),
- BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .matches = {
- {6, "R3", "var_off=(0x0; 0xff)"},
- {7, "R4", "var_off=(0x0; 0xff)"},
- {8, "R4", "var_off=(0x0; 0xff)"},
- {9, "R4", "var_off=(0x0; 0xff)"},
- {10, "R4", "var_off=(0x0; 0x1fe)"},
- {11, "R4", "var_off=(0x0; 0xff)"},
- {12, "R4", "var_off=(0x0; 0x3fc)"},
- {13, "R4", "var_off=(0x0; 0xff)"},
- {14, "R4", "var_off=(0x0; 0x7f8)"},
- {15, "R4", "var_off=(0x0; 0xff0)"},
- },
- },
- {
- .descr = "packet const offset",
- .insns = {
- PREP_PKT_POINTERS,
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
-
- BPF_MOV64_IMM(BPF_REG_0, 0),
-
- /* Skip over ethernet header. */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
- BPF_EXIT_INSN(),
-
- BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 0),
- BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 2),
- BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 3),
- BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_5, 0),
- BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_5, 2),
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0),
-
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .matches = {
- {2, "R5", "pkt(r=0)"},
- {4, "R5", "pkt(off=14,r=0)"},
- {5, "R4", "pkt(off=14,r=0)"},
- {9, "R2", "pkt(r=18)"},
- {10, "R5", "pkt(off=14,r=18)"},
- {10, "R4", "var_off=(0x0; 0xff)"},
- {13, "R4", "var_off=(0x0; 0xffff)"},
- {14, "R4", "var_off=(0x0; 0xffff)"},
- },
- },
- {
- .descr = "packet variable offset",
- .insns = {
- LOAD_UNKNOWN(BPF_REG_6),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
-
- /* First, add a constant to the R5 packet pointer,
- * then a variable with a known alignment.
- */
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0),
-
- /* Now, test in the other direction. Adding first
- * the variable offset to R5, then the constant.
- */
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0),
-
- /* Test multiple accumulations of unknown values
- * into a packet pointer.
- */
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0),
-
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .matches = {
- /* Calculated offset in R6 has unknown value, but known
- * alignment of 4.
- */
- {6, "R2", "pkt(r=8)"},
- {7, "R6", "var_off=(0x0; 0x3fc)"},
- /* Offset is added to packet pointer R5, resulting in
- * known fixed offset, and variable offset from R6.
- */
- {11, "R5", "pkt(id=1,off=14,"},
- /* At the time the word size load is performed from R5,
- * it's total offset is NET_IP_ALIGN + reg->off (0) +
- * reg->aux_off (14) which is 16. Then the variable
- * offset is considered using reg->aux_off_align which
- * is 4 and meets the load's requirements.
- */
- {15, "R4", "var_off=(0x0; 0x3fc)"},
- {15, "R5", "var_off=(0x0; 0x3fc)"},
- /* Variable offset is added to R5 packet pointer,
- * resulting in auxiliary alignment of 4. To avoid BPF
- * verifier's precision backtracking logging
- * interfering we also have a no-op R4 = R5
- * instruction to validate R5 state. We also check
- * that R4 is what it should be in such case.
- */
- {18, "R4", "var_off=(0x0; 0x3fc)"},
- {18, "R5", "var_off=(0x0; 0x3fc)"},
- /* Constant offset is added to R5, resulting in
- * reg->off of 14.
- */
- {19, "R5", "pkt(id=2,off=14,"},
- /* At the time the word size load is performed from R5,
- * its total fixed offset is NET_IP_ALIGN + reg->off
- * (14) which is 16. Then the variable offset is 4-byte
- * aligned, so the total offset is 4-byte aligned and
- * meets the load's requirements.
- */
- {24, "R4", "var_off=(0x0; 0x3fc)"},
- {24, "R5", "var_off=(0x0; 0x3fc)"},
- /* Constant offset is added to R5 packet pointer,
- * resulting in reg->off value of 14.
- */
- {26, "R5", "pkt(off=14,r=8)"},
- /* Variable offset is added to R5, resulting in a
- * variable offset of (4n). See comment for insn #18
- * for R4 = R5 trick.
- */
- {28, "R4", "var_off=(0x0; 0x3fc)"},
- {28, "R5", "var_off=(0x0; 0x3fc)"},
- /* Constant is added to R5 again, setting reg->off to 18. */
- {29, "R5", "pkt(id=3,off=18,"},
- /* And once more we add a variable; resulting var_off
- * is still (4n), fixed offset is not changed.
- * Also, we create a new reg->id.
- */
- {31, "R4", "var_off=(0x0; 0x7fc)"},
- {31, "R5", "var_off=(0x0; 0x7fc)"},
- /* At the time the word size load is performed from R5,
- * its total fixed offset is NET_IP_ALIGN + reg->off (18)
- * which is 20. Then the variable offset is (4n), so
- * the total offset is 4-byte aligned and meets the
- * load's requirements.
- */
- {35, "R4", "var_off=(0x0; 0x7fc)"},
- {35, "R5", "var_off=(0x0; 0x7fc)"},
- },
- },
- {
- .descr = "packet variable offset 2",
- .insns = {
- /* Create an unknown offset, (4n+2)-aligned */
- LOAD_UNKNOWN(BPF_REG_6),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 14),
- /* Add it to the packet pointer */
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
- /* Check bounds and perform a read */
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
- /* Make a (4n) offset from the value we just read */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 0xff),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
- /* Add it to the packet pointer */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
- /* Check bounds and perform a read */
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .matches = {
- /* Calculated offset in R6 has unknown value, but known
- * alignment of 4.
- */
- {6, "R2", "pkt(r=8)"},
- {7, "R6", "var_off=(0x0; 0x3fc)"},
- /* Adding 14 makes R6 be (4n+2) */
- {8, "R6", "var_off=(0x2; 0x7fc)"},
- /* Packet pointer has (4n+2) offset */
- {11, "R5", "var_off=(0x2; 0x7fc)"},
- {12, "R4", "var_off=(0x2; 0x7fc)"},
- /* At the time the word size load is performed from R5,
- * its total fixed offset is NET_IP_ALIGN + reg->off (0)
- * which is 2. Then the variable offset is (4n+2), so
- * the total offset is 4-byte aligned and meets the
- * load's requirements.
- */
- {15, "R5", "var_off=(0x2; 0x7fc)"},
- /* Newly read value in R6 was shifted left by 2, so has
- * known alignment of 4.
- */
- {17, "R6", "var_off=(0x0; 0x3fc)"},
- /* Added (4n) to packet pointer's (4n+2) var_off, giving
- * another (4n+2).
- */
- {19, "R5", "var_off=(0x2; 0xffc)"},
- {20, "R4", "var_off=(0x2; 0xffc)"},
- /* At the time the word size load is performed from R5,
- * its total fixed offset is NET_IP_ALIGN + reg->off (0)
- * which is 2. Then the variable offset is (4n+2), so
- * the total offset is 4-byte aligned and meets the
- * load's requirements.
- */
- {23, "R5", "var_off=(0x2; 0xffc)"},
- },
- },
- {
- .descr = "dubious pointer arithmetic",
- .insns = {
- PREP_PKT_POINTERS,
- BPF_MOV64_IMM(BPF_REG_0, 0),
- /* (ptr - ptr) << 2 */
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_5, BPF_REG_2),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_5, 2),
- /* We have a (4n) value. Let's make a packet offset
- * out of it. First add 14, to make it a (4n+2)
- */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
- /* Then make sure it's nonnegative */
- BPF_JMP_IMM(BPF_JSGE, BPF_REG_5, 0, 1),
- BPF_EXIT_INSN(),
- /* Add it to packet pointer */
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5),
- /* Check bounds and perform a read */
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = REJECT,
- .matches = {
- {3, "R5", "pkt_end()"},
- /* (ptr - ptr) << 2 == unknown, (4n) */
- {5, "R5", "var_off=(0x0; 0xfffffffffffffffc)"},
- /* (4n) + 14 == (4n+2). We blow our bounds, because
- * the add could overflow.
- */
- {6, "R5", "var_off=(0x2; 0xfffffffffffffffc)"},
- /* Checked s>=0 */
- {9, "R5", "var_off=(0x2; 0x7ffffffffffffffc)"},
- /* packet pointer + nonnegative (4n+2) */
- {11, "R6", "var_off=(0x2; 0x7ffffffffffffffc)"},
- {12, "R4", "var_off=(0x2; 0x7ffffffffffffffc)"},
- /* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
- * We checked the bounds, but it might have been able
- * to overflow if the packet pointer started in the
- * upper half of the address space.
- * So we did not get a 'range' on R6, and the access
- * attempt will fail.
- */
- {15, "R6", "var_off=(0x2; 0x7ffffffffffffffc)"},
- }
- },
- {
- .descr = "variable subtraction",
- .insns = {
- /* Create an unknown offset, (4n+2)-aligned */
- LOAD_UNKNOWN(BPF_REG_6),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 14),
- /* Create another unknown, (4n)-aligned, and subtract
- * it from the first one
- */
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_7, 2),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_6, BPF_REG_7),
- /* Bounds-check the result */
- BPF_JMP_IMM(BPF_JSGE, BPF_REG_6, 0, 1),
- BPF_EXIT_INSN(),
- /* Add it to the packet pointer */
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
- /* Check bounds and perform a read */
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .matches = {
- /* Calculated offset in R6 has unknown value, but known
- * alignment of 4.
- */
- {6, "R2", "pkt(r=8)"},
- {8, "R6", "var_off=(0x0; 0x3fc)"},
- /* Adding 14 makes R6 be (4n+2) */
- {9, "R6", "var_off=(0x2; 0x7fc)"},
- /* New unknown value in R7 is (4n) */
- {10, "R7", "var_off=(0x0; 0x3fc)"},
- /* Subtracting it from R6 blows our unsigned bounds */
- {11, "R6", "var_off=(0x2; 0xfffffffffffffffc)"},
- /* Checked s>= 0 */
- {14, "R6", "var_off=(0x2; 0x7fc)"},
- /* At the time the word size load is performed from R5,
- * its total fixed offset is NET_IP_ALIGN + reg->off (0)
- * which is 2. Then the variable offset is (4n+2), so
- * the total offset is 4-byte aligned and meets the
- * load's requirements.
- */
- {20, "R5", "var_off=(0x2; 0x7fc)"},
- },
- },
- {
- .descr = "pointer variable subtraction",
- .insns = {
- /* Create an unknown offset, (4n+2)-aligned and bounded
- * to [14,74]
- */
- LOAD_UNKNOWN(BPF_REG_6),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 0xf),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 14),
- /* Subtract it from the packet pointer */
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_5, BPF_REG_6),
- /* Create another unknown, (4n)-aligned and >= 74.
- * That in fact means >= 76, since 74 % 4 == 2
- */
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_7, 2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 76),
- /* Add it to the packet pointer */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_7),
- /* Check bounds and perform a read */
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .matches = {
- /* Calculated offset in R6 has unknown value, but known
- * alignment of 4.
- */
- {6, "R2", "pkt(r=8)"},
- {9, "R6", "var_off=(0x0; 0x3c)"},
- /* Adding 14 makes R6 be (4n+2) */
- {10, "R6", "var_off=(0x2; 0x7c)"},
- /* Subtracting from packet pointer overflows ubounds */
- {13, "R5", "var_off=(0xffffffffffffff82; 0x7c)"},
- /* New unknown value in R7 is (4n), >= 76 */
- {14, "R7", "var_off=(0x0; 0x7fc)"},
- /* Adding it to packet pointer gives nice bounds again */
- {16, "R5", "var_off=(0x2; 0x7fc)"},
- /* At the time the word size load is performed from R5,
- * its total fixed offset is NET_IP_ALIGN + reg->off (0)
- * which is 2. Then the variable offset is (4n+2), so
- * the total offset is 4-byte aligned and meets the
- * load's requirements.
- */
- {20, "R5", "var_off=(0x2; 0x7fc)"},
- },
- },
-};
-
-static int probe_filter_length(const struct bpf_insn *fp)
-{
- int len;
-
- for (len = MAX_INSNS - 1; len > 0; --len)
- if (fp[len].code != 0 || fp[len].imm != 0)
- break;
- return len + 1;
-}
-
-static char bpf_vlog[32768];
-
-static int do_test_single(struct bpf_align_test *test)
-{
- struct bpf_insn *prog = test->insns;
- int prog_type = test->prog_type;
- char bpf_vlog_copy[32768];
- LIBBPF_OPTS(bpf_prog_load_opts, opts,
- .prog_flags = BPF_F_STRICT_ALIGNMENT,
- .log_buf = bpf_vlog,
- .log_size = sizeof(bpf_vlog),
- .log_level = 2,
- );
- const char *main_pass_start = "0: R1=ctx() R10=fp0";
- const char *line_ptr;
- int cur_line = -1;
- int prog_len, i;
- char *start;
- int fd_prog;
- int ret;
-
- prog_len = probe_filter_length(prog);
- fd_prog = bpf_prog_load(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL",
- prog, prog_len, &opts);
- if (fd_prog < 0 && test->result != REJECT) {
- printf("Failed to load program.\n");
- printf("%s", bpf_vlog);
- ret = 1;
- } else if (fd_prog >= 0 && test->result == REJECT) {
- printf("Unexpected success to load!\n");
- printf("%s", bpf_vlog);
- ret = 1;
- close(fd_prog);
- } else {
- ret = 0;
- /* We make a local copy so that we can strtok() it */
- strncpy(bpf_vlog_copy, bpf_vlog, sizeof(bpf_vlog_copy));
- start = strstr(bpf_vlog_copy, main_pass_start);
- if (!start) {
- ret = 1;
- printf("Can't find initial line '%s'\n", main_pass_start);
- goto out;
- }
- line_ptr = strtok(start, "\n");
- for (i = 0; i < MAX_MATCHES; i++) {
- struct bpf_reg_match m = test->matches[i];
- const char *p;
- int tmp;
-
- if (!m.match)
- break;
- while (line_ptr) {
- cur_line = -1;
- sscanf(line_ptr, "%u: ", &cur_line);
- if (cur_line == -1)
- sscanf(line_ptr, "from %u to %u: ", &tmp, &cur_line);
- if (cur_line == m.line)
- break;
- line_ptr = strtok(NULL, "\n");
- }
- if (!line_ptr) {
- printf("Failed to find line %u for match: %s=%s\n",
- m.line, m.reg, m.match);
- ret = 1;
- printf("%s", bpf_vlog);
- break;
- }
- /* Check the next line as well in case the previous line
- * did not have a corresponding bpf insn. Example:
- * func#0 @0
- * 0: R1=ctx() R10=fp0
- * 0: (b7) r3 = 2 ; R3_w=2
- *
- * Sometimes it's actually two lines below, e.g. when
- * searching for "6: R3_w=scalar(umax=255,var_off=(0x0; 0xff))":
- * from 4 to 6: R0_w=pkt(off=8,r=8) R1=ctx() R2_w=pkt(r=8) R3_w=pkt_end() R10=fp0
- * 6: R0_w=pkt(off=8,r=8) R1=ctx() R2_w=pkt(r=8) R3_w=pkt_end() R10=fp0
- * 6: (71) r3 = *(u8 *)(r2 +0) ; R2_w=pkt(r=8) R3_w=scalar(umax=255,var_off=(0x0; 0xff))
- */
- while (!(p = strstr(line_ptr, m.reg)) || !strstr(p, m.match)) {
- cur_line = -1;
- line_ptr = strtok(NULL, "\n");
- sscanf(line_ptr ?: "", "%u: ", &cur_line);
- if (!line_ptr || cur_line != m.line)
- break;
- }
- if (cur_line != m.line || !line_ptr || !(p = strstr(line_ptr, m.reg)) || !strstr(p, m.match)) {
- printf("Failed to find match %u: %s=%s\n", m.line, m.reg, m.match);
- ret = 1;
- printf("%s", bpf_vlog);
- break;
- }
- }
-out:
- if (fd_prog >= 0)
- close(fd_prog);
- }
- return ret;
-}
-
-void test_align(void)
-{
- unsigned int i;
-
- for (i = 0; i < ARRAY_SIZE(tests); i++) {
- struct bpf_align_test *test = &tests[i];
-
- if (!test__start_subtest(test->descr))
- continue;
-
- ASSERT_OK(do_test_single(test), test->descr);
- }
-}
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index 9e77e5da7097..e8c1a619e330 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -123,6 +123,140 @@ cleanup:
test_attach_probe_manual__destroy(skel);
}
+/* manual attach address-based kprobe/kretprobe testings */
+static void test_attach_kprobe_by_addr(enum probe_attach_mode attach_mode)
+{
+ LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts);
+ struct test_attach_probe_manual *skel;
+ unsigned long func_addr;
+
+ if (!ASSERT_OK(load_kallsyms(), "load_kallsyms"))
+ return;
+
+ func_addr = ksym_get_addr(SYS_NANOSLEEP_KPROBE_NAME);
+ if (!ASSERT_NEQ(func_addr, 0UL, "func_addr"))
+ return;
+
+ skel = test_attach_probe_manual__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_kprobe_manual_open_and_load"))
+ return;
+
+ kprobe_opts.attach_mode = attach_mode;
+ kprobe_opts.retprobe = false;
+ kprobe_opts.offset = func_addr;
+ skel->links.handle_kprobe =
+ bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe,
+ NULL, &kprobe_opts);
+ if (!ASSERT_OK_PTR(skel->links.handle_kprobe, "attach_kprobe_by_addr"))
+ goto cleanup;
+
+ kprobe_opts.retprobe = true;
+ skel->links.handle_kretprobe =
+ bpf_program__attach_kprobe_opts(skel->progs.handle_kretprobe,
+ NULL, &kprobe_opts);
+ if (!ASSERT_OK_PTR(skel->links.handle_kretprobe, "attach_kretprobe_by_addr"))
+ goto cleanup;
+
+ /* trigger & validate kprobe && kretprobe */
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->kprobe_res, 1, "check_kprobe_res");
+ ASSERT_EQ(skel->bss->kretprobe_res, 2, "check_kretprobe_res");
+
+cleanup:
+ test_attach_probe_manual__destroy(skel);
+}
+
+/* reject legacy address-based kprobe attach */
+static void test_attach_kprobe_legacy_by_addr_reject(void)
+{
+ LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts);
+ struct test_attach_probe_manual *skel;
+ unsigned long func_addr;
+
+ if (!ASSERT_OK(load_kallsyms(), "load_kallsyms"))
+ return;
+
+ func_addr = ksym_get_addr(SYS_NANOSLEEP_KPROBE_NAME);
+ if (!ASSERT_NEQ(func_addr, 0UL, "func_addr"))
+ return;
+
+ skel = test_attach_probe_manual__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_kprobe_manual_open_and_load"))
+ return;
+
+ kprobe_opts.attach_mode = PROBE_ATTACH_MODE_LEGACY;
+ kprobe_opts.offset = func_addr;
+ skel->links.handle_kprobe =
+ bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe,
+ NULL, &kprobe_opts);
+ ASSERT_ERR_PTR(skel->links.handle_kprobe, "attach_kprobe_legacy_by_addr");
+ ASSERT_EQ(libbpf_get_error(skel->links.handle_kprobe),
+ -EOPNOTSUPP, "attach_kprobe_legacy_by_addr_err");
+
+ test_attach_probe_manual__destroy(skel);
+}
+
+/*
+ * bpf_fentry_shadow_test exists in both vmlinux (net/bpf/test_run.c) and
+ * bpf_testmod (bpf_testmod.c). When bpf_testmod is loaded the symbol is
+ * duplicated. Test that kprobe attachment handles this correctly:
+ * - Unqualified name ("bpf_fentry_shadow_test") attaches to vmlinux.
+ * - MOD:SYM name ("bpf_testmod:bpf_fentry_shadow_test") attaches to module.
+ *
+ * Note: bpf_fentry_shadow_test is not invoked via test_run, so we only
+ * verify that attach and detach succeed without triggering the probe.
+ */
+static void test_attach_probe_dup_sym(enum probe_attach_mode attach_mode)
+{
+ DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts);
+ struct bpf_link *kprobe_link, *kretprobe_link;
+ struct test_attach_probe_manual *skel;
+
+ skel = test_attach_probe_manual__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_dup_sym_open_and_load"))
+ return;
+
+ kprobe_opts.attach_mode = attach_mode;
+
+ /* Unqualified: should attach to vmlinux symbol */
+ kprobe_opts.retprobe = false;
+ kprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe,
+ "bpf_fentry_shadow_test",
+ &kprobe_opts);
+ if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe_vmlinux"))
+ goto cleanup;
+ bpf_link__destroy(kprobe_link);
+
+ kprobe_opts.retprobe = true;
+ kretprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kretprobe,
+ "bpf_fentry_shadow_test",
+ &kprobe_opts);
+ if (!ASSERT_OK_PTR(kretprobe_link, "attach_kretprobe_vmlinux"))
+ goto cleanup;
+ bpf_link__destroy(kretprobe_link);
+
+ /* MOD:SYM qualified: should attach to module symbol */
+ kprobe_opts.retprobe = false;
+ kprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe,
+ "bpf_testmod:bpf_fentry_shadow_test",
+ &kprobe_opts);
+ if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe_module"))
+ goto cleanup;
+ bpf_link__destroy(kprobe_link);
+
+ kprobe_opts.retprobe = true;
+ kretprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kretprobe,
+ "bpf_testmod:bpf_fentry_shadow_test",
+ &kprobe_opts);
+ if (!ASSERT_OK_PTR(kretprobe_link, "attach_kretprobe_module"))
+ goto cleanup;
+ bpf_link__destroy(kretprobe_link);
+
+cleanup:
+ test_attach_probe_manual__destroy(skel);
+}
+
/* attach uprobe/uretprobe long event name testings */
static void test_attach_uprobe_long_event_name(void)
{
@@ -220,11 +354,73 @@ static void test_attach_kprobe_write_ctx(void)
kprobe_write_ctx__destroy(skel);
}
+
+static void test_freplace_kprobe_write_ctx(void)
+{
+ struct bpf_program *prog_kprobe, *prog_ext, *prog_fentry;
+ struct kprobe_write_ctx *skel_kprobe, *skel_ext = NULL;
+ struct bpf_link *link_kprobe = NULL, *link_ext = NULL;
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts);
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ skel_kprobe = kprobe_write_ctx__open();
+ if (!ASSERT_OK_PTR(skel_kprobe, "kprobe_write_ctx__open kprobe"))
+ return;
+
+ prog_kprobe = skel_kprobe->progs.kprobe_dummy;
+ bpf_program__set_autoload(prog_kprobe, true);
+
+ prog_fentry = skel_kprobe->progs.fentry;
+ bpf_program__set_autoload(prog_fentry, true);
+
+ err = kprobe_write_ctx__load(skel_kprobe);
+ if (!ASSERT_OK(err, "kprobe_write_ctx__load kprobe"))
+ goto out;
+
+ skel_ext = kprobe_write_ctx__open();
+ if (!ASSERT_OK_PTR(skel_ext, "kprobe_write_ctx__open ext"))
+ goto out;
+
+ prog_ext = skel_ext->progs.freplace_kprobe;
+ bpf_program__set_autoload(prog_ext, true);
+
+ prog_fd = bpf_program__fd(skel_kprobe->progs.kprobe_write_ctx);
+ bpf_program__set_attach_target(prog_ext, prog_fd, "kprobe_write_ctx");
+
+ err = kprobe_write_ctx__load(skel_ext);
+ if (!ASSERT_OK(err, "kprobe_write_ctx__load ext"))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog_kprobe);
+ link_ext = bpf_program__attach_freplace(prog_ext, prog_fd, "kprobe_dummy");
+ ASSERT_ERR_PTR(link_ext, "bpf_program__attach_freplace link");
+ ASSERT_EQ(libbpf_get_error(link_ext), -EINVAL, "bpf_program__attach_freplace error");
+
+ link_kprobe = bpf_program__attach_kprobe_opts(prog_kprobe, "bpf_fentry_test1",
+ &kprobe_opts);
+ if (!ASSERT_OK_PTR(link_kprobe, "bpf_program__attach_kprobe_opts"))
+ goto out;
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(prog_fentry), &topts);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+
+out:
+ bpf_link__destroy(link_ext);
+ bpf_link__destroy(link_kprobe);
+ kprobe_write_ctx__destroy(skel_ext);
+ kprobe_write_ctx__destroy(skel_kprobe);
+}
#else
static void test_attach_kprobe_write_ctx(void)
{
test__skip();
}
+
+static void test_freplace_kprobe_write_ctx(void)
+{
+ test__skip();
+}
#endif
static void test_attach_probe_auto(struct test_attach_probe *skel)
@@ -416,6 +612,21 @@ void test_attach_probe(void)
test_attach_probe_manual(PROBE_ATTACH_MODE_PERF);
if (test__start_subtest("manual-link"))
test_attach_probe_manual(PROBE_ATTACH_MODE_LINK);
+ if (test__start_subtest("kprobe-perf-by-addr"))
+ test_attach_kprobe_by_addr(PROBE_ATTACH_MODE_PERF);
+ if (test__start_subtest("kprobe-link-by-addr"))
+ test_attach_kprobe_by_addr(PROBE_ATTACH_MODE_LINK);
+ if (test__start_subtest("kprobe-legacy-by-addr-reject"))
+ test_attach_kprobe_legacy_by_addr_reject();
+
+ if (test__start_subtest("dup-sym-default"))
+ test_attach_probe_dup_sym(PROBE_ATTACH_MODE_DEFAULT);
+ if (test__start_subtest("dup-sym-legacy"))
+ test_attach_probe_dup_sym(PROBE_ATTACH_MODE_LEGACY);
+ if (test__start_subtest("dup-sym-perf"))
+ test_attach_probe_dup_sym(PROBE_ATTACH_MODE_PERF);
+ if (test__start_subtest("dup-sym-link"))
+ test_attach_probe_dup_sym(PROBE_ATTACH_MODE_LINK);
if (test__start_subtest("auto"))
test_attach_probe_auto(skel);
@@ -434,6 +645,8 @@ void test_attach_probe(void)
test_attach_kprobe_long_event_name();
if (test__start_subtest("kprobe-write-ctx"))
test_attach_kprobe_write_ctx();
+ if (test__start_subtest("freplace-kprobe-write-ctx"))
+ test_freplace_kprobe_write_ctx();
cleanup:
test_attach_probe__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
index 75f4dff7d042..35adc3f6d443 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
@@ -6,6 +6,7 @@
#include <sys/syscall.h>
#include <sys/mman.h>
#include <unistd.h>
+#include <linux/compiler.h>
#include <test_progs.h>
#include <network_helpers.h>
#include <bpf/btf.h>
@@ -105,6 +106,11 @@ static void kprobe_multi_link_api_subtest(void)
unsigned long long addrs[8];
__u64 cookies[8];
+ if (!env.has_testmod) {
+ test__skip();
+ return;
+ }
+
if (!ASSERT_OK(load_kallsyms(), "load_kallsyms"))
goto cleanup;
@@ -192,6 +198,11 @@ static void kprobe_multi_attach_api_subtest(void)
};
__u64 cookies[8];
+ if (!env.has_testmod) {
+ test__skip();
+ return;
+ }
+
skel = kprobe_multi__open_and_load();
if (!ASSERT_OK_PTR(skel, "fentry_raw_skel_load"))
goto cleanup;
@@ -421,11 +432,12 @@ cleanup:
bpf_link__destroy(link3);
}
-static void burn_cpu(void)
+static void burn_cpu(long loops)
{
- volatile int j = 0;
+ long j = 0;
cpu_set_t cpu_set;
- int i, err;
+ long i;
+ int err;
/* generate some branches on cpu 0 */
CPU_ZERO(&cpu_set);
@@ -433,9 +445,10 @@ static void burn_cpu(void)
err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
ASSERT_OK(err, "set_thread_affinity");
- /* spin the loop for a while (random high number) */
- for (i = 0; i < 1000000; ++i)
+ for (i = 0; i < loops; ++i) {
++j;
+ barrier();
+ }
}
static void pe_subtest(struct test_bpf_cookie *skel)
@@ -451,7 +464,7 @@ static void pe_subtest(struct test_bpf_cookie *skel)
attr.type = PERF_TYPE_SOFTWARE;
attr.config = PERF_COUNT_SW_CPU_CLOCK;
attr.sample_period = 100000;
- pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
+ pfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC);
if (!ASSERT_GE(pfd, 0, "perf_fd"))
goto cleanup;
@@ -460,7 +473,7 @@ static void pe_subtest(struct test_bpf_cookie *skel)
if (!ASSERT_OK_PTR(link, "link1"))
goto cleanup;
- burn_cpu(); /* trigger BPF prog */
+ burn_cpu(100000000L); /* trigger BPF prog */
ASSERT_EQ(skel->bss->pe_res, 0x100000, "pe_res1");
@@ -479,7 +492,7 @@ static void pe_subtest(struct test_bpf_cookie *skel)
if (!ASSERT_OK_PTR(link, "link2"))
goto cleanup;
- burn_cpu(); /* trigger BPF prog */
+ burn_cpu(100000000L); /* trigger BPF prog */
ASSERT_EQ(skel->bss->pe_res, 0x200000, "pe_res2");
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_gotox.c b/tools/testing/selftests/bpf/prog_tests/bpf_gotox.c
index 75b0cf2467ab..73dc63882b7d 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_gotox.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_gotox.c
@@ -317,7 +317,7 @@ static void check_ldimm64_off_load(struct bpf_gotox *skel __always_unused)
static int __check_ldimm64_gotox_prog_load(struct bpf_insn *insns,
__u32 insn_cnt,
- __u32 off1, __u32 off2)
+ int off1, int off2, int off3)
{
const __u32 values[] = {5, 7, 9, 11, 13, 15};
const __u32 max_entries = ARRAY_SIZE(values);
@@ -349,16 +349,46 @@ static int __check_ldimm64_gotox_prog_load(struct bpf_insn *insns,
/* r1 += off2 */
insns[2].imm = off2;
+ /* r1 = *(r1 + off3) */
+ insns[3].off = off3;
+
ret = prog_load(insns, insn_cnt);
close(map_fd);
return ret;
}
-static void reject_offsets(struct bpf_insn *insns, __u32 insn_cnt, __u32 off1, __u32 off2)
+static void
+allow_offsets(struct bpf_insn *insns, __u32 insn_cnt, int off1, int off2, int off3)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int prog_fd, err;
+ char s[128] = "";
+
+ prog_fd = __check_ldimm64_gotox_prog_load(insns, insn_cnt, off1, off2, off3);
+ snprintf(s, sizeof(s), "__check_ldimm64_gotox_prog_load(%d,%d,%d)", off1, off2, off3);
+ if (!ASSERT_GE(prog_fd, 0, s))
+ return;
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err")) {
+ close(prog_fd);
+ return;
+ }
+
+ if (!ASSERT_EQ(topts.retval, (off1 + off2 + off3) / 8, "test_run_opts retval")) {
+ close(prog_fd);
+ return;
+ }
+
+ close(prog_fd);
+}
+
+static void
+reject_offsets(struct bpf_insn *insns, __u32 insn_cnt, int off1, int off2, int off3)
{
int prog_fd;
- prog_fd = __check_ldimm64_gotox_prog_load(insns, insn_cnt, off1, off2);
+ prog_fd = __check_ldimm64_gotox_prog_load(insns, insn_cnt, off1, off2, off3);
if (!ASSERT_EQ(prog_fd, -EACCES, "__check_ldimm64_gotox_prog_load"))
close(prog_fd);
}
@@ -376,7 +406,7 @@ static void check_ldimm64_off_gotox(struct bpf_gotox *skel __always_unused)
* The program rewrites the offsets in the instructions below:
* r1 = &map + offset1
* r1 += offset2
- * r1 = *r1
+ * r1 = *(r1 + offset3)
* gotox r1
*/
BPF_LD_IMM64_RAW(BPF_REG_1, BPF_PSEUDO_MAP_VALUE, 0),
@@ -403,43 +433,55 @@ static void check_ldimm64_off_gotox(struct bpf_gotox *skel __always_unused)
BPF_MOV64_IMM(BPF_REG_0, 5),
BPF_EXIT_INSN(),
};
- int prog_fd, err;
- __u32 off1, off2;
-
- /* allow all combinations off1 + off2 < 6 */
- for (off1 = 0; off1 < 6; off1++) {
- for (off2 = 0; off1 + off2 < 6; off2++) {
- LIBBPF_OPTS(bpf_test_run_opts, topts);
-
- prog_fd = __check_ldimm64_gotox_prog_load(insns, ARRAY_SIZE(insns),
- off1 * 8, off2 * 8);
- if (!ASSERT_GE(prog_fd, 0, "__check_ldimm64_gotox_prog_load"))
- return;
-
- err = bpf_prog_test_run_opts(prog_fd, &topts);
- if (!ASSERT_OK(err, "test_run_opts err")) {
- close(prog_fd);
- return;
- }
-
- if (!ASSERT_EQ(topts.retval, off1 + off2, "test_run_opts retval")) {
- close(prog_fd);
- return;
- }
-
- close(prog_fd);
- }
- }
+ int off1, off2, off3;
+
+ /* allow all combinations off1 + off2 + off3 < 6 */
+ for (off1 = 0; off1 < 6; off1++)
+ for (off2 = 0; off1 + off2 < 6; off2++)
+ for (off3 = 0; off1 + off2 + off3 < 6; off3++)
+ allow_offsets(insns, ARRAY_SIZE(insns),
+ off1 * 8, off2 * 8, off3 * 8);
+
+ /* allow for some offsets to be negative */
+ allow_offsets(insns, ARRAY_SIZE(insns), 8 * 3, 0, -(8 * 3));
+ allow_offsets(insns, ARRAY_SIZE(insns), 8 * 3, -(8 * 3), 0);
+ allow_offsets(insns, ARRAY_SIZE(insns), 0, 8 * 3, -(8 * 3));
+ allow_offsets(insns, ARRAY_SIZE(insns), 8 * 4, 0, -(8 * 2));
+ allow_offsets(insns, ARRAY_SIZE(insns), 8 * 4, -(8 * 2), 0);
+ allow_offsets(insns, ARRAY_SIZE(insns), 0, 8 * 4, -(8 * 2));
+
+ /* disallow negative sums of offsets */
+ reject_offsets(insns, ARRAY_SIZE(insns), 8 * 3, 0, -(8 * 4));
+ reject_offsets(insns, ARRAY_SIZE(insns), 8 * 3, -(8 * 4), 0);
+ reject_offsets(insns, ARRAY_SIZE(insns), 0, 8 * 3, -(8 * 4));
+
+ /* disallow the off1 to be negative in any case */
+ reject_offsets(insns, ARRAY_SIZE(insns), -8 * 1, 0, 0);
+ reject_offsets(insns, ARRAY_SIZE(insns), -8 * 1, 8 * 1, 0);
+ reject_offsets(insns, ARRAY_SIZE(insns), -8 * 1, 8 * 1, 8 * 1);
+
+ /* reject off1 + off2 + off3 >= 6 */
+ reject_offsets(insns, ARRAY_SIZE(insns), 8 * 3, 8 * 3, 8 * 0);
+ reject_offsets(insns, ARRAY_SIZE(insns), 8 * 7, 8 * 0, 8 * 0);
+ reject_offsets(insns, ARRAY_SIZE(insns), 8 * 0, 8 * 7, 8 * 0);
+ reject_offsets(insns, ARRAY_SIZE(insns), 8 * 3, 8 * 0, 8 * 3);
+ reject_offsets(insns, ARRAY_SIZE(insns), 8 * 0, 8 * 3, 8 * 3);
+
+ /* reject (off1 + off2) % 8 != 0, off3 % 8 != 0 */
+ reject_offsets(insns, ARRAY_SIZE(insns), 3, 3, 0);
+ reject_offsets(insns, ARRAY_SIZE(insns), 7, 0, 0);
+ reject_offsets(insns, ARRAY_SIZE(insns), 0, 7, 0);
+ reject_offsets(insns, ARRAY_SIZE(insns), 0, 0, 7);
+}
- /* reject off1 + off2 >= 6 */
- reject_offsets(insns, ARRAY_SIZE(insns), 8 * 3, 8 * 3);
- reject_offsets(insns, ARRAY_SIZE(insns), 8 * 7, 8 * 0);
- reject_offsets(insns, ARRAY_SIZE(insns), 8 * 0, 8 * 7);
+static void check_ldimm64_off_gotox_llvm(struct bpf_gotox *skel)
+{
+ __u64 in[] = {0, 1, 2, 3, 4};
+ __u64 out[] = {1, 1, 5, 1, 1};
+ int i;
- /* reject (off1 + off2) % 8 != 0 */
- reject_offsets(insns, ARRAY_SIZE(insns), 3, 3);
- reject_offsets(insns, ARRAY_SIZE(insns), 7, 0);
- reject_offsets(insns, ARRAY_SIZE(insns), 0, 7);
+ for (i = 0; i < ARRAY_SIZE(in); i++)
+ check_simple(skel, skel->progs.load_with_nonzero_offset, in[i], out[i]);
}
void test_bpf_gotox(void)
@@ -496,5 +538,8 @@ void test_bpf_gotox(void)
if (test__start_subtest("check-ldimm64-off-gotox"))
__subtest(skel, check_ldimm64_off_gotox);
+ if (test__start_subtest("check-ldimm64-off-gotox-llvm"))
+ __subtest(skel, check_ldimm64_off_gotox_llvm);
+
bpf_gotox__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_insn_array.c b/tools/testing/selftests/bpf/prog_tests/bpf_insn_array.c
index 482d38b9c29e..0222a9a5d076 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_insn_array.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_insn_array.c
@@ -3,7 +3,7 @@
#include <bpf/bpf.h>
#include <test_progs.h>
-#if defined(__x86_64__) || defined(__powerpc__)
+#if defined(__x86_64__) || defined(__powerpc__) || defined(__aarch64__)
static int map_create(__u32 map_type, __u32 max_entries)
{
const char *map_name = "insn_array";
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_kind.c b/tools/testing/selftests/bpf/prog_tests/btf_kind.c
new file mode 100644
index 000000000000..f61afe6a79a5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_kind.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2026, Oracle and/or its affiliates. */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include <bpf/libbpf.h>
+
+/* Verify kind encoding exists for each kind */
+static void test_btf_kind_encoding(void)
+{
+ LIBBPF_OPTS(btf_new_opts, opts);
+ const struct btf_header *hdr;
+ const void *raw_btf;
+ struct btf *btf;
+ __u32 raw_size;
+
+ opts.add_layout = true;
+ btf = btf__new_empty_opts(&opts);
+ if (!ASSERT_OK_PTR(btf, "btf_new"))
+ return;
+
+ raw_btf = btf__raw_data(btf, &raw_size);
+ if (!ASSERT_OK_PTR(raw_btf, "btf__raw_data"))
+ return;
+
+ hdr = raw_btf;
+
+ ASSERT_EQ(hdr->layout_off % 4, 0, "layout_aligned");
+ ASSERT_EQ(hdr->layout_len, sizeof(struct btf_layout) * NR_BTF_KINDS,
+ "layout_len");
+ ASSERT_EQ(hdr->str_off, hdr->layout_off + hdr->layout_len, "str_after_layout");
+ btf__free(btf);
+
+ opts.add_layout = false;
+ btf = btf__new_empty_opts(&opts);
+ if (!ASSERT_OK_PTR(btf, "btf_new"))
+ return;
+
+ raw_btf = btf__raw_data(btf, &raw_size);
+ if (!ASSERT_OK_PTR(raw_btf, "btf__raw_data"))
+ return;
+
+ hdr = raw_btf;
+
+ ASSERT_EQ(hdr->layout_off, 0, "no_layout_off");
+ ASSERT_EQ(hdr->layout_len, 0, "no_layout_len");
+ ASSERT_EQ(hdr->str_off, hdr->type_off + hdr->type_len, "strs_after_types");
+ btf__free(btf);
+}
+
+static int write_raw_btf(void *raw_btf, size_t raw_size, char *file)
+{
+ int fd = mkstemp(file);
+ ssize_t n;
+
+ if (!ASSERT_OK_FD(fd, "open_raw_btf"))
+ return -1;
+ n = write(fd, raw_btf, raw_size);
+ close(fd);
+ if (!ASSERT_EQ(n, (ssize_t)raw_size, "write_raw_btf"))
+ return -1;
+ return 0;
+}
+
+/*
+ * Fabricate an unrecognized kind at BTF_KIND_MAX + 1, and after adding
+ * the appropriate struct/typedefs to the BTF such that it recognizes
+ * this kind, ensure that parsing of BTF containing the unrecognized kind
+ * can succeed.
+ */
+void test_btf_kind_decoding(void)
+{
+ char btf_kind_file1[] = "/tmp/test_btf_kind.XXXXXX";
+ char btf_kind_file2[] = "/tmp/test_btf_kind.XXXXXX";
+ char btf_kind_file3[] = "/tmp/test_btf_kind.XXXXXX";
+ struct btf *btf = NULL, *new_btf = NULL;
+ __s32 int_id, unrec_id, id, id2;
+ LIBBPF_OPTS(btf_new_opts, opts);
+ struct btf_layout *l;
+ struct btf_header *hdr;
+ const void *raw_btf;
+ struct btf_type *t;
+ void *new_raw_btf;
+ void *str_data;
+ __u32 raw_size;
+
+ opts.add_layout = true;
+ btf = btf__new_empty_opts(&opts);
+ if (!ASSERT_OK_PTR(btf, "btf_new"))
+ return;
+
+ int_id = btf__add_int(btf, "test_char", 1, BTF_INT_CHAR);
+ if (!ASSERT_GT(int_id, 0, "add_int_id"))
+ return;
+
+ /*
+ * Create our type with unrecognized kind by adding a typedef kind
+ * we will overwrite it with our unrecognized kind value.
+ */
+ unrec_id = btf__add_typedef(btf, "unrec_kind", int_id);
+ if (!ASSERT_GT(unrec_id, 0, "add_unrec_id"))
+ return;
+
+ /*
+ * Add an id after it that we will look up to verify we can parse
+ * beyond unrecognized kinds.
+ */
+ id = btf__add_typedef(btf, "test_lookup", int_id);
+ if (!ASSERT_GT(id, 0, "add_test_lookup_id"))
+ return;
+ id2 = btf__add_typedef(btf, "test_lookup2", int_id);
+ if (!ASSERT_GT(id2, 0, "add_test_lookup_id2"))
+ return;
+
+ raw_btf = (void *)btf__raw_data(btf, &raw_size);
+ if (!ASSERT_OK_PTR(raw_btf, "btf__raw_data"))
+ return;
+
+ new_raw_btf = calloc(1, raw_size + sizeof(*l));
+ if (!ASSERT_OK_PTR(new_raw_btf, "calloc_raw_btf"))
+ return;
+ memcpy(new_raw_btf, raw_btf, raw_size);
+
+ hdr = new_raw_btf;
+
+ /* Move strings to make space for one new layout description */
+ raw_size += sizeof(*l);
+ str_data = new_raw_btf + hdr->hdr_len + hdr->str_off;
+ memmove(str_data + sizeof(*l), str_data, hdr->str_len);
+ hdr->str_off += sizeof(*l);
+
+ /* Add new layout description */
+ hdr->layout_len += sizeof(*l);
+ l = new_raw_btf + hdr->hdr_len + hdr->layout_off;
+ l[NR_BTF_KINDS].info_sz = 0;
+ l[NR_BTF_KINDS].elem_sz = 0;
+ l[NR_BTF_KINDS].flags = 0;
+
+ /* Now modify typedef added above to be an unrecognized kind. */
+ t = (void *)hdr + hdr->hdr_len + hdr->type_off + sizeof(struct btf_type) +
+ sizeof(__u32);
+ t->info = (NR_BTF_KINDS << 24);
+
+ /* Write BTF to a raw file, ready for parsing. */
+ if (write_raw_btf(new_raw_btf, raw_size, btf_kind_file1))
+ goto out;
+
+ /*
+ * Verify parsing succeeds, and that we can read type info past
+ * the unrecognized kind.
+ */
+ new_btf = btf__parse_raw(btf_kind_file1);
+ if (ASSERT_OK_PTR(new_btf, "btf__parse_raw")) {
+ ASSERT_EQ(btf__find_by_name(new_btf, "unrec_kind"), unrec_id,
+ "unrec_kind_found");
+ ASSERT_EQ(btf__find_by_name_kind(new_btf, "test_lookup",
+ BTF_KIND_TYPEDEF), id,
+ "verify_id_lookup");
+ ASSERT_EQ(btf__find_by_name_kind(new_btf, "test_lookup2",
+ BTF_KIND_TYPEDEF), id2,
+ "verify_id2_lookup");
+ }
+ btf__free(new_btf);
+ new_btf = NULL;
+
+ /*
+ * Next, change info_sz to equal sizeof(struct btf_type); this means the
+ * "test_lookup" kind will be reinterpreted as a singular info element
+ * following the unrecognized kind.
+ */
+ l[NR_BTF_KINDS].info_sz = sizeof(struct btf_type);
+ if (write_raw_btf(new_raw_btf, raw_size, btf_kind_file2))
+ goto out;
+
+ new_btf = btf__parse_raw(btf_kind_file2);
+ if (ASSERT_OK_PTR(new_btf, "btf__parse_raw")) {
+ ASSERT_EQ(btf__find_by_name_kind(new_btf, "test_lookup",
+ BTF_KIND_TYPEDEF), -ENOENT,
+ "verify_id_not_found");
+ /* id of "test_lookup2" will be id2 -1 as we have removed one type */
+ ASSERT_EQ(btf__find_by_name_kind(new_btf, "test_lookup2",
+ BTF_KIND_TYPEDEF), id2 - 1,
+ "verify_id_lookup2");
+
+ }
+ btf__free(new_btf);
+ new_btf = NULL;
+
+ /*
+ * Change elem_sz to equal sizeof(struct btf_type) and set vlen
+ * associated with unrecognized type to 1; this allows us to verify
+ * vlen-specified BTF can still be parsed.
+ */
+ l[NR_BTF_KINDS].info_sz = 0;
+ l[NR_BTF_KINDS].elem_sz = sizeof(struct btf_type);
+ t->info |= 1;
+ if (write_raw_btf(new_raw_btf, raw_size, btf_kind_file3))
+ goto out;
+
+ new_btf = btf__parse_raw(btf_kind_file3);
+ if (ASSERT_OK_PTR(new_btf, "btf__parse_raw")) {
+ ASSERT_EQ(btf__find_by_name_kind(new_btf, "test_lookup",
+ BTF_KIND_TYPEDEF), -ENOENT,
+ "verify_id_not_found");
+ /* id of "test_lookup2" will be id2 -1 as we have removed one type */
+ ASSERT_EQ(btf__find_by_name_kind(new_btf, "test_lookup2",
+ BTF_KIND_TYPEDEF), id2 - 1,
+ "verify_id_lookup2");
+
+ }
+out:
+ btf__free(new_btf);
+ free(new_raw_btf);
+ unlink(btf_kind_file1);
+ unlink(btf_kind_file2);
+ unlink(btf_kind_file3);
+ btf__free(btf);
+}
+
+void test_btf_kind(void)
+{
+ if (test__start_subtest("btf_kind_encoding"))
+ test_btf_kind_encoding();
+ if (test__start_subtest("btf_kind_decoding"))
+ test_btf_kind_decoding();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_sanitize.c b/tools/testing/selftests/bpf/prog_tests/btf_sanitize.c
new file mode 100644
index 000000000000..652b51efafc2
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_sanitize.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2026, Oracle and/or its affiliates. */
+#include <test_progs.h>
+#include <linux/btf.h>
+#include "bpf/libbpf_internal.h"
+#include "../test_btf.h"
+#include "kfree_skb.skel.h"
+
+#define TYPE_LEN (sizeof(struct btf_type) + sizeof(__u32))
+#define MAX_NR_LAYOUT 2
+#define LAYOUT_LEN (sizeof(struct btf_layout) * MAX_NR_LAYOUT)
+#define STR_LEN sizeof("\0int")
+
+struct layout_btf {
+ struct btf_header hdr;
+ __u32 types[TYPE_LEN/sizeof(__u32)];
+ struct btf_layout layout[MAX_NR_LAYOUT];
+ char strs[STR_LEN];
+};
+
+static const struct layout_btf layout_btf = {
+ .hdr = {
+ .magic = BTF_MAGIC,
+ .version = BTF_VERSION,
+ .hdr_len = sizeof(struct btf_header),
+ .type_off = 0,
+ .type_len = TYPE_LEN,
+ .str_off = TYPE_LEN + LAYOUT_LEN,
+ .str_len = STR_LEN,
+ .layout_off = TYPE_LEN,
+ .layout_len = LAYOUT_LEN,
+ },
+ .types = {
+ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
+ },
+ .layout = {
+ { .info_sz = 0, .elem_sz = 0, .flags = 0 },
+ { .info_sz = sizeof(__u32), .elem_sz = 0, .flags = 0 },
+ },
+ .strs = "\0int",
+};
+
+void test_btf_sanitize_layout(void)
+{
+ struct btf *orig = NULL, *sanitized = NULL;
+ struct kern_feature_cache *cache = NULL;
+ struct kfree_skb *skel = NULL;
+ const struct btf_header *hdr;
+ const void *raw;
+ __u32 raw_sz;
+
+ skel = kfree_skb__open();
+ if (!ASSERT_OK_PTR(skel, "kfree_skb_skel"))
+ return;
+ orig = btf__new(&layout_btf, sizeof(layout_btf));
+ if (!ASSERT_OK_PTR(orig, "btf_new_layout"))
+ goto out;
+ raw = btf__raw_data(orig, &raw_sz);
+ if (!ASSERT_OK_PTR(raw, "btf__raw_data_orig"))
+ goto out;
+ hdr = (struct btf_header *)raw;
+ ASSERT_EQ(hdr->layout_off, TYPE_LEN, "layout_off_nonzero");
+ ASSERT_EQ(hdr->layout_len, LAYOUT_LEN, "layout_len_nonzero");
+
+ cache = calloc(1, sizeof(*cache));
+ if (!ASSERT_OK_PTR(cache, "alloc_feat_cache"))
+ goto out;
+ for (int i = 0; i < __FEAT_CNT; i++)
+ cache->res[i] = FEAT_SUPPORTED;
+ cache->res[FEAT_BTF_LAYOUT] = FEAT_MISSING;
+
+ bpf_object_set_feat_cache(skel->obj, cache);
+
+ if (!ASSERT_FALSE(kernel_supports(skel->obj, FEAT_BTF_LAYOUT), "layout_feature_missing"))
+ goto out;
+ if (!ASSERT_TRUE(kernel_supports(skel->obj, FEAT_BTF_FUNC), "other_feature_allowed"))
+ goto out;
+
+ sanitized = bpf_object__sanitize_btf(skel->obj, orig);
+ if (!ASSERT_OK_PTR(sanitized, "bpf_object__sanitize_btf"))
+ goto out;
+
+ raw = btf__raw_data(sanitized, &raw_sz);
+ if (!ASSERT_OK_PTR(raw, "btf__raw_data_sanitized"))
+ goto out;
+ hdr = (struct btf_header *)raw;
+ ASSERT_EQ(hdr->layout_off, 0, "layout_off_zero");
+ ASSERT_EQ(hdr->layout_len, 0, "layout_len_zero");
+ ASSERT_EQ(hdr->str_off, TYPE_LEN, "strs_after_types");
+ ASSERT_EQ(hdr->str_len, STR_LEN, "strs_len_unchanged");
+ ASSERT_EQ(raw_sz, hdr->hdr_len + hdr->type_len + hdr->str_len, "btf_raw_sz_reduced");
+out:
+ /* This will free the cache we allocated above */
+ kfree_skb__destroy(skel);
+ btf__free(sanitized);
+ btf__free(orig);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c
index 6e36de1302fc..5c84723cf254 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_write.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c
@@ -497,10 +497,121 @@ cleanup:
btf__free(btf2);
}
+static void test_btf_add_btf_split()
+{
+ struct btf *base = NULL, *split1 = NULL, *split2 = NULL;
+ struct btf *combined = NULL;
+ int id, err;
+
+ /* Create a base BTF with an INT and a PTR to it */
+ base = btf__new_empty();
+ if (!ASSERT_OK_PTR(base, "base"))
+ return;
+
+ id = btf__add_int(base, "int", 4, BTF_INT_SIGNED);
+ ASSERT_EQ(id, 1, "base_int_id");
+ id = btf__add_ptr(base, 1);
+ ASSERT_EQ(id, 2, "base_ptr_id");
+
+ /* base has 2 types, type IDs 1..2 */
+ ASSERT_EQ(btf__type_cnt(base), 3, "base_type_cnt");
+
+ /* Create split1 on base: a STRUCT referencing base's int (ID 1) */
+ split1 = btf__new_empty_split(base);
+ if (!ASSERT_OK_PTR(split1, "split1"))
+ goto cleanup;
+
+ id = btf__add_struct(split1, "s1", 4);
+ /* split types start at base_type_cnt = 3 */
+ ASSERT_EQ(id, 3, "split1_struct_id");
+ btf__add_field(split1, "x", 1, 0, 0); /* refers to base int */
+
+ id = btf__add_ptr(split1, 3);
+ ASSERT_EQ(id, 4, "split1_ptr_id"); /* ptr to the struct (split self-ref) */
+
+ /* Add a typedef "int_alias" -> base int in split1, which will be
+ * duplicated in split2 to test that btf__dedup() merges them.
+ */
+ id = btf__add_typedef(split1, "int_alias", 1);
+ ASSERT_EQ(id, 5, "split1_typedef_id");
+
+ /* Create split2 on base: a TYPEDEF referencing base's ptr (ID 2) */
+ split2 = btf__new_empty_split(base);
+ if (!ASSERT_OK_PTR(split2, "split2"))
+ goto cleanup;
+
+ id = btf__add_typedef(split2, "int_ptr", 2); /* refers to base ptr */
+ ASSERT_EQ(id, 3, "split2_typedef_id");
+
+ id = btf__add_struct(split2, "s2", 8);
+ ASSERT_EQ(id, 4, "split2_struct_id");
+ btf__add_field(split2, "p", 3, 0, 0); /* refers to split2's own typedef */
+
+ /* Same "int_alias" typedef as split1 - should be deduped away */
+ id = btf__add_typedef(split2, "int_alias", 1);
+ ASSERT_EQ(id, 5, "split2_dup_typedef_id");
+
+ /* Create combined split BTF on same base and merge both */
+ combined = btf__new_empty_split(base);
+ if (!ASSERT_OK_PTR(combined, "combined"))
+ goto cleanup;
+
+ /* Merge split1: its types (3,4,5) should land at IDs 3,4,5 */
+ id = btf__add_btf(combined, split1);
+ if (!ASSERT_GE(id, 0, "add_split1"))
+ goto cleanup;
+ ASSERT_EQ(id, 3, "split1_first_id");
+
+ /* Merge split2: its types (3,4,5) should be remapped to 6,7,8 */
+ id = btf__add_btf(combined, split2);
+ if (!ASSERT_GE(id, 0, "add_split2"))
+ goto cleanup;
+ ASSERT_EQ(id, 6, "split2_first_id");
+
+ /* Before dedup: base (2) + split1 (3) + split2 (3) = 8 types + void */
+ ASSERT_EQ(btf__type_cnt(combined), 9, "pre_dedup_type_cnt");
+
+ VALIDATE_RAW_BTF(
+ combined,
+ /* base types (IDs 1-2) */
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] PTR '(anon)' type_id=1",
+
+ /* split1 types (IDs 3-5): base refs unchanged */
+ "[3] STRUCT 's1' size=4 vlen=1\n"
+ "\t'x' type_id=1 bits_offset=0", /* refers to base int=1 */
+ "[4] PTR '(anon)' type_id=3", /* refers to split1's struct=3 */
+ "[5] TYPEDEF 'int_alias' type_id=1", /* refers to base int=1 */
+
+ /* split2 types (IDs 6-8): remapped from 3,4,5 to 6,7,8 */
+ "[6] TYPEDEF 'int_ptr' type_id=2", /* base ptr=2, unchanged */
+ "[7] STRUCT 's2' size=8 vlen=1\n"
+ "\t'p' type_id=6 bits_offset=0", /* split2 typedef: 3->6 */
+ "[8] TYPEDEF 'int_alias' type_id=1"); /* dup of [5] */
+
+ /* Dedup to mirror the bpftool merge flow; should remove the
+ * duplicate "int_alias" typedef.
+ */
+ err = btf__dedup(combined, NULL);
+ if (!ASSERT_OK(err, "dedup"))
+ goto cleanup;
+
+ /* After dedup: one int_alias removed, so 7 types + void */
+ ASSERT_EQ(btf__type_cnt(combined), 8, "dedup_type_cnt");
+
+cleanup:
+ btf__free(combined);
+ btf__free(split2);
+ btf__free(split1);
+ btf__free(base);
+}
+
void test_btf_write()
{
if (test__start_subtest("btf_add"))
test_btf_add();
if (test__start_subtest("btf_add_btf"))
test_btf_add_btf();
+ if (test__start_subtest("btf_add_btf_split"))
+ test_btf_add_btf_split();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_iter_memcg.c b/tools/testing/selftests/bpf/prog_tests/cgroup_iter_memcg.c
index a5afd16705f0..b7c18d590b99 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_iter_memcg.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_iter_memcg.c
@@ -107,10 +107,10 @@ static void test_shmem(struct bpf_link *link, struct memcg_query *memcg_query)
/*
* Increase memcg shmem usage by creating and writing
- * to a shmem object.
+ * to a memfd backed by shmem/tmpfs.
*/
- fd = shm_open("/tmp_shmem", O_CREAT | O_RDWR, 0644);
- if (!ASSERT_OK_FD(fd, "shm_open"))
+ fd = memfd_create("tmp_shmem", 0);
+ if (!ASSERT_OK_FD(fd, "memfd_create"))
return;
if (!ASSERT_OK(fallocate(fd, 0, 0, len), "fallocate"))
@@ -123,33 +123,6 @@ static void test_shmem(struct bpf_link *link, struct memcg_query *memcg_query)
cleanup:
close(fd);
- shm_unlink("/tmp_shmem");
-}
-
-#define NR_PIPES 64
-static void test_kmem(struct bpf_link *link, struct memcg_query *memcg_query)
-{
- int fds[NR_PIPES][2], i;
-
- /*
- * Increase kmem value by creating pipes which will allocate some
- * kernel buffers.
- */
- for (i = 0; i < NR_PIPES; i++) {
- if (!ASSERT_OK(pipe(fds[i]), "pipe"))
- goto cleanup;
- }
-
- if (!ASSERT_OK(read_stats(link), "read stats"))
- goto cleanup;
-
- ASSERT_GT(memcg_query->memcg_kmem, 0, "kmem value");
-
-cleanup:
- for (i = i - 1; i >= 0; i--) {
- close(fds[i][0]);
- close(fds[i][1]);
- }
}
static void test_pgfault(struct bpf_link *link, struct memcg_query *memcg_query)
@@ -209,8 +182,6 @@ void test_cgroup_iter_memcg(void)
test_shmem(link, &skel->data_query->memcg_query);
if (test__start_subtest("cgroup_iter_memcg__file"))
test_file(link, &skel->data_query->memcg_query);
- if (test__start_subtest("cgroup_iter_memcg__kmem"))
- test_kmem(link, &skel->data_query->memcg_query);
if (test__start_subtest("cgroup_iter_memcg__pgfault"))
test_pgfault(link, &skel->data_query->memcg_query);
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_storage.c b/tools/testing/selftests/bpf/prog_tests/cgroup_storage.c
index cf395715ced4..8dab655db342 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_storage.c
@@ -1,5 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
+#include <unistd.h>
+#include <sys/socket.h>
#include <test_progs.h>
#include "cgroup_helpers.h"
#include "network_helpers.h"
@@ -86,6 +88,11 @@ void test_cgroup_storage(void)
err = SYS_NOFAIL(PING_CMD);
ASSERT_OK(err, "sixth ping");
+ err = bpf_map__get_next_key(skel->maps.cgroup_storage, &key, &key,
+ sizeof(key));
+ ASSERT_ERR(err, "bpf_map__get_next_key should fail");
+ ASSERT_EQ(errno, ENOENT, "no second key");
+
cleanup_progs:
cgroup_storage__destroy(skel);
cleanup_network:
@@ -94,3 +101,43 @@ cleanup_cgroup:
close(cgroup_fd);
cleanup_cgroup_environment();
}
+
+void test_cgroup_storage_oob(void)
+{
+ struct cgroup_storage *skel;
+ int cgroup_fd, sock_fd;
+
+ cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
+ if (!ASSERT_OK_FD(cgroup_fd, "create cgroup"))
+ return;
+
+ /* Load and attach BPF program */
+ skel = cgroup_storage__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "cgroup_storage__open_and_load"))
+ goto cleanup_cgroup;
+
+ skel->links.trigger_oob = bpf_program__attach_cgroup(skel->progs.trigger_oob,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.trigger_oob, "attach_cgroup"))
+ goto cleanup_skel;
+
+ /* Create a socket to trigger cgroup/sock_create hook.
+ * This will execute our BPF program and trigger the OOB read
+ * if the bug is present (before the fix).
+ */
+ sock_fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (!ASSERT_OK_FD(sock_fd, "create socket"))
+ goto cleanup_skel;
+
+ close(sock_fd);
+
+ /* If we reach here without a kernel panic or KASAN report,
+ * the test passes (the fix is working).
+ */
+
+cleanup_skel:
+ cgroup_storage__destroy(skel);
+cleanup_cgroup:
+ close(cgroup_fd);
+ cleanup_cgroup_environment();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/clone_attach_btf_id.c b/tools/testing/selftests/bpf/prog_tests/clone_attach_btf_id.c
new file mode 100644
index 000000000000..1c3e28e74606
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/clone_attach_btf_id.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta */
+#include <test_progs.h>
+#include "clone_attach_btf_id.skel.h"
+
+/*
+ * Test that bpf_program__clone() respects caller-provided attach_btf_id
+ * override via bpf_prog_load_opts.
+ *
+ * The BPF program has SEC("fentry/bpf_fentry_test1"). Clone it twice
+ * from the same prepared object: first with no opts (callback resolves
+ * attach_btf_id from sec_name), then with attach_btf_id overridden to
+ * bpf_fentry_test2. Verify each loaded program's attach_btf_id via
+ * bpf_prog_get_info_by_fd().
+ */
+
+static int get_prog_attach_btf_id(int prog_fd)
+{
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ int err;
+
+ err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
+ if (err)
+ return err;
+ return info.attach_btf_id;
+}
+
+void test_clone_attach_btf_id(void)
+{
+ struct clone_attach_btf_id *skel;
+ int fd1 = -1, fd2 = -1, err;
+ int btf_id_test1, btf_id_test2;
+
+ btf_id_test1 = libbpf_find_vmlinux_btf_id("bpf_fentry_test1", BPF_TRACE_FENTRY);
+ if (!ASSERT_GT(btf_id_test1, 0, "find_btf_id_test1"))
+ return;
+
+ btf_id_test2 = libbpf_find_vmlinux_btf_id("bpf_fentry_test2", BPF_TRACE_FENTRY);
+ if (!ASSERT_GT(btf_id_test2, 0, "find_btf_id_test2"))
+ return;
+
+ skel = clone_attach_btf_id__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ err = bpf_object__prepare(skel->obj);
+ if (!ASSERT_OK(err, "obj_prepare"))
+ goto out;
+
+ /* Clone with no opts — callback resolves BTF from sec_name */
+ fd1 = bpf_program__clone(skel->progs.fentry_handler, NULL);
+ if (!ASSERT_GE(fd1, 0, "clone_default"))
+ goto out;
+ ASSERT_EQ(get_prog_attach_btf_id(fd1), btf_id_test1,
+ "attach_btf_id_default");
+
+ /*
+ * Clone with attach_btf_id override pointing to a different
+ * function. The BPF program never accesses arguments, so the
+ * load succeeds regardless of signature mismatch.
+ */
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .attach_btf_id = btf_id_test2,
+ );
+ fd2 = bpf_program__clone(skel->progs.fentry_handler, &opts);
+ if (!ASSERT_GE(fd2, 0, "clone_override"))
+ goto out;
+ ASSERT_EQ(get_prog_attach_btf_id(fd2), btf_id_test2,
+ "attach_btf_id_override");
+
+out:
+ if (fd1 >= 0)
+ close(fd1);
+ if (fd2 >= 0)
+ close(fd2);
+ clone_attach_btf_id__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
index 24d553109f8d..dfb7f6cf3ee1 100644
--- a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
+++ b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
@@ -53,6 +53,9 @@ static int run_test(int cgroup_fd, int server_fd, int family, int type)
__u16 expected_peer_port = 60000;
struct bpf_program *prog;
struct bpf_object *obj;
+ struct bpf_map *map;
+ __u16 *port_ptr;
+ size_t port_size;
const char *obj_file = v4 ? "connect_force_port4.bpf.o" : "connect_force_port6.bpf.o";
int fd, err;
__u32 duration = 0;
@@ -61,6 +64,21 @@ static int run_test(int cgroup_fd, int server_fd, int family, int type)
if (!ASSERT_OK_PTR(obj, "bpf_obj_open"))
return -1;
+ map = bpf_object__find_map_by_name(obj, ".bss");
+ if (!ASSERT_OK_PTR(map, "find bss map")) {
+ err = -EIO;
+ goto close_bpf_object;
+ }
+
+ port_ptr = bpf_map__initial_value(map, &port_size);
+ if (!ASSERT_OK_PTR(port_ptr, "get bss initial value")) {
+ err = -EIO;
+ goto close_bpf_object;
+ }
+
+ /* Auto assigns the port according to availability */
+ *port_ptr = ntohs(get_socket_local_port(server_fd));
+
err = bpf_object__load(obj);
if (!ASSERT_OK(err, "bpf_obj_load")) {
err = -EIO;
@@ -138,25 +156,25 @@ void test_connect_force_port(void)
if (CHECK_FAIL(cgroup_fd < 0))
return;
- server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 60123, 0);
+ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
if (CHECK_FAIL(server_fd < 0))
goto close_cgroup_fd;
CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET, SOCK_STREAM));
close(server_fd);
- server_fd = start_server(AF_INET6, SOCK_STREAM, NULL, 60124, 0);
+ server_fd = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
if (CHECK_FAIL(server_fd < 0))
goto close_cgroup_fd;
CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET6, SOCK_STREAM));
close(server_fd);
- server_fd = start_server(AF_INET, SOCK_DGRAM, NULL, 60123, 0);
+ server_fd = start_server(AF_INET, SOCK_DGRAM, NULL, 0, 0);
if (CHECK_FAIL(server_fd < 0))
goto close_cgroup_fd;
CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET, SOCK_DGRAM));
close(server_fd);
- server_fd = start_server(AF_INET6, SOCK_DGRAM, NULL, 60124, 0);
+ server_fd = start_server(AF_INET6, SOCK_DGRAM, NULL, 0, 0);
if (CHECK_FAIL(server_fd < 0))
goto close_cgroup_fd;
CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET6, SOCK_DGRAM));
diff --git a/tools/testing/selftests/bpf/prog_tests/empty_skb.c b/tools/testing/selftests/bpf/prog_tests/empty_skb.c
index 438583e1f2d1..c9fcb70cbafb 100644
--- a/tools/testing/selftests/bpf/prog_tests/empty_skb.c
+++ b/tools/testing/selftests/bpf/prog_tests/empty_skb.c
@@ -10,8 +10,8 @@ void test_empty_skb(void)
struct empty_skb *bpf_obj = NULL;
struct nstoken *tok = NULL;
struct bpf_program *prog;
+ struct ethhdr eth_hlen;
char eth_hlen_pp[15];
- char eth_hlen[14];
int veth_ifindex;
int ipip_ifindex;
int err;
@@ -25,7 +25,9 @@ void test_empty_skb(void)
int err;
int ret;
int lwt_egress_ret; /* expected retval at lwt/egress */
+ __be16 h_proto;
bool success_on_tc;
+ bool adjust_room;
} tests[] = {
/* Empty packets are always rejected. */
@@ -46,6 +48,28 @@ void test_empty_skb(void)
.err = -EINVAL,
},
+ /* ETH_HLEN-sized packets with IPv4/IPv6 EtherType but
+ * no L3 header are rejected.
+ */
+ {
+ .msg = "veth short IPv4 ingress packet",
+ .data_in = &eth_hlen,
+ .data_size_in = sizeof(eth_hlen),
+ .ifindex = &veth_ifindex,
+ .err = -EINVAL,
+ .h_proto = htons(ETH_P_IP),
+ .adjust_room = true,
+ },
+ {
+ .msg = "veth short IPv6 ingress packet",
+ .data_in = &eth_hlen,
+ .data_size_in = sizeof(eth_hlen),
+ .ifindex = &veth_ifindex,
+ .err = -EINVAL,
+ .h_proto = htons(ETH_P_IPV6),
+ .adjust_room = true,
+ },
+
/* ETH_HLEN-sized packets:
* - can not be redirected at LWT_XMIT
* - can be redirected at TC to non-tunneling dest
@@ -54,7 +78,7 @@ void test_empty_skb(void)
{
/* __bpf_redirect_common */
.msg = "veth ETH_HLEN packet ingress",
- .data_in = eth_hlen,
+ .data_in = &eth_hlen,
.data_size_in = sizeof(eth_hlen),
.ifindex = &veth_ifindex,
.ret = -ERANGE,
@@ -68,7 +92,7 @@ void test_empty_skb(void)
* tc: skb->len=14 <= skb_network_offset=14
*/
.msg = "ipip ETH_HLEN packet ingress",
- .data_in = eth_hlen,
+ .data_in = &eth_hlen,
.data_size_in = sizeof(eth_hlen),
.ifindex = &ipip_ifindex,
.ret = -ERANGE,
@@ -108,17 +132,27 @@ void test_empty_skb(void)
SYS(out, "ip addr add 192.168.1.1/16 dev ipip0");
ipip_ifindex = if_nametoindex("ipip0");
+ memset(eth_hlen_pp, 0, sizeof(eth_hlen_pp));
+ memset(&eth_hlen, 0, sizeof(eth_hlen));
+
bpf_obj = empty_skb__open_and_load();
if (!ASSERT_OK_PTR(bpf_obj, "open skeleton"))
goto out;
for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ if (tests[i].data_in == &eth_hlen)
+ eth_hlen.h_proto = tests[i].h_proto;
+
bpf_object__for_each_program(prog, bpf_obj->obj) {
bool at_egress = strstr(bpf_program__name(prog), "egress") != NULL;
bool at_tc = !strncmp(bpf_program__section_name(prog), "tc", 2);
+ bool is_adjust_room = !strcmp(bpf_program__name(prog), "tc_adjust_room");
int expected_ret;
char buf[128];
+ if (tests[i].adjust_room != is_adjust_room)
+ continue;
+
expected_ret = at_egress && !at_tc ? tests[i].lwt_egress_ret : tests[i].ret;
tattr.data_in = tests[i].data_in;
diff --git a/tools/testing/selftests/bpf/prog_tests/exceptions.c b/tools/testing/selftests/bpf/prog_tests/exceptions.c
index 516f4a13013c..e8cbaf2a3e82 100644
--- a/tools/testing/selftests/bpf/prog_tests/exceptions.c
+++ b/tools/testing/selftests/bpf/prog_tests/exceptions.c
@@ -83,6 +83,7 @@ static void test_exceptions_success(void)
RUN_SUCCESS(exception_assert_range_with, 1);
RUN_SUCCESS(exception_bad_assert_range, 0);
RUN_SUCCESS(exception_bad_assert_range_with, 10);
+ RUN_SUCCESS(exception_throw_from_void_global, 11);
#define RUN_EXT(load_ret, attach_err, expr, msg, after_link) \
{ \
@@ -127,7 +128,7 @@ static void test_exceptions_success(void)
bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime),
"exception_cb_mod"), "set_attach_target"))
goto done;
- }), "FENTRY/FEXIT programs cannot attach to exception callback", 0);
+ }), "Tracing programs cannot attach to exception callback", 0);
if (test__start_subtest("throwing fentry -> exception_cb"))
RUN_EXT(-EINVAL, true, ({
@@ -137,7 +138,7 @@ static void test_exceptions_success(void)
bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime),
"exception_cb_mod"), "set_attach_target"))
goto done;
- }), "FENTRY/FEXIT programs cannot attach to exception callback", 0);
+ }), "Tracing programs cannot attach to exception callback", 0);
if (test__start_subtest("non-throwing fexit -> exception_cb"))
RUN_EXT(-EINVAL, true, ({
@@ -147,7 +148,7 @@ static void test_exceptions_success(void)
bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime),
"exception_cb_mod"), "set_attach_target"))
goto done;
- }), "FENTRY/FEXIT programs cannot attach to exception callback", 0);
+ }), "Tracing programs cannot attach to exception callback", 0);
if (test__start_subtest("throwing fexit -> exception_cb"))
RUN_EXT(-EINVAL, true, ({
@@ -157,7 +158,7 @@ static void test_exceptions_success(void)
bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime),
"exception_cb_mod"), "set_attach_target"))
goto done;
- }), "FENTRY/FEXIT programs cannot attach to exception callback", 0);
+ }), "Tracing programs cannot attach to exception callback", 0);
if (test__start_subtest("throwing extension (with custom cb) -> exception_cb"))
RUN_EXT(-EINVAL, true, ({
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
index f29fc789c14b..92c20803ea76 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -111,7 +111,7 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
struct bpf_link_info link_info;
struct bpf_program *pos;
const char *pos_sec_name;
- char *tgt_name;
+ const char *tgt_name;
__s32 btf_id;
tgt_name = strstr(prog_name[i], "/");
@@ -347,6 +347,17 @@ static void test_func_sockmap_update(void)
prog_name, false, NULL);
}
+static void test_func_replace_void(void)
+{
+ const char *prog_name[] = {
+ "freplace/foo",
+ };
+ test_fexit_bpf2bpf_common("./freplace_void.bpf.o",
+ "./test_global_func7.bpf.o",
+ ARRAY_SIZE(prog_name),
+ prog_name, false, NULL);
+}
+
static void test_obj_load_failure_common(const char *obj_file,
const char *target_obj_file,
const char *exp_msg)
@@ -432,6 +443,15 @@ static void test_func_replace_global_func(void)
prog_name, false, NULL);
}
+static void test_func_replace_int_with_void(void)
+{
+ /* Make sure we can't freplace with the wrong type */
+ test_obj_load_failure_common("freplace_int_with_void.bpf.o",
+ "./test_global_func2.bpf.o",
+ "Return type UNKNOWN of test_freplace_int_with_void()"
+ " doesn't match type INT of global_func2()");
+}
+
static int find_prog_btf_id(const char *name, __u32 attach_prog_fd)
{
struct bpf_prog_info info = {};
@@ -597,4 +617,8 @@ void serial_test_fexit_bpf2bpf(void)
test_fentry_to_cgroup_bpf();
if (test__start_subtest("func_replace_progmap"))
test_func_replace_progmap();
+ if (test__start_subtest("freplace_int_with_void"))
+ test_func_replace_int_with_void();
+ if (test__start_subtest("freplace_void"))
+ test_func_replace_void();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c b/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c
index 96b27de05524..7bf8adc41e99 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
#include "get_func_args_test.skel.h"
+#include "get_func_args_fsession_test.skel.h"
void test_get_func_args_test(void)
{
@@ -41,8 +42,30 @@ void test_get_func_args_test(void)
ASSERT_EQ(skel->bss->test4_result, 1, "test4_result");
ASSERT_EQ(skel->bss->test5_result, 1, "test5_result");
ASSERT_EQ(skel->bss->test6_result, 1, "test6_result");
- ASSERT_EQ(skel->bss->test7_result, 1, "test7_result");
cleanup:
get_func_args_test__destroy(skel);
}
+
+void test_get_func_args_fsession_test(void)
+{
+ struct get_func_args_fsession_test *skel = NULL;
+ int err;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ skel = get_func_args_fsession_test__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "get_func_args_fsession_test__open_and_load"))
+ return;
+
+ err = get_func_args_fsession_test__attach(skel);
+ if (!ASSERT_OK(err, "get_func_args_fsession_test__attach"))
+ goto cleanup;
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test1), &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+
+ ASSERT_EQ(skel->bss->test1_result, 1, "test1_result");
+cleanup:
+ get_func_args_fsession_test__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c
index 7772a0f288d3..357fdedfea93 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c
@@ -2,6 +2,7 @@
#include <test_progs.h>
#include "get_func_ip_test.skel.h"
#include "get_func_ip_uprobe_test.skel.h"
+#include "get_func_ip_fsession_test.skel.h"
static noinline void uprobe_trigger(void)
{
@@ -46,8 +47,6 @@ static void test_function_entry(void)
ASSERT_EQ(skel->bss->test5_result, 1, "test5_result");
ASSERT_EQ(skel->bss->test7_result, 1, "test7_result");
ASSERT_EQ(skel->bss->test8_result, 1, "test8_result");
- ASSERT_EQ(skel->bss->test9_entry_result, 1, "test9_entry_result");
- ASSERT_EQ(skel->bss->test9_exit_result, 1, "test9_exit_result");
cleanup:
get_func_ip_test__destroy(skel);
@@ -139,3 +138,28 @@ void test_get_func_ip_test(void)
test_function_entry();
test_function_body();
}
+
+void test_get_func_ip_fsession_test(void)
+{
+ struct get_func_ip_fsession_test *skel = NULL;
+ int err;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ skel = get_func_ip_fsession_test__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "get_func_ip_fsession_test__open_and_load"))
+ return;
+
+ err = get_func_ip_fsession_test__attach(skel);
+ if (!ASSERT_OK(err, "get_func_ip_fsession_test__attach"))
+ goto cleanup;
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test1), &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+
+ ASSERT_EQ(skel->bss->test1_entry_result, 1, "test1_entry_result");
+ ASSERT_EQ(skel->bss->test1_exit_result, 1, "test1_exit_result");
+
+cleanup:
+ get_func_ip_fsession_test__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/htab_reuse.c b/tools/testing/selftests/bpf/prog_tests/htab_reuse.c
index a742dd994d60..d7c3df165adc 100644
--- a/tools/testing/selftests/bpf/prog_tests/htab_reuse.c
+++ b/tools/testing/selftests/bpf/prog_tests/htab_reuse.c
@@ -59,7 +59,7 @@ static void *htab_update_fn(void *arg)
return NULL;
}
-void test_htab_reuse(void)
+static void test_htab_reuse_basic(void)
{
unsigned int i, wr_nr = 1, rd_nr = 4;
pthread_t tids[wr_nr + rd_nr];
@@ -99,3 +99,170 @@ reap:
}
htab_reuse__destroy(skel);
}
+
+/*
+ * Writes consistency test for BPF_F_LOCK update
+ *
+ * The race:
+ * 1. Thread A: BPF_F_LOCK|BPF_EXIST update
+ * 2. Thread B: delete element then update it with BPF_ANY
+ */
+
+struct htab_val_large {
+ struct bpf_spin_lock lock;
+ __u32 seq;
+ __u64 data[256];
+};
+
+struct consistency_ctx {
+ int fd;
+ int start_fd;
+ int loop;
+ volatile bool torn_write;
+};
+
+static void wait_for_start(int fd)
+{
+ char buf;
+
+ read(fd, &buf, 1);
+}
+
+static void *locked_update_fn(void *arg)
+{
+ struct consistency_ctx *ctx = arg;
+ struct htab_val_large value;
+ unsigned int key = 1;
+ int i;
+
+ memset(&value, 0xAA, sizeof(value));
+ wait_for_start(ctx->start_fd);
+
+ for (i = 0; i < ctx->loop; i++) {
+ value.seq = i;
+ bpf_map_update_elem(ctx->fd, &key, &value,
+ BPF_F_LOCK | BPF_EXIST);
+ }
+
+ return NULL;
+}
+
+/* Delete + update: removes the element then re-creates it with BPF_ANY. */
+static void *delete_update_fn(void *arg)
+{
+ struct consistency_ctx *ctx = arg;
+ struct htab_val_large value;
+ unsigned int key = 1;
+ int i;
+
+ memset(&value, 0xBB, sizeof(value));
+
+ wait_for_start(ctx->start_fd);
+
+ for (i = 0; i < ctx->loop; i++) {
+ value.seq = i;
+ bpf_map_delete_elem(ctx->fd, &key);
+ bpf_map_update_elem(ctx->fd, &key, &value, BPF_ANY | BPF_F_LOCK);
+ }
+
+ return NULL;
+}
+
+static void *locked_lookup_fn(void *arg)
+{
+ struct consistency_ctx *ctx = arg;
+ struct htab_val_large value;
+ unsigned int key = 1;
+ int i, j;
+
+ wait_for_start(ctx->start_fd);
+
+ for (i = 0; i < ctx->loop && !ctx->torn_write; i++) {
+ if (bpf_map_lookup_elem_flags(ctx->fd, &key, &value, BPF_F_LOCK))
+ continue;
+
+ for (j = 0; j < 256; j++) {
+ if (value.data[j] != value.data[0]) {
+ ctx->torn_write = true;
+ return NULL;
+ }
+ }
+ }
+
+ return NULL;
+}
+
+static void test_htab_reuse_consistency(void)
+{
+ int threads_total = 6, threads = 2;
+ pthread_t tids[threads_total];
+ struct consistency_ctx ctx;
+ struct htab_val_large seed;
+ struct htab_reuse *skel;
+ unsigned int key = 1, i;
+ int pipefd[2];
+ int err;
+
+ skel = htab_reuse__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "htab_reuse__open_and_load"))
+ return;
+
+ if (!ASSERT_OK(pipe(pipefd), "pipe"))
+ goto out;
+
+ ctx.fd = bpf_map__fd(skel->maps.htab_lock_consistency);
+ ctx.start_fd = pipefd[0];
+ ctx.loop = 100000;
+ ctx.torn_write = false;
+
+ /* Seed the element so locked updaters have something to find */
+ memset(&seed, 0xBB, sizeof(seed));
+ err = bpf_map_update_elem(ctx.fd, &key, &seed, BPF_ANY);
+ if (!ASSERT_OK(err, "seed_element"))
+ goto close_pipe;
+
+ memset(tids, 0, sizeof(tids));
+ for (i = 0; i < threads; i++) {
+ err = pthread_create(&tids[i], NULL, locked_update_fn, &ctx);
+ if (!ASSERT_OK(err, "pthread_create"))
+ goto stop;
+ }
+ for (i = 0; i < threads; i++) {
+ err = pthread_create(&tids[threads + i], NULL, delete_update_fn, &ctx);
+ if (!ASSERT_OK(err, "pthread_create"))
+ goto stop;
+ }
+ for (i = 0; i < threads; i++) {
+ err = pthread_create(&tids[threads * 2 + i], NULL, locked_lookup_fn, &ctx);
+ if (!ASSERT_OK(err, "pthread_create"))
+ goto stop;
+ }
+
+ /* Release all threads simultaneously */
+ close(pipefd[1]);
+ pipefd[1] = -1;
+
+stop:
+ for (i = 0; i < threads_total; i++) {
+ if (!tids[i])
+ continue;
+ pthread_join(tids[i], NULL);
+ }
+
+ ASSERT_FALSE(ctx.torn_write, "no torn writes detected");
+
+close_pipe:
+ if (pipefd[1] >= 0)
+ close(pipefd[1]);
+ close(pipefd[0]);
+out:
+ htab_reuse__destroy(skel);
+}
+
+void test_htab_reuse(void)
+{
+ if (test__start_subtest("basic"))
+ test_htab_reuse_basic();
+ if (test__start_subtest("consistency"))
+ test_htab_reuse_consistency();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/iter_buf_null_fail.c b/tools/testing/selftests/bpf/prog_tests/iter_buf_null_fail.c
new file mode 100644
index 000000000000..ea97787b870d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/iter_buf_null_fail.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "iter_buf_null_fail.skel.h"
+
+void test_iter_buf_null_fail(void)
+{
+ RUN_TESTS(iter_buf_null_fail);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
index f79c8e53cb3e..62f3fb79f5d1 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
@@ -74,6 +74,8 @@ static struct kfunc_test_params kfunc_tests[] = {
TC_TEST(kfunc_call_test1, 12),
TC_TEST(kfunc_call_test2, 3),
TC_TEST(kfunc_call_test4, -1234),
+ TC_TEST(kfunc_call_test5, 0),
+ TC_TEST(kfunc_call_test5_asm, 0),
TC_TEST(kfunc_call_test_ref_btf_id, 0),
TC_TEST(kfunc_call_test_get_mem, 42),
SYSCALL_TEST(kfunc_syscall_test, 0),
diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
index f81dcd609ee9..2e0ddef77ba5 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -10,6 +10,7 @@
#include "kprobe_multi_session_cookie.skel.h"
#include "kprobe_multi_verifier.skel.h"
#include "kprobe_write_ctx.skel.h"
+#include "kprobe_multi_sleepable.skel.h"
#include "bpf/libbpf_internal.h"
#include "bpf/hashmap.h"
@@ -220,7 +221,9 @@ static void test_attach_api_syms(void)
static void test_attach_api_fails(void)
{
LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
struct kprobe_multi *skel = NULL;
+ struct kprobe_multi_sleepable *sl_skel = NULL;
struct bpf_link *link = NULL;
unsigned long long addrs[2];
const char *syms[2] = {
@@ -228,7 +231,7 @@ static void test_attach_api_fails(void)
"bpf_fentry_test2",
};
__u64 cookies[2];
- int saved_error;
+ int saved_error, err;
addrs[0] = ksym_get_addr("bpf_fentry_test1");
addrs[1] = ksym_get_addr("bpf_fentry_test2");
@@ -327,9 +330,63 @@ static void test_attach_api_fails(void)
if (!ASSERT_EQ(saved_error, -E2BIG, "fail_6_error"))
goto cleanup;
+ /* fail_7 - non-existent wildcard pattern (slow path) */
+ LIBBPF_OPTS_RESET(opts);
+
+ link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
+ "__nonexistent_func_xyz_*",
+ &opts);
+ saved_error = -errno;
+ if (!ASSERT_ERR_PTR(link, "fail_7"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(saved_error, -ENOENT, "fail_7_error"))
+ goto cleanup;
+
+ /* fail_8 - non-existent exact name (fast path), same error as wildcard */
+ link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
+ "__nonexistent_func_xyz_123",
+ &opts);
+ saved_error = -errno;
+ if (!ASSERT_ERR_PTR(link, "fail_8"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(saved_error, -ENOENT, "fail_8_error"))
+ goto cleanup;
+
+ /* fail_9 - sleepable kprobe multi should not attach */
+ sl_skel = kprobe_multi_sleepable__open();
+ if (!ASSERT_OK_PTR(sl_skel, "sleep_skel_open"))
+ goto cleanup;
+
+ sl_skel->bss->user_ptr = sl_skel;
+
+ err = bpf_program__set_flags(sl_skel->progs.handle_kprobe_multi_sleepable,
+ BPF_F_SLEEPABLE);
+ if (!ASSERT_OK(err, "sleep_skel_set_flags"))
+ goto cleanup;
+
+ err = kprobe_multi_sleepable__load(sl_skel);
+ if (!ASSERT_OK(err, "sleep_skel_load"))
+ goto cleanup;
+
+ link = bpf_program__attach_kprobe_multi_opts(sl_skel->progs.handle_kprobe_multi_sleepable,
+ "bpf_fentry_test1", NULL);
+ saved_error = -errno;
+
+ if (!ASSERT_ERR_PTR(link, "fail_9"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(saved_error, -EINVAL, "fail_9_error"))
+ goto cleanup;
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(sl_skel->progs.fentry), &topts);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+
cleanup:
bpf_link__destroy(link);
kprobe_multi__destroy(skel);
+ kprobe_multi_sleepable__destroy(sl_skel);
}
static void test_session_skel_api(void)
@@ -355,8 +412,13 @@ static void test_session_skel_api(void)
ASSERT_OK(err, "test_run");
ASSERT_EQ(topts.retval, 0, "test_run");
- /* bpf_fentry_test1-4 trigger return probe, result is 2 */
- for (i = 0; i < 4; i++)
+ /*
+ * bpf_fentry_test1 is hit by both the wildcard probe and the exact
+ * name probe (test_kprobe_syms), so entry + return fires twice: 4.
+ * bpf_fentry_test2-4 are hit only by the wildcard probe: 2.
+ */
+ ASSERT_EQ(skel->bss->kprobe_session_result[0], 4, "kprobe_session_result");
+ for (i = 1; i < 4; i++)
ASSERT_EQ(skel->bss->kprobe_session_result[i], 2, "kprobe_session_result");
/* bpf_fentry_test5-8 trigger only entry probe, result is 1 */
@@ -604,6 +666,44 @@ static void test_attach_write_ctx(void)
}
#endif
+/*
+ * Test kprobe_multi handles shadow symbols (vmlinux + module duplicate).
+ * bpf_fentry_shadow_test exists in both vmlinux and bpf_testmod.
+ * kprobe_multi resolves via ftrace_lookup_symbols() which finds the
+ * vmlinux symbol first and stops, so this should always succeed.
+ */
+static void test_attach_probe_dup_sym(void)
+{
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ const char *syms[1] = { "bpf_fentry_shadow_test" };
+ struct kprobe_multi *skel = NULL;
+ struct bpf_link *link1 = NULL, *link2 = NULL;
+
+ skel = kprobe_multi__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "kprobe_multi__open_and_load"))
+ goto cleanup;
+
+ skel->bss->pid = getpid();
+ opts.syms = syms;
+ opts.cnt = ARRAY_SIZE(syms);
+
+ link1 = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
+ NULL, &opts);
+ if (!ASSERT_OK_PTR(link1, "attach_kprobe_multi_dup_sym"))
+ goto cleanup;
+
+ opts.retprobe = true;
+ link2 = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kretprobe_manual,
+ NULL, &opts);
+ if (!ASSERT_OK_PTR(link2, "attach_kretprobe_multi_dup_sym"))
+ goto cleanup;
+
+cleanup:
+ bpf_link__destroy(link2);
+ bpf_link__destroy(link1);
+ kprobe_multi__destroy(skel);
+}
+
void serial_test_kprobe_multi_bench_attach(void)
{
if (test__start_subtest("kernel"))
@@ -647,5 +747,7 @@ void test_kprobe_multi_test(void)
test_unique_match();
if (test__start_subtest("attach_write_ctx"))
test_attach_write_ctx();
+ if (test__start_subtest("dup_sym"))
+ test_attach_probe_dup_sym();
RUN_TESTS(kprobe_multi_verifier);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c
index 14c5a7ef0e87..6f25b5f39a79 100644
--- a/tools/testing/selftests/bpf/prog_tests/linked_list.c
+++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c
@@ -87,12 +87,12 @@ static struct {
{ "incorrect_value_type",
"operation on bpf_list_head expects arg#1 bpf_list_node at offset=48 in struct foo, "
"but arg is at offset=0 in struct bar" },
- { "incorrect_node_var_off", "variable ptr_ access var_off=(0x0; 0xffffffff) disallowed" },
+ { "incorrect_node_var_off", "variable ptr_ access var_off=(0x0; 0x1ffffffff) disallowed" },
{ "incorrect_node_off1", "bpf_list_node not found at offset=49" },
{ "incorrect_node_off2", "arg#1 offset=0, but expected bpf_list_node at offset=48 in struct foo" },
{ "no_head_type", "bpf_list_head not found at offset=0" },
{ "incorrect_head_var_off1", "R1 doesn't have constant offset" },
- { "incorrect_head_var_off2", "variable ptr_ access var_off=(0x0; 0xffffffff) disallowed" },
+ { "incorrect_head_var_off2", "variable ptr_ access var_off=(0x0; 0x1ffffffff) disallowed" },
{ "incorrect_head_off1", "bpf_list_head not found at offset=25" },
{ "incorrect_head_off2", "bpf_list_head not found at offset=1" },
{ "pop_front_off", "off 48 doesn't point to 'struct bpf_spin_lock' that is at 40" },
diff --git a/tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c b/tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c
index 72aa5376c30e..0a12af924a99 100644
--- a/tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c
+++ b/tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c
@@ -5,6 +5,8 @@
#include "testing_helpers.h"
#include "livepatch_trampoline.skel.h"
+#define LIVEPATCH_ENABLED_PATH "/sys/kernel/livepatch/livepatch_sample/enabled"
+
static int load_livepatch(void)
{
char path[4096];
@@ -19,7 +21,8 @@ static int load_livepatch(void)
static void unload_livepatch(void)
{
/* Disable the livepatch before unloading the module */
- system("echo 0 > /sys/kernel/livepatch/livepatch_sample/enabled");
+ if (!access(LIVEPATCH_ENABLED_PATH, F_OK))
+ system("echo 0 > " LIVEPATCH_ENABLED_PATH);
unload_module("livepatch_sample", env_verbosity > VERBOSE_NONE);
}
@@ -81,9 +84,22 @@ out:
void test_livepatch_trampoline(void)
{
int retry_cnt = 0;
+ int err;
+
+ /* Skip if kernel was built without CONFIG_LIVEPATCH */
+ if (access("/sys/kernel/livepatch", F_OK)) {
+ test__skip();
+ return;
+ }
retry:
- if (load_livepatch()) {
+ err = load_livepatch();
+ if (err) {
+ if (err == -ENOENT) {
+ test__skip();
+ return;
+ }
+
if (retry_cnt) {
ASSERT_OK(1, "load_livepatch");
goto out;
diff --git a/tools/testing/selftests/bpf/prog_tests/lsm_bdev.c b/tools/testing/selftests/bpf/prog_tests/lsm_bdev.c
new file mode 100644
index 000000000000..a970798e1173
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lsm_bdev.c
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2026 Christian Brauner <brauner@kernel.org> */
+
+/*
+ * Test BPF LSM block device integrity hooks with dm-verity.
+ *
+ * Creates a dm-verity device over loopback, which triggers
+ * security_bdev_setintegrity() during verity_preresume().
+ * Verifies that the BPF program correctly tracks the integrity
+ * metadata in its hashmap.
+ */
+
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include "lsm_bdev.skel.h"
+
+/* Must match the definition in progs/lsm_bdev.c. */
+struct verity_info {
+ __u8 has_roothash;
+ __u8 sig_valid;
+ __u32 setintegrity_cnt;
+};
+
+#define DATA_SIZE_MB 8
+#define HASH_SIZE_MB 1
+#define DM_NAME "bpf_test_verity"
+#define DM_DEV_PATH "/dev/mapper/" DM_NAME
+
+/* Run a command and optionally capture the first line of stdout. */
+static int run_cmd(const char *cmd, char *out, size_t out_sz)
+{
+ FILE *fp;
+ int ret;
+
+ fp = popen(cmd, "r");
+ if (!fp)
+ return -1;
+
+ if (out && out_sz > 0) {
+ if (!fgets(out, out_sz, fp))
+ out[0] = '\0';
+ /* strip trailing newline */
+ out[strcspn(out, "\n")] = '\0';
+ }
+
+ ret = pclose(fp);
+ return WIFEXITED(ret) ? WEXITSTATUS(ret) : -1;
+}
+
+static bool has_prerequisites(void)
+{
+ if (getuid() != 0) {
+ printf("SKIP: must be root\n");
+ return false;
+ }
+
+ if (run_cmd("modprobe loop 2>/dev/null", NULL, 0) &&
+ run_cmd("ls /dev/loop-control 2>/dev/null", NULL, 0)) {
+ printf("SKIP: no loop device support\n");
+ return false;
+ }
+
+ if (run_cmd("modprobe dm-verity 2>/dev/null", NULL, 0) &&
+ run_cmd("dmsetup targets 2>/dev/null | grep -q verity", NULL, 0)) {
+ printf("SKIP: dm-verity module not available\n");
+ return false;
+ }
+
+ if (run_cmd("which veritysetup >/dev/null 2>&1", NULL, 0)) {
+ printf("SKIP: veritysetup not found\n");
+ return false;
+ }
+
+ return true;
+}
+
+void test_lsm_bdev(void)
+{
+ char data_img[] = "/tmp/bpf_verity_data_XXXXXX";
+ char hash_img[] = "/tmp/bpf_verity_hash_XXXXXX";
+ char data_loop[64] = {};
+ char hash_loop[64] = {};
+ char roothash[256] = {};
+ char cmd[512];
+ int data_fd = -1, hash_fd = -1;
+ struct lsm_bdev *skel = NULL;
+ struct verity_info val;
+ struct stat st;
+ __u32 dev_key;
+ int err;
+
+ if (!has_prerequisites()) {
+ test__skip();
+ return;
+ }
+
+ /* Clean up any stale device from a previous crashed run. */
+ snprintf(cmd, sizeof(cmd), "dmsetup remove %s 2>/dev/null", DM_NAME);
+ run_cmd(cmd, NULL, 0);
+
+ /* Create temporary image files. */
+ data_fd = mkstemp(data_img);
+ if (!ASSERT_OK_FD(data_fd, "mkstemp data"))
+ return;
+
+ hash_fd = mkstemp(hash_img);
+ if (!ASSERT_OK_FD(hash_fd, "mkstemp hash"))
+ goto cleanup;
+
+ if (!ASSERT_OK(ftruncate(data_fd, DATA_SIZE_MB * 1024 * 1024),
+ "truncate data"))
+ goto cleanup;
+
+ if (!ASSERT_OK(ftruncate(hash_fd, HASH_SIZE_MB * 1024 * 1024),
+ "truncate hash"))
+ goto cleanup;
+
+ close(data_fd);
+ data_fd = -1;
+ close(hash_fd);
+ hash_fd = -1;
+
+ /* Set up loop devices. */
+ snprintf(cmd, sizeof(cmd),
+ "losetup --find --show %s 2>/dev/null", data_img);
+ if (!ASSERT_OK(run_cmd(cmd, data_loop, sizeof(data_loop)),
+ "losetup data"))
+ goto teardown;
+
+ snprintf(cmd, sizeof(cmd),
+ "losetup --find --show %s 2>/dev/null", hash_img);
+ if (!ASSERT_OK(run_cmd(cmd, hash_loop, sizeof(hash_loop)),
+ "losetup hash"))
+ goto teardown;
+
+ /* Format the dm-verity device and capture the root hash. */
+ snprintf(cmd, sizeof(cmd),
+ "veritysetup format %s %s 2>/dev/null | "
+ "grep -i 'root hash' | awk '{print $NF}'",
+ data_loop, hash_loop);
+ if (!ASSERT_OK(run_cmd(cmd, roothash, sizeof(roothash)),
+ "veritysetup format"))
+ goto teardown;
+
+ if (!ASSERT_GT((int)strlen(roothash), 0, "roothash not empty"))
+ goto teardown;
+
+ /* Load and attach BPF program before activating dm-verity. */
+ skel = lsm_bdev__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel open_and_load"))
+ goto teardown;
+
+ err = lsm_bdev__attach(skel);
+ if (!ASSERT_OK(err, "skel attach"))
+ goto teardown;
+
+ /* Activate dm-verity — triggers verity_preresume() hooks. */
+ snprintf(cmd, sizeof(cmd),
+ "veritysetup open %s %s %s %s 2>/dev/null",
+ data_loop, DM_NAME, hash_loop, roothash);
+ if (!ASSERT_OK(run_cmd(cmd, NULL, 0), "veritysetup open"))
+ goto teardown;
+
+ /* Get the dm device's dev_t. */
+ if (!ASSERT_OK(stat(DM_DEV_PATH, &st), "stat dm dev"))
+ goto remove_dm;
+
+ dev_key = (__u32)st.st_rdev;
+
+ /* Look up the device in the BPF map and verify. */
+ err = bpf_map__lookup_elem(skel->maps.verity_devices,
+ &dev_key, sizeof(dev_key),
+ &val, sizeof(val), 0);
+ if (!ASSERT_OK(err, "map lookup"))
+ goto remove_dm;
+
+ ASSERT_EQ(val.has_roothash, 1, "has_roothash");
+ ASSERT_EQ(val.sig_valid, 0, "sig_valid (unsigned)");
+ /*
+ * verity_preresume() always calls security_bdev_setintegrity()
+ * for the roothash. The signature-validity call only happens
+ * when CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG is enabled.
+ */
+ ASSERT_GE(val.setintegrity_cnt, 1, "setintegrity_cnt min");
+ ASSERT_LE(val.setintegrity_cnt, 2, "setintegrity_cnt max");
+
+ /* Verify that the alloc hook fired at least once. */
+ ASSERT_GT(skel->bss->alloc_count, 0, "alloc_count");
+
+remove_dm:
+ snprintf(cmd, sizeof(cmd), "dmsetup remove %s 2>/dev/null", DM_NAME);
+ run_cmd(cmd, NULL, 0);
+
+teardown:
+ if (data_loop[0]) {
+ snprintf(cmd, sizeof(cmd), "losetup -d %s 2>/dev/null",
+ data_loop);
+ run_cmd(cmd, NULL, 0);
+ }
+ if (hash_loop[0]) {
+ snprintf(cmd, sizeof(cmd), "losetup -d %s 2>/dev/null",
+ hash_loop);
+ run_cmd(cmd, NULL, 0);
+ }
+
+cleanup:
+ lsm_bdev__destroy(skel);
+ if (data_fd >= 0)
+ close(data_fd);
+ if (hash_fd >= 0)
+ close(hash_fd);
+ unlink(data_img);
+ unlink(hash_img);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_misc.c b/tools/testing/selftests/bpf/prog_tests/lwt_misc.c
new file mode 100644
index 000000000000..6940fca38512
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lwt_misc.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "lwt_misc.skel.h"
+
+void test_lwt_misc(void)
+{
+ RUN_TESTS(lwt_misc);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/modify_return.c b/tools/testing/selftests/bpf/prog_tests/modify_return.c
index a70c99c2f8c8..4661d77ebdfc 100644
--- a/tools/testing/selftests/bpf/prog_tests/modify_return.c
+++ b/tools/testing/selftests/bpf/prog_tests/modify_return.c
@@ -5,6 +5,7 @@
*/
#include <test_progs.h>
+#include <unistd.h>
#include "modify_return.skel.h"
#define LOWER(x) ((x) & 0xffff)
@@ -23,11 +24,13 @@ static void run_test(__u32 input_retval, __u16 want_side_effect, __s16 want_ret)
if (!ASSERT_OK_PTR(skel, "skel_load"))
goto cleanup;
+ skel->bss->input_retval = input_retval;
+ skel->bss->test_pid = getpid();
+
err = modify_return__attach(skel);
if (!ASSERT_OK(err, "modify_return__attach failed"))
goto cleanup;
- skel->bss->input_retval = input_retval;
prog_fd = bpf_program__fd(skel->progs.fmod_ret_test);
err = bpf_prog_test_run_opts(prog_fd, &topts);
ASSERT_OK(err, "test_run");
@@ -49,8 +52,7 @@ cleanup:
modify_return__destroy(skel);
}
-/* TODO: conflict with get_func_ip_test */
-void serial_test_modify_return(void)
+void test_modify_return(void)
{
run_test(0 /* input_retval */,
2 /* want_side_effect */,
diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c
index 70fa7ae93173..92c336333fcb 100644
--- a/tools/testing/selftests/bpf/prog_tests/module_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c
@@ -6,7 +6,22 @@
#include "test_module_attach.skel.h"
#include "testing_helpers.h"
-static int duration;
+static const char * const read_tests[] = {
+ "handle_raw_tp",
+ "handle_tp_btf",
+ "handle_fentry",
+ "handle_fentry_explicit",
+ "handle_fmod_ret",
+};
+
+static const char * const detach_tests[] = {
+ "handle_fentry",
+ "handle_fexit",
+ "kprobe_multi",
+};
+
+static const int READ_SZ = 456;
+static const int WRITE_SZ = 457;
static int trigger_module_test_writable(int *val)
{
@@ -33,53 +48,73 @@ static int trigger_module_test_writable(int *val)
return 0;
}
-void test_module_attach(void)
+static void test_module_attach_prog(const char *prog_name, int sz,
+ const char *attach_target, int ret)
{
- const int READ_SZ = 456;
- const int WRITE_SZ = 457;
- struct test_module_attach* skel;
- struct test_module_attach__bss *bss;
- struct bpf_link *link;
+ struct test_module_attach *skel;
+ struct bpf_program *prog;
int err;
- int writable_val = 0;
skel = test_module_attach__open();
- if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ if (!ASSERT_OK_PTR(skel, "module_attach open"))
return;
- err = bpf_program__set_attach_target(skel->progs.handle_fentry_manual,
- 0, "bpf_testmod_test_read");
- ASSERT_OK(err, "set_attach_target");
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "module_attach find_program"))
+ goto cleanup;
+ bpf_program__set_autoload(prog, true);
- err = bpf_program__set_attach_target(skel->progs.handle_fentry_explicit_manual,
- 0, "bpf_testmod:bpf_testmod_test_read");
- ASSERT_OK(err, "set_attach_target_explicit");
+ if (attach_target) {
+ err = bpf_program__set_attach_target(prog, 0, attach_target);
+ if (!ASSERT_OK(err, attach_target))
+ goto cleanup;
+ }
err = test_module_attach__load(skel);
- if (CHECK(err, "skel_load", "failed to load skeleton\n"))
+ if (!ASSERT_OK(err, "module_attach load"))
+ goto cleanup;
+
+ err = test_module_attach__attach(skel);
+ if (!ASSERT_OK(err, "module_attach attach"))
+ goto cleanup;
+
+ if (sz) {
+ /* trigger both read and write though each test uses only one */
+ ASSERT_OK(trigger_module_test_read(sz), "trigger_read");
+ ASSERT_OK(trigger_module_test_write(sz), "trigger_write");
+
+ ASSERT_EQ(skel->bss->sz, sz, prog_name);
+ }
+
+ if (ret)
+ ASSERT_EQ(skel->bss->retval, ret, "ret");
+cleanup:
+ test_module_attach__destroy(skel);
+}
+
+static void test_module_attach_writable(void)
+{
+ struct test_module_attach__bss *bss;
+ struct test_module_attach *skel;
+ int writable_val = 0;
+ int err;
+
+ skel = test_module_attach__open();
+ if (!ASSERT_OK_PTR(skel, "module_attach open"))
return;
+ bpf_program__set_autoload(skel->progs.handle_raw_tp_writable_bare, true);
+
+ err = test_module_attach__load(skel);
+ if (!ASSERT_OK(err, "module_attach load"))
+ goto cleanup;
+
bss = skel->bss;
err = test_module_attach__attach(skel);
- if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ if (!ASSERT_OK(err, "module_attach attach"))
goto cleanup;
- /* trigger tracepoint */
- ASSERT_OK(trigger_module_test_read(READ_SZ), "trigger_read");
- ASSERT_OK(trigger_module_test_write(WRITE_SZ), "trigger_write");
-
- ASSERT_EQ(bss->raw_tp_read_sz, READ_SZ, "raw_tp");
- ASSERT_EQ(bss->raw_tp_bare_write_sz, WRITE_SZ, "raw_tp_bare");
- ASSERT_EQ(bss->tp_btf_read_sz, READ_SZ, "tp_btf");
- ASSERT_EQ(bss->fentry_read_sz, READ_SZ, "fentry");
- ASSERT_EQ(bss->fentry_manual_read_sz, READ_SZ, "fentry_manual");
- ASSERT_EQ(bss->fentry_explicit_read_sz, READ_SZ, "fentry_explicit");
- ASSERT_EQ(bss->fentry_explicit_manual_read_sz, READ_SZ, "fentry_explicit_manual");
- ASSERT_EQ(bss->fexit_read_sz, READ_SZ, "fexit");
- ASSERT_EQ(bss->fexit_ret, -EIO, "fexit_tet");
- ASSERT_EQ(bss->fmod_ret_read_sz, READ_SZ, "fmod_ret");
-
bss->raw_tp_writable_bare_early_ret = true;
bss->raw_tp_writable_bare_out_val = 0xf1f2f3f4;
ASSERT_OK(trigger_module_test_writable(&writable_val),
@@ -87,31 +122,73 @@ void test_module_attach(void)
ASSERT_EQ(bss->raw_tp_writable_bare_in_val, 1024, "writable_test_in");
ASSERT_EQ(bss->raw_tp_writable_bare_out_val, writable_val,
"writable_test_out");
+cleanup:
+ test_module_attach__destroy(skel);
+}
- test_module_attach__detach(skel);
-
- /* attach fentry/fexit and make sure it gets module reference */
- link = bpf_program__attach(skel->progs.handle_fentry);
- if (!ASSERT_OK_PTR(link, "attach_fentry"))
- goto cleanup;
+static void test_module_attach_detach(const char *prog_name)
+{
+ struct test_module_attach *skel;
+ struct bpf_program *prog;
+ struct bpf_link *link;
+ int err;
- ASSERT_ERR(unload_bpf_testmod(false), "unload_bpf_testmod");
- bpf_link__destroy(link);
+ skel = test_module_attach__open();
+ if (!ASSERT_OK_PTR(skel, "module_attach open"))
+ return;
- link = bpf_program__attach(skel->progs.handle_fexit);
- if (!ASSERT_OK_PTR(link, "attach_fexit"))
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "module_attach find_program"))
goto cleanup;
+ bpf_program__set_autoload(prog, true);
- ASSERT_ERR(unload_bpf_testmod(false), "unload_bpf_testmod");
- bpf_link__destroy(link);
+ err = test_module_attach__load(skel);
+ if (!ASSERT_OK(err, "module_attach load"))
+ goto cleanup;
- link = bpf_program__attach(skel->progs.kprobe_multi);
- if (!ASSERT_OK_PTR(link, "attach_kprobe_multi"))
+ /* attach and make sure it gets module reference */
+ link = bpf_program__attach(prog);
+ if (!ASSERT_OK_PTR(link, "module_attach attach"))
goto cleanup;
- ASSERT_ERR(unload_bpf_testmod(false), "unload_bpf_testmod");
+ ASSERT_ERR(try_unload_module("bpf_testmod", 1, false), "try_unload_module");
bpf_link__destroy(link);
-
cleanup:
test_module_attach__destroy(skel);
}
+
+void test_module_attach(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(read_tests); i++) {
+ if (!test__start_subtest(read_tests[i]))
+ continue;
+ test_module_attach_prog(read_tests[i], READ_SZ, NULL, 0);
+ }
+ if (test__start_subtest("handle_raw_tp_bare"))
+ test_module_attach_prog("handle_raw_tp_bare", WRITE_SZ, NULL, 0);
+ if (test__start_subtest("handle_raw_tp_writable_bare"))
+ test_module_attach_writable();
+ if (test__start_subtest("handle_fentry_manual")) {
+ test_module_attach_prog("handle_fentry_manual", READ_SZ,
+ "bpf_testmod_test_read", 0);
+ }
+ if (test__start_subtest("handle_fentry_explicit_manual")) {
+ test_module_attach_prog("handle_fentry_explicit_manual",
+ READ_SZ,
+ "bpf_testmod:bpf_testmod_test_read", 0);
+ }
+ if (test__start_subtest("handle_fexit"))
+ test_module_attach_prog("handle_fexit", READ_SZ, NULL, -EIO);
+ if (test__start_subtest("handle_fexit_ret"))
+ test_module_attach_prog("handle_fexit_ret", 0, NULL, 0);
+ for (i = 0; i < ARRAY_SIZE(detach_tests); i++) {
+ char test_name[50];
+
+ snprintf(test_name, sizeof(test_name), "%s_detach", detach_tests[i]);
+ if (!test__start_subtest(test_name))
+ continue;
+ test_module_attach_detach(detach_tests[i]);
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_link.c b/tools/testing/selftests/bpf/prog_tests/perf_link.c
index d940ff87fa08..9e3a0d217af8 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_link.c
@@ -1,8 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */
#define _GNU_SOURCE
-#include <pthread.h>
-#include <sched.h>
+#include <linux/compiler.h>
#include <test_progs.h>
#include "testing_helpers.h"
#include "test_perf_link.skel.h"
@@ -12,23 +11,14 @@
static void burn_cpu(void)
{
- volatile int j = 0;
- cpu_set_t cpu_set;
- int i, err;
-
- /* generate some branches on cpu 0 */
- CPU_ZERO(&cpu_set);
- CPU_SET(0, &cpu_set);
- err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
- ASSERT_OK(err, "set_thread_affinity");
+ int i;
/* spin the loop for a while (random high number) */
for (i = 0; i < 1000000; ++i)
- ++j;
+ barrier();
}
-/* TODO: often fails in concurrent mode */
-void serial_test_perf_link(void)
+void test_perf_link(void)
{
struct test_perf_link *skel = NULL;
struct perf_event_attr attr;
@@ -45,7 +35,7 @@ void serial_test_perf_link(void)
attr.config = PERF_COUNT_SW_CPU_CLOCK;
attr.freq = 1;
attr.sample_freq = 1000;
- pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
+ pfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC);
if (!ASSERT_GE(pfd, 0, "perf_fd"))
goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/probe_user.c b/tools/testing/selftests/bpf/prog_tests/probe_user.c
index 8721671321de..7a9d13aa2c87 100644
--- a/tools/testing/selftests/bpf/prog_tests/probe_user.c
+++ b/tools/testing/selftests/bpf/prog_tests/probe_user.c
@@ -1,8 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
-/* TODO: corrupts other tests uses connect() */
-void serial_test_probe_user(void)
+void test_probe_user(void)
{
static const char *const prog_names[] = {
"handle_sys_connect",
@@ -20,6 +19,11 @@ void serial_test_probe_user(void)
struct bpf_program *kprobe_progs[prog_count];
struct bpf_object *obj;
static const int zero = 0;
+ struct test_pro_bss {
+ struct sockaddr_in old;
+ __u32 test_pid;
+ };
+ struct test_pro_bss results = {};
size_t i;
obj = bpf_object__open_file(obj_file, &opts);
@@ -34,6 +38,23 @@ void serial_test_probe_user(void)
goto cleanup;
}
+ {
+ struct bpf_map *bss_map;
+ struct test_pro_bss bss_init = {};
+
+ bss_init.test_pid = getpid();
+ bss_map = bpf_object__find_map_by_name(obj, "test_pro.bss");
+ if (!ASSERT_OK_PTR(bss_map, "find_bss_map"))
+ goto cleanup;
+ if (!ASSERT_EQ(bpf_map__value_size(bss_map), sizeof(bss_init),
+ "bss_size"))
+ goto cleanup;
+ err = bpf_map__set_initial_value(bss_map, &bss_init,
+ sizeof(bss_init));
+ if (!ASSERT_OK(err, "set_bss_init"))
+ goto cleanup;
+ }
+
err = bpf_object__load(obj);
if (CHECK(err, "obj_load", "err %d\n", err))
goto cleanup;
@@ -62,11 +83,13 @@ void serial_test_probe_user(void)
connect(sock_fd, &curr, sizeof(curr));
close(sock_fd);
- err = bpf_map_lookup_elem(results_map_fd, &zero, &tmp);
+ err = bpf_map_lookup_elem(results_map_fd, &zero, &results);
if (CHECK(err, "get_kprobe_res",
"failed to get kprobe res: %d\n", err))
goto cleanup;
+ memcpy(&tmp, &results.old, sizeof(tmp));
+
in = (struct sockaddr_in *)&tmp;
if (CHECK(memcmp(&tmp, &orig, sizeof(orig)), "check_kprobe_res",
"wrong kprobe res from probe read: %s:%u\n",
diff --git a/tools/testing/selftests/bpf/prog_tests/rbtree.c b/tools/testing/selftests/bpf/prog_tests/rbtree.c
index d8f3d7a45fe9..a854fb38e418 100644
--- a/tools/testing/selftests/bpf/prog_tests/rbtree.c
+++ b/tools/testing/selftests/bpf/prog_tests/rbtree.c
@@ -9,6 +9,7 @@
#include "rbtree_btf_fail__wrong_node_type.skel.h"
#include "rbtree_btf_fail__add_wrong_type.skel.h"
#include "rbtree_search.skel.h"
+#include "rbtree_search_kptr.skel.h"
static void test_rbtree_add_nodes(void)
{
@@ -193,3 +194,8 @@ void test_rbtree_search(void)
{
RUN_TESTS(rbtree_search);
}
+
+void test_rbtree_search_kptr(void)
+{
+ RUN_TESTS(rbtree_search_kptr);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
index cb8dd2f63296..71f5240cc5b7 100644
--- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
+++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
@@ -500,6 +500,39 @@ static struct range range_refine(enum num_t x_t, struct range x, enum num_t y_t,
(s64)x.a >= S32_MIN && (s64)x.b <= S32_MAX)
return range_intersection(x_t, x, y_cast);
+ if (y_t == U32 && x_t == U64) {
+ u64 xmin_swap, xmax_swap, xmin_lower32, xmax_lower32;
+
+ xmin_lower32 = x.a & 0xffffffff;
+ xmax_lower32 = x.b & 0xffffffff;
+ if (xmin_lower32 < y.a || xmin_lower32 > y.b) {
+ /* The 32 lower bits of the umin64 are outside the u32
+ * range. Let's update umin64 to match the u32 range.
+ * We want to *increase* the umin64 to the *minimum*
+ * value that matches the u32 range.
+ */
+ xmin_swap = swap_low32(x.a, y.a);
+ /* We should always only increase the minimum, so if
+ * the new value is lower than before, we need to
+ * increase the 32 upper bits by 1.
+ */
+ if (xmin_swap < x.a)
+ xmin_swap += 0x100000000;
+ if (xmin_swap == x.b)
+ return range(x_t, x.b, x.b);
+ } else if (xmax_lower32 < y.a || xmax_lower32 > y.b) {
+ /* Same for the umax64, but we want to *decrease*
+ * umax64 to the *maximum* value that matches the u32
+ * range.
+ */
+ xmax_swap = swap_low32(x.b, y.b);
+ if (xmax_swap > x.b)
+ xmax_swap -= 0x100000000;
+ if (xmax_swap == x.a)
+ return range(x_t, x.a, x.a);
+ }
+ }
+
/* the case when new range knowledge, *y*, is a 32-bit subregister
* range, while previous range knowledge, *x*, is a full register
* 64-bit range, needs special treatment to take into account upper 32
@@ -1217,7 +1250,23 @@ static int parse_range_cmp_log(const char *log_buf, struct case_spec spec,
spec.compare_subregs ? "w0" : "r0",
spec.compare_subregs ? "w" : "r", specs[i].reg_idx);
- q = strstr(p, buf);
+ /*
+ * In the verifier log look for lines:
+ * 18: (bf) r0 = r6 ; R0=... R6=...
+ * Different verifier passes may print
+ * 18: (bf) r0 = r6
+ * as well, but never followed by ';'.
+ */
+ q = p;
+ while ((q = strstr(q, buf)) != NULL) {
+ const char *s = q + strlen(buf);
+
+ while (*s == ' ' || *s == '\t')
+ s++;
+ if (*s == ';')
+ break;
+ q = s;
+ }
if (!q) {
*specs[i].state = (struct reg_state){.valid = false};
continue;
@@ -2129,6 +2178,8 @@ static struct subtest_case crafted_cases[] = {
{U64, S64, {0x7fffffff00000001ULL, 0xffffffff00000000ULL}, {0, 0}},
{U64, S64, {0, 0xffffffffULL}, {1, 1}},
{U64, S64, {0, 0xffffffffULL}, {0x7fffffff, 0x7fffffff}},
+ {U64, S32, {0xfffffffe00000001, 0xffffffff00000000}, {S64_MIN, S64_MIN}},
+ {U64, U32, {0xfffffffe00000000, U64_MAX - 1}, {U64_MAX, U64_MAX}},
{U64, U32, {0, 0x100000000}, {0, 0}},
{U64, U32, {0xfffffffe, 0x300000000}, {0x80000000, 0x80000000}},
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index dd3c757859f6..d2846579285f 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -1298,10 +1298,23 @@ static void test_sockmap_multi_channels(int sotype)
avail = wait_for_fionread(p1, expected, IO_TIMEOUT_SEC);
ASSERT_EQ(avail, expected, "ioctl(FIONREAD) full return");
- recvd = recv_timeout(p1, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
- if (!ASSERT_EQ(recvd, sizeof(buf), "recv_timeout(p1)") ||
+ recvd = recv_timeout(p1, rcv, expected, MSG_DONTWAIT, 1);
+ if (!ASSERT_EQ(recvd, expected, "recv_timeout(p1)") ||
!ASSERT_OK(memcmp(buf, rcv, recvd), "data mismatch"))
goto end;
+
+ /* process remaining data for udp if secondary data is available */
+ expected = sizeof(buf) - expected;
+ if (expected) {
+ avail = wait_for_fionread(p1, expected, IO_TIMEOUT_SEC);
+ ASSERT_EQ(avail, expected, "second ioctl(FIONREAD) full return");
+
+ recvd = recv_timeout(p1, rcv, expected, MSG_DONTWAIT, 1);
+ if (!ASSERT_EQ(recvd, expected, "second recv_timeout(p1)") ||
+ !ASSERT_OK(memcmp(buf + sizeof(buf) - expected, rcv, recvd),
+ "second data mismatch"))
+ goto end;
+ }
end:
if (c0 >= 0)
close(c0);
diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
index 254fbfeab06a..bbe476f4c47d 100644
--- a/tools/testing/selftests/bpf/prog_tests/spin_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
@@ -13,8 +13,9 @@ static struct {
const char *err_msg;
} spin_lock_fail_tests[] = {
{ "lock_id_kptr_preserve",
- "5: (bf) r1 = r0 ; R0=ptr_foo(id=2,ref_obj_id=2) "
- "R1=ptr_foo(id=2,ref_obj_id=2) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n"
+ "[0-9]\\+: (bf) r1 = r0 ; R0=ptr_foo(id=2,ref_obj_id=2)"
+ " R1=ptr_foo(id=2,ref_obj_id=2) refs=2\n"
+ "[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n"
"R1 type=ptr_ expected=percpu_ptr_" },
{ "lock_id_global_zero",
"; R1=map_value(map=.data.A,ks=4,vs=4)\n2: (85) call bpf_this_cpu_ptr#154\n"
diff --git a/tools/testing/selftests/bpf/prog_tests/summarization.c b/tools/testing/selftests/bpf/prog_tests/summarization.c
index 5dd6c120a838..6951786044ca 100644
--- a/tools/testing/selftests/bpf/prog_tests/summarization.c
+++ b/tools/testing/selftests/bpf/prog_tests/summarization.c
@@ -58,7 +58,7 @@ static void test_aux(const char *main_prog_name,
* this particular combination can be enabled.
*/
if (!strcmp("might_sleep", replacement) && err) {
- ASSERT_HAS_SUBSTR(log, "helper call might sleep in a non-sleepable prog", "error log");
+ ASSERT_HAS_SUBSTR(log, "sleepable helper bpf_copy_from_user#", "error log");
ASSERT_EQ(err, -EINVAL, "err");
test__skip();
goto out;
diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_data.h b/tools/testing/selftests/bpf/prog_tests/task_local_data.h
index 8342e2fe5260..1e5c67c78ffb 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_local_data.h
+++ b/tools/testing/selftests/bpf/prog_tests/task_local_data.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
#ifndef __TASK_LOCAL_DATA_H
#define __TASK_LOCAL_DATA_H
@@ -22,14 +22,17 @@
/*
* OPTIONS
*
- * Define the option before including the header
+ * Define the option before including the header. Using different options in
+ * different translation units is strongly discouraged.
*
* TLD_FREE_DATA_ON_THREAD_EXIT - Frees memory on thread exit automatically
*
* Thread-specific memory for storing TLD is allocated lazily on the first call to
* tld_get_data(). The thread that calls it must also call tld_free() on thread exit
* to prevent memory leak. Pthread will be included if the option is defined. A pthread
- * key will be registered with a destructor that calls tld_free().
+ * key will be registered with a destructor that calls tld_free(). Enabled only when
+ * the option is defined and TLD_DEFINE_KEY/tld_create_key() is called in the same
+ * translation unit.
*
*
* TLD_DYN_DATA_SIZE - The maximum size of memory allocated for TLDs created dynamically
@@ -47,19 +50,16 @@
* TLD_NAME_LEN - The maximum length of the name of a TLD (default: 62)
*
* Setting TLD_NAME_LEN will affect the maximum number of TLDs a process can store,
- * TLD_MAX_DATA_CNT.
+ * TLD_MAX_DATA_CNT. Must be consistent with task_local_data.bpf.h.
*
*
- * TLD_DATA_USE_ALIGNED_ALLOC - Always use aligned_alloc() instead of malloc()
+ * TLD_DONT_ROUND_UP_DATA_SIZE - Don't round up memory size allocated for data if
+ * the memory allocator has low overhead aligned_alloc() implementation.
*
- * When allocating the memory for storing TLDs, we need to make sure there is a memory
- * region of the X bytes within a page. This is due to the limit posed by UPTR: memory
- * pinned to the kernel cannot exceed a page nor can it cross the page boundary. The
- * library normally calls malloc(2*X) given X bytes of total TLDs, and only uses
- * aligned_alloc(PAGE_SIZE, X) when X >= PAGE_SIZE / 2. This is to reduce memory wastage
- * as not all memory allocator can use the exact amount of memory requested to fulfill
- * aligned_alloc(). For example, some may round the size up to the alignment. Enable the
- * option to always use aligned_alloc() if the implementation has low memory overhead.
+ * For some memory allocators, when calling aligned_alloc(alignment, size), size
+ * does not need to be an integral multiple of alignment and it can be fulfilled
+ * without using round_up(size, alignment) bytes of memory. Enable this option to
+ * reduce memory usage.
*/
#define TLD_PAGE_SIZE getpagesize()
@@ -68,7 +68,7 @@
#define TLD_ROUND_MASK(x, y) ((__typeof__(x))((y) - 1))
#define TLD_ROUND_UP(x, y) ((((x) - 1) | TLD_ROUND_MASK(x, y)) + 1)
-#define TLD_READ_ONCE(x) (*(volatile typeof(x) *)&(x))
+#define TLD_ROUND_UP_POWER_OF_TWO(x) (1UL << (sizeof(x) * 8 - __builtin_clzl(x - 1)))
#ifndef TLD_DYN_DATA_SIZE
#define TLD_DYN_DATA_SIZE 64
@@ -90,7 +90,7 @@ typedef struct {
struct tld_metadata {
char name[TLD_NAME_LEN];
- _Atomic __u16 size;
+ _Atomic __u16 size; /* size of tld_data_u->data */
};
struct tld_meta_u {
@@ -101,7 +101,7 @@ struct tld_meta_u {
struct tld_data_u {
__u64 start; /* offset of tld_data_u->data in a page */
- char data[];
+ char data[] __attribute__((aligned(8)));
};
struct tld_map_value {
@@ -111,15 +111,16 @@ struct tld_map_value {
struct tld_meta_u * _Atomic tld_meta_p __attribute__((weak));
__thread struct tld_data_u *tld_data_p __attribute__((weak));
-__thread void *tld_data_alloc_p __attribute__((weak));
#ifdef TLD_FREE_DATA_ON_THREAD_EXIT
+bool _Atomic tld_pthread_key_init __attribute__((weak));
pthread_key_t tld_pthread_key __attribute__((weak));
static void tld_free(void);
static void __tld_thread_exit_handler(void *unused)
{
+ (void)unused;
tld_free();
}
#endif
@@ -143,20 +144,16 @@ static int __tld_init_meta_p(void)
goto out;
}
-#ifdef TLD_FREE_DATA_ON_THREAD_EXIT
- pthread_key_create(&tld_pthread_key, __tld_thread_exit_handler);
-#endif
out:
return err;
}
static int __tld_init_data_p(int map_fd)
{
- bool use_aligned_alloc = false;
struct tld_map_value map_val;
struct tld_data_u *data;
- void *data_alloc = NULL;
int err, tid_fd = -1;
+ size_t size, size_pot;
tid_fd = syscall(SYS_pidfd_open, sys_gettid(), O_EXCL);
if (tid_fd < 0) {
@@ -164,47 +161,37 @@ static int __tld_init_data_p(int map_fd)
goto out;
}
-#ifdef TLD_DATA_USE_ALIGNED_ALLOC
- use_aligned_alloc = true;
-#endif
-
/*
* tld_meta_p->size = TLD_DYN_DATA_SIZE +
* total size of TLDs defined via TLD_DEFINE_KEY()
*/
- data_alloc = (use_aligned_alloc || tld_meta_p->size * 2 >= TLD_PAGE_SIZE) ?
- aligned_alloc(TLD_PAGE_SIZE, tld_meta_p->size) :
- malloc(tld_meta_p->size * 2);
- if (!data_alloc) {
+ size = tld_meta_p->size + sizeof(struct tld_data_u);
+ size_pot = TLD_ROUND_UP_POWER_OF_TWO(size);
+#ifdef TLD_DONT_ROUND_UP_DATA_SIZE
+ data = (struct tld_data_u *)aligned_alloc(size_pot, size);
+#else
+ data = (struct tld_data_u *)aligned_alloc(size_pot, size_pot);
+#endif
+ if (!data) {
err = -ENOMEM;
goto out;
}
/*
* Always pass a page-aligned address to UPTR since the size of tld_map_value::data
- * is a page in BTF. If data_alloc spans across two pages, use the page that contains large
- * enough memory.
+ * is a page in BTF.
*/
- if (TLD_PAGE_SIZE - (~TLD_PAGE_MASK & (intptr_t)data_alloc) >= tld_meta_p->size) {
- map_val.data = (void *)(TLD_PAGE_MASK & (intptr_t)data_alloc);
- data = data_alloc;
- data->start = (~TLD_PAGE_MASK & (intptr_t)data_alloc) +
- offsetof(struct tld_data_u, data);
- } else {
- map_val.data = (void *)(TLD_ROUND_UP((intptr_t)data_alloc, TLD_PAGE_SIZE));
- data = (void *)(TLD_ROUND_UP((intptr_t)data_alloc, TLD_PAGE_SIZE));
- data->start = offsetof(struct tld_data_u, data);
- }
- map_val.meta = TLD_READ_ONCE(tld_meta_p);
+ map_val.data = (void *)(TLD_PAGE_MASK & (intptr_t)data);
+ data->start = (~TLD_PAGE_MASK & (intptr_t)data) + sizeof(struct tld_data_u);
+ map_val.meta = tld_meta_p;
err = bpf_map_update_elem(map_fd, &tid_fd, &map_val, 0);
if (err) {
- free(data_alloc);
+ free(data);
goto out;
}
tld_data_p = data;
- tld_data_alloc_p = data_alloc;
#ifdef TLD_FREE_DATA_ON_THREAD_EXIT
pthread_setspecific(tld_pthread_key, (void *)1);
#endif
@@ -217,15 +204,24 @@ out:
static tld_key_t __tld_create_key(const char *name, size_t size, bool dyn_data)
{
int err, i, sz, off = 0;
+ bool uninit = false;
__u16 cnt;
- if (!TLD_READ_ONCE(tld_meta_p)) {
+ if (!tld_meta_p) {
err = __tld_init_meta_p();
if (err)
- return (tld_key_t){err};
+ return (tld_key_t){(__s16)err};
}
- for (i = 0; i < TLD_MAX_DATA_CNT; i++) {
+#ifdef TLD_FREE_DATA_ON_THREAD_EXIT
+ if (atomic_compare_exchange_strong(&tld_pthread_key_init, &uninit, true)) {
+ err = pthread_key_create(&tld_pthread_key, __tld_thread_exit_handler);
+ if (err)
+ return (tld_key_t){(__s16)err};
+ }
+#endif
+
+ for (i = 0; i < (int)TLD_MAX_DATA_CNT; i++) {
retry:
cnt = atomic_load(&tld_meta_p->cnt);
if (i < cnt) {
@@ -290,7 +286,7 @@ retry:
#define TLD_DEFINE_KEY(key, name, size) \
tld_key_t key; \
\
-__attribute__((constructor)) \
+__attribute__((constructor(101))) \
void __tld_define_key_##key(void) \
{ \
key = __tld_create_key(name, size, false); \
@@ -350,7 +346,7 @@ static inline int tld_key_err_or_zero(tld_key_t key)
__attribute__((unused))
static void *tld_get_data(int map_fd, tld_key_t key)
{
- if (!TLD_READ_ONCE(tld_meta_p))
+ if (!tld_meta_p)
return NULL;
/* tld_data_p is allocated on the first invocation of tld_get_data() */
@@ -367,14 +363,14 @@ static void *tld_get_data(int map_fd, tld_key_t key)
*
* Users must call tld_free() on thread exit to prevent memory leak. Alternatively,
* define TLD_FREE_DATA_ON_THREAD_EXIT and a thread exit handler will be registered
- * to free the memory automatically.
+ * to free the memory automatically. Calling tld_free() before thread exit is
+ * undefined behavior, which may lead to null-pointer dereference.
*/
__attribute__((unused))
static void tld_free(void)
{
- if (tld_data_alloc_p) {
- free(tld_data_alloc_p);
- tld_data_alloc_p = NULL;
+ if (tld_data_p) {
+ free(tld_data_p);
tld_data_p = NULL;
}
}
diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
index 7bee33797c71..1b26c12f255a 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
@@ -25,24 +25,30 @@
static void test_sys_enter_exit(void)
{
struct task_local_storage *skel;
+ pid_t pid = sys_gettid();
int err;
skel = task_local_storage__open_and_load();
if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
return;
- skel->bss->target_pid = sys_gettid();
-
err = task_local_storage__attach(skel);
if (!ASSERT_OK(err, "skel_attach"))
goto out;
+ /* Set target_pid after attach so that syscalls made during
+ * attach are not counted.
+ */
+ skel->bss->target_pid = pid;
+
sys_gettid();
sys_gettid();
- /* 3x syscalls: 1x attach and 2x gettid */
- ASSERT_EQ(skel->bss->enter_cnt, 3, "enter_cnt");
- ASSERT_EQ(skel->bss->exit_cnt, 3, "exit_cnt");
+ skel->bss->target_pid = 0;
+
+ /* 2x gettid syscalls */
+ ASSERT_EQ(skel->bss->enter_cnt, 2, "enter_cnt");
+ ASSERT_EQ(skel->bss->exit_cnt, 2, "exit_cnt");
ASSERT_EQ(skel->bss->mismatch_cnt, 0, "mismatch_cnt");
out:
task_local_storage__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c
index de22734abc4d..40d38280c091 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c
@@ -131,8 +131,10 @@ static bool get_smc_nl_family_id(void)
goto fail;
ret = recv(fd, &msg, sizeof(msg), 0);
- if (!ASSERT_FALSE(msg.n.nlmsg_type == NLMSG_ERROR || ret < 0 ||
- !NLMSG_OK(&msg.n, ret), "nl_family response"))
+ if (msg.n.nlmsg_type == NLMSG_ERROR)
+ goto fail;
+ if (!ASSERT_FALSE(ret < 0 || !NLMSG_OK(&msg.n, ret),
+ "nl_family response"))
goto fail;
nl = (struct nlattr *)GENLMSG_DATA(&msg);
diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
index e905cbaf6b3d..500446808908 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
@@ -18,6 +18,7 @@
#include "test_global_func15.skel.h"
#include "test_global_func16.skel.h"
#include "test_global_func17.skel.h"
+#include "test_global_func_deep_stack.skel.h"
#include "test_global_func_ctx_args.skel.h"
#include "bpf/libbpf_internal.h"
@@ -155,6 +156,7 @@ void test_test_global_funcs(void)
RUN_TESTS(test_global_func15);
RUN_TESTS(test_global_func16);
RUN_TESTS(test_global_func17);
+ RUN_TESTS(test_global_func_deep_stack);
RUN_TESTS(test_global_func_ctx_args);
if (test__start_subtest("ctx_arg_rewrite"))
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_multi_args.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_multi_args.c
new file mode 100644
index 000000000000..0f321e889862
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_multi_args.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "struct_ops_multi_args.skel.h"
+
+void test_struct_ops_multi_args(void)
+{
+ RUN_TESTS(struct_ops_multi_args);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c b/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c
index 9556ad3d986f..e219ff506b56 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c
@@ -26,7 +26,7 @@ TLD_DEFINE_KEY(value0_key, "value0", sizeof(int));
*/
static void reset_tld(void)
{
- if (TLD_READ_ONCE(tld_meta_p)) {
+ if (tld_meta_p) {
/* Remove TLDs created by tld_create_key() */
tld_meta_p->cnt = 1;
tld_meta_p->size = TLD_DYN_DATA_SIZE;
diff --git a/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c
index 7fc4d7dd70ef..1aa7c9463980 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c
@@ -168,7 +168,7 @@ static int check_server_rx_data(struct subtest_cfg *cfg,
static struct connection *connect_client_to_server(struct subtest_cfg *cfg)
{
- struct network_helper_opts opts = {.timeout_ms = 500};
+ struct network_helper_opts opts = {.timeout_ms = 1000};
int family = cfg->ipproto == 6 ? AF_INET6 : AF_INET;
struct connection *conn = NULL;
int client_fd, server_fd;
@@ -206,18 +206,13 @@ static void disconnect_client_from_server(struct subtest_cfg *cfg,
free(conn);
}
-static int send_and_test_data(struct subtest_cfg *cfg, bool must_succeed)
+static int send_and_test_data(struct subtest_cfg *cfg)
{
struct connection *conn;
int err, res = -1;
conn = connect_client_to_server(cfg);
- if (!must_succeed && !ASSERT_ERR_PTR(conn, "connection that must fail"))
- goto end;
- else if (!must_succeed)
- return 0;
-
- if (!ASSERT_OK_PTR(conn, "connection that must succeed"))
+ if (!ASSERT_OK_PTR(conn, "connect to server"))
return -1;
err = send(conn->client_fd, tx_buffer, DEFAULT_TEST_DATA_SIZE, 0);
@@ -391,7 +386,7 @@ static void run_test(struct subtest_cfg *cfg)
goto fail;
/* Basic communication must work */
- if (!ASSERT_OK(send_and_test_data(cfg, true), "connect without any encap"))
+ if (!ASSERT_OK(send_and_test_data(cfg), "connect without any encap"))
goto fail;
/* Attach encapsulation program to client */
@@ -403,7 +398,7 @@ static void run_test(struct subtest_cfg *cfg)
if (!ASSERT_OK(configure_kernel_decapsulation(cfg),
"configure kernel decapsulation"))
goto fail;
- if (!ASSERT_OK(send_and_test_data(cfg, true),
+ if (!ASSERT_OK(send_and_test_data(cfg),
"connect with encap prog and kern decap"))
goto fail;
}
@@ -411,7 +406,7 @@ static void run_test(struct subtest_cfg *cfg)
/* Replace kernel decapsulation with BPF decapsulation, test must pass */
if (!ASSERT_OK(configure_ebpf_decapsulation(cfg), "configure ebpf decapsulation"))
goto fail;
- ASSERT_OK(send_and_test_data(cfg, true), "connect with encap and decap progs");
+ ASSERT_OK(send_and_test_data(cfg), "connect with encap and decap progs");
fail:
close_netns(nstoken);
diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
index 6cd7349d4a2b..7321850db75f 100644
--- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
+++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
@@ -30,16 +30,14 @@ static struct bpf_program *load_prog(char *file, char *name, struct inst *inst)
return prog;
}
-/* TODO: use different target function to run in concurrent mode */
-void serial_test_trampoline_count(void)
+void test_trampoline_count(void)
{
char *file = "test_trampoline_count.bpf.o";
char *const progs[] = { "fentry_test", "fmod_ret_test", "fexit_test" };
- int bpf_max_tramp_links, err, i, prog_fd;
+ int bpf_max_tramp_links, i;
struct bpf_program *prog;
struct bpf_link *link;
struct inst *inst;
- LIBBPF_OPTS(bpf_test_run_opts, opts);
bpf_max_tramp_links = get_bpf_max_tramp_links();
if (!ASSERT_GE(bpf_max_tramp_links, 1, "bpf_max_tramp_links"))
@@ -80,16 +78,7 @@ void serial_test_trampoline_count(void)
goto cleanup;
/* and finally execute the probe */
- prog_fd = bpf_program__fd(prog);
- if (!ASSERT_GE(prog_fd, 0, "bpf_program__fd"))
- goto cleanup;
-
- err = bpf_prog_test_run_opts(prog_fd, &opts);
- if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
- goto cleanup;
-
- ASSERT_EQ(opts.retval & 0xffff, 33, "bpf_modify_return_test.result");
- ASSERT_EQ(opts.retval >> 16, 2, "bpf_modify_return_test.side_effect");
+ ASSERT_OK(trigger_module_test_read(256), "trigger_module_test_read");
cleanup:
for (; i >= 0; i--) {
diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c
index f4be5269fa90..69759b27794d 100644
--- a/tools/testing/selftests/bpf/prog_tests/usdt.c
+++ b/tools/testing/selftests/bpf/prog_tests/usdt.c
@@ -247,6 +247,96 @@ cleanup:
#undef TRIGGER
}
+#ifdef __x86_64__
+extern void usdt_1(void);
+extern void usdt_2(void);
+
+static unsigned char nop1[1] = { 0x90 };
+static unsigned char nop1_nop5_combo[6] = { 0x90, 0x0f, 0x1f, 0x44, 0x00, 0x00 };
+
+static void *find_instr(void *fn, unsigned char *instr, size_t cnt)
+{
+ int i;
+
+ for (i = 0; i < 10; i++) {
+ if (!memcmp(instr, fn + i, cnt))
+ return fn + i;
+ }
+ return NULL;
+}
+
+static void subtest_optimized_attach(void)
+{
+ struct test_usdt *skel;
+ __u8 *addr_1, *addr_2;
+
+ /* usdt_1 USDT probe has single nop instruction */
+ addr_1 = find_instr(usdt_1, nop1_nop5_combo, 6);
+ if (!ASSERT_NULL(addr_1, "usdt_1_find_nop1_nop5_combo"))
+ return;
+
+ addr_1 = find_instr(usdt_1, nop1, 1);
+ if (!ASSERT_OK_PTR(addr_1, "usdt_1_find_nop1"))
+ return;
+
+ /* usdt_2 USDT probe has nop,nop5 instructions combo */
+ addr_2 = find_instr(usdt_2, nop1_nop5_combo, 6);
+ if (!ASSERT_OK_PTR(addr_2, "usdt_2_find_nop1_nop5_combo"))
+ return;
+
+ skel = test_usdt__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_usdt__open_and_load"))
+ return;
+
+ skel->bss->expected_ip = (unsigned long) addr_1;
+
+ /*
+ * Attach program on top of usdt_1 which is single nop probe,
+ * so the probe won't get optimized.
+ */
+ skel->links.usdt_executed = bpf_program__attach_usdt(skel->progs.usdt_executed,
+ 0 /*self*/, "/proc/self/exe",
+ "optimized_attach", "usdt_1", NULL);
+ if (!ASSERT_OK_PTR(skel->links.usdt_executed, "bpf_program__attach_usdt"))
+ goto cleanup;
+
+ usdt_1();
+ usdt_1();
+
+ /* int3 is on addr_1 address */
+ ASSERT_EQ(*addr_1, 0xcc, "int3");
+ ASSERT_EQ(skel->bss->executed, 2, "executed");
+
+ bpf_link__destroy(skel->links.usdt_executed);
+
+ /* we expect the nop5 ip */
+ skel->bss->expected_ip = (unsigned long) addr_2 + 1;
+
+ /*
+ * Attach program on top of usdt_2 which is probe defined on top
+ * of nop1,nop5 combo, so the probe gets optimized on top of nop5.
+ */
+ skel->links.usdt_executed = bpf_program__attach_usdt(skel->progs.usdt_executed,
+ 0 /*self*/, "/proc/self/exe",
+ "optimized_attach", "usdt_2", NULL);
+ if (!ASSERT_OK_PTR(skel->links.usdt_executed, "bpf_program__attach_usdt"))
+ goto cleanup;
+
+ usdt_2();
+ usdt_2();
+
+ /* nop stays on addr_2 address */
+ ASSERT_EQ(*addr_2, 0x90, "nop");
+
+ /* call is on addr_2 + 1 address */
+ ASSERT_EQ(*(addr_2 + 1), 0xe8, "call");
+ ASSERT_EQ(skel->bss->executed, 4, "executed");
+
+cleanup:
+ test_usdt__destroy(skel);
+}
+#endif
+
unsigned short test_usdt_100_semaphore SEC(".probes");
unsigned short test_usdt_300_semaphore SEC(".probes");
unsigned short test_usdt_400_semaphore SEC(".probes");
@@ -516,6 +606,8 @@ void test_usdt(void)
#ifdef __x86_64__
if (test__start_subtest("basic_optimized"))
subtest_basic_usdt(true);
+ if (test__start_subtest("optimized_attach"))
+ subtest_optimized_attach();
#endif
if (test__start_subtest("multispec"))
subtest_multispec_usdt();
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index 302286a80154..a96b25ebff23 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -3,6 +3,7 @@
#include <test_progs.h>
#include "cap_helpers.h"
+#include "verifier_align.skel.h"
#include "verifier_and.skel.h"
#include "verifier_arena.skel.h"
#include "verifier_arena_large.skel.h"
@@ -53,6 +54,7 @@
#include "verifier_leak_ptr.skel.h"
#include "verifier_linked_scalars.skel.h"
#include "verifier_live_stack.skel.h"
+#include "verifier_liveness_exp.skel.h"
#include "verifier_load_acquire.skel.h"
#include "verifier_loops1.skel.h"
#include "verifier_lwt.skel.h"
@@ -92,6 +94,7 @@
#include "verifier_stack_ptr.skel.h"
#include "verifier_store_release.skel.h"
#include "verifier_subprog_precision.skel.h"
+#include "verifier_subprog_topo.skel.h"
#include "verifier_subreg.skel.h"
#include "verifier_tailcall.skel.h"
#include "verifier_tailcall_jit.skel.h"
@@ -114,6 +117,7 @@
#include "verifier_lsm.skel.h"
#include "verifier_jit_inline.skel.h"
#include "irq.skel.h"
+#include "verifier_ctx_ptr_param.skel.h"
#define MAX_ENTRIES 11
@@ -149,6 +153,7 @@ static void run_tests_aux(const char *skel_name,
#define RUN(skel) run_tests_aux(#skel, skel##__elf_bytes, NULL)
+void test_verifier_align(void) { RUN(verifier_align); }
void test_verifier_and(void) { RUN(verifier_and); }
void test_verifier_arena(void) { RUN(verifier_arena); }
void test_verifier_arena_large(void) { RUN(verifier_arena_large); }
@@ -171,7 +176,7 @@ void test_verifier_cgroup_skb(void) { RUN(verifier_cgroup_skb); }
void test_verifier_cgroup_storage(void) { RUN(verifier_cgroup_storage); }
void test_verifier_const(void) { RUN(verifier_const); }
void test_verifier_const_or(void) { RUN(verifier_const_or); }
-void test_verifier_ctx(void) { RUN(verifier_ctx); }
+void test_verifier_ctx(void) { RUN_TESTS(verifier_ctx); }
void test_verifier_ctx_sk_msg(void) { RUN(verifier_ctx_sk_msg); }
void test_verifier_d_path(void) { RUN(verifier_d_path); }
void test_verifier_default_trusted_ptr(void) { RUN_TESTS(verifier_default_trusted_ptr); }
@@ -198,6 +203,7 @@ void test_verifier_ldsx(void) { RUN(verifier_ldsx); }
void test_verifier_leak_ptr(void) { RUN(verifier_leak_ptr); }
void test_verifier_linked_scalars(void) { RUN(verifier_linked_scalars); }
void test_verifier_live_stack(void) { RUN(verifier_live_stack); }
+void test_verifier_liveness_exp(void) { RUN(verifier_liveness_exp); }
void test_verifier_loops1(void) { RUN(verifier_loops1); }
void test_verifier_lwt(void) { RUN(verifier_lwt); }
void test_verifier_map_in_map(void) { RUN(verifier_map_in_map); }
@@ -235,6 +241,7 @@ void test_verifier_spin_lock(void) { RUN(verifier_spin_lock); }
void test_verifier_stack_ptr(void) { RUN(verifier_stack_ptr); }
void test_verifier_store_release(void) { RUN(verifier_store_release); }
void test_verifier_subprog_precision(void) { RUN(verifier_subprog_precision); }
+void test_verifier_subprog_topo(void) { RUN(verifier_subprog_topo); }
void test_verifier_subreg(void) { RUN(verifier_subreg); }
void test_verifier_tailcall(void) { RUN(verifier_tailcall); }
void test_verifier_tailcall_jit(void) { RUN(verifier_tailcall_jit); }
@@ -257,6 +264,7 @@ void test_verifier_lsm(void) { RUN(verifier_lsm); }
void test_irq(void) { RUN(irq); }
void test_verifier_mtu(void) { RUN(verifier_mtu); }
void test_verifier_jit_inline(void) { RUN(verifier_jit_inline); }
+void test_verifier_ctx_ptr_param(void) { RUN(verifier_ctx_ptr_param); }
static int init_test_val_map(struct bpf_object *obj, char *map_name)
{
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier_log.c b/tools/testing/selftests/bpf/prog_tests/verifier_log.c
index aaa2854974c0..c01c0114af1b 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier_log.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier_log.c
@@ -25,10 +25,10 @@ static bool check_prog_load(int prog_fd, bool expect_err, const char *tag)
static struct {
/* strategically placed before others to avoid accidental modification by kernel */
- char filler[1024];
- char buf[1024];
+ char filler[16384];
+ char buf[16384];
/* strategically placed after buf[] to catch more accidental corruptions */
- char reference[1024];
+ char reference[16384];
} logs;
static const struct bpf_insn *insns;
static size_t insn_cnt;
diff --git a/tools/testing/selftests/bpf/progs/bench_local_storage_create.c b/tools/testing/selftests/bpf/progs/bench_local_storage_create.c
index c8ec0d0368e4..25ca6045fea3 100644
--- a/tools/testing/selftests/bpf/progs/bench_local_storage_create.c
+++ b/tools/testing/selftests/bpf/progs/bench_local_storage_create.c
@@ -8,7 +8,6 @@
long create_errs = 0;
long create_cnts = 0;
-long kmalloc_cnts = 0;
__u32 bench_pid = 0;
struct storage {
@@ -29,16 +28,6 @@ struct {
__type(value, struct storage);
} task_storage_map SEC(".maps");
-SEC("raw_tp/kmalloc")
-int BPF_PROG(kmalloc, unsigned long call_site, const void *ptr,
- size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags,
- int node)
-{
- __sync_fetch_and_add(&kmalloc_cnts, 1);
-
- return 0;
-}
-
SEC("tp_btf/sched_process_fork")
int BPF_PROG(sched_process_fork, struct task_struct *parent, struct task_struct *child)
{
diff --git a/tools/testing/selftests/bpf/progs/bpf_gotox.c b/tools/testing/selftests/bpf/progs/bpf_gotox.c
index 216c71b94c64..99b3c9c9a01c 100644
--- a/tools/testing/selftests/bpf/progs/bpf_gotox.c
+++ b/tools/testing/selftests/bpf/progs/bpf_gotox.c
@@ -421,6 +421,36 @@ int use_nonstatic_global_other_sec(void *ctx)
return __nonstatic_global(in_user);
}
+SEC("syscall")
+int load_with_nonzero_offset(struct simple_ctx *ctx)
+{
+ void *jj[] = { &&l1, &&l2, &&l3 };
+
+ /*
+ * This makes LLVM to generate a load from the jj map with an offset:
+ * r1 = 0x0 ll
+ * r1 = *(u64 *)(r1 + 0x10)
+ * gotox r1
+ */
+ if (ctx->x == 2)
+ goto *jj[ctx->x];
+
+ ret_user = 1;
+ return 1;
+
+l1:
+ /* never reached, but leave it here to outsmart LLVM */
+ ret_user = 0;
+ return 0;
+l2:
+ /* never reached, but leave it here to outsmart LLVM */
+ ret_user = 3;
+ return 3;
+l3:
+ ret_user = 5;
+ return 5;
+}
+
#else /* __BPF_FEATURE_GOTOX */
#define SKIP_TEST(TEST_NAME) \
@@ -442,6 +472,7 @@ SKIP_TEST(use_static_global_other_sec);
SKIP_TEST(use_nonstatic_global1);
SKIP_TEST(use_nonstatic_global2);
SKIP_TEST(use_nonstatic_global_other_sec);
+SKIP_TEST(load_with_nonzero_offset);
#endif /* __BPF_FEATURE_GOTOX */
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
index c9bfbe1bafc1..dcd78a3a9052 100644
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -103,8 +103,8 @@
* - TEST_DATA_LEN
* __retval_unpriv Same, but load program in unprivileged mode.
*
- * __description Text to be used instead of a program name for display
- * and filtering purposes.
+ * __description Text to be used for display and as an additional filter
+ * alias, while the original program name stays matchable.
*
* __log_level Log level to use for the program, numeric value expected.
*
@@ -130,39 +130,41 @@
* __linear_size Specify the size of the linear area of non-linear skbs, or
* 0 for linear skbs.
*/
-#define __msg(msg) __attribute__((btf_decl_tag("comment:test_expect_msg=" XSTR(__COUNTER__) "=" msg)))
-#define __not_msg(msg) __attribute__((btf_decl_tag("comment:test_expect_not_msg=" XSTR(__COUNTER__) "=" msg)))
-#define __xlated(msg) __attribute__((btf_decl_tag("comment:test_expect_xlated=" XSTR(__COUNTER__) "=" msg)))
-#define __jited(msg) __attribute__((btf_decl_tag("comment:test_jited=" XSTR(__COUNTER__) "=" msg)))
-#define __failure __attribute__((btf_decl_tag("comment:test_expect_failure")))
-#define __success __attribute__((btf_decl_tag("comment:test_expect_success")))
-#define __description(desc) __attribute__((btf_decl_tag("comment:test_description=" desc)))
-#define __msg_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_msg_unpriv=" XSTR(__COUNTER__) "=" msg)))
-#define __not_msg_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_not_msg_unpriv=" XSTR(__COUNTER__) "=" msg)))
-#define __xlated_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_xlated_unpriv=" XSTR(__COUNTER__) "=" msg)))
-#define __jited_unpriv(msg) __attribute__((btf_decl_tag("comment:test_jited=" XSTR(__COUNTER__) "=" msg)))
-#define __failure_unpriv __attribute__((btf_decl_tag("comment:test_expect_failure_unpriv")))
-#define __success_unpriv __attribute__((btf_decl_tag("comment:test_expect_success_unpriv")))
-#define __log_level(lvl) __attribute__((btf_decl_tag("comment:test_log_level="#lvl)))
-#define __flag(flag) __attribute__((btf_decl_tag("comment:test_prog_flags="#flag)))
-#define __retval(val) __attribute__((btf_decl_tag("comment:test_retval="XSTR(val))))
-#define __retval_unpriv(val) __attribute__((btf_decl_tag("comment:test_retval_unpriv="XSTR(val))))
-#define __auxiliary __attribute__((btf_decl_tag("comment:test_auxiliary")))
-#define __auxiliary_unpriv __attribute__((btf_decl_tag("comment:test_auxiliary_unpriv")))
-#define __btf_path(path) __attribute__((btf_decl_tag("comment:test_btf_path=" path)))
-#define __arch(arch) __attribute__((btf_decl_tag("comment:test_arch=" arch)))
+#define __test_tag(tag) __attribute__((btf_decl_tag("comment:" XSTR(__COUNTER__) ":" tag)))
+
+#define __msg(msg) __test_tag("test_expect_msg=" msg)
+#define __not_msg(msg) __test_tag("test_expect_not_msg=" msg)
+#define __xlated(msg) __test_tag("test_expect_xlated=" msg)
+#define __jited(msg) __test_tag("test_jited=" msg)
+#define __failure __test_tag("test_expect_failure")
+#define __success __test_tag("test_expect_success")
+#define __description(desc) __test_tag("test_description=" desc)
+#define __msg_unpriv(msg) __test_tag("test_expect_msg_unpriv=" msg)
+#define __not_msg_unpriv(msg) __test_tag("test_expect_not_msg_unpriv=" msg)
+#define __xlated_unpriv(msg) __test_tag("test_expect_xlated_unpriv=" msg)
+#define __jited_unpriv(msg) __test_tag("test_jited_unpriv=" msg)
+#define __failure_unpriv __test_tag("test_expect_failure_unpriv")
+#define __success_unpriv __test_tag("test_expect_success_unpriv")
+#define __log_level(lvl) __test_tag("test_log_level=" #lvl)
+#define __flag(flag) __test_tag("test_prog_flags=" #flag)
+#define __retval(val) __test_tag("test_retval=" XSTR(val))
+#define __retval_unpriv(val) __test_tag("test_retval_unpriv=" XSTR(val))
+#define __auxiliary __test_tag("test_auxiliary")
+#define __auxiliary_unpriv __test_tag("test_auxiliary_unpriv")
+#define __btf_path(path) __test_tag("test_btf_path=" path)
+#define __arch(arch) __test_tag("test_arch=" arch)
#define __arch_x86_64 __arch("X86_64")
#define __arch_arm64 __arch("ARM64")
#define __arch_riscv64 __arch("RISCV64")
#define __arch_s390x __arch("s390x")
-#define __caps_unpriv(caps) __attribute__((btf_decl_tag("comment:test_caps_unpriv=" EXPAND_QUOTE(caps))))
-#define __load_if_JITed() __attribute__((btf_decl_tag("comment:load_mode=jited")))
-#define __load_if_no_JITed() __attribute__((btf_decl_tag("comment:load_mode=no_jited")))
-#define __stderr(msg) __attribute__((btf_decl_tag("comment:test_expect_stderr=" XSTR(__COUNTER__) "=" msg)))
-#define __stderr_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_stderr_unpriv=" XSTR(__COUNTER__) "=" msg)))
-#define __stdout(msg) __attribute__((btf_decl_tag("comment:test_expect_stdout=" XSTR(__COUNTER__) "=" msg)))
-#define __stdout_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_stdout_unpriv=" XSTR(__COUNTER__) "=" msg)))
-#define __linear_size(sz) __attribute__((btf_decl_tag("comment:test_linear_size=" XSTR(sz))))
+#define __caps_unpriv(caps) __test_tag("test_caps_unpriv=" EXPAND_QUOTE(caps))
+#define __load_if_JITed() __test_tag("load_mode=jited")
+#define __load_if_no_JITed() __test_tag("load_mode=no_jited")
+#define __stderr(msg) __test_tag("test_expect_stderr=" msg)
+#define __stderr_unpriv(msg) __test_tag("test_expect_stderr_unpriv=" msg)
+#define __stdout(msg) __test_tag("test_expect_stdout=" msg)
+#define __stdout_unpriv(msg) __test_tag("test_expect_stdout_unpriv=" msg)
+#define __linear_size(sz) __test_tag("test_linear_size=" XSTR(sz))
/* Define common capabilities tested using __caps_unpriv */
#define CAP_NET_ADMIN 12
@@ -188,6 +190,10 @@
#define POINTER_VALUE 0xbadcafe
#define TEST_DATA_LEN 64
+#ifndef __aligned
+#define __aligned(x) __attribute__((aligned(x)))
+#endif
+
#ifndef __used
#define __used __attribute__((used))
#endif
diff --git a/tools/testing/selftests/bpf/progs/bpf_smc.c b/tools/testing/selftests/bpf/progs/bpf_smc.c
index 70d8b08f5914..6263a45bf006 100644
--- a/tools/testing/selftests/bpf/progs/bpf_smc.c
+++ b/tools/testing/selftests/bpf/progs/bpf_smc.c
@@ -8,6 +8,10 @@
char _license[] SEC("license") = "GPL";
+#ifndef SMC_HS_CTRL_NAME_MAX
+#define SMC_HS_CTRL_NAME_MAX 16
+#endif
+
enum {
BPF_SMC_LISTEN = 10,
};
@@ -18,6 +22,20 @@ struct smc_sock___local {
bool use_fallback;
} __attribute__((preserve_access_index));
+struct smc_hs_ctrl___local {
+ char name[SMC_HS_CTRL_NAME_MAX];
+ int (*syn_option)(struct tcp_sock *);
+ int (*synack_option)(const struct tcp_sock *, struct inet_request_sock *);
+} __attribute__((preserve_access_index));
+
+struct netns_smc___local {
+ struct smc_hs_ctrl___local *hs_ctrl;
+} __attribute__((preserve_access_index));
+
+struct net___local {
+ struct netns_smc___local smc;
+} __attribute__((preserve_access_index));
+
int smc_cnt = 0;
int fallback_cnt = 0;
@@ -88,8 +106,14 @@ int BPF_PROG(smc_run, int family, int type, int protocol)
task = bpf_get_current_task_btf();
/* Prevent from affecting other tests */
- if (!task || !task->nsproxy->net_ns->smc.hs_ctrl)
+ if (!task) {
return protocol;
+ } else {
+ struct net___local *net = (struct net___local *)task->nsproxy->net_ns;
+
+ if (!bpf_core_field_exists(struct net___local, smc) || !net->smc.hs_ctrl)
+ return protocol;
+ }
return IPPROTO_SMC;
}
@@ -110,7 +134,7 @@ int BPF_PROG(bpf_smc_set_tcp_option, struct tcp_sock *tp)
}
SEC(".struct_ops")
-struct smc_hs_ctrl linkcheck = {
+struct smc_hs_ctrl___local linkcheck = {
.name = "linkcheck",
.syn_option = (void *)bpf_smc_set_tcp_option,
.synack_option = (void *)bpf_smc_set_tcp_option_cond,
diff --git a/tools/testing/selftests/bpf/progs/cgroup_iter_memcg.c b/tools/testing/selftests/bpf/progs/cgroup_iter_memcg.c
index 59fb70a3cc50..06a385c9d85b 100644
--- a/tools/testing/selftests/bpf/progs/cgroup_iter_memcg.c
+++ b/tools/testing/selftests/bpf/progs/cgroup_iter_memcg.c
@@ -26,12 +26,18 @@ int cgroup_memcg_query(struct bpf_iter__cgroup *ctx)
bpf_mem_cgroup_flush_stats(memcg);
- memcg_query.nr_anon_mapped = bpf_mem_cgroup_page_state(memcg, NR_ANON_MAPPED);
- memcg_query.nr_shmem = bpf_mem_cgroup_page_state(memcg, NR_SHMEM);
- memcg_query.nr_file_pages = bpf_mem_cgroup_page_state(memcg, NR_FILE_PAGES);
- memcg_query.nr_file_mapped = bpf_mem_cgroup_page_state(memcg, NR_FILE_MAPPED);
- memcg_query.memcg_kmem = bpf_mem_cgroup_page_state(memcg, MEMCG_KMEM);
- memcg_query.pgfault = bpf_mem_cgroup_vm_events(memcg, PGFAULT);
+ memcg_query.nr_anon_mapped = bpf_mem_cgroup_page_state(
+ memcg,
+ bpf_core_enum_value(enum node_stat_item, NR_ANON_MAPPED));
+ memcg_query.nr_shmem = bpf_mem_cgroup_page_state(
+ memcg, bpf_core_enum_value(enum node_stat_item, NR_SHMEM));
+ memcg_query.nr_file_pages = bpf_mem_cgroup_page_state(
+ memcg, bpf_core_enum_value(enum node_stat_item, NR_FILE_PAGES));
+ memcg_query.nr_file_mapped = bpf_mem_cgroup_page_state(
+ memcg,
+ bpf_core_enum_value(enum node_stat_item, NR_FILE_MAPPED));
+ memcg_query.pgfault = bpf_mem_cgroup_vm_events(
+ memcg, bpf_core_enum_value(enum vm_event_item, PGFAULT));
bpf_put_mem_cgroup(memcg);
diff --git a/tools/testing/selftests/bpf/progs/cgroup_storage.c b/tools/testing/selftests/bpf/progs/cgroup_storage.c
index db1e4d2d3281..59da1d95e5b9 100644
--- a/tools/testing/selftests/bpf/progs/cgroup_storage.c
+++ b/tools/testing/selftests/bpf/progs/cgroup_storage.c
@@ -21,4 +21,47 @@ int bpf_prog(struct __sk_buff *skb)
return (*counter & 1);
}
+/* Maps for OOB test */
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+ __type(key, struct bpf_cgroup_storage_key);
+ __type(value, __u32); /* 4-byte value - not 8-byte aligned */
+} cgroup_storage_oob SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_PERCPU_HASH);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32); /* 4-byte value - same as cgroup storage */
+} lru_map SEC(".maps");
+
+SEC("cgroup/sock_create")
+int trigger_oob(struct bpf_sock *sk)
+{
+ __u32 key = 0;
+ __u32 *cgroup_val;
+ __u32 value = 0x12345678;
+
+ /* Get cgroup storage value */
+ cgroup_val = bpf_get_local_storage(&cgroup_storage_oob, 0);
+ if (!cgroup_val)
+ return 0;
+
+ /* Initialize cgroup storage */
+ *cgroup_val = value;
+
+ /* This triggers the OOB read:
+ * bpf_map_update_elem() -> htab_map_update_elem() ->
+ * pcpu_init_value() -> copy_map_value_long() ->
+ * bpf_obj_memcpy(..., long_memcpy=true) ->
+ * bpf_long_memcpy(dst, src, round_up(4, 8))
+ *
+ * The copy size is rounded up to 8 bytes, but cgroup_val
+ * points to a 4-byte buffer, causing a 4-byte OOB read.
+ */
+ bpf_map_update_elem(&lru_map, &key, cgroup_val, BPF_ANY);
+
+ return 1;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/clone_attach_btf_id.c b/tools/testing/selftests/bpf/progs/clone_attach_btf_id.c
new file mode 100644
index 000000000000..0ffa3ec3e1a0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/clone_attach_btf_id.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(fentry_handler, int a)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/connect_force_port4.c b/tools/testing/selftests/bpf/progs/connect_force_port4.c
index 27a632dd382e..d5be6a559d6a 100644
--- a/tools/testing/selftests/bpf/progs/connect_force_port4.c
+++ b/tools/testing/selftests/bpf/progs/connect_force_port4.c
@@ -14,6 +14,8 @@
char _license[] SEC("license") = "GPL";
+__u16 port = 0;
+
struct svc_addr {
__be32 addr;
__be16 port;
@@ -40,7 +42,7 @@ int connect4(struct bpf_sock_addr *ctx)
if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
return 0;
- /* Rewire service 1.2.3.4:60000 to backend 127.0.0.1:60123. */
+ /* Rewire service 1.2.3.4:60000 to backend 127.0.0.1:port. */
if (ctx->user_port == bpf_htons(60000)) {
orig = bpf_sk_storage_get(&service_mapping, ctx->sk, 0,
BPF_SK_STORAGE_GET_F_CREATE);
@@ -51,7 +53,7 @@ int connect4(struct bpf_sock_addr *ctx)
orig->port = ctx->user_port;
ctx->user_ip4 = bpf_htonl(0x7f000001);
- ctx->user_port = bpf_htons(60123);
+ ctx->user_port = bpf_htons(port);
}
return 1;
}
@@ -63,7 +65,7 @@ int getsockname4(struct bpf_sock_addr *ctx)
return 1;
/* Expose local server as 1.2.3.4:60000 to client. */
- if (ctx->user_port == bpf_htons(60123)) {
+ if (ctx->user_port == bpf_htons(port)) {
ctx->user_ip4 = bpf_htonl(0x01020304);
ctx->user_port = bpf_htons(60000);
}
@@ -79,7 +81,7 @@ int getpeername4(struct bpf_sock_addr *ctx)
return 1;
/* Expose service 1.2.3.4:60000 as peer instead of backend. */
- if (ctx->user_port == bpf_htons(60123)) {
+ if (ctx->user_port == bpf_htons(port)) {
orig = bpf_sk_storage_get(&service_mapping, ctx->sk, 0, 0);
if (orig) {
ctx->user_ip4 = orig->addr;
diff --git a/tools/testing/selftests/bpf/progs/connect_force_port6.c b/tools/testing/selftests/bpf/progs/connect_force_port6.c
index 19cad93e612f..a1a671b39083 100644
--- a/tools/testing/selftests/bpf/progs/connect_force_port6.c
+++ b/tools/testing/selftests/bpf/progs/connect_force_port6.c
@@ -13,6 +13,8 @@
char _license[] SEC("license") = "GPL";
+__u16 port = 0;
+
struct svc_addr {
__be32 addr[4];
__be16 port;
@@ -39,7 +41,7 @@ int connect6(struct bpf_sock_addr *ctx)
if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
return 0;
- /* Rewire service [fc00::1]:60000 to backend [::1]:60124. */
+ /* Rewire service [fc00::1]:60000 to backend [::1]:port. */
if (ctx->user_port == bpf_htons(60000)) {
orig = bpf_sk_storage_get(&service_mapping, ctx->sk, 0,
BPF_SK_STORAGE_GET_F_CREATE);
@@ -56,7 +58,7 @@ int connect6(struct bpf_sock_addr *ctx)
ctx->user_ip6[1] = 0;
ctx->user_ip6[2] = 0;
ctx->user_ip6[3] = bpf_htonl(1);
- ctx->user_port = bpf_htons(60124);
+ ctx->user_port = bpf_htons(port);
}
return 1;
}
@@ -68,7 +70,7 @@ int getsockname6(struct bpf_sock_addr *ctx)
return 1;
/* Expose local server as [fc00::1]:60000 to client. */
- if (ctx->user_port == bpf_htons(60124)) {
+ if (ctx->user_port == bpf_htons(port)) {
ctx->user_ip6[0] = bpf_htonl(0xfc000000);
ctx->user_ip6[1] = 0;
ctx->user_ip6[2] = 0;
@@ -87,7 +89,7 @@ int getpeername6(struct bpf_sock_addr *ctx)
return 1;
/* Expose service [fc00::1]:60000 as peer instead of backend. */
- if (ctx->user_port == bpf_htons(60124)) {
+ if (ctx->user_port == bpf_htons(port)) {
orig = bpf_sk_storage_get(&service_mapping, ctx->sk, 0, 0);
if (orig) {
ctx->user_ip6[0] = orig->addr[0];
diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
index 8f2ae9640886..b62773ce5219 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -1993,3 +1993,118 @@ int test_dynptr_reg_type(void *ctx)
global_call_bpf_dynptr((const struct bpf_dynptr *)current);
return 0;
}
+
+/* Overwriting a referenced dynptr is allowed if a clone still holds the ref */
+SEC("?raw_tp")
+__success
+int dynptr_overwrite_ref_with_clone(void *ctx)
+{
+ struct bpf_dynptr ptr, clone;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr);
+
+ bpf_dynptr_clone(&ptr, &clone);
+
+ /* Overwrite the original - clone still holds the ref */
+ *(volatile __u8 *)&ptr = 0;
+
+ bpf_ringbuf_discard_dynptr(&clone, 0);
+
+ return 0;
+}
+
+/* Overwriting the last referenced dynptr should still be rejected */
+SEC("?raw_tp")
+__failure __msg("cannot overwrite referenced dynptr")
+int dynptr_overwrite_ref_last_clone(void *ctx)
+{
+ struct bpf_dynptr ptr, clone;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr);
+
+ bpf_dynptr_clone(&ptr, &clone);
+
+ /* Overwrite the original - clone still holds the ref, OK */
+ *(volatile __u8 *)&ptr = 0;
+
+ /* Overwrite the last holder - this should fail */
+ *(volatile __u8 *)&clone = 0;
+
+ return 0;
+}
+
+/* Overwriting a clone should be allowed if the original still holds the ref */
+SEC("?raw_tp")
+__success
+int dynptr_overwrite_clone_with_original(void *ctx)
+{
+ struct bpf_dynptr ptr, clone;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr);
+
+ bpf_dynptr_clone(&ptr, &clone);
+
+ /* Overwrite the clone - original still holds the ref */
+ *(volatile __u8 *)&clone = 0;
+
+ bpf_ringbuf_discard_dynptr(&ptr, 0);
+
+ return 0;
+}
+
+/* Data slices from the destroyed dynptr should be invalidated */
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'scalar'")
+int dynptr_overwrite_ref_invalidate_slice(void *ctx)
+{
+ struct bpf_dynptr ptr, clone;
+ int *data;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+ data = bpf_dynptr_data(&ptr, 0, sizeof(val));
+ if (!data)
+ return 0;
+
+ bpf_dynptr_clone(&ptr, &clone);
+
+ /* Overwrite the original - clone holds the ref */
+ *(volatile __u8 *)&ptr = 0;
+
+ /* data was from the original dynptr, should be invalid now */
+ *data = 123;
+
+ return 0;
+}
+
+/*
+ * Data slices from a dynptr clone should remain valid after
+ * overwriting the original dynptr
+ */
+SEC("?raw_tp")
+__success
+int dynptr_overwrite_ref_clone_slice_valid(void *ctx)
+{
+ struct bpf_dynptr ptr, clone;
+ int *data;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+ bpf_dynptr_clone(&ptr, &clone);
+
+ data = bpf_dynptr_data(&clone, 0, sizeof(val));
+ if (!data) {
+ bpf_ringbuf_discard_dynptr(&clone, 0);
+ return 0;
+ }
+
+ /* Overwrite the original - clone holds the ref */
+ *(volatile __u8 *)&ptr = 0;
+
+ /* data is from the clone, should still be valid */
+ *data = 123;
+
+ bpf_ringbuf_discard_dynptr(&clone, 0);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/empty_skb.c b/tools/testing/selftests/bpf/progs/empty_skb.c
index 4b0cd6753251..44326f5cc8bb 100644
--- a/tools/testing/selftests/bpf/progs/empty_skb.c
+++ b/tools/testing/selftests/bpf/progs/empty_skb.c
@@ -35,3 +35,10 @@ int tc_redirect_egress(struct __sk_buff *skb)
ret = bpf_clone_redirect(skb, ifindex, 0);
return 0;
}
+
+SEC("tc")
+int tc_adjust_room(struct __sk_buff *skb)
+{
+ ret = bpf_skb_adjust_room(skb, 4, BPF_ADJ_ROOM_NET, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/exceptions.c b/tools/testing/selftests/bpf/progs/exceptions.c
index f09cd14d8e04..4206f59d7b86 100644
--- a/tools/testing/selftests/bpf/progs/exceptions.c
+++ b/tools/testing/selftests/bpf/progs/exceptions.c
@@ -109,6 +109,20 @@ int exception_tail_call(struct __sk_buff *ctx) {
return ret + 8;
}
+__weak
+void throw_11(void)
+{
+ bpf_throw(11);
+}
+
+SEC("tc")
+int exception_throw_from_void_global(struct __sk_buff *ctx)
+{
+ throw_11();
+
+ return 0;
+}
+
__noinline int exception_ext_global(struct __sk_buff *ctx)
{
volatile int ret = 0;
diff --git a/tools/testing/selftests/bpf/progs/exceptions_assert.c b/tools/testing/selftests/bpf/progs/exceptions_assert.c
index 858af5988a38..e4abf4172fca 100644
--- a/tools/testing/selftests/bpf/progs/exceptions_assert.c
+++ b/tools/testing/selftests/bpf/progs/exceptions_assert.c
@@ -59,7 +59,7 @@ check_assert(s64, >=, ge_neg, INT_MIN);
SEC("?tc")
__log_level(2) __failure
-__msg(": R0=0 R1=ctx() R2=scalar(smin=0xffffffff80000002,smax=smax32=0x7ffffffd,smin32=0x80000002) R10=fp0")
+__msg(": R1=ctx() R2=scalar(smin=0xffffffff80000002,smax=smax32=0x7ffffffd,smin32=0x80000002) R10=fp0")
int check_assert_range_s64(struct __sk_buff *ctx)
{
struct bpf_sock *sk = ctx->sk;
@@ -86,7 +86,7 @@ int check_assert_range_u64(struct __sk_buff *ctx)
SEC("?tc")
__log_level(2) __failure
-__msg(": R0=0 R1=ctx() R2=4096 R10=fp0")
+__msg(": R1=ctx() R2=4096 R10=fp0")
int check_assert_single_range_s64(struct __sk_buff *ctx)
{
struct bpf_sock *sk = ctx->sk;
@@ -114,7 +114,7 @@ int check_assert_single_range_u64(struct __sk_buff *ctx)
SEC("?tc")
__log_level(2) __failure
-__msg(": R1=pkt(off=64,r=64) R2=pkt_end() R6=pkt(r=64) R10=fp0")
+__msg(": R6=pkt(r=64) R10=fp0")
int check_assert_generic(struct __sk_buff *ctx)
{
u8 *data_end = (void *)(long)ctx->data_end;
diff --git a/tools/testing/selftests/bpf/progs/exceptions_fail.c b/tools/testing/selftests/bpf/progs/exceptions_fail.c
index 9ea1353488d7..051e2b6f2694 100644
--- a/tools/testing/selftests/bpf/progs/exceptions_fail.c
+++ b/tools/testing/selftests/bpf/progs/exceptions_fail.c
@@ -34,11 +34,15 @@ struct {
private(A) struct bpf_spin_lock lock;
private(A) struct bpf_rb_root rbtree __contains(foo, node);
-__noinline void *exception_cb_bad_ret_type(u64 cookie)
+__noinline void *exception_cb_bad_ret_type1(u64 cookie)
{
return NULL;
}
+__noinline void exception_cb_bad_ret_type2(u64 cookie)
+{
+}
+
__noinline int exception_cb_bad_arg_0(void)
{
return 0;
@@ -55,8 +59,8 @@ __noinline int exception_cb_ok_arg_small(int a)
}
SEC("?tc")
-__exception_cb(exception_cb_bad_ret_type)
-__failure __msg("Global function exception_cb_bad_ret_type() doesn't return scalar.")
+__exception_cb(exception_cb_bad_ret_type1)
+__failure __msg("Global function exception_cb_bad_ret_type1() return value not void or scalar.")
int reject_exception_cb_type_1(struct __sk_buff *ctx)
{
bpf_throw(0);
@@ -90,6 +94,15 @@ int reject_exception_cb_type_4(struct __sk_buff *ctx)
return 0;
}
+SEC("?tc")
+__exception_cb(exception_cb_bad_ret_type2)
+__failure __msg("exception cb cannot return void")
+int reject_exception_cb_type_5(struct __sk_buff *ctx)
+{
+ bpf_throw(0);
+ return 0;
+}
+
__noinline
static int timer_cb(void *map, int *key, struct bpf_timer *timer)
{
@@ -353,6 +366,21 @@ int reject_exception_throw_cb_diff(struct __sk_buff *ctx)
return 0;
}
+__weak
+void foo(void)
+{
+ bpf_throw(1);
+}
+
+SEC("?fentry/bpf_check")
+__failure __msg("At program exit the register R1 has smin=1 smax=1 should")
+int reject_out_of_range_global_throw(struct __sk_buff *skb)
+{
+ foo();
+
+ return 0;
+}
+
__noinline static int always_throws(void)
{
bpf_throw(0);
diff --git a/tools/testing/selftests/bpf/progs/freplace_int_with_void.c b/tools/testing/selftests/bpf/progs/freplace_int_with_void.c
new file mode 100644
index 000000000000..cbb8f8ff2581
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_int_with_void.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+#include <bpf/bpf_helpers.h>
+
+SEC("freplace/global_func2")
+void test_freplace_int_with_void(struct __sk_buff *skb)
+{
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/freplace_void.c b/tools/testing/selftests/bpf/progs/freplace_void.c
new file mode 100644
index 000000000000..68b114f477fe
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_void.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+SEC("freplace/foo")
+void test_freplace_void(struct __sk_buff *skb)
+{
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/get_func_args_fsession_test.c b/tools/testing/selftests/bpf/progs/get_func_args_fsession_test.c
new file mode 100644
index 000000000000..bb597f24b659
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/get_func_args_fsession_test.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <errno.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u64 test1_result = 0;
+
+SEC("fsession/bpf_fentry_test1")
+int BPF_PROG(test1)
+{
+ __u64 cnt = bpf_get_func_arg_cnt(ctx);
+ __u64 a = 0, z = 0, ret = 0;
+ __s64 err;
+
+ test1_result = cnt == 1;
+
+ /* valid arguments */
+ err = bpf_get_func_arg(ctx, 0, &a);
+ test1_result &= err == 0 && ((int) a == 1);
+
+ /* not valid argument */
+ err = bpf_get_func_arg(ctx, 1, &z);
+ test1_result &= err == -EINVAL;
+
+ if (bpf_session_is_return(ctx)) {
+ err = bpf_get_func_ret(ctx, &ret);
+ test1_result &= err == 0 && ret == 2;
+ } else {
+ err = bpf_get_func_ret(ctx, &ret);
+ test1_result &= err == 0 && ret == 0;
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/get_func_args_test.c b/tools/testing/selftests/bpf/progs/get_func_args_test.c
index 075a1180ec26..1bf47f64d096 100644
--- a/tools/testing/selftests/bpf/progs/get_func_args_test.c
+++ b/tools/testing/selftests/bpf/progs/get_func_args_test.c
@@ -165,41 +165,3 @@ int BPF_PROG(tp_test2)
return 0;
}
-
-__u64 test7_result = 0;
-#if defined(bpf_target_x86) || defined(bpf_target_arm64) || defined(bpf_target_riscv)
-SEC("fsession/bpf_fentry_test1")
-int BPF_PROG(test7)
-{
- __u64 cnt = bpf_get_func_arg_cnt(ctx);
- __u64 a = 0, z = 0, ret = 0;
- __s64 err;
-
- test7_result = cnt == 1;
-
- /* valid arguments */
- err = bpf_get_func_arg(ctx, 0, &a);
- test7_result &= err == 0 && ((int) a == 1);
-
- /* not valid argument */
- err = bpf_get_func_arg(ctx, 1, &z);
- test7_result &= err == -EINVAL;
-
- if (bpf_session_is_return(ctx)) {
- err = bpf_get_func_ret(ctx, &ret);
- test7_result &= err == 0 && ret == 2;
- } else {
- err = bpf_get_func_ret(ctx, &ret);
- test7_result &= err == 0 && ret == 0;
- }
-
- return 0;
-}
-#else
-SEC("fentry/bpf_fentry_test1")
-int BPF_PROG(test7)
-{
- test7_result = 1;
- return 0;
-}
-#endif
diff --git a/tools/testing/selftests/bpf/progs/get_func_ip_fsession_test.c b/tools/testing/selftests/bpf/progs/get_func_ip_fsession_test.c
new file mode 100644
index 000000000000..bbeea0d512e3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/get_func_ip_fsession_test.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u64 test1_entry_result = 0;
+__u64 test1_exit_result = 0;
+
+SEC("fsession/bpf_fentry_test1")
+int BPF_PROG(test1, int a)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ if (bpf_session_is_return(ctx))
+ test1_exit_result = (const void *) addr == &bpf_fentry_test1;
+ else
+ test1_entry_result = (const void *) addr == &bpf_fentry_test1;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/get_func_ip_test.c b/tools/testing/selftests/bpf/progs/get_func_ip_test.c
index 45eaa54d1ac7..2011cacdeb18 100644
--- a/tools/testing/selftests/bpf/progs/get_func_ip_test.c
+++ b/tools/testing/selftests/bpf/progs/get_func_ip_test.c
@@ -103,26 +103,3 @@ int BPF_URETPROBE(test8, int ret)
test8_result = (const void *) addr == (const void *) uprobe_trigger;
return 0;
}
-
-__u64 test9_entry_result = 0;
-__u64 test9_exit_result = 0;
-#if defined(bpf_target_x86) || defined(bpf_target_arm64) || defined(bpf_target_riscv)
-SEC("fsession/bpf_fentry_test1")
-int BPF_PROG(test9, int a)
-{
- __u64 addr = bpf_get_func_ip(ctx);
-
- if (bpf_session_is_return(ctx))
- test9_exit_result = (const void *) addr == &bpf_fentry_test1;
- else
- test9_entry_result = (const void *) addr == &bpf_fentry_test1;
- return 0;
-}
-#else
-SEC("fentry/bpf_fentry_test1")
-int BPF_PROG(test9, int a)
-{
- test9_entry_result = test9_exit_result = 1;
- return 0;
-}
-#endif
diff --git a/tools/testing/selftests/bpf/progs/htab_reuse.c b/tools/testing/selftests/bpf/progs/htab_reuse.c
index 7f7368cb3095..1c7fa7ee45ee 100644
--- a/tools/testing/selftests/bpf/progs/htab_reuse.c
+++ b/tools/testing/selftests/bpf/progs/htab_reuse.c
@@ -17,3 +17,19 @@ struct {
__type(value, struct htab_val);
__uint(map_flags, BPF_F_NO_PREALLOC);
} htab SEC(".maps");
+
+#define HTAB_NDATA 256
+
+struct htab_val_large {
+ struct bpf_spin_lock lock;
+ __u32 seq;
+ __u64 data[HTAB_NDATA];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 8);
+ __type(key, unsigned int);
+ __type(value, struct htab_val_large);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} htab_lock_consistency SEC(".maps");
diff --git a/tools/testing/selftests/bpf/progs/irq.c b/tools/testing/selftests/bpf/progs/irq.c
index 74d912b22de9..e11e82d98904 100644
--- a/tools/testing/selftests/bpf/progs/irq.c
+++ b/tools/testing/selftests/bpf/progs/irq.c
@@ -490,7 +490,7 @@ int irq_non_sleepable_global_subprog(void *ctx)
}
SEC("?syscall")
-__failure __msg("global functions that may sleep are not allowed in non-sleepable context")
+__failure __msg("sleepable global function")
int irq_sleepable_helper_global_subprog(void *ctx)
{
unsigned long flags;
@@ -502,7 +502,7 @@ int irq_sleepable_helper_global_subprog(void *ctx)
}
SEC("?syscall")
-__failure __msg("global functions that may sleep are not allowed in non-sleepable context")
+__failure __msg("sleepable global function")
int irq_sleepable_global_subprog_indirect(void *ctx)
{
unsigned long flags;
diff --git a/tools/testing/selftests/bpf/progs/iter_buf_null_fail.c b/tools/testing/selftests/bpf/progs/iter_buf_null_fail.c
new file mode 100644
index 000000000000..3daad40515e6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/iter_buf_null_fail.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2026 Qi Tang */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* Verify that the verifier rejects direct access to nullable PTR_TO_BUF. */
+SEC("iter/bpf_map_elem")
+__failure __msg("invalid mem access")
+int iter_buf_null_deref(struct bpf_iter__bpf_map_elem *ctx)
+{
+ /*
+ * ctx->key is PTR_TO_BUF | PTR_MAYBE_NULL | MEM_RDONLY.
+ * Direct access without null check must be rejected.
+ */
+ volatile __u32 v = *(__u32 *)ctx->key;
+
+ (void)v;
+ return 0;
+}
+
+/* Verify that access after a null check is still accepted. */
+SEC("iter/bpf_map_elem")
+__success
+int iter_buf_null_check_ok(struct bpf_iter__bpf_map_elem *ctx)
+{
+ __u32 *key = ctx->key;
+
+ if (!key)
+ return 0;
+
+ volatile __u32 v = *key;
+
+ (void)v;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c
index 7f27b517d5d5..86b74e3579d9 100644
--- a/tools/testing/selftests/bpf/progs/iters.c
+++ b/tools/testing/selftests/bpf/progs/iters.c
@@ -1651,7 +1651,7 @@ int clean_live_states(const void *ctx)
SEC("?raw_tp")
__flag(BPF_F_TEST_STATE_FREQ)
-__failure __msg("misaligned stack access off 0+-31+0 size 8")
+__failure __msg("misaligned stack access off -31+0 size 8")
__naked int absent_mark_in_the_middle_state(void)
{
/* This is equivalent to C program below.
@@ -1726,7 +1726,7 @@ static int noop(void)
SEC("?raw_tp")
__flag(BPF_F_TEST_STATE_FREQ)
-__failure __msg("misaligned stack access off 0+-31+0 size 8")
+__failure __msg("misaligned stack access off -31+0 size 8")
__naked int absent_mark_in_the_middle_state2(void)
{
/* This is equivalent to C program below.
@@ -1802,7 +1802,7 @@ __naked int absent_mark_in_the_middle_state2(void)
SEC("?raw_tp")
__flag(BPF_F_TEST_STATE_FREQ)
-__failure __msg("misaligned stack access off 0+-31+0 size 8")
+__failure __msg("misaligned stack access off -31+0 size 8")
__naked int absent_mark_in_the_middle_state3(void)
{
/*
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
index 8b86113a0126..5edc51564f71 100644
--- a/tools/testing/selftests/bpf/progs/kfunc_call_test.c
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
@@ -2,9 +2,107 @@
/* Copyright (c) 2021 Facebook */
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
#include "../test_kmods/bpf_testmod_kfunc.h"
SEC("tc")
+int kfunc_call_test5(struct __sk_buff *skb)
+{
+ struct bpf_sock *sk = skb->sk;
+ int ret;
+ u32 val32;
+ u16 val16;
+ u8 val8;
+
+ if (!sk)
+ return -1;
+
+ sk = bpf_sk_fullsock(sk);
+ if (!sk)
+ return -1;
+
+ /*
+ * Test with constant values to verify zero-extension.
+ * ISA-dependent BPF asm:
+ * With ALU32: w1 = 0xFF; w2 = 0xFFFF; w3 = 0xFFFFffff
+ * Without ALU32: r1 = 0xFF; r2 = 0xFFFF; r3 = 0xFFFFffff
+ * Both zero-extend to 64-bit before the kfunc call.
+ */
+ ret = bpf_kfunc_call_test5(0xFF, 0xFFFF, 0xFFFFffffULL);
+ if (ret)
+ return ret;
+
+ val32 = bpf_get_prandom_u32();
+ val16 = val32 & 0xFFFF;
+ val8 = val32 & 0xFF;
+ ret = bpf_kfunc_call_test5(val8, val16, val32);
+ if (ret)
+ return ret;
+
+ /*
+ * Test multiplication with different operand sizes:
+ *
+ * val8 * 0xFF:
+ * - Both operands promote to int (32-bit signed)
+ * - Result: 32-bit multiplication, truncated to u8, then zero-extended
+ *
+ * val16 * 0xFFFF:
+ * - Both operands promote to int (32-bit signed)
+ * - Result: 32-bit multiplication, truncated to u16, then zero-extended
+ *
+ * val32 * 0xFFFFffffULL:
+ * - val32 (u32) promotes to unsigned long long (due to ULL suffix)
+ * - Result: 64-bit unsigned multiplication, truncated to u32, then zero-extended
+ */
+ ret = bpf_kfunc_call_test5(val8 * 0xFF, val16 * 0xFFFF, val32 * 0xFFFFffffULL);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+/*
+ * Assembly version testing the multiplication edge case explicitly.
+ * This ensures consistent testing across different ISA versions.
+ */
+SEC("tc")
+__naked int kfunc_call_test5_asm(void)
+{
+ asm volatile (
+ /* Get a random u32 value */
+ "call %[bpf_get_prandom_u32];"
+ "r6 = r0;" /* Save val32 in r6 */
+
+ /* Prepare first argument: val8 * 0xFF */
+ "r1 = r6;"
+ "r1 &= 0xFF;" /* val8 = val32 & 0xFF */
+ "r7 = 0xFF;"
+ "r1 *= r7;" /* 64-bit mult: r1 = r1 * r7 */
+
+ /* Prepare second argument: val16 * 0xFFFF */
+ "r2 = r6;"
+ "r2 &= 0xFFFF;" /* val16 = val32 & 0xFFFF */
+ "r7 = 0xFFFF;"
+ "r2 *= r7;" /* 64-bit mult: r2 = r2 * r7 */
+
+ /* Prepare third argument: val32 * 0xFFFFffff */
+ "r3 = r6;" /* val32 */
+ "r7 = 0xFFFFffff;"
+ "r3 *= r7;" /* 64-bit mult: r3 = r3 * r7 */
+
+ /* Call kfunc with multiplication results */
+ "call bpf_kfunc_call_test5;"
+
+ /* Check return value */
+ "if r0 != 0 goto exit_%=;"
+ "r0 = 0;"
+ "exit_%=: exit;"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("tc")
int kfunc_call_test4(struct __sk_buff *skb)
{
struct bpf_sock *sk = skb->sk;
diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_session.c b/tools/testing/selftests/bpf/progs/kprobe_multi_session.c
index bd8b7fb7061e..d52a65b40bbf 100644
--- a/tools/testing/selftests/bpf/progs/kprobe_multi_session.c
+++ b/tools/testing/selftests/bpf/progs/kprobe_multi_session.c
@@ -76,3 +76,13 @@ int test_kprobe(struct pt_regs *ctx)
{
return session_check(ctx);
}
+
+/*
+ * Exact function name (no wildcards) - exercises the fast syms[] path
+ * in bpf_program__attach_kprobe_multi_opts() which bypasses kallsyms parsing.
+ */
+SEC("kprobe.session/bpf_fentry_test1")
+int test_kprobe_syms(struct pt_regs *ctx)
+{
+ return session_check(ctx);
+}
diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_sleepable.c b/tools/testing/selftests/bpf/progs/kprobe_multi_sleepable.c
new file mode 100644
index 000000000000..932e1d9c72e2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kprobe_multi_sleepable.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+void *user_ptr = 0;
+
+SEC("kprobe.multi")
+int handle_kprobe_multi_sleepable(struct pt_regs *ctx)
+{
+ int a, err;
+
+ err = bpf_copy_from_user(&a, sizeof(a), user_ptr);
+ barrier_var(a);
+ return err;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(fentry)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/kprobe_write_ctx.c b/tools/testing/selftests/bpf/progs/kprobe_write_ctx.c
index f77aef0474d3..adbf52afe490 100644
--- a/tools/testing/selftests/bpf/progs/kprobe_write_ctx.c
+++ b/tools/testing/selftests/bpf/progs/kprobe_write_ctx.c
@@ -19,4 +19,23 @@ int kprobe_multi_write_ctx(struct pt_regs *ctx)
ctx->ax = 0;
return 0;
}
+
+SEC("?kprobe")
+int kprobe_dummy(struct pt_regs *regs)
+{
+ return 0;
+}
+
+SEC("?freplace")
+int freplace_kprobe(struct pt_regs *regs)
+{
+ regs->di = 0;
+ return 0;
+}
+
+SEC("?fentry/bpf_fentry_test1")
+int BPF_PROG(fentry)
+{
+ return 0;
+}
#endif
diff --git a/tools/testing/selftests/bpf/progs/kptr_xchg_inline.c b/tools/testing/selftests/bpf/progs/kptr_xchg_inline.c
index 2414ac20b6d5..ca5943166057 100644
--- a/tools/testing/selftests/bpf/progs/kptr_xchg_inline.c
+++ b/tools/testing/selftests/bpf/progs/kptr_xchg_inline.c
@@ -25,14 +25,14 @@ __naked int kptr_xchg_inline(void)
"if r0 == 0 goto 1f;"
"r1 = r0;"
"r2 = 0;"
- "call %[bpf_obj_drop_impl];"
+ "call %[bpf_obj_drop];"
"1:"
"r0 = 0;"
"exit;"
:
: __imm_addr(ptr),
__imm(bpf_kptr_xchg),
- __imm(bpf_obj_drop_impl)
+ __imm(bpf_obj_drop)
: __clobber_all
);
}
diff --git a/tools/testing/selftests/bpf/progs/lsm_bdev.c b/tools/testing/selftests/bpf/progs/lsm_bdev.c
new file mode 100644
index 000000000000..45554e6db605
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lsm_bdev.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2026 Christian Brauner <brauner@kernel.org> */
+
+/*
+ * BPF LSM block device integrity tracker for dm-verity.
+ *
+ * Tracks block devices in a hashmap keyed by bd_dev. When dm-verity
+ * calls security_bdev_setintegrity() during verity_preresume(), the
+ * setintegrity hook records the roothash and signature-validity data.
+ * The free hook cleans up when the device goes away. The alloc hook
+ * counts allocations for test validation.
+ *
+ * The sleepable hooks exercise bpf_copy_from_user() to verify that
+ * the sleepable classification actually permits sleepable helpers.
+ */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct verity_info {
+ __u8 has_roothash; /* LSM_INT_DMVERITY_ROOTHASH seen */
+ __u8 sig_valid; /* LSM_INT_DMVERITY_SIG_VALID value (non-NULL = valid) */
+ __u32 setintegrity_cnt; /* total setintegrity calls for this dev */
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 64);
+ __type(key, __u32); /* dev_t from bdev->bd_dev */
+ __type(value, struct verity_info);
+} verity_devices SEC(".maps");
+
+/* Global counters exposed to userspace via skeleton bss. */
+int alloc_count;
+
+char _license[] SEC("license") = "GPL";
+
+SEC("lsm.s/bdev_setintegrity")
+int BPF_PROG(bdev_setintegrity, struct block_device *bdev,
+ enum lsm_integrity_type type, const void *value, size_t size)
+{
+ struct verity_info zero = {};
+ struct verity_info *info;
+ __u32 dev;
+ char buf;
+
+ /*
+ * Exercise a sleepable helper to confirm the verifier
+ * allows it in this sleepable hook.
+ */
+ (void)bpf_copy_from_user(&buf, sizeof(buf), NULL);
+
+ dev = bdev->bd_dev;
+
+ info = bpf_map_lookup_elem(&verity_devices, &dev);
+ if (!info) {
+ bpf_map_update_elem(&verity_devices, &dev, &zero, BPF_NOEXIST);
+ info = bpf_map_lookup_elem(&verity_devices, &dev);
+ if (!info)
+ return 0;
+ }
+
+ if (type == LSM_INT_DMVERITY_ROOTHASH)
+ info->has_roothash = 1;
+ else if (type == LSM_INT_DMVERITY_SIG_VALID)
+ info->sig_valid = (value != NULL);
+
+ __sync_fetch_and_add(&info->setintegrity_cnt, 1);
+
+ return 0;
+}
+
+SEC("lsm/bdev_free_security")
+void BPF_PROG(bdev_free_security, struct block_device *bdev)
+{
+ __u32 dev = bdev->bd_dev;
+
+ bpf_map_delete_elem(&verity_devices, &dev);
+}
+
+SEC("lsm.s/bdev_alloc_security")
+int BPF_PROG(bdev_alloc_security, struct block_device *bdev)
+{
+ char buf;
+
+ /*
+ * Exercise a sleepable helper to confirm the verifier
+ * allows it in this sleepable hook.
+ */
+ (void)bpf_copy_from_user(&buf, sizeof(buf), NULL);
+
+ __sync_fetch_and_add(&alloc_count, 1);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/lwt_misc.c b/tools/testing/selftests/bpf/progs/lwt_misc.c
new file mode 100644
index 000000000000..b392317088d2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lwt_misc.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("lwt_xmit")
+__success __retval(0)
+int test_missing_dst(struct __sk_buff *skb)
+{
+ struct iphdr iph;
+
+ __builtin_memset(&iph, 0, sizeof(struct iphdr));
+ iph.ihl = 5;
+ iph.version = 4;
+
+ bpf_lwt_push_encap(skb, BPF_LWT_ENCAP_IP, &iph, sizeof(struct iphdr));
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
index efaf622c28dd..373c8d17ea55 100644
--- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c
+++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
@@ -647,8 +647,14 @@ static inline int check_devmap_hash(void)
return 1;
}
+struct bpf_ringbuf {
+ unsigned long consumer_pos;
+ unsigned long producer_pos;
+} __attribute__((preserve_access_index));
+
struct bpf_ringbuf_map {
struct bpf_map map;
+ struct bpf_ringbuf *rb;
} __attribute__((preserve_access_index));
struct {
@@ -659,9 +665,20 @@ static inline int check_ringbuf(void)
{
struct bpf_ringbuf_map *ringbuf = (struct bpf_ringbuf_map *)&m_ringbuf;
struct bpf_map *map = (struct bpf_map *)&m_ringbuf;
+ struct bpf_ringbuf *rb;
+ void *ptr;
VERIFY(check(&ringbuf->map, map, 0, 0, page_size));
+ ptr = bpf_ringbuf_reserve(&m_ringbuf, 128, 0);
+ VERIFY(ptr);
+
+ bpf_ringbuf_discard(ptr, 0);
+ rb = ringbuf->rb;
+ VERIFY(rb);
+ VERIFY(rb->consumer_pos == 0);
+ VERIFY(rb->producer_pos == 128 + BPF_RINGBUF_HDR_SZ);
+
return 1;
}
diff --git a/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c b/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c
index 3b984b6ae7c0..5b4453747c23 100644
--- a/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c
+++ b/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c
@@ -8,7 +8,7 @@
SEC("tp_btf/sys_enter")
__success
__log_level(2)
-__msg("r8 = *(u64 *)(r7 +0) ; R7=ptr_nameidata(off={{[0-9]+}}) R8=rdonly_untrusted_mem(sz=0)")
+__msg("r8 = *(u64 *)(r7 +0) ; R7=ptr_nameidata(imm={{[0-9]+}}) R8=rdonly_untrusted_mem(sz=0)")
__msg("r9 = *(u8 *)(r8 +0) ; R8=rdonly_untrusted_mem(sz=0) R9=scalar")
int btf_id_to_ptr_mem(void *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/modify_return.c b/tools/testing/selftests/bpf/progs/modify_return.c
index 3376d4849f58..68fabd2efe8d 100644
--- a/tools/testing/selftests/bpf/progs/modify_return.c
+++ b/tools/testing/selftests/bpf/progs/modify_return.c
@@ -12,11 +12,14 @@ char _license[] SEC("license") = "GPL";
static int sequence = 0;
__s32 input_retval = 0;
+__u32 test_pid = 0;
__u64 fentry_result = 0;
SEC("fentry/bpf_modify_return_test")
int BPF_PROG(fentry_test, int a, __u64 b)
{
+ if (bpf_get_current_pid_tgid() >> 32 != test_pid)
+ return 0;
sequence++;
fentry_result = (sequence == 1);
return 0;
@@ -26,6 +29,8 @@ __u64 fmod_ret_result = 0;
SEC("fmod_ret/bpf_modify_return_test")
int BPF_PROG(fmod_ret_test, int a, int *b, int ret)
{
+ if (bpf_get_current_pid_tgid() >> 32 != test_pid)
+ return ret;
sequence++;
/* This is the first fmod_ret program, the ret passed should be 0 */
fmod_ret_result = (sequence == 2 && ret == 0);
@@ -36,6 +41,8 @@ __u64 fexit_result = 0;
SEC("fexit/bpf_modify_return_test")
int BPF_PROG(fexit_test, int a, __u64 b, int ret)
{
+ if (bpf_get_current_pid_tgid() >> 32 != test_pid)
+ return 0;
sequence++;
/* If the input_reval is non-zero a successful modification should have
* occurred.
@@ -55,6 +62,8 @@ SEC("fentry/bpf_modify_return_test2")
int BPF_PROG(fentry_test2, int a, int *b, short c, int d, void *e, char f,
int g)
{
+ if (bpf_get_current_pid_tgid() >> 32 != test_pid)
+ return 0;
sequence2++;
fentry_result2 = (sequence2 == 1);
return 0;
@@ -65,6 +74,8 @@ SEC("fmod_ret/bpf_modify_return_test2")
int BPF_PROG(fmod_ret_test2, int a, int *b, short c, int d, void *e, char f,
int g, int ret)
{
+ if (bpf_get_current_pid_tgid() >> 32 != test_pid)
+ return ret;
sequence2++;
/* This is the first fmod_ret program, the ret passed should be 0 */
fmod_ret_result2 = (sequence2 == 2 && ret == 0);
@@ -76,6 +87,8 @@ SEC("fexit/bpf_modify_return_test2")
int BPF_PROG(fexit_test2, int a, int *b, short c, int d, void *e, char f,
int g, int ret)
{
+ if (bpf_get_current_pid_tgid() >> 32 != test_pid)
+ return 0;
sequence2++;
/* If the input_reval is non-zero a successful modification should have
* occurred.
diff --git a/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c b/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c
index f2b8eb2ff76f..81813c724fa9 100644
--- a/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c
+++ b/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c
@@ -110,7 +110,7 @@ int BPF_PROG(test_array_map_3)
}
SEC("?fentry.s/bpf_fentry_test1")
-__failure __msg("arg#0 expected for bpf_percpu_obj_drop_impl()")
+__failure __msg("arg#0 expected for bpf_percpu_obj_drop()")
int BPF_PROG(test_array_map_4)
{
struct val_t __percpu_kptr *p;
@@ -124,7 +124,7 @@ int BPF_PROG(test_array_map_4)
}
SEC("?fentry.s/bpf_fentry_test1")
-__failure __msg("arg#0 expected for bpf_obj_drop_impl()")
+__failure __msg("arg#0 expected for bpf_obj_drop()")
int BPF_PROG(test_array_map_5)
{
struct val_t *p;
diff --git a/tools/testing/selftests/bpf/progs/preempt_lock.c b/tools/testing/selftests/bpf/progs/preempt_lock.c
index 7d04254e61f1..6d5fce7e6ffc 100644
--- a/tools/testing/selftests/bpf/progs/preempt_lock.c
+++ b/tools/testing/selftests/bpf/progs/preempt_lock.c
@@ -177,7 +177,7 @@ global_subprog_calling_sleepable_global(int i)
}
SEC("?syscall")
-__failure __msg("global functions that may sleep are not allowed in non-sleepable context")
+__failure __msg("sleepable global function")
int preempt_global_sleepable_helper_subprog(struct __sk_buff *ctx)
{
preempt_disable();
@@ -188,7 +188,7 @@ int preempt_global_sleepable_helper_subprog(struct __sk_buff *ctx)
}
SEC("?syscall")
-__failure __msg("global functions that may sleep are not allowed in non-sleepable context")
+__failure __msg("sleepable global function")
int preempt_global_sleepable_kfunc_subprog(struct __sk_buff *ctx)
{
preempt_disable();
@@ -199,7 +199,7 @@ int preempt_global_sleepable_kfunc_subprog(struct __sk_buff *ctx)
}
SEC("?syscall")
-__failure __msg("global functions that may sleep are not allowed in non-sleepable context")
+__failure __msg("sleepable global function")
int preempt_global_sleepable_subprog_indirect(struct __sk_buff *ctx)
{
preempt_disable();
diff --git a/tools/testing/selftests/bpf/progs/rbtree_search_kptr.c b/tools/testing/selftests/bpf/progs/rbtree_search_kptr.c
new file mode 100644
index 000000000000..610aae45e2dc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/rbtree_search_kptr.c
@@ -0,0 +1,290 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2026 KylinSoft Corporation. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+#define NR_NODES 16
+
+struct node_data {
+ int data;
+};
+
+struct tree_node {
+ struct bpf_rb_node node;
+ u64 key;
+ struct node_data __kptr * node_data;
+};
+
+struct tree_node_ref {
+ struct bpf_refcount ref;
+ struct bpf_rb_node node;
+ u64 key;
+ struct node_data __kptr * node_data;
+};
+
+#define private(name) SEC(".data." #name) __hidden __aligned(8)
+
+private(A) struct bpf_rb_root root __contains(tree_node, node);
+private(A) struct bpf_spin_lock lock;
+
+private(B) struct bpf_rb_root root_r __contains(tree_node_ref, node);
+private(B) struct bpf_spin_lock lock_r;
+
+static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct tree_node *node_a, *node_b;
+
+ node_a = container_of(a, struct tree_node, node);
+ node_b = container_of(b, struct tree_node, node);
+
+ return node_a->key < node_b->key;
+}
+
+SEC("syscall")
+__retval(0)
+long rbtree_search_kptr(void *ctx)
+{
+ struct tree_node *tnode;
+ struct bpf_rb_node *rb_n;
+ struct node_data __kptr * node_data;
+ int lookup_key = NR_NODES / 2;
+ int lookup_data = NR_NODES / 2;
+ int i, data, ret = 0;
+
+ for (i = 0; i < NR_NODES && can_loop; i++) {
+ tnode = bpf_obj_new(typeof(*tnode));
+ if (!tnode)
+ return __LINE__;
+
+ node_data = bpf_obj_new(typeof(*node_data));
+ if (!node_data) {
+ bpf_obj_drop(tnode);
+ return __LINE__;
+ }
+
+ tnode->key = i;
+ node_data->data = i;
+
+ node_data = bpf_kptr_xchg(&tnode->node_data, node_data);
+ if (node_data)
+ bpf_obj_drop(node_data);
+
+ bpf_spin_lock(&lock);
+ bpf_rbtree_add(&root, &tnode->node, less);
+ bpf_spin_unlock(&lock);
+ }
+
+ bpf_spin_lock(&lock);
+ rb_n = bpf_rbtree_root(&root);
+ while (rb_n && can_loop) {
+ tnode = container_of(rb_n, struct tree_node, node);
+ node_data = bpf_kptr_xchg(&tnode->node_data, NULL);
+ if (!node_data) {
+ ret = __LINE__;
+ goto fail;
+ }
+
+ data = node_data->data;
+ node_data = bpf_kptr_xchg(&tnode->node_data, node_data);
+ if (node_data) {
+ bpf_spin_unlock(&lock);
+ bpf_obj_drop(node_data);
+ return __LINE__;
+ }
+
+ if (lookup_key == tnode->key) {
+ if (data == lookup_data)
+ break;
+
+ ret = __LINE__;
+ goto fail;
+ }
+
+ if (lookup_key < tnode->key)
+ rb_n = bpf_rbtree_left(&root, rb_n);
+ else
+ rb_n = bpf_rbtree_right(&root, rb_n);
+ }
+ bpf_spin_unlock(&lock);
+
+ while (can_loop) {
+ bpf_spin_lock(&lock);
+ rb_n = bpf_rbtree_first(&root);
+ if (!rb_n) {
+ bpf_spin_unlock(&lock);
+ return 0;
+ }
+
+ rb_n = bpf_rbtree_remove(&root, rb_n);
+ if (!rb_n) {
+ ret = __LINE__;
+ goto fail;
+ }
+ bpf_spin_unlock(&lock);
+
+ tnode = container_of(rb_n, struct tree_node, node);
+
+ node_data = bpf_kptr_xchg(&tnode->node_data, NULL);
+ if (node_data)
+ bpf_obj_drop(node_data);
+
+ bpf_obj_drop(tnode);
+ }
+
+ return 0;
+fail:
+ bpf_spin_unlock(&lock);
+ return ret;
+}
+
+static bool less_r(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct tree_node_ref *node_a, *node_b;
+
+ node_a = container_of(a, struct tree_node_ref, node);
+ node_b = container_of(b, struct tree_node_ref, node);
+
+ return node_a->key < node_b->key;
+}
+
+SEC("syscall")
+__retval(0)
+long rbtree_search_kptr_ref(void *ctx)
+{
+ struct tree_node_ref *tnode_r, *tnode_m;
+ struct bpf_rb_node *rb_n;
+ struct node_data __kptr * node_data;
+ int lookup_key = NR_NODES / 2;
+ int lookup_data = NR_NODES / 2;
+ int i, data, ret = 0;
+
+ for (i = 0; i < NR_NODES && can_loop; i++) {
+ tnode_r = bpf_obj_new(typeof(*tnode_r));
+ if (!tnode_r)
+ return __LINE__;
+
+ node_data = bpf_obj_new(typeof(*node_data));
+ if (!node_data) {
+ bpf_obj_drop(tnode_r);
+ return __LINE__;
+ }
+
+ tnode_r->key = i;
+ node_data->data = i;
+
+ node_data = bpf_kptr_xchg(&tnode_r->node_data, node_data);
+ if (node_data)
+ bpf_obj_drop(node_data);
+
+ /* Unused reference */
+ tnode_m = bpf_refcount_acquire(tnode_r);
+ if (!tnode_m)
+ return __LINE__;
+
+ bpf_spin_lock(&lock_r);
+ bpf_rbtree_add(&root_r, &tnode_r->node, less_r);
+ bpf_spin_unlock(&lock_r);
+
+ bpf_obj_drop(tnode_m);
+ }
+
+ bpf_spin_lock(&lock_r);
+ rb_n = bpf_rbtree_root(&root_r);
+ while (rb_n && can_loop) {
+ tnode_r = container_of(rb_n, struct tree_node_ref, node);
+ node_data = bpf_kptr_xchg(&tnode_r->node_data, NULL);
+ if (!node_data) {
+ ret = __LINE__;
+ goto fail;
+ }
+
+ data = node_data->data;
+ node_data = bpf_kptr_xchg(&tnode_r->node_data, node_data);
+ if (node_data) {
+ bpf_spin_unlock(&lock_r);
+ bpf_obj_drop(node_data);
+ return __LINE__;
+ }
+
+ if (lookup_key == tnode_r->key) {
+ if (data == lookup_data)
+ break;
+
+ ret = __LINE__;
+ goto fail;
+ }
+
+ if (lookup_key < tnode_r->key)
+ rb_n = bpf_rbtree_left(&root_r, rb_n);
+ else
+ rb_n = bpf_rbtree_right(&root_r, rb_n);
+ }
+ bpf_spin_unlock(&lock_r);
+
+ while (can_loop) {
+ bpf_spin_lock(&lock_r);
+ rb_n = bpf_rbtree_first(&root_r);
+ if (!rb_n) {
+ bpf_spin_unlock(&lock_r);
+ return 0;
+ }
+
+ rb_n = bpf_rbtree_remove(&root_r, rb_n);
+ if (!rb_n) {
+ ret = __LINE__;
+ goto fail;
+ }
+ bpf_spin_unlock(&lock_r);
+
+ tnode_r = container_of(rb_n, struct tree_node_ref, node);
+
+ node_data = bpf_kptr_xchg(&tnode_r->node_data, NULL);
+ if (node_data)
+ bpf_obj_drop(node_data);
+
+ bpf_obj_drop(tnode_r);
+ }
+
+ return 0;
+fail:
+ bpf_spin_unlock(&lock_r);
+ return ret;
+}
+
+SEC("syscall")
+__failure __msg("R1 type=scalar expected=map_value, ptr_, ptr_")
+long non_own_ref_kptr_xchg_no_lock(void *ctx)
+{
+ struct tree_node *tnode;
+ struct bpf_rb_node *rb_n;
+ struct node_data __kptr * node_data;
+ int data;
+
+ bpf_spin_lock(&lock);
+ rb_n = bpf_rbtree_first(&root);
+ if (!rb_n) {
+ bpf_spin_unlock(&lock);
+ return __LINE__;
+ }
+ bpf_spin_unlock(&lock);
+
+ tnode = container_of(rb_n, struct tree_node, node);
+ node_data = bpf_kptr_xchg(&tnode->node_data, NULL);
+ if (!node_data)
+ return __LINE__;
+
+ data = node_data->data;
+ if (data < 0)
+ return __LINE__;
+
+ node_data = bpf_kptr_xchg(&tnode->node_data, node_data);
+ if (node_data)
+ return __LINE__;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr.c b/tools/testing/selftests/bpf/progs/refcounted_kptr.c
index 1aca85d86aeb..c847398837cc 100644
--- a/tools/testing/selftests/bpf/progs/refcounted_kptr.c
+++ b/tools/testing/selftests/bpf/progs/refcounted_kptr.c
@@ -500,7 +500,7 @@ long rbtree_wrong_owner_remove_fail_a2(void *ctx)
return 0;
}
-SEC("?fentry.s/bpf_testmod_test_read")
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
__success
int BPF_PROG(rbtree_sleepable_rcu,
struct file *file, struct kobject *kobj,
@@ -534,7 +534,7 @@ err_out:
return 0;
}
-SEC("?fentry.s/bpf_testmod_test_read")
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
__success
int BPF_PROG(rbtree_sleepable_rcu_no_explicit_rcu_lock,
struct file *file, struct kobject *kobj,
diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
index 836c8ab7b908..b2808bfcec29 100644
--- a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
@@ -93,7 +93,7 @@ long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx)
return 0;
}
-SEC("?fentry.s/bpf_testmod_test_read")
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
__failure __msg("function calls are not allowed while holding a lock")
int BPF_PROG(rbtree_fail_sleepable_lock_across_rcu,
struct file *file, struct kobject *kobj,
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_multi_args.c b/tools/testing/selftests/bpf/progs/struct_ops_multi_args.c
new file mode 100644
index 000000000000..c62be15757f0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_multi_args.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2026 Varun R Mallya */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} prog_array SEC(".maps");
+
+SEC("struct_ops/test_refcounted_multi")
+__failure __msg("program with __ref argument cannot tail call")
+int test_refcounted_multi(unsigned long long *ctx)
+{
+ /* ctx[2] is used because the refcounted variable is the third argument */
+ struct task_struct *refcounted_task = (struct task_struct *)ctx[2];
+
+ bpf_task_release(refcounted_task);
+ bpf_tail_call(ctx, &prog_array, 0);
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_ref_acquire = {
+ .test_refcounted_multi = (void *)test_refcounted_multi,
+};
diff --git a/tools/testing/selftests/bpf/progs/tailcall3.c b/tools/testing/selftests/bpf/progs/tailcall3.c
index f60bcd7b8d4b..204f19c30a3e 100644
--- a/tools/testing/selftests/bpf/progs/tailcall3.c
+++ b/tools/testing/selftests/bpf/progs/tailcall3.c
@@ -5,7 +5,7 @@
struct {
__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
- __uint(max_entries, 1);
+ __uint(max_entries, 2);
__uint(key_size, sizeof(__u32));
__uint(value_size, sizeof(__u32));
} jmp_table SEC(".maps");
@@ -23,6 +23,9 @@ int classifier_0(struct __sk_buff *skb)
SEC("tc")
int entry(struct __sk_buff *skb)
{
+ /* prog == NULL case */
+ bpf_tail_call_static(skb, &jmp_table, 1);
+
bpf_tail_call_static(skb, &jmp_table, 0);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/task_local_data.bpf.h b/tools/testing/selftests/bpf/progs/task_local_data.bpf.h
index fed53d63a7e5..1f396711f487 100644
--- a/tools/testing/selftests/bpf/progs/task_local_data.bpf.h
+++ b/tools/testing/selftests/bpf/progs/task_local_data.bpf.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
#ifndef __TASK_LOCAL_DATA_BPF_H
#define __TASK_LOCAL_DATA_BPF_H
@@ -87,7 +87,7 @@ struct tld_meta_u {
struct tld_data_u {
__u64 start; /* offset of tld_data_u->data in a page */
- char data[__PAGE_SIZE - sizeof(__u64)];
+ char data[__PAGE_SIZE - sizeof(__u64)] __attribute__((aligned(8)));
};
struct tld_map_value {
diff --git a/tools/testing/selftests/bpf/progs/test_access_variable_array.c b/tools/testing/selftests/bpf/progs/test_access_variable_array.c
deleted file mode 100644
index 326b7d1f496a..000000000000
--- a/tools/testing/selftests/bpf/progs/test_access_variable_array.c
+++ /dev/null
@@ -1,19 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright (c) 2023 Bytedance */
-
-#include "vmlinux.h"
-#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
-
-unsigned long span = 0;
-
-SEC("fentry/sched_balance_rq")
-int BPF_PROG(fentry_fentry, int this_cpu, struct rq *this_rq,
- struct sched_domain *sd)
-{
- span = sd->span[0];
-
- return 0;
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_global_func3.c b/tools/testing/selftests/bpf/progs/test_global_func3.c
index 142b682d3c2f..974fd8c19561 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func3.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func3.c
@@ -5,56 +5,56 @@
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
-__attribute__ ((noinline))
+static __attribute__ ((noinline))
int f1(struct __sk_buff *skb)
{
return skb->len;
}
-__attribute__ ((noinline))
+static __attribute__ ((noinline))
int f2(int val, struct __sk_buff *skb)
{
return f1(skb) + val;
}
-__attribute__ ((noinline))
+static __attribute__ ((noinline))
int f3(int val, struct __sk_buff *skb, int var)
{
return f2(var, skb) + val;
}
-__attribute__ ((noinline))
+static __attribute__ ((noinline))
int f4(struct __sk_buff *skb)
{
return f3(1, skb, 2);
}
-__attribute__ ((noinline))
+static __attribute__ ((noinline))
int f5(struct __sk_buff *skb)
{
return f4(skb);
}
-__attribute__ ((noinline))
+static __attribute__ ((noinline))
int f6(struct __sk_buff *skb)
{
return f5(skb);
}
-__attribute__ ((noinline))
+static __attribute__ ((noinline))
int f7(struct __sk_buff *skb)
{
return f6(skb);
}
-__attribute__ ((noinline))
+static __attribute__ ((noinline))
int f8(struct __sk_buff *skb)
{
return f7(skb);
}
SEC("tc")
-__failure __msg("the call stack of 8 frames")
+__failure __msg("the call stack of 9 frames")
int global_func3(struct __sk_buff *skb)
{
return f8(skb);
diff --git a/tools/testing/selftests/bpf/progs/test_global_func7.c b/tools/testing/selftests/bpf/progs/test_global_func7.c
index f182febfde3c..9e59625c1c92 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func7.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func7.c
@@ -12,7 +12,7 @@ void foo(struct __sk_buff *skb)
}
SEC("tc")
-__failure __msg("foo() doesn't return scalar")
+__success
int global_func7(struct __sk_buff *skb)
{
foo(skb);
diff --git a/tools/testing/selftests/bpf/progs/test_global_func_deep_stack.c b/tools/testing/selftests/bpf/progs/test_global_func_deep_stack.c
new file mode 100644
index 000000000000..1b634b543b62
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_func_deep_stack.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2026 Meta Platforms, Inc and affiliates. */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+/*
+ * Macro tricks to tersely define for long non-recursive call chains. Add
+ * computation to the functions prevent tail recursion from reducing the
+ * stack size to 0.
+ */
+
+#define CAT(a, b) a ## b
+#define XCAT(a, b) CAT(a, b)
+
+#define F_0 \
+__attribute__((noinline)) \
+int f0(unsigned long a) \
+{ \
+ volatile long b = a + 16; \
+ if (a == 0) \
+ return 0; \
+ return b; \
+}
+
+#define FN(n, prev) \
+__attribute__((noinline)) \
+int XCAT(f, n)(unsigned long a) \
+{ \
+ volatile long b = XCAT(f, prev)(a - 1); \
+ if (!b) \
+ return 0; \
+ return b + 1; \
+}
+
+/* Call chain 33 levels deep. */
+#define F_1 F_0 FN(1, 0)
+#define F_2 F_1 FN(2, 1)
+#define F_3 F_2 FN(3, 2)
+#define F_4 F_3 FN(4, 3)
+#define F_5 F_4 FN(5, 4)
+#define F_6 F_5 FN(6, 5)
+#define F_7 F_6 FN(7, 6)
+#define F_8 F_7 FN(8, 7)
+#define F_9 F_8 FN(9, 8)
+#define F_10 F_9 FN(10, 9)
+#define F_11 F_10 FN(11, 10)
+#define F_12 F_11 FN(12, 11)
+#define F_13 F_12 FN(13, 12)
+#define F_14 F_13 FN(14, 13)
+#define F_15 F_14 FN(15, 14)
+#define F_16 F_15 FN(16, 15)
+#define F_17 F_16 FN(17, 16)
+#define F_18 F_17 FN(18, 17)
+#define F_19 F_18 FN(19, 18)
+#define F_20 F_19 FN(20, 19)
+#define F_21 F_20 FN(21, 20)
+#define F_22 F_21 FN(22, 21)
+#define F_23 F_22 FN(23, 22)
+#define F_24 F_23 FN(24, 23)
+#define F_25 F_24 FN(25, 24)
+#define F_26 F_25 FN(26, 25)
+#define F_27 F_26 FN(27, 26)
+#define F_28 F_27 FN(28, 27)
+#define F_29 F_28 FN(29, 28)
+#define F_30 F_29 FN(30, 29)
+#define F_31 F_30 FN(31, 30)
+#define F_32 F_31 FN(32, 31)
+
+#define CAT2(a, b) a ## b
+#define XCAT2(a, b) CAT2(a, b)
+
+#define F(n) XCAT2(F_, n)
+
+F(32)
+
+/* Ensure that even 32 levels deep, the function verifies. */
+SEC("syscall")
+__success
+int global_func_deep_stack_success(struct __sk_buff *skb)
+{
+ return f31(55);
+}
+
+/*
+ * Check we actually honor stack limits (33 * 16 = 528 > 512 = MAX_STACK_DEPTH).
+ * The stack depth is 16 because the verifier calls round_up_stack_depth() on
+ * the size.
+ */
+SEC("syscall")
+__failure __msg("combined stack size of 34 calls")
+int global_func_deep_stack_fail(struct __sk_buff *skb)
+{
+ return f32(123);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c
index 03d7f89787a1..5609e388fb58 100644
--- a/tools/testing/selftests/bpf/progs/test_module_attach.c
+++ b/tools/testing/selftests/bpf/progs/test_module_attach.c
@@ -7,23 +7,21 @@
#include <bpf/bpf_core_read.h>
#include "../test_kmods/bpf_testmod.h"
-__u32 raw_tp_read_sz = 0;
+__u32 sz = 0;
-SEC("raw_tp/bpf_testmod_test_read")
+SEC("?raw_tp/bpf_testmod_test_read")
int BPF_PROG(handle_raw_tp,
struct task_struct *task, struct bpf_testmod_test_read_ctx *read_ctx)
{
- raw_tp_read_sz = BPF_CORE_READ(read_ctx, len);
+ sz = BPF_CORE_READ(read_ctx, len);
return 0;
}
-__u32 raw_tp_bare_write_sz = 0;
-
-SEC("raw_tp/bpf_testmod_test_write_bare_tp")
+SEC("?raw_tp/bpf_testmod_test_write_bare_tp")
int BPF_PROG(handle_raw_tp_bare,
struct task_struct *task, struct bpf_testmod_test_write_ctx *write_ctx)
{
- raw_tp_bare_write_sz = BPF_CORE_READ(write_ctx, len);
+ sz = BPF_CORE_READ(write_ctx, len);
return 0;
}
@@ -31,7 +29,7 @@ int raw_tp_writable_bare_in_val = 0;
int raw_tp_writable_bare_early_ret = 0;
int raw_tp_writable_bare_out_val = 0;
-SEC("raw_tp.w/bpf_testmod_test_writable_bare_tp")
+SEC("?raw_tp.w/bpf_testmod_test_writable_bare_tp")
int BPF_PROG(handle_raw_tp_writable_bare,
struct bpf_testmod_test_writable_ctx *writable)
{
@@ -41,76 +39,65 @@ int BPF_PROG(handle_raw_tp_writable_bare,
return 0;
}
-__u32 tp_btf_read_sz = 0;
-
-SEC("tp_btf/bpf_testmod_test_read")
+SEC("?tp_btf/bpf_testmod_test_read")
int BPF_PROG(handle_tp_btf,
struct task_struct *task, struct bpf_testmod_test_read_ctx *read_ctx)
{
- tp_btf_read_sz = read_ctx->len;
+ sz = read_ctx->len;
return 0;
}
-__u32 fentry_read_sz = 0;
-
-SEC("fentry/bpf_testmod_test_read")
+SEC("?fentry/bpf_testmod_test_read")
int BPF_PROG(handle_fentry,
struct file *file, struct kobject *kobj,
struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len)
{
- fentry_read_sz = len;
+ sz = len;
return 0;
}
-__u32 fentry_manual_read_sz = 0;
-
-SEC("fentry")
+SEC("?fentry")
int BPF_PROG(handle_fentry_manual,
struct file *file, struct kobject *kobj,
struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len)
{
- fentry_manual_read_sz = len;
+ sz = len;
return 0;
}
-__u32 fentry_explicit_read_sz = 0;
-
-SEC("fentry/bpf_testmod:bpf_testmod_test_read")
+SEC("?fentry/bpf_testmod:bpf_testmod_test_read")
int BPF_PROG(handle_fentry_explicit,
struct file *file, struct kobject *kobj,
struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len)
{
- fentry_explicit_read_sz = len;
+ sz = len;
return 0;
}
-__u32 fentry_explicit_manual_read_sz = 0;
-
-SEC("fentry")
+SEC("?fentry")
int BPF_PROG(handle_fentry_explicit_manual,
struct file *file, struct kobject *kobj,
struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len)
{
- fentry_explicit_manual_read_sz = len;
+ sz = len;
return 0;
}
-__u32 fexit_read_sz = 0;
-int fexit_ret = 0;
+int retval = 0;
-SEC("fexit/bpf_testmod_test_read")
+SEC("?fexit/bpf_testmod_test_read")
int BPF_PROG(handle_fexit,
struct file *file, struct kobject *kobj,
struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len,
int ret)
{
- fexit_read_sz = len;
- fexit_ret = ret;
+ sz = len;
+ retval = ret;
return 0;
}
-SEC("fexit/bpf_testmod_return_ptr")
+SEC("?fexit/bpf_testmod_return_ptr")
int BPF_PROG(handle_fexit_ret, int arg, struct file *ret)
{
long buf = 0;
@@ -122,18 +109,16 @@ int BPF_PROG(handle_fexit_ret, int arg, struct file *ret)
return 0;
}
-__u32 fmod_ret_read_sz = 0;
-
-SEC("fmod_ret/bpf_testmod_test_read")
+SEC("?fmod_ret/bpf_testmod_test_read")
int BPF_PROG(handle_fmod_ret,
struct file *file, struct kobject *kobj,
struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len)
{
- fmod_ret_read_sz = len;
+ sz = len;
return 0; /* don't override the exit code */
}
-SEC("kprobe.multi/bpf_testmod_test_read")
+SEC("?kprobe.multi/bpf_testmod_test_read")
int BPF_PROG(kprobe_multi)
{
return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_probe_user.c b/tools/testing/selftests/bpf/progs/test_probe_user.c
index a8e501af9604..4bc86c7654b1 100644
--- a/tools/testing/selftests/bpf/progs/test_probe_user.c
+++ b/tools/testing/selftests/bpf/progs/test_probe_user.c
@@ -5,13 +5,22 @@
#include <bpf/bpf_core_read.h>
#include "bpf_misc.h"
-static struct sockaddr_in old;
+struct test_pro_bss {
+ struct sockaddr_in old;
+ __u32 test_pid;
+};
+
+struct test_pro_bss bss;
static int handle_sys_connect_common(struct sockaddr_in *uservaddr)
{
struct sockaddr_in new;
+ __u32 cur = bpf_get_current_pid_tgid() >> 32;
+
+ if (bss.test_pid && cur != bss.test_pid)
+ return 0;
- bpf_probe_read_user(&old, sizeof(old), uservaddr);
+ bpf_probe_read_user(&bss.old, sizeof(bss.old), uservaddr);
__builtin_memset(&new, 0xab, sizeof(new));
bpf_probe_write_user(uservaddr, &new, sizeof(new));
diff --git a/tools/testing/selftests/bpf/progs/test_trampoline_count.c b/tools/testing/selftests/bpf/progs/test_trampoline_count.c
index 7765720da7d5..02f52806b1b2 100644
--- a/tools/testing/selftests/bpf/progs/test_trampoline_count.c
+++ b/tools/testing/selftests/bpf/progs/test_trampoline_count.c
@@ -3,20 +3,20 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
-SEC("fentry/bpf_modify_return_test")
-int BPF_PROG(fentry_test, int a, int *b)
+SEC("fentry/bpf_testmod_trampoline_count_test")
+int BPF_PROG(fentry_test)
{
return 0;
}
-SEC("fmod_ret/bpf_modify_return_test")
-int BPF_PROG(fmod_ret_test, int a, int *b, int ret)
+SEC("fmod_ret/bpf_testmod_trampoline_count_test")
+int BPF_PROG(fmod_ret_test, int ret)
{
return 0;
}
-SEC("fexit/bpf_modify_return_test")
-int BPF_PROG(fexit_test, int a, int *b, int ret)
+SEC("fexit/bpf_testmod_trampoline_count_test")
+int BPF_PROG(fexit_test, int ret)
{
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_usdt.c b/tools/testing/selftests/bpf/progs/test_usdt.c
index a78c87537b07..f00cb52874e0 100644
--- a/tools/testing/selftests/bpf/progs/test_usdt.c
+++ b/tools/testing/selftests/bpf/progs/test_usdt.c
@@ -138,4 +138,16 @@ int usdt_sib(struct pt_regs *ctx)
return 0;
}
+#ifdef __TARGET_ARCH_x86
+int executed;
+unsigned long expected_ip;
+
+SEC("usdt")
+int usdt_executed(struct pt_regs *ctx)
+{
+ if (expected_ip == ctx->ip)
+ executed++;
+ return 0;
+}
+#endif
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c
index 4ea0422d1042..3225b4aee8ff 100644
--- a/tools/testing/selftests/bpf/progs/trigger_bench.c
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -1,10 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2020 Facebook
-#include <linux/bpf.h>
+#include "vmlinux.h"
#include <asm/unistd.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "bpf_misc.h"
+#include "bpf/usdt.bpf.h"
char _license[] SEC("license") = "GPL";
@@ -180,3 +181,10 @@ int bench_trigger_rawtp(void *ctx)
handle(ctx);
return 0;
}
+
+SEC("?usdt")
+int bench_trigger_usdt(void *ctx)
+{
+ inc_counter();
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/uninit_stack.c b/tools/testing/selftests/bpf/progs/uninit_stack.c
index 046a204c8fc6..5db02323c89c 100644
--- a/tools/testing/selftests/bpf/progs/uninit_stack.c
+++ b/tools/testing/selftests/bpf/progs/uninit_stack.c
@@ -76,6 +76,7 @@ __naked int helper_uninit_to_misc(void *ctx)
* thus showing the stack state, matched by __msg(). \
*/ \
call %[dummy]; \
+ r1 = *(u64*)(r10 - 104); \
r0 = 0; \
exit; \
"
diff --git a/tools/testing/selftests/bpf/progs/verifier_align.c b/tools/testing/selftests/bpf/progs/verifier_align.c
new file mode 100644
index 000000000000..3e52686515ca
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_align.c
@@ -0,0 +1,581 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
+/* Converted from tools/testing/selftests/bpf/prog_tests/align.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+/* Four tests of known constants. These aren't staggeringly
+ * interesting since we track exact values now.
+ */
+
+SEC("tc")
+__success __log_level(2)
+__flag(BPF_F_ANY_ALIGNMENT)
+__msg("0: R1=ctx() R10=fp0")
+__msg("0: {{.*}} R3=2")
+__msg("1: {{.*}} R3=4")
+__msg("2: {{.*}} R3=8")
+__msg("3: {{.*}} R3=16")
+__msg("4: {{.*}} R3=32")
+__naked void mov(void)
+{
+ asm volatile (" \
+ r3 = 2; \
+ r3 = 4; \
+ r3 = 8; \
+ r3 = 16; \
+ r3 = 32; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__success __log_level(2)
+__flag(BPF_F_ANY_ALIGNMENT)
+__msg("0: R1=ctx() R10=fp0")
+__msg("0: {{.*}}R3=1")
+__msg("1: {{.*}}R3=2")
+__msg("2: {{.*}}R3=4")
+__msg("3: {{.*}}R3=8")
+__msg("4: {{.*}}R3=16")
+__msg("5: {{.*}}R3=1")
+__msg("6: {{.*}}R4=32")
+__msg("7: {{.*}}R4=16")
+__msg("8: {{.*}}R4=8")
+__msg("9: {{.*}}R4=4")
+__msg("10: {{.*}}R4=2")
+__naked void shift(void)
+{
+ asm volatile (" \
+ r3 = 1; \
+ r3 <<= 1; \
+ r3 <<= 1; \
+ r3 <<= 1; \
+ r3 <<= 1; \
+ r3 >>= 4; \
+ r4 = 32; \
+ r4 >>= 1; \
+ r4 >>= 1; \
+ r4 >>= 1; \
+ r4 >>= 1; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__success __log_level(2)
+__flag(BPF_F_ANY_ALIGNMENT)
+__msg("0: R1=ctx() R10=fp0")
+__msg("0: {{.*}}R3=4")
+__msg("1: {{.*}}R3=8")
+__msg("2: {{.*}}R3=10")
+__msg("3: {{.*}}R4=8")
+__msg("4: {{.*}}R4=12")
+__msg("5: {{.*}}R4=14")
+__naked void addsub(void)
+{
+ asm volatile (" \
+ r3 = 4; \
+ r3 += 4; \
+ r3 += 2; \
+ r4 = 8; \
+ r4 += 4; \
+ r4 += 2; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__success __log_level(2)
+__flag(BPF_F_ANY_ALIGNMENT)
+__msg("0: R1=ctx() R10=fp0")
+__msg("0: {{.*}}R3=7")
+__msg("1: {{.*}}R3=7")
+__msg("2: {{.*}}R3=14")
+__msg("3: {{.*}}R3=56")
+__naked void mul(void)
+{
+ asm volatile (" \
+ r3 = 7; \
+ r3 *= 1; \
+ r3 *= 2; \
+ r3 *= 4; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+/* Tests using unknown values */
+
+#define PREP_PKT_POINTERS \
+ "r2 = *(u32*)(r1 + %[__sk_buff_data]);" \
+ "r3 = *(u32*)(r1 + %[__sk_buff_data_end]);"
+
+#define __LOAD_UNKNOWN(DST_REG, LBL) \
+ "r2 = *(u32*)(r1 + %[__sk_buff_data]);" \
+ "r3 = *(u32*)(r1 + %[__sk_buff_data_end]);" \
+ "r0 = r2;" \
+ "r0 += 8;" \
+ "if r3 >= r0 goto " LBL ";" \
+ "exit;" \
+LBL ":" \
+ DST_REG " = *(u8*)(r2 + 0);"
+
+#define LOAD_UNKNOWN(DST_REG) __LOAD_UNKNOWN(DST_REG, "l99_%=")
+
+SEC("tc")
+__success __log_level(2)
+__flag(BPF_F_ANY_ALIGNMENT)
+__msg("6: {{.*}} R2=pkt(r=8)")
+__msg("6: {{.*}} R3={{[^)]*}}var_off=(0x0; 0xff)")
+__msg("7: {{.*}} R3={{[^)]*}}var_off=(0x0; 0x1fe)")
+__msg("8: {{.*}} R3={{[^)]*}}var_off=(0x0; 0x3fc)")
+__msg("9: {{.*}} R3={{[^)]*}}var_off=(0x0; 0x7f8)")
+__msg("10: {{.*}} R3={{[^)]*}}var_off=(0x0; 0xff0)")
+__msg("12: {{.*}} R3=pkt_end()")
+__msg("17: {{.*}} R4={{[^)]*}}var_off=(0x0; 0xff)")
+__msg("18: {{.*}} R4={{[^)]*}}var_off=(0x0; 0x1fe0)")
+__msg("19: {{.*}} R4={{[^)]*}}var_off=(0x0; 0xff0)")
+__msg("20: {{.*}} R4={{[^)]*}}var_off=(0x0; 0x7f8)")
+__msg("21: {{.*}} R4={{[^)]*}}var_off=(0x0; 0x3fc)")
+__msg("22: {{.*}} R4={{[^)]*}}var_off=(0x0; 0x1fe)")
+__naked void unknown_shift(void)
+{
+ asm volatile (" \
+ " __LOAD_UNKNOWN("r3", "l99_%=") " \
+ r3 <<= 1; \
+ r3 <<= 1; \
+ r3 <<= 1; \
+ r3 <<= 1; \
+ " __LOAD_UNKNOWN("r4", "l98_%=") " \
+ r4 <<= 5; \
+ r4 >>= 1; \
+ r4 >>= 1; \
+ r4 >>= 1; \
+ r4 >>= 1; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__success __log_level(2)
+__flag(BPF_F_ANY_ALIGNMENT)
+__msg("6: {{.*}} R3={{[^)]*}}var_off=(0x0; 0xff)")
+__msg("7: {{.*}} R4={{[^)]*}}var_off=(0x0; 0xff)")
+__msg("8: {{.*}} R4={{[^)]*}}var_off=(0x0; 0xff)")
+__msg("9: {{.*}} R4={{[^)]*}}var_off=(0x0; 0xff)")
+__msg("10: {{.*}} R4={{[^)]*}}var_off=(0x0; 0x1fe)")
+__msg("11: {{.*}} R4={{[^)]*}}var_off=(0x0; 0xff)")
+__msg("12: {{.*}} R4={{[^)]*}}var_off=(0x0; 0x3fc)")
+__msg("13: {{.*}} R4={{[^)]*}}var_off=(0x0; 0xff)")
+__msg("14: {{.*}} R4={{[^)]*}}var_off=(0x0; 0x7f8)")
+__msg("15: {{.*}} R4={{[^)]*}}var_off=(0x0; 0xff0)")
+__naked void unknown_mul(void)
+{
+ asm volatile (" \
+ " LOAD_UNKNOWN("r3") " \
+ r4 = r3; \
+ r4 *= 1; \
+ r4 = r3; \
+ r4 *= 2; \
+ r4 = r3; \
+ r4 *= 4; \
+ r4 = r3; \
+ r4 *= 8; \
+ r4 *= 2; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__success __log_level(2)
+__msg("2: {{.*}} R5=pkt(r=0)")
+__msg("4: {{.*}} R5=pkt(r=0,imm=14)")
+__msg("5: {{.*}} R4=pkt(r=0,imm=14)")
+__msg("9: {{.*}} R5=pkt(r=18,imm=14)")
+__msg("10: {{.*}} R4={{[^)]*}}var_off=(0x0; 0xff){{.*}} R5=pkt(r=18,imm=14)")
+__msg("13: {{.*}} R4={{[^)]*}}var_off=(0x0; 0xffff)")
+__msg("14: {{.*}} R4={{[^)]*}}var_off=(0x0; 0xffff)")
+__naked void packet_const_offset(void)
+{
+ asm volatile (" \
+ " PREP_PKT_POINTERS " \
+ r5 = r2; \
+ r0 = 0; \
+ /* Skip over ethernet header. */ \
+ r5 += 14; \
+ r4 = r5; \
+ r4 += 4; \
+ if r3 >= r4 goto l0_%=; \
+ exit; \
+l0_%=: r4 = *(u8*)(r5 + 0); \
+ r4 = *(u8*)(r5 + 1); \
+ r4 = *(u8*)(r5 + 2); \
+ r4 = *(u8*)(r5 + 3); \
+ r4 = *(u16*)(r5 + 0); \
+ r4 = *(u16*)(r5 + 2); \
+ r4 = *(u32*)(r5 + 0); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__success __log_level(2)
+__flag(BPF_F_ANY_ALIGNMENT)
+/* Calculated offset in R6 has unknown value, but known
+ * alignment of 4.
+ */
+__msg("6: {{.*}} R2=pkt(r=8)")
+__msg("7: {{.*}} R6={{[^)]*}}var_off=(0x0; 0x3fc)")
+/* Offset is added to packet pointer R5, resulting in
+ * known fixed offset, and variable offset from R6.
+ */
+__msg("11: {{.*}} R5=pkt(id=1,{{[^)]*}},var_off=(0x2; 0x7fc)")
+/* At the time the word size load is performed from R5,
+ * it's total offset is NET_IP_ALIGN + reg->off (0) +
+ * reg->aux_off (14) which is 16. Then the variable
+ * offset is considered using reg->aux_off_align which
+ * is 4 and meets the load's requirements.
+ */
+__msg("15: {{.*}} R5={{[^)]*}}var_off=(0x2; 0x7fc)")
+/* Variable offset is added to R5 packet pointer,
+ * resulting in auxiliary alignment of 4. To avoid BPF
+ * verifier's precision backtracking logging
+ * interfering we also have a no-op R4 = R5
+ * instruction to validate R5 state. We also check
+ * that R4 is what it should be in such case.
+ */
+__msg("18: {{.*}} R4={{[^)]*}}var_off=(0x0; 0x3fc){{.*}} R5={{[^)]*}}var_off=(0x0; 0x3fc)")
+/* Constant offset is added to R5, resulting in
+ * reg->off of 14.
+ */
+__msg("19: {{.*}} R5=pkt(id=2,{{[^)]*}}var_off=(0x2; 0x7fc)")
+/* At the time the word size load is performed from R5,
+ * its total fixed offset is NET_IP_ALIGN + reg->off
+ * (14) which is 16. Then the variable offset is 4-byte
+ * aligned, so the total offset is 4-byte aligned and
+ * meets the load's requirements.
+ */
+__msg("24: {{.*}} R5={{[^)]*}}var_off=(0x2; 0x7fc)")
+/* Constant offset is added to R5 packet pointer,
+ * resulting in reg->off value of 14.
+ */
+__msg("26: {{.*}} R5=pkt(r=8,imm=14)")
+/* Variable offset is added to R5, resulting in a
+ * variable offset of (4n). See comment for insn #18
+ * for R4 = R5 trick.
+ */
+__msg("28: {{.*}} R4={{[^)]*}}var_off=(0x2; 0x7fc){{.*}} R5={{[^)]*}}var_off=(0x2; 0x7fc)")
+/* Constant is added to R5 again, setting reg->off to 18. */
+__msg("29: {{.*}} R5=pkt(id=3,{{[^)]*}}var_off=(0x2; 0x7fc)")
+/* And once more we add a variable; resulting {{[^)]*}}var_off
+ * is still (4n), fixed offset is not changed.
+ * Also, we create a new reg->id.
+ */
+__msg("31: {{.*}} R4={{[^)]*}}var_off=(0x2; 0xffc){{.*}} R5={{[^)]*}}var_off=(0x2; 0xffc)")
+/* At the time the word size load is performed from R5,
+ * its total fixed offset is NET_IP_ALIGN + reg->off (18)
+ * which is 20. Then the variable offset is (4n), so
+ * the total offset is 4-byte aligned and meets the
+ * load's requirements.
+ */
+__msg("35: {{.*}} R5={{[^)]*}}var_off=(0x2; 0xffc)")
+__naked void packet_variable_offset(void)
+{
+ asm volatile (" \
+ " LOAD_UNKNOWN("r6") " \
+ r6 <<= 2; \
+ /* First, add a constant to the R5 packet pointer,\
+ * then a variable with a known alignment. \
+ */ \
+ r5 = r2; \
+ r5 += 14; \
+ r5 += r6; \
+ r4 = r5; \
+ r4 += 4; \
+ if r3 >= r4 goto l0_%=; \
+ exit; \
+l0_%=: r4 = *(u32*)(r5 + 0); \
+ /* Now, test in the other direction. Adding first\
+ * the variable offset to R5, then the constant.\
+ */ \
+ r5 = r2; \
+ r5 += r6; \
+ r4 = r5; \
+ r5 += 14; \
+ r4 = r5; \
+ r4 += 4; \
+ if r3 >= r4 goto l1_%=; \
+ exit; \
+l1_%=: r4 = *(u32*)(r5 + 0); \
+ /* Test multiple accumulations of unknown values\
+ * into a packet pointer. \
+ */ \
+ r5 = r2; \
+ r5 += 14; \
+ r5 += r6; \
+ r4 = r5; \
+ r5 += 4; \
+ r5 += r6; \
+ r4 = r5; \
+ r4 += 4; \
+ if r3 >= r4 goto l2_%=; \
+ exit; \
+l2_%=: r4 = *(u32*)(r5 + 0); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__success __log_level(2)
+__flag(BPF_F_ANY_ALIGNMENT)
+/* Calculated offset in R6 has unknown value, but known
+ * alignment of 4.
+ */
+__msg("6: {{.*}} R2=pkt(r=8)")
+__msg("7: {{.*}} R6={{[^)]*}}var_off=(0x0; 0x3fc)")
+/* Adding 14 makes R6 be (4n+2) */
+__msg("8: {{.*}} R6={{[^)]*}}var_off=(0x2; 0x7fc)")
+/* Packet pointer has (4n+2) offset */
+__msg("11: {{.*}} R5={{[^)]*}}var_off=(0x2; 0x7fc)")
+__msg("12: {{.*}} R4={{[^)]*}}var_off=(0x2; 0x7fc)")
+/* At the time the word size load is performed from R5,
+ * its total fixed offset is NET_IP_ALIGN + reg->off (0)
+ * which is 2. Then the variable offset is (4n+2), so
+ * the total offset is 4-byte aligned and meets the
+ * load's requirements.
+ */
+__msg("15: {{.*}} R5={{[^)]*}}var_off=(0x2; 0x7fc)")
+/* Newly read value in R6 was shifted left by 2, so has
+ * known alignment of 4.
+ */
+__msg("17: {{.*}} R6={{[^)]*}}var_off=(0x0; 0x3fc)")
+/* Added (4n) to packet pointer's (4n+2) {{[^)]*}}var_off, giving
+ * another (4n+2).
+ */
+__msg("19: {{.*}} R5={{[^)]*}}var_off=(0x2; 0xffc)")
+__msg("20: {{.*}} R4={{[^)]*}}var_off=(0x2; 0xffc)")
+/* At the time the word size load is performed from R5,
+ * its total fixed offset is NET_IP_ALIGN + reg->off (0)
+ * which is 2. Then the variable offset is (4n+2), so
+ * the total offset is 4-byte aligned and meets the
+ * load's requirements.
+ */
+__msg("23: {{.*}} R5={{[^)]*}}var_off=(0x2; 0xffc)")
+__naked void packet_variable_offset_2(void)
+{
+ asm volatile (" \
+ /* Create an unknown offset, (4n+2)-aligned */ \
+ " LOAD_UNKNOWN("r6") " \
+ r6 <<= 2; \
+ r6 += 14; \
+ /* Add it to the packet pointer */ \
+ r5 = r2; \
+ r5 += r6; \
+ /* Check bounds and perform a read */ \
+ r4 = r5; \
+ r4 += 4; \
+ if r3 >= r4 goto l0_%=; \
+ exit; \
+l0_%=: r6 = *(u32*)(r5 + 0); \
+ /* Make a (4n) offset from the value we just read */\
+ r6 &= 0xff; \
+ r6 <<= 2; \
+ /* Add it to the packet pointer */ \
+ r5 += r6; \
+ /* Check bounds and perform a read */ \
+ r4 = r5; \
+ r4 += 4; \
+ if r3 >= r4 goto l1_%=; \
+ exit; \
+l1_%=: r6 = *(u32*)(r5 + 0); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__failure __log_level(2)
+__msg("3: {{.*}} R5=pkt_end()")
+/* (ptr - ptr) << 2 == unknown, (4n) */
+__msg("5: {{.*}} R5={{[^)]*}}var_off=(0x0; 0xfffffffffffffffc)")
+/* (4n) + 14 == (4n+2). We blow our bounds, because
+ * the add could overflow.
+ */
+__msg("6: {{.*}} R5={{[^)]*}}var_off=(0x2; 0xfffffffffffffffc)")
+/* Checked s>=0 */
+__msg("9: {{.*}} R5={{[^)]*}}var_off=(0x2; 0x7ffffffffffffffc)")
+/* packet pointer + nonnegative (4n+2) */
+__msg("11: {{.*}} R4={{[^)]*}}var_off=(0x2; 0x7ffffffffffffffc){{.*}} R6={{[^)]*}}var_off=(0x2; 0x7ffffffffffffffc)")
+__msg("12: (07) r4 += 4")
+/* packet smax bound overflow */
+__msg("pkt pointer offset -9223372036854775808 is not allowed")
+__naked void dubious_pointer_arithmetic(void)
+{
+ asm volatile (" \
+ " PREP_PKT_POINTERS " \
+ r0 = 0; \
+ /* (ptr - ptr) << 2 */ \
+ r5 = r3; \
+ r5 -= r2; \
+ r5 <<= 2; \
+ /* We have a (4n) value. Let's make a packet offset\
+ * out of it. First add 14, to make it a (4n+2)\
+ */ \
+ r5 += 14; \
+ /* Then make sure it's nonnegative */ \
+ if r5 s>= 0 goto l0_%=; \
+ exit; \
+l0_%=: /* Add it to packet pointer */ \
+ r6 = r2; \
+ r6 += r5; \
+ /* Check bounds and perform a read */ \
+ r4 = r6; \
+ r4 += 4; \
+ if r3 >= r4 goto l1_%=; \
+ exit; \
+l1_%=: r4 = *(u32*)(r6 + 0); \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__success __log_level(2)
+__flag(BPF_F_ANY_ALIGNMENT)
+/* Calculated offset in R6 has unknown value, but known
+ * alignment of 4.
+ */
+__msg("6: {{.*}} R2=pkt(r=8)")
+__msg("8: {{.*}} R6={{[^)]*}}var_off=(0x0; 0x3fc)")
+/* Adding 14 makes R6 be (4n+2) */
+__msg("9: {{.*}} R6={{[^)]*}}var_off=(0x2; 0x7fc)")
+/* New unknown value in R7 is (4n) */
+__msg("10: {{.*}} R7={{[^)]*}}var_off=(0x0; 0x3fc)")
+/* Subtracting it from R6 blows our unsigned bounds */
+__msg("11: {{.*}} R6={{[^)]*}}var_off=(0x2; 0xfffffffffffffffc)")
+/* Checked s>= 0 */
+__msg("14: {{.*}} R6={{[^)]*}}var_off=(0x2; 0x7fc)")
+/* At the time the word size load is performed from R5,
+ * its total fixed offset is NET_IP_ALIGN + reg->off (0)
+ * which is 2. Then the variable offset is (4n+2), so
+ * the total offset is 4-byte aligned and meets the
+ * load's requirements.
+ */
+__msg("20: {{.*}} R5={{[^)]*}}var_off=(0x2; 0x7fc)")
+__naked void variable_subtraction(void)
+{
+ asm volatile (" \
+ /* Create an unknown offset, (4n+2)-aligned */ \
+ " LOAD_UNKNOWN("r6") " \
+ r7 = r6; \
+ r6 <<= 2; \
+ r6 += 14; \
+ /* Create another unknown, (4n)-aligned, and subtract\
+ * it from the first one \
+ */ \
+ r7 <<= 2; \
+ r6 -= r7; \
+ /* Bounds-check the result */ \
+ if r6 s>= 0 goto l0_%=; \
+ exit; \
+l0_%=: /* Add it to the packet pointer */ \
+ r5 = r2; \
+ r5 += r6; \
+ /* Check bounds and perform a read */ \
+ r4 = r5; \
+ r4 += 4; \
+ if r3 >= r4 goto l1_%=; \
+ exit; \
+l1_%=: r6 = *(u32*)(r5 + 0); \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__success __log_level(2)
+__flag(BPF_F_ANY_ALIGNMENT)
+/* Calculated offset in R6 has unknown value, but known
+ * alignment of 4.
+ */
+__msg("6: {{.*}} R2=pkt(r=8)")
+__msg("9: {{.*}} R6={{[^)]*}}var_off=(0x0; 0x3c)")
+/* Adding 14 makes R6 be (4n+2) */
+__msg("10: {{.*}} R6={{[^)]*}}var_off=(0x2; 0x7c)")
+/* Subtracting from packet pointer overflows ubounds */
+__msg("13: R5={{[^)]*}}var_off=(0xffffffffffffff82; 0x7c)")
+/* New unknown value in R7 is (4n), >= 76 */
+__msg("14: {{.*}} R7={{[^)]*}}var_off=(0x0; 0x7fc)")
+/* Adding it to packet pointer gives nice bounds again */
+__msg("16: {{.*}} R5={{[^)]*}}var_off=(0x2; 0x7fc)")
+/* At the time the word size load is performed from R5,
+ * its total fixed offset is NET_IP_ALIGN + reg->off (0)
+ * which is 2. Then the variable offset is (4n+2), so
+ * the total offset is 4-byte aligned and meets the
+ * load's requirements.
+ */
+__msg("20: {{.*}} R5={{[^)]*}}var_off=(0x2; 0x7fc)")
+__naked void pointer_variable_subtraction(void)
+{
+ asm volatile (" \
+ /* Create an unknown offset, (4n+2)-aligned and bounded\
+ * to [14,74] \
+ */ \
+ " LOAD_UNKNOWN("r6") " \
+ r7 = r6; \
+ r6 &= 0xf; \
+ r6 <<= 2; \
+ r6 += 14; \
+ /* Subtract it from the packet pointer */ \
+ r5 = r2; \
+ r5 -= r6; \
+ /* Create another unknown, (4n)-aligned and >= 74.\
+ * That in fact means >= 76, since 74 mod 4 == 2\
+ */ \
+ r7 <<= 2; \
+ r7 += 76; \
+ /* Add it to the packet pointer */ \
+ r5 += r7; \
+ /* Check bounds and perform a read */ \
+ r4 = r5; \
+ r4 += 4; \
+ if r3 >= r4 goto l0_%=; \
+ exit; \
+l0_%=: r6 = *(u32*)(r5 + 0); \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_arena.c b/tools/testing/selftests/bpf/progs/verifier_arena.c
index c4b8daac4388..62e282f4448a 100644
--- a/tools/testing/selftests/bpf/progs/verifier_arena.c
+++ b/tools/testing/selftests/bpf/progs/verifier_arena.c
@@ -477,4 +477,134 @@ int arena_kfuncs_under_bpf_lock(void *ctx)
return 0;
}
+
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+
+/*
+ * Test that scalar += PTR_TO_ARENA correctly upgrades the
+ * destination register to a PTR_TO_ARENA.
+ */
+SEC("syscall")
+__success __retval(0)
+int scalar_add_arena_ptr(void *ctx)
+{
+ int __arena *scalar, *arena_ptr;
+
+ volatile char __arena *base = arena_base(&arena);
+
+ asm volatile (
+ "%[arena_ptr] = 8192;"
+ "%[arena_ptr] = addr_space_cast(%[arena_ptr], 0x0, 0x1);"
+ "%[scalar] = 12;"
+ "%[scalar] += %[arena_ptr];"
+ : [scalar] "=r"(scalar),
+ [arena_ptr] "=&r"(arena_ptr)
+ : "r"(base)
+ :
+ );
+ return 0;
+}
+
+/*
+ * Tests that PTR_TO_ARENA + PTR_TO_ARENA is allowed.
+ */
+SEC("syscall")
+__success __retval(0)
+int arena_ptr_add_arena_ptr(void *ctx)
+{
+ int __arena *arena_ptr2, *arena_ptr1;
+
+ /* Needed for the verifier to link the arena to the subprog. */
+ volatile char __arena *base = arena_base(&arena);
+
+ asm volatile (
+ "%[arena_ptr1] = 8192;"
+ "%[arena_ptr1] = addr_space_cast(%[arena_ptr1], 0x0, 0x1);"
+ "%[arena_ptr2] = 4096;"
+ "%[arena_ptr2] = addr_space_cast(%[arena_ptr2], 0x0, 0x1);"
+ "%[arena_ptr2] += %[arena_ptr1];"
+ : [arena_ptr2] "=r"(arena_ptr2),
+ [arena_ptr1] "=&r"(arena_ptr1)
+ : "r"(base)
+ :
+ );
+ return 0;
+}
+
+SEC("syscall")
+__success __retval(0)
+int scalar_xor_arena_ptr(void *ctx)
+{
+ int __arena *scalar, *arena_ptr;
+
+ volatile char __arena *base = arena_base(&arena);
+
+ asm volatile (
+ "%[arena_ptr] = 8192;"
+ "%[arena_ptr] = addr_space_cast(%[arena_ptr], 0x0, 0x1);"
+ "%[scalar] = 12;"
+ "%[scalar] ^= %[arena_ptr];"
+ : [scalar] "=r"(scalar),
+ [arena_ptr] "=&r"(arena_ptr)
+ : "r"(base)
+ :
+ );
+ return 0;
+}
+
+/*
+ * Tests that PTR_TO_ARENA and non-arena pointers can be added.
+ */
+SEC("syscall")
+__success __retval(0)
+int arena_ptr_add_to_non_arena_ptr(void *ctx)
+{
+ register int __arena *arena_ptr asm("r3");
+ register void *dst asm("r4");
+
+ volatile char __arena *base = arena_base(&arena);
+
+ asm volatile (
+ "%[arena_ptr] = 8192;"
+ "%[arena_ptr] = addr_space_cast(%[arena_ptr], 0x0, 0x1);"
+ "%[dst] = %[ctx];"
+ "%[dst] += %[arena_ptr];"
+ : [arena_ptr] "=&r"(arena_ptr),
+ [dst] "=&r"(dst)
+ : [ctx] "r"(ctx), "r"(base)
+ :
+ );
+
+ (void)ctx;
+
+ return 0;
+}
+
+SEC("syscall")
+__success __retval(0)
+int non_arena_ptr_add_to_arena_ptr(void *ctx)
+{
+ register int __arena *arena_ptr asm("r3");
+ register void *src asm("r4");
+
+ volatile char __arena *base = arena_base(&arena);
+
+ asm volatile (
+ "%[arena_ptr] = 8192;"
+ "%[arena_ptr] = addr_space_cast(%[arena_ptr], 0x0, 0x1);"
+ "%[src] = %[ctx];"
+ "%[arena_ptr] += %[src];"
+ : [arena_ptr] "=&r"(arena_ptr),
+ [src] "=&r"(src)
+ : [ctx] "r"(ctx), "r"(base)
+ :
+ );
+
+ (void)ctx;
+
+ return 0;
+}
+
+#endif
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_async_cb_context.c b/tools/testing/selftests/bpf/progs/verifier_async_cb_context.c
index 39aff82549c9..6bf95550a024 100644
--- a/tools/testing/selftests/bpf/progs/verifier_async_cb_context.c
+++ b/tools/testing/selftests/bpf/progs/verifier_async_cb_context.c
@@ -31,7 +31,7 @@ static int timer_cb(void *map, int *key, struct bpf_timer *timer)
}
SEC("fentry/bpf_fentry_test1")
-__failure __msg("helper call might sleep in a non-sleepable prog")
+__failure __msg("sleepable helper bpf_copy_from_user#{{[0-9]+}} in non-sleepable prog")
int timer_non_sleepable_prog(void *ctx)
{
struct timer_elem *val;
@@ -47,7 +47,7 @@ int timer_non_sleepable_prog(void *ctx)
}
SEC("lsm.s/file_open")
-__failure __msg("helper call might sleep in a non-sleepable prog")
+__failure __msg("sleepable helper bpf_copy_from_user#{{[0-9]+}} in non-sleepable prog")
int timer_sleepable_prog(void *ctx)
{
struct timer_elem *val;
diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c
index 79a328276805..c1ae013dee29 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bounds.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c
@@ -202,7 +202,7 @@ l0_%=: /* exit */ \
SEC("tc")
__description("bounds check based on reg_off + var_off + insn_off. test1")
-__failure __msg("value_size=8 off=1073741825")
+__failure __msg("map_value pointer offset 1073741822 is not allowed")
__naked void var_off_insn_off_test1(void)
{
asm volatile (" \
@@ -1066,7 +1066,6 @@ l0_%=: r0 = 0; \
SEC("xdp")
__description("bound check with JMP_JSLT for crossing 64-bit signed boundary")
__success __retval(0)
-__flag(BPF_F_TEST_REG_INVARIANTS)
__naked void crossing_64_bit_signed_boundary_2(void)
{
asm volatile (" \
@@ -1148,7 +1147,6 @@ l0_%=: r0 = 0; \
SEC("xdp")
__description("bound check with JMP32_JSLT for crossing 32-bit signed boundary")
__success __retval(0)
-__flag(BPF_F_TEST_REG_INVARIANTS)
__naked void crossing_32_bit_signed_boundary_2(void)
{
asm volatile (" \
@@ -1536,7 +1534,7 @@ __naked void sub32_partial_overflow(void)
SEC("socket")
__description("dead branch on jset, does not result in invariants violation error")
__success __log_level(2)
-__retval(0) __flag(BPF_F_TEST_REG_INVARIANTS)
+__retval(0)
__naked void jset_range_analysis(void)
{
asm volatile (" \
@@ -1572,7 +1570,7 @@ l0_%=: r0 = 0; \
*/
SEC("socket")
__description("bounds deduction cross sign boundary, negative overlap")
-__success __log_level(2) __flag(BPF_F_TEST_REG_INVARIANTS)
+__success __log_level(2)
__msg("7: (1f) r0 -= r6 {{.*}} R0=scalar(smin=smin32=-655,smax=smax32=-146,umin=0xfffffffffffffd71,umax=0xffffffffffffff6e,umin32=0xfffffd71,umax32=0xffffff6e,var_off=(0xfffffffffffffc00; 0x3ff))")
__retval(0)
__naked void bounds_deduct_negative_overlap(void)
@@ -1616,7 +1614,7 @@ l0_%=: r0 = 0; \
*/
SEC("socket")
__description("bounds deduction cross sign boundary, positive overlap")
-__success __log_level(2) __flag(BPF_F_TEST_REG_INVARIANTS)
+__success __log_level(2)
__msg("3: (2d) if r0 > r1 {{.*}} R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=127,var_off=(0x0; 0x7f))")
__retval(0)
__naked void bounds_deduct_positive_overlap(void)
@@ -1649,7 +1647,7 @@ l0_%=: r0 = 0; \
*/
SEC("socket")
__description("bounds deduction cross sign boundary, two overlaps")
-__failure __flag(BPF_F_TEST_REG_INVARIANTS)
+__failure
__msg("3: (2d) if r0 > r1 {{.*}} R0=scalar(smin=smin32=-128,smax=smax32=127,umax=0xffffffffffffff80)")
__msg("frame pointer is read only")
__naked void bounds_deduct_two_overlaps(void)
@@ -1713,7 +1711,7 @@ SEC("socket")
__description("conditional jump on same register, branch taken")
__not_msg("20: (b7) r0 = 1 {{.*}} R0=1")
__success __log_level(2)
-__retval(0) __flag(BPF_F_TEST_REG_INVARIANTS)
+__retval(0)
__naked void condition_jump_on_same_register(void *ctx)
{
asm volatile(" \
@@ -1748,7 +1746,7 @@ SEC("socket")
__description("jset on same register, constant value branch taken")
__not_msg("7: (b7) r0 = 1 {{.*}} R0=1")
__success __log_level(2)
-__retval(0) __flag(BPF_F_TEST_REG_INVARIANTS)
+__retval(0)
__naked void jset_on_same_register_1(void *ctx)
{
asm volatile(" \
@@ -1770,7 +1768,7 @@ SEC("socket")
__description("jset on same register, scalar value branch taken")
__not_msg("12: (b7) r0 = 1 {{.*}} R0=1")
__success __log_level(2)
-__retval(0) __flag(BPF_F_TEST_REG_INVARIANTS)
+__retval(0)
__naked void jset_on_same_register_2(void *ctx)
{
asm volatile(" \
@@ -1800,7 +1798,6 @@ __description("jset on same register, scalar value unknown branch 1")
__msg("3: (b7) r0 = 0 {{.*}} R0=0")
__msg("5: (b7) r0 = 1 {{.*}} R0=1")
__success __log_level(2)
-__flag(BPF_F_TEST_REG_INVARIANTS)
__naked void jset_on_same_register_3(void *ctx)
{
asm volatile(" \
@@ -1822,7 +1819,6 @@ __description("jset on same register, scalar value unknown branch 2")
__msg("4: (b7) r0 = 0 {{.*}} R0=0")
__msg("6: (b7) r0 = 1 {{.*}} R0=1")
__success __log_level(2)
-__flag(BPF_F_TEST_REG_INVARIANTS)
__naked void jset_on_same_register_4(void *ctx)
{
asm volatile(" \
@@ -1845,7 +1841,6 @@ __description("jset on same register, scalar value unknown branch 3")
__msg("4: (b7) r0 = 0 {{.*}} R0=0")
__msg("6: (b7) r0 = 1 {{.*}} R0=1")
__success __log_level(2)
-__flag(BPF_F_TEST_REG_INVARIANTS)
__naked void jset_on_same_register_5(void *ctx)
{
asm volatile(" \
@@ -1877,7 +1872,6 @@ SEC("socket")
__description("bounds refinement with single-value tnum on umax")
__msg("3: (15) if r0 == 0xe0 {{.*}} R0=240")
__success __log_level(2)
-__flag(BPF_F_TEST_REG_INVARIANTS)
__naked void bounds_refinement_tnum_umax(void *ctx)
{
asm volatile(" \
@@ -1907,7 +1901,6 @@ SEC("socket")
__description("bounds refinement with single-value tnum on umin")
__msg("3: (15) if r0 == 0xf0 {{.*}} R0=224")
__success __log_level(2)
-__flag(BPF_F_TEST_REG_INVARIANTS)
__naked void bounds_refinement_tnum_umin(void *ctx)
{
asm volatile(" \
@@ -2002,7 +1995,6 @@ __naked void bounds_refinement_multiple_overlaps(void *ctx)
SEC("socket")
__success
-__flag(BPF_F_TEST_REG_INVARIANTS)
__naked void signed_unsigned_intersection32_case1(void *ctx)
{
asm volatile(" \
@@ -2020,7 +2012,6 @@ __naked void signed_unsigned_intersection32_case1(void *ctx)
SEC("socket")
__success
-__flag(BPF_F_TEST_REG_INVARIANTS)
__naked void signed_unsigned_intersection32_case2(void *ctx)
{
asm volatile(" \
@@ -2037,6 +2028,40 @@ __naked void signed_unsigned_intersection32_case2(void *ctx)
: __clobber_all);
}
+/*
+ * After instruction 3, the u64 and s64 ranges look as follows:
+ * 0 umin=2 umax=0xff..ff00..03 U64_MAX
+ * | [xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx] |
+ * |----------------------------|------------------------------|
+ * |xx] [xxxxxxxxxxxxxxxxxxxxxxxxxxxx|
+ * 0 smax=2 smin=0x800..02 -1
+ *
+ * The two ranges can't be refined because they overlap in two places. Once we
+ * add an upper-bound to u64 at instruction 4, the refinement can happen. This
+ * test validates that this refinement does happen and is not overwritten by
+ * the less-precise 32bits ranges.
+ */
+SEC("socket")
+__description("bounds refinement: 64bits ranges not overwritten by 32bits ranges")
+__msg("3: (65) if r0 s> 0x2 {{.*}} R0=scalar(smin=0x8000000000000002,smax=2,umin=smin32=umin32=2,umax=0xffffffff00000003,smax32=umax32=3")
+__msg("4: (25) if r0 > 0x13 {{.*}} R0=2")
+__success __log_level(2)
+__naked void refinement_32bounds_not_overwriting_64bounds(void *ctx)
+{
+ asm volatile(" \
+ call %[bpf_get_prandom_u32]; \
+ if w0 < 2 goto +5; \
+ if w0 > 3 goto +4; \
+ if r0 s> 2 goto +3; \
+ if r0 > 19 goto +2; \
+ if r0 == 2 goto +1; \
+ r10 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
SEC("socket")
__description("maybe_fork_scalars: OR with constant rejects OOB")
__failure __msg("invalid access to map value")
@@ -2131,4 +2156,32 @@ l0_%=: r0 = 0; \
: __clobber_all);
}
+/*
+ * Last jump can be detected as always taken because the intersection of R5 and
+ * R7 32bit tnums produces a constant that isn't within R7's s32 bounds.
+ */
+SEC("socket")
+__description("dead branch: tnums give impossible constant if equal")
+__success
+__naked void tnums_equal_impossible_constant(void *ctx)
+{
+ asm volatile(" \
+ call %[bpf_get_prandom_u32]; \
+ r5 = r0; \
+ /* Set r5's var_off32 to (0; 0xfffffffc) */ \
+ r5 &= 0xfffffffffffffffc; \
+ r7 = r0; \
+ /* Set r7's var_off32 to (0x0; 0x1) */ \
+ r7 &= 0x1; \
+ /* Now, s32=[-43; -42], var_off32=(0xffffffd4; 0x3) */ \
+ r7 += -43; \
+ /* On fallthrough, var_off32=-44, not in s32 */ \
+ if w5 != w7 goto +1; \
+ r10 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_ctx.c b/tools/testing/selftests/bpf/progs/verifier_ctx.c
index 5ebf7d9bcc55..7856dad3d1f3 100644
--- a/tools/testing/selftests/bpf/progs/verifier_ctx.c
+++ b/tools/testing/selftests/bpf/progs/verifier_ctx.c
@@ -4,6 +4,10 @@
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+static const char ctx_strncmp_target[] = "ctx";
+static const char ctx_snprintf_fmt[] = "";
SEC("tc")
__description("context stores via BPF_ATOMIC")
@@ -69,7 +73,6 @@ __naked void ctx_pointer_to_helper_1(void)
SEC("socket")
__description("pass modified ctx pointer to helper, 2")
__failure __msg("negative offset ctx ptr R1 off=-612 disallowed")
-__failure_unpriv __msg_unpriv("negative offset ctx ptr R1 off=-612 disallowed")
__naked void ctx_pointer_to_helper_2(void)
{
asm volatile (" \
@@ -292,4 +295,568 @@ padding_access("cgroup/post_bind4", bpf_sock, dst_port, 2);
__failure __msg("invalid bpf_context access")
padding_access("sk_reuseport", sk_reuseport_md, hash, 4);
+SEC("?syscall")
+__description("syscall: write to ctx with fixed offset")
+__success
+int syscall_ctx_fixed_off_write(void *ctx)
+{
+ char *p = ctx;
+
+ *(__u32 *)p = 0;
+ *(__u32 *)(p + 4) = 0;
+ return 0;
+}
+
+SEC("?syscall")
+__description("syscall: read ctx with fixed offset")
+__success
+int syscall_ctx_fixed_off_read(void *ctx)
+{
+ char *p = ctx;
+ volatile __u32 val;
+
+ val = *(__u32 *)(p + 4);
+ (void)val;
+ return 0;
+}
+
+SEC("?syscall")
+__description("syscall: unaligned read ctx with fixed offset")
+__success
+int syscall_ctx_unaligned_fixed_off_read(void *ctx)
+{
+ char *p = ctx;
+ volatile __u32 val;
+
+ val = *(__u32 *)(p + 2);
+ (void)val;
+ return 0;
+}
+
+SEC("?syscall")
+__description("syscall: unaligned write ctx with fixed offset")
+__success
+int syscall_ctx_unaligned_fixed_off_write(void *ctx)
+{
+ char *p = ctx;
+
+ *(__u32 *)(p + 2) = 0;
+ return 0;
+}
+
+SEC("?syscall")
+__description("syscall: read ctx with variable offset")
+__success
+int syscall_ctx_var_off_read(void *ctx)
+{
+ __u64 off = bpf_get_prandom_u32();
+ char *p = ctx;
+ volatile __u32 val;
+
+ off &= 0xfc;
+ p += off;
+ val = *(__u32 *)p;
+ (void)val;
+ return 0;
+}
+
+SEC("?syscall")
+__description("syscall: write ctx with variable offset")
+__success
+int syscall_ctx_var_off_write(void *ctx)
+{
+ __u64 off = bpf_get_prandom_u32();
+ char *p = ctx;
+
+ off &= 0xfc;
+ p += off;
+ *(__u32 *)p = 0;
+ return 0;
+}
+
+SEC("?syscall")
+__description("syscall: unaligned read ctx with variable offset")
+__success
+int syscall_ctx_unaligned_var_off_read(void *ctx)
+{
+ __u64 off = bpf_get_prandom_u32();
+ char *p = ctx;
+ volatile __u32 val;
+
+ off &= 0xfc;
+ off += 2;
+ p += off;
+ val = *(__u32 *)p;
+ (void)val;
+ return 0;
+}
+
+SEC("?syscall")
+__description("syscall: unaligned write ctx with variable offset")
+__success
+int syscall_ctx_unaligned_var_off_write(void *ctx)
+{
+ __u64 off = bpf_get_prandom_u32();
+ char *p = ctx;
+
+ off &= 0xfc;
+ off += 2;
+ p += off;
+ *(__u32 *)p = 0;
+ return 0;
+}
+
+SEC("?syscall")
+__description("syscall: reject ctx access past U16_MAX with fixed offset")
+__failure __msg("outside of the allowed memory range")
+int syscall_ctx_u16_max_fixed_off(void *ctx)
+{
+ char *p = ctx;
+ volatile __u32 val;
+
+ p += 65535;
+ val = *(__u32 *)p;
+ (void)val;
+ return 0;
+}
+
+SEC("?syscall")
+__description("syscall: reject ctx access past U16_MAX with variable offset")
+__failure __msg("outside of the allowed memory range")
+int syscall_ctx_u16_max_var_off(void *ctx)
+{
+ __u64 off = bpf_get_prandom_u32();
+ char *p = ctx;
+ volatile __u32 val;
+
+ off &= 0xffff;
+ off += 1;
+ p += off;
+ val = *(__u32 *)p;
+ (void)val;
+ return 0;
+}
+
+SEC("?syscall")
+__description("syscall: reject negative variable offset ctx access")
+__failure __msg("min value is negative")
+int syscall_ctx_neg_var_off(void *ctx)
+{
+ __u64 off = bpf_get_prandom_u32();
+ char *p = ctx;
+
+ off &= 4;
+ p -= off;
+ return *(__u32 *)p;
+}
+
+SEC("?syscall")
+__description("syscall: reject unbounded variable offset ctx access")
+__failure __msg("unbounded memory access")
+int syscall_ctx_unbounded_var_off(void *ctx)
+{
+ __u64 off = (__u32)bpf_get_prandom_u32();
+ char *p = ctx;
+
+ off <<= 2;
+ p += off;
+ return *(__u32 *)p;
+}
+
+SEC("?syscall")
+__description("syscall: helper read ctx with fixed offset")
+__success
+int syscall_ctx_helper_fixed_off_read(void *ctx)
+{
+ char *p = ctx;
+
+ p += 4;
+ return bpf_strncmp(p, 4, ctx_strncmp_target);
+}
+
+SEC("?syscall")
+__description("syscall: helper write ctx with fixed offset")
+__success
+int syscall_ctx_helper_fixed_off_write(void *ctx)
+{
+ char *p = ctx;
+
+ p += 4;
+ return bpf_probe_read_kernel(p, 4, 0);
+}
+
+SEC("?syscall")
+__description("syscall: helper unaligned read ctx with fixed offset")
+__success
+int syscall_ctx_helper_unaligned_fixed_off_read(void *ctx)
+{
+ char *p = ctx;
+
+ p += 2;
+ return bpf_strncmp(p, 4, ctx_strncmp_target);
+}
+
+SEC("?syscall")
+__description("syscall: helper unaligned write ctx with fixed offset")
+__success
+int syscall_ctx_helper_unaligned_fixed_off_write(void *ctx)
+{
+ char *p = ctx;
+
+ p += 2;
+ return bpf_probe_read_kernel(p, 4, 0);
+}
+
+SEC("?syscall")
+__description("syscall: helper read ctx with variable offset")
+__success
+int syscall_ctx_helper_var_off_read(void *ctx)
+{
+ __u64 off = bpf_get_prandom_u32();
+ char *p = ctx;
+
+ off &= 0xfc;
+ p += off;
+ return bpf_strncmp(p, 4, ctx_strncmp_target);
+}
+
+SEC("?syscall")
+__description("syscall: helper write ctx with variable offset")
+__success
+int syscall_ctx_helper_var_off_write(void *ctx)
+{
+ __u64 off = bpf_get_prandom_u32();
+ char *p = ctx;
+
+ off &= 0xfc;
+ p += off;
+ return bpf_probe_read_kernel(p, 4, 0);
+}
+
+SEC("?syscall")
+__description("syscall: helper unaligned read ctx with variable offset")
+__success
+int syscall_ctx_helper_unaligned_var_off_read(void *ctx)
+{
+ __u64 off = bpf_get_prandom_u32();
+ char *p = ctx;
+
+ off &= 0xfc;
+ off += 2;
+ p += off;
+ return bpf_strncmp(p, 4, ctx_strncmp_target);
+}
+
+SEC("?syscall")
+__description("syscall: helper unaligned write ctx with variable offset")
+__success
+int syscall_ctx_helper_unaligned_var_off_write(void *ctx)
+{
+ __u64 off = bpf_get_prandom_u32();
+ char *p = ctx;
+
+ off &= 0xfc;
+ off += 2;
+ p += off;
+ return bpf_probe_read_kernel(p, 4, 0);
+}
+
+SEC("?syscall")
+__description("syscall: reject helper read ctx past U16_MAX with fixed offset")
+__failure __msg("outside of the allowed memory range")
+int syscall_ctx_helper_u16_max_fixed_off_read(void *ctx)
+{
+ char *p = ctx;
+
+ p += 65535;
+ return bpf_strncmp(p, 4, ctx_strncmp_target);
+}
+
+SEC("?syscall")
+__description("syscall: reject helper write ctx past U16_MAX with fixed offset")
+__failure __msg("outside of the allowed memory range")
+int syscall_ctx_helper_u16_max_fixed_off_write(void *ctx)
+{
+ char *p = ctx;
+
+ p += 65535;
+ return bpf_probe_read_kernel(p, 4, 0);
+}
+
+SEC("?syscall")
+__description("syscall: reject helper read ctx past U16_MAX with variable offset")
+__failure __msg("outside of the allowed memory range")
+int syscall_ctx_helper_u16_max_var_off_read(void *ctx)
+{
+ __u64 off = bpf_get_prandom_u32();
+ char *p = ctx;
+
+ off &= 0xffff;
+ off += 1;
+ p += off;
+ return bpf_strncmp(p, 4, ctx_strncmp_target);
+}
+
+SEC("?syscall")
+__description("syscall: reject helper write ctx past U16_MAX with variable offset")
+__failure __msg("outside of the allowed memory range")
+int syscall_ctx_helper_u16_max_var_off_write(void *ctx)
+{
+ __u64 off = bpf_get_prandom_u32();
+ char *p = ctx;
+
+ off &= 0xffff;
+ off += 1;
+ p += off;
+ return bpf_probe_read_kernel(p, 4, 0);
+}
+
+SEC("?syscall")
+__description("syscall: helper read zero-sized ctx access")
+__success
+int syscall_ctx_helper_zero_sized_read(void *ctx)
+{
+ return bpf_snprintf(0, 0, ctx_snprintf_fmt, ctx, 0);
+}
+
+SEC("?syscall")
+__description("syscall: helper write zero-sized ctx access")
+__success
+int syscall_ctx_helper_zero_sized_write(void *ctx)
+{
+ return bpf_probe_read_kernel(ctx, 0, 0);
+}
+
+SEC("?syscall")
+__description("syscall: kfunc access ctx with fixed offset")
+__success
+int syscall_ctx_kfunc_fixed_off(void *ctx)
+{
+ char *p = ctx;
+
+ p += 4;
+ bpf_kfunc_call_test_mem_len_pass1(p, 4);
+ return 0;
+}
+
+SEC("?syscall")
+__description("syscall: kfunc access ctx with variable offset")
+__success
+int syscall_ctx_kfunc_var_off(void *ctx)
+{
+ __u64 off = bpf_get_prandom_u32();
+ char *p = ctx;
+
+ off &= 0xfc;
+ p += off;
+ bpf_kfunc_call_test_mem_len_pass1(p, 4);
+ return 0;
+}
+
+SEC("?syscall")
+__description("syscall: kfunc unaligned access ctx with fixed offset")
+__success
+int syscall_ctx_kfunc_unaligned_fixed_off(void *ctx)
+{
+ char *p = ctx;
+
+ p += 2;
+ bpf_kfunc_call_test_mem_len_pass1(p, 4);
+ return 0;
+}
+
+SEC("?syscall")
+__description("syscall: kfunc unaligned access ctx with variable offset")
+__success
+int syscall_ctx_kfunc_unaligned_var_off(void *ctx)
+{
+ __u64 off = bpf_get_prandom_u32();
+ char *p = ctx;
+
+ off &= 0xfc;
+ off += 2;
+ p += off;
+ bpf_kfunc_call_test_mem_len_pass1(p, 4);
+ return 0;
+}
+
+SEC("?syscall")
+__description("syscall: reject kfunc ctx access past U16_MAX with fixed offset")
+__failure __msg("outside of the allowed memory range")
+int syscall_ctx_kfunc_u16_max_fixed_off(void *ctx)
+{
+ char *p = ctx;
+
+ p += 65535;
+ bpf_kfunc_call_test_mem_len_pass1(p, 4);
+ return 0;
+}
+
+SEC("?syscall")
+__description("syscall: reject kfunc ctx access past U16_MAX with variable offset")
+__failure __msg("outside of the allowed memory range")
+int syscall_ctx_kfunc_u16_max_var_off(void *ctx)
+{
+ __u64 off = bpf_get_prandom_u32();
+ char *p = ctx;
+
+ off &= 0xffff;
+ off += 1;
+ p += off;
+ bpf_kfunc_call_test_mem_len_pass1(p, 4);
+ return 0;
+}
+
+SEC("?syscall")
+__description("syscall: kfunc access zero-sized ctx")
+__success
+int syscall_ctx_kfunc_zero_sized(void *ctx)
+{
+ bpf_kfunc_call_test_mem_len_pass1(ctx, 0);
+ return 0;
+}
+
+/*
+ * For non-syscall program types without convert_ctx_access, direct ctx
+ * dereference is still allowed after adding a fixed offset, while variable
+ * and negative direct accesses reject.
+ *
+ * Passing ctx as a helper or kfunc memory argument is only permitted for
+ * syscall programs, so the helper and kfunc cases below validate rejection
+ * for non-syscall ctx pointers at fixed, variable, and zero-sized accesses.
+ */
+#define no_rewrite_ctx_access(type, name, off, load_t) \
+ SEC("?" type) \
+ __description(type ": read ctx at fixed offset") \
+ __success \
+ int no_rewrite_##name##_fixed(void *ctx) \
+ { \
+ char *p = ctx; \
+ volatile load_t val; \
+ \
+ val = *(load_t *)(p + off); \
+ (void)val; \
+ return 0; \
+ } \
+ SEC("?" type) \
+ __description(type ": reject variable offset ctx access") \
+ __failure __msg("variable ctx access var_off=") \
+ int no_rewrite_##name##_var(void *ctx) \
+ { \
+ __u64 off_var = bpf_get_prandom_u32(); \
+ char *p = ctx; \
+ \
+ off_var &= 4; \
+ p += off_var; \
+ return *(load_t *)p; \
+ } \
+ SEC("?" type) \
+ __description(type ": reject negative offset ctx access") \
+ __failure __msg("invalid bpf_context access") \
+ int no_rewrite_##name##_neg(void *ctx) \
+ { \
+ char *p = ctx; \
+ \
+ p -= 612; \
+ return *(load_t *)p; \
+ } \
+ SEC("?" type) \
+ __description(type ": reject helper read ctx at fixed offset") \
+ __failure __msg("dereference of modified ctx ptr") \
+ int no_rewrite_##name##_helper_read_fixed(void *ctx) \
+ { \
+ char *p = ctx; \
+ \
+ p += off; \
+ return bpf_strncmp(p, 4, ctx_strncmp_target); \
+ } \
+ SEC("?" type) \
+ __description(type ": reject helper write ctx at fixed offset") \
+ __failure __msg("dereference of modified ctx ptr") \
+ int no_rewrite_##name##_helper_write_fixed(void *ctx) \
+ { \
+ char *p = ctx; \
+ \
+ p += off; \
+ return bpf_probe_read_kernel(p, 4, 0); \
+ } \
+ SEC("?" type) \
+ __description(type ": reject helper read ctx with variable offset") \
+ __failure __msg("variable ctx access var_off=") \
+ int no_rewrite_##name##_helper_read_var(void *ctx) \
+ { \
+ __u64 off_var = bpf_get_prandom_u32(); \
+ char *p = ctx; \
+ \
+ off_var &= 4; \
+ p += off_var; \
+ return bpf_strncmp(p, 4, ctx_strncmp_target); \
+ } \
+ SEC("?" type) \
+ __description(type ": reject helper write ctx with variable offset") \
+ __failure __msg("variable ctx access var_off=") \
+ int no_rewrite_##name##_helper_write_var(void *ctx) \
+ { \
+ __u64 off_var = bpf_get_prandom_u32(); \
+ char *p = ctx; \
+ \
+ off_var &= 4; \
+ p += off_var; \
+ return bpf_probe_read_kernel(p, 4, 0); \
+ } \
+ SEC("?" type) \
+ __description(type ": reject helper read zero-sized ctx access") \
+ __failure __msg("R4 type=ctx expected=fp") \
+ int no_rewrite_##name##_helper_read_zero(void *ctx) \
+ { \
+ return bpf_snprintf(0, 0, ctx_snprintf_fmt, ctx, 0); \
+ } \
+ SEC("?" type) \
+ __description(type ": reject helper write zero-sized ctx access") \
+ __failure __msg("R1 type=ctx expected=fp") \
+ int no_rewrite_##name##_helper_write_zero(void *ctx) \
+ { \
+ return bpf_probe_read_kernel(ctx, 0, 0); \
+ } \
+ SEC("?" type) \
+ __description(type ": reject kfunc ctx at fixed offset") \
+ __failure __msg("dereference of modified ctx ptr") \
+ int no_rewrite_##name##_kfunc_fixed(void *ctx) \
+ { \
+ char *p = ctx; \
+ \
+ p += off; \
+ bpf_kfunc_call_test_mem_len_pass1(p, 4); \
+ return 0; \
+ } \
+ SEC("?" type) \
+ __description(type ": reject kfunc ctx with variable offset") \
+ __failure __msg("variable ctx access var_off=") \
+ int no_rewrite_##name##_kfunc_var(void *ctx) \
+ { \
+ __u64 off_var = bpf_get_prandom_u32(); \
+ char *p = ctx; \
+ \
+ off_var &= 4; \
+ p += off_var; \
+ bpf_kfunc_call_test_mem_len_pass1(p, 4); \
+ return 0; \
+ } \
+ SEC("?" type) \
+ __description(type ": reject kfunc zero-sized ctx access") \
+ __failure __msg("R1 type=ctx expected=fp") \
+ int no_rewrite_##name##_kfunc_zero(void *ctx) \
+ { \
+ bpf_kfunc_call_test_mem_len_pass1(ctx, 0); \
+ return 0; \
+ }
+
+no_rewrite_ctx_access("kprobe", kprobe, 8, u64);
+no_rewrite_ctx_access("tracepoint", tp, 8, u64);
+no_rewrite_ctx_access("raw_tp", raw_tp, 8, u64);
+no_rewrite_ctx_access("raw_tracepoint.w", raw_tp_w, 8, u64);
+no_rewrite_ctx_access("fentry/bpf_modify_return_test", fentry, 8, u64);
+no_rewrite_ctx_access("cgroup/dev", cgroup_dev, 4, u32);
+no_rewrite_ctx_access("netfilter", netfilter, offsetof(struct bpf_nf_ctx, skb), u64);
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_ctx_ptr_param.c b/tools/testing/selftests/bpf/progs/verifier_ctx_ptr_param.c
new file mode 100644
index 000000000000..d5cc8fc01fe6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_ctx_ptr_param.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Verifier tests for single- and multi-level pointer parameter handling
+ * Copyright (c) 2026 CrowdStrike, Inc.
+ */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+SEC("fentry/bpf_fentry_test_ppvoid")
+__description("fentry/void**: void ** inferred as scalar")
+__success __retval(0)
+__log_level(2)
+__msg("R1=ctx() R2=scalar()")
+__naked void fentry_ppvoid_as_scalar(void)
+{
+ asm volatile (" \
+ r2 = *(u64 *)(r1 + 0); \
+ r0 = 0; \
+ exit; \
+ " ::: __clobber_all);
+}
+
+SEC("fentry/bpf_fentry_test_pppvoid")
+__description("fentry/void***: void *** inferred as scalar")
+__success __retval(0)
+__log_level(2)
+__msg("R1=ctx() R2=scalar()")
+__naked void fentry_pppvoid_as_scalar(void)
+{
+ asm volatile (" \
+ r2 = *(u64 *)(r1 + 0); \
+ r0 = 0; \
+ exit; \
+ " ::: __clobber_all);
+}
+
+SEC("fentry/bpf_fentry_test_ppfile")
+__description("fentry/struct file**: struct file ** inferred as scalar")
+__success __retval(0)
+__log_level(2)
+__msg("R1=ctx() R2=scalar()")
+__naked void fentry_ppfile_as_scalar(void)
+{
+ asm volatile (" \
+ r2 = *(u64 *)(r1 + 0); \
+ r0 = 0; \
+ exit; \
+ " ::: __clobber_all);
+}
+
+SEC("fexit/bpf_fexit_test_ret_ppfile")
+__description("fexit/return struct file**: returned struct file ** inferred as scalar")
+__success __retval(0)
+__log_level(2)
+__msg("R1=ctx() R2=scalar()")
+__naked void fexit_ppfile_as_scalar(void)
+{
+ asm volatile (" \
+ r2 = *(u64 *)(r1 + 0); \
+ r0 = 0; \
+ exit; \
+ " ::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c
index 911caa8fd1b7..915a9707298b 100644
--- a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c
+++ b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c
@@ -412,7 +412,7 @@ l0_%=: r0 = 0; \
SEC("tc")
__description("direct packet access: test17 (pruning, alignment)")
-__failure __msg("misaligned packet access off 2+0+15+-4 size 4")
+__failure __msg("misaligned packet access off 2+15+-4 size 4")
__flag(BPF_F_STRICT_ALIGNMENT)
__naked void packet_access_test17_pruning_alignment(void)
{
@@ -569,7 +569,7 @@ l0_%=: r0 = 0; \
SEC("tc")
__description("direct packet access: test23 (x += pkt_ptr, 4)")
-__failure __msg("invalid access to packet, off=0 size=8, R5(id=3,off=0,r=0)")
+__failure __msg("invalid access to packet, off=31 size=8, R5(id=3,off=31,r=0)")
__flag(BPF_F_ANY_ALIGNMENT)
__naked void test23_x_pkt_ptr_4(void)
{
@@ -859,4 +859,65 @@ l0_%=: r0 = 1; \
: __clobber_all);
}
+SEC("tc")
+__description("direct packet access: pkt_range cleared after sub with known scalar")
+__failure __msg("invalid access to packet")
+__naked void pkt_range_clear_after_sub(void)
+{
+ asm volatile (" \
+ r9 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r8 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r9 += 256; \
+ if r9 >= r8 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: /* r9 has AT_PKT_END (pkt + 256 >= pkt_end) */ \
+ r9 -= 256; \
+ /* \
+ * AT_PKT_END must not survive the arithmetic. \
+ * is_pkt_ptr_branch_taken must validate both \
+ * branches when visiting the next condition. \
+ */ \
+ if r9 < r8 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u8*)(r9 + 0); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: pkt_range cleared after add with known scalar")
+__failure __msg("invalid access to packet")
+__naked void pkt_range_clear_after_add(void)
+{
+ asm volatile (" \
+ r9 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r8 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r9 += 256; \
+ if r9 >= r8 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: /* r9 has AT_PKT_END (pkt + 256 >= pkt_end) */ \
+ r9 += -256; \
+ /* \
+ * Same as sub, but goes through BPF_ADD path. \
+ * AT_PKT_END must not survive the arithmetic. \
+ */ \
+ if r9 < r8 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u8*)(r9 + 0); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_div_mod_bounds.c b/tools/testing/selftests/bpf/progs/verifier_div_mod_bounds.c
index 4672af0b3268..e814a054d69a 100644
--- a/tools/testing/selftests/bpf/progs/verifier_div_mod_bounds.c
+++ b/tools/testing/selftests/bpf/progs/verifier_div_mod_bounds.c
@@ -36,7 +36,7 @@ l0_%=: r0 = *(u64 *)(r1 + 0); \
SEC("socket")
__description("UDIV32, zero divisor")
__success __retval(0) __log_level(2)
-__msg("w1 /= w2 {{.*}}; R1=0 R2=0")
+__msg("w1 /= w2 {{.*}}; R1=0")
__naked void udiv32_zero_divisor(void)
{
asm volatile (" \
@@ -81,7 +81,7 @@ l0_%=: r0 = *(u64 *)(r1 + 0); \
SEC("socket")
__description("UDIV64, zero divisor")
__success __retval(0) __log_level(2)
-__msg("r1 /= r2 {{.*}}; R1=0 R2=0")
+__msg("r1 /= r2 {{.*}}; R1=0")
__naked void udiv64_zero_divisor(void)
{
asm volatile (" \
@@ -242,7 +242,7 @@ l1_%=: r0 = *(u64 *)(r1 + 0); \
SEC("socket")
__description("SDIV32, zero divisor")
__success __retval(0) __log_level(2)
-__msg("w1 s/= w2 {{.*}}; R1=0 R2=0")
+__msg("w1 s/= w2 {{.*}}; R1=0")
__naked void sdiv32_zero_divisor(void)
{
asm volatile (" \
@@ -275,6 +275,7 @@ __naked void sdiv32_overflow_1(void)
w2 += 10; \
if w1 s> w2 goto l0_%=; \
w1 s/= -1; \
+ r2 = r1; \
l0_%=: r0 = 0; \
exit; \
" :
@@ -443,7 +444,7 @@ l1_%=: r0 = *(u64 *)(r1 + 0); \
SEC("socket")
__description("SDIV64, zero divisor")
__success __retval(0) __log_level(2)
-__msg("r1 s/= r2 {{.*}}; R1=0 R2=0")
+__msg("r1 s/= r2 {{.*}}; R1=0")
__naked void sdiv64_zero_divisor(void)
{
asm volatile (" \
@@ -476,6 +477,7 @@ __naked void sdiv64_overflow_1(void)
r2 += 10; \
if r1 s> r2 goto l0_%=; \
r1 s/= -1; \
+ r2 = r1; \
l0_%=: r0 = 0; \
exit; \
" :
@@ -553,7 +555,7 @@ l0_%=: r0 = *(u64 *)(r1 + 0); \
SEC("socket")
__description("UMOD32, zero divisor")
__success __retval(0) __log_level(2)
-__msg("w1 %= w2 {{.*}}; R1=scalar(smin=umin=smin32=umin32=1,smax=umax=smax32=umax32=9,var_off=(0x1; 0x8)) R2=0")
+__msg("w1 %= w2 {{.*}}; R1=scalar(smin=umin=smin32=umin32=1,smax=umax=smax32=umax32=9,var_off=(0x1; 0x8))")
__naked void umod32_zero_divisor(void)
{
asm volatile (" \
@@ -624,7 +626,7 @@ l0_%=: r0 = *(u64 *)(r1 + 0); \
SEC("socket")
__description("UMOD64, zero divisor")
__success __retval(0) __log_level(2)
-__msg("r1 %= r2 {{.*}}; R1=scalar(smin=umin=smin32=umin32=1,smax=umax=smax32=umax32=9,var_off=(0x1; 0x8)) R2=0")
+__msg("r1 %= r2 {{.*}}; R1=scalar(smin=umin=smin32=umin32=1,smax=umax=smax32=umax32=9,var_off=(0x1; 0x8))")
__naked void umod64_zero_divisor(void)
{
asm volatile (" \
@@ -833,7 +835,7 @@ l1_%=: r0 = *(u64 *)(r1 + 0); \
SEC("socket")
__description("SMOD32, zero divisor")
__success __retval(0) __log_level(2)
-__msg("w1 s%= w2 {{.*}}; R1=scalar(smin=0,smax=umax=0xffffffff,smin32=-8,smax32=10,var_off=(0x0; 0xffffffff)) R2=0")
+__msg("w1 s%= w2 {{.*}}; R1=scalar(smin=0,smax=umax=0xffffffff,smin32=-8,smax32=10,var_off=(0x0; 0xffffffff))")
__naked void smod32_zero_divisor(void)
{
asm volatile (" \
@@ -1084,7 +1086,7 @@ l1_%=: r0 = *(u64 *)(r1 + 0); \
SEC("socket")
__description("SMOD64, zero divisor")
__success __retval(0) __log_level(2)
-__msg("r1 s%= r2 {{.*}}; R1=scalar(smin=smin32=-8,smax=smax32=10) R2=0")
+__msg("r1 s%= r2 {{.*}}; R1=scalar(smin=smin32=-8,smax=smax32=10)")
__naked void smod64_zero_divisor(void)
{
asm volatile (" \
diff --git a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c
index 20904cd2baa2..1e08aff7532e 100644
--- a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c
+++ b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c
@@ -134,7 +134,6 @@ __noinline __weak int subprog_user_anon_mem(user_struct_t *t)
SEC("?tracepoint")
__failure __log_level(2)
-__msg("invalid bpf_context access")
__msg("Caller passes invalid args into func#1 ('subprog_user_anon_mem')")
int anon_user_mem_invalid(void *ctx)
{
@@ -358,6 +357,100 @@ int arg_tag_ctx_syscall(void *ctx)
return tracing_subprog_void(ctx) + tracing_subprog_u64(ctx) + tp_whatever(ctx);
}
+__weak int syscall_array_bpf_for(void *ctx __arg_ctx)
+{
+ int *arr = ctx;
+ int i;
+
+ bpf_for(i, 0, 100)
+ arr[i] *= i;
+
+ return 0;
+}
+
+SEC("?syscall")
+__success __log_level(2)
+int arg_tag_ctx_syscall_bpf_for(void *ctx)
+{
+ return syscall_array_bpf_for(ctx);
+}
+
+SEC("syscall")
+__auxiliary
+int syscall_tailcall_target(void *ctx)
+{
+ return syscall_array_bpf_for(ctx);
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __array(values, int (void *));
+} syscall_prog_array SEC(".maps") = {
+ .values = {
+ [0] = (void *)&syscall_tailcall_target,
+ },
+};
+
+SEC("?syscall")
+__success __log_level(2)
+int arg_tag_ctx_syscall_tailcall(void *ctx)
+{
+ bpf_tail_call(ctx, &syscall_prog_array, 0);
+ return 0;
+}
+
+SEC("?syscall")
+__failure __log_level(2)
+__msg("dereference of modified ctx ptr R1 off=8 disallowed")
+int arg_tag_ctx_syscall_tailcall_fixed_off_bad(void *ctx)
+{
+ char *p = ctx;
+
+ p += 8;
+ bpf_tail_call(p, &syscall_prog_array, 0);
+ return 0;
+}
+
+SEC("?syscall")
+__failure __log_level(2)
+__msg("variable ctx access var_off=(0x0; 0x4) disallowed")
+int arg_tag_ctx_syscall_tailcall_var_off_bad(void *ctx)
+{
+ __u64 off = bpf_get_prandom_u32();
+ char *p = ctx;
+
+ off &= 4;
+ p += off;
+ bpf_tail_call(p, &syscall_prog_array, 0);
+ return 0;
+}
+
+SEC("?syscall")
+__failure __log_level(2)
+__msg("dereference of modified ctx ptr R1 off=8 disallowed")
+int arg_tag_ctx_syscall_fixed_off_bad(void *ctx)
+{
+ char *p = ctx;
+
+ p += 8;
+ return subprog_ctx_tag(p);
+}
+
+SEC("?syscall")
+__failure __log_level(2)
+__msg("variable ctx access var_off=(0x0; 0x4) disallowed")
+int arg_tag_ctx_syscall_var_off_bad(void *ctx)
+{
+ __u64 off = bpf_get_prandom_u32();
+ char *p = ctx;
+
+ off &= 4;
+ p += off;
+ return subprog_ctx_tag(p);
+}
+
__weak int subprog_dynptr(struct bpf_dynptr *dptr)
{
long *d, t, buf[1] = {};
@@ -388,4 +481,23 @@ int arg_tag_dynptr(struct xdp_md *ctx)
return subprog_dynptr(&dptr);
}
+__weak
+void foo(void)
+{
+}
+
+SEC("?tc")
+__failure __msg("R0 !read_ok")
+int return_from_void_global(struct __sk_buff *skb)
+{
+ foo();
+
+ asm volatile(
+ "r1 = r0;"
+ :::
+ );
+
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_gotox.c b/tools/testing/selftests/bpf/progs/verifier_gotox.c
index 0f43b56ec2bc..f88aa4cdb279 100644
--- a/tools/testing/selftests/bpf/progs/verifier_gotox.c
+++ b/tools/testing/selftests/bpf/progs/verifier_gotox.c
@@ -131,7 +131,7 @@ DEFINE_INVALID_SIZE_PROG(u16, __failure __msg("Invalid read of 2 bytes from insn
DEFINE_INVALID_SIZE_PROG(u8, __failure __msg("Invalid read of 1 bytes from insn_array"))
SEC("socket")
-__failure __msg("misaligned value access off 0+1+0 size 8")
+__failure __msg("misaligned value access off 1+0 size 8")
__naked void jump_table_misaligned_access(void)
{
asm volatile (" \
@@ -187,7 +187,7 @@ jt0_%=: \
}
SEC("socket")
-__failure __msg("invalid access to map value, value_size=16 off=-24 size=8")
+__failure __msg("R0 min value is negative")
__naked void jump_table_invalid_mem_acceess_neg(void)
{
asm volatile (" \
diff --git a/tools/testing/selftests/bpf/progs/verifier_helper_packet_access.c b/tools/testing/selftests/bpf/progs/verifier_helper_packet_access.c
index 74f5f9cd153d..71cee3f58324 100644
--- a/tools/testing/selftests/bpf/progs/verifier_helper_packet_access.c
+++ b/tools/testing/selftests/bpf/progs/verifier_helper_packet_access.c
@@ -360,7 +360,7 @@ l0_%=: r0 = 0; \
SEC("tc")
__description("helper access to packet: test15, cls helper fail sub")
-__failure __msg("invalid access to packet")
+__failure __msg("R1 min value is negative")
__naked void test15_cls_helper_fail_sub(void)
{
asm volatile (" \
diff --git a/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c b/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c
index 886498b5e6f3..6d2a38597c34 100644
--- a/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c
+++ b/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c
@@ -1100,7 +1100,7 @@ l0_%=: exit; \
SEC("tracepoint")
__description("map helper access to adjusted map (via const imm): out-of-bound 2")
-__failure __msg("invalid access to map value, value_size=16 off=-4 size=8")
+__failure __msg("R2 min value is negative")
__naked void imm_out_of_bound_2(void)
{
asm volatile (" \
@@ -1176,7 +1176,7 @@ l0_%=: exit; \
SEC("tracepoint")
__description("map helper access to adjusted map (via const reg): out-of-bound 2")
-__failure __msg("invalid access to map value, value_size=16 off=-4 size=8")
+__failure __msg("R2 min value is negative")
__naked void reg_out_of_bound_2(void)
{
asm volatile (" \
diff --git a/tools/testing/selftests/bpf/progs/verifier_int_ptr.c b/tools/testing/selftests/bpf/progs/verifier_int_ptr.c
index 59e34d558654..6627f44faf4b 100644
--- a/tools/testing/selftests/bpf/progs/verifier_int_ptr.c
+++ b/tools/testing/selftests/bpf/progs/verifier_int_ptr.c
@@ -65,7 +65,7 @@ __naked void ptr_to_long_half_uninitialized(void)
SEC("cgroup/sysctl")
__description("arg pointer to long misaligned")
-__failure __msg("misaligned stack access off 0+-20+0 size 8")
+__failure __msg("misaligned stack access off -20+0 size 8")
__naked void arg_ptr_to_long_misaligned(void)
{
asm volatile (" \
diff --git a/tools/testing/selftests/bpf/progs/verifier_jeq_infer_not_null.c b/tools/testing/selftests/bpf/progs/verifier_jeq_infer_not_null.c
index bf16b00502f2..3d1e8de4390c 100644
--- a/tools/testing/selftests/bpf/progs/verifier_jeq_infer_not_null.c
+++ b/tools/testing/selftests/bpf/progs/verifier_jeq_infer_not_null.c
@@ -210,4 +210,58 @@ l0_%=: /* return 0; */ \
: __clobber_all);
}
+/* Verified that we can detect the pointer as non_null when comparing with
+ * register with value 0. JEQ test case.
+ */
+SEC("xdp")
+__success __log_level(2)
+/* to make sure the branch is not falsely predicted*/
+__msg("r0 = *(u32 *)(r0 +0)")
+__msg("from 7 to 9")
+__naked void jeq_reg_reg_null_check(void)
+{
+ asm volatile (" \
+ *(u32*)(r10 - 8) = 0; \
+ r1 = %[map_xskmap] ll; \
+ r2 = r10; \
+ r2 += -8; \
+ call %[bpf_map_lookup_elem]; \
+ r1 = 0; \
+ if r0 == r1 goto 1f; \
+ r0 = *(u32*)(r0 +0); \
+1: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_xskmap)
+ : __clobber_all);
+}
+
+/* Same as above but for JNE.
+ */
+SEC("xdp")
+__success __log_level(2)
+/* to make sure the branch is not falsely predicted*/
+__msg("r0 = *(u32 *)(r0 +0)")
+__msg("from 7 to 9")
+__naked void jne_reg_reg_null_check(void)
+{
+ asm volatile (" \
+ *(u32*)(r10 - 8) = 0; \
+ r1 = %[map_xskmap] ll; \
+ r2 = r10; \
+ r2 += -8; \
+ call %[bpf_map_lookup_elem]; \
+ r1 = 0; \
+ if r0 != r1 goto 1f; \
+ goto 2f; \
+1: r0 = *(u32*)(r0 +0); \
+2: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_xskmap)
+ : __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_ld_ind.c b/tools/testing/selftests/bpf/progs/verifier_ld_ind.c
index c925ba9a2e74..09e81b99eecb 100644
--- a/tools/testing/selftests/bpf/progs/verifier_ld_ind.c
+++ b/tools/testing/selftests/bpf/progs/verifier_ld_ind.c
@@ -107,4 +107,146 @@ __naked void ind_check_calling_conv_r7(void)
: __clobber_all);
}
+/*
+ * ld_{abs,ind} subprog that always sets r0=1 on the success path.
+ * bpf_gen_ld_abs() emits a hidden exit with r0=0 when the load helper
+ * fails. The verifier must model this failure return so that callers
+ * account for r0=0 as a possible return value.
+ */
+__naked __noinline __used
+static int ldabs_subprog(void)
+{
+ asm volatile (
+ "r6 = r1;"
+ ".8byte %[ld_abs];"
+ "r0 = 1;"
+ "exit;"
+ :
+ : __imm_insn(ld_abs, BPF_LD_ABS(BPF_W, 0))
+ : __clobber_all);
+}
+
+__naked __noinline __used
+static int ldind_subprog(void)
+{
+ asm volatile (
+ "r6 = r1;"
+ "r7 = 0;"
+ ".8byte %[ld_ind];"
+ "r0 = 1;"
+ "exit;"
+ :
+ : __imm_insn(ld_ind, BPF_LD_IND(BPF_W, BPF_REG_7, 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("ld_abs: subprog early exit on ld_abs failure")
+__failure __msg("R9 !read_ok")
+__naked void ld_abs_subprog_early_exit(void)
+{
+ asm volatile (
+ "call ldabs_subprog;"
+ "if r0 != 0 goto l_exit_%=;"
+ "r0 = r9;"
+ "l_exit_%=:"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__description("ld_ind: subprog early exit on ld_ind failure")
+__failure __msg("R9 !read_ok")
+__naked void ld_ind_subprog_early_exit(void)
+{
+ asm volatile (
+ "call ldind_subprog;"
+ "if r0 != 0 goto l_exit_%=;"
+ "r0 = r9;"
+ "l_exit_%=:"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__description("ld_abs: subprog with both paths safe")
+__success
+__naked void ld_abs_subprog_both_paths_safe(void)
+{
+ asm volatile (
+ "call ldabs_subprog;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__description("ld_ind: subprog with both paths safe")
+__success
+__naked void ld_ind_subprog_both_paths_safe(void)
+{
+ asm volatile (
+ "call ldind_subprog;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/*
+ * ld_{abs,ind} in subprogs require scalar (int) return type in BTF.
+ * A test with void return must be rejected.
+ */
+__naked __noinline __used
+static void ldabs_void_subprog(void)
+{
+ asm volatile (
+ "r6 = r1;"
+ ".8byte %[ld_abs];"
+ "r0 = 1;"
+ "exit;"
+ :
+ : __imm_insn(ld_abs, BPF_LD_ABS(BPF_W, 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("ld_abs: reject void return subprog")
+__failure __msg("LD_ABS is only allowed in functions that return 'int'")
+__naked void ld_abs_void_subprog_reject(void)
+{
+ asm volatile (
+ "call ldabs_void_subprog;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+__naked __noinline __used
+static void ldind_void_subprog(void)
+{
+ asm volatile (
+ "r6 = r1;"
+ "r7 = 0;"
+ ".8byte %[ld_ind];"
+ "r0 = 1;"
+ "exit;"
+ :
+ : __imm_insn(ld_ind, BPF_LD_IND(BPF_W, BPF_REG_7, 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("ld_ind: reject void return subprog")
+__failure __msg("LD_ABS is only allowed in functions that return 'int'")
+__naked void ld_ind_void_subprog_reject(void)
+{
+ asm volatile (
+ "call ldind_void_subprog;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_linked_scalars.c b/tools/testing/selftests/bpf/progs/verifier_linked_scalars.c
index f4f8a055af8a..d571fbfc86a3 100644
--- a/tools/testing/selftests/bpf/progs/verifier_linked_scalars.c
+++ b/tools/testing/selftests/bpf/progs/verifier_linked_scalars.c
@@ -535,4 +535,179 @@ int spurious_precision_marks(void *ctx)
return 0;
}
+/*
+ * Test that r += r (self-add, src_reg == dst_reg) clears the scalar ID
+ * so that sync_linked_regs() does not propagate an incorrect delta.
+ */
+SEC("socket")
+__failure
+__msg("div by zero")
+__naked void scalars_self_add_clears_id(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r6 = r0; /* r6 unknown, id A */ \
+ r7 = r6; /* r7 linked to r6, id A */ \
+ call %[bpf_get_prandom_u32]; \
+ r8 = r0; /* r8 unknown, id B */ \
+ r9 = r8; /* r9 linked to r8, id B */ \
+ if r7 != 1 goto l_exit_%=; \
+ /* r7 == 1; sync propagates: r6 = 1 (known, id A) */ \
+ r6 += r6; /* r6 = 2; should clear id */ \
+ if r7 == r9 goto l_exit_%=; \
+ /* Bug: r6 synced to r7(1)+delta(2)=3; Fix: r6 = 2 */ \
+ if r6 == 3 goto l_exit_%=; \
+ r0 /= 0; \
+l_exit_%=: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/* Same as above but with alu32 such that w6 += w6 also clears id. */
+SEC("socket")
+__failure
+__msg("div by zero")
+__naked void scalars_self_add_alu32_clears_id(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w6 = w0; \
+ w7 = w6; \
+ call %[bpf_get_prandom_u32]; \
+ w8 = w0; \
+ w9 = w8; \
+ if w7 != 1 goto l_exit_%=; \
+ w6 += w6; \
+ if w7 == w9 goto l_exit_%=; \
+ if w6 == 3 goto l_exit_%=; \
+ r0 /= 0; \
+l_exit_%=: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/*
+ * Test that stale delta from a cleared BPF_ADD_CONST does not leak
+ * through assign_scalar_id_before_mov() into a new id, causing
+ * sync_linked_regs() to compute an incorrect offset.
+ */
+SEC("socket")
+__failure
+__msg("div by zero")
+__naked void scalars_stale_delta_from_cleared_id(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r6 = r0; /* r6 unknown, gets id A */ \
+ r6 += 5; /* id A|ADD_CONST, delta 5 */ \
+ r6 ^= 0; /* id cleared; delta stays 5 */ \
+ r8 = r6; /* new id B, stale delta 5 */ \
+ r8 += 3; /* id B|ADD_CONST, delta 3 */ \
+ r9 = r6; /* id B, stale delta 5 */ \
+ if r9 != 10 goto l_exit_%=; \
+ /* Bug: r8 = 10+(3-5) = 8; Fix: r8 = 10+(3-0) = 13 */ \
+ if r8 == 8 goto l_exit_%=; \
+ r0 /= 0; \
+l_exit_%=: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/* Same as above but with alu32. */
+SEC("socket")
+__failure
+__msg("div by zero")
+__naked void scalars_stale_delta_from_cleared_id_alu32(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w6 = w0; \
+ w6 += 5; \
+ w6 ^= 0; \
+ w8 = w6; \
+ w8 += 3; \
+ w9 = w6; \
+ if w9 != 10 goto l_exit_%=; \
+ if w8 == 8 goto l_exit_%=; \
+ r0 /= 0; \
+l_exit_%=: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/*
+ * Test that regsafe() verifies base_id consistency for BPF_ADD_CONST
+ * linked scalars during state pruning.
+ *
+ * The false branch (explored first) links R3 to R2 via ADD_CONST.
+ * The true branch (runtime path) links R3 to R4 (unrelated base_id).
+ * At the merge point, pruning must fail because the linkage topology
+ * differs.
+ */
+SEC("socket")
+__description("linked scalars: add_const base_id must be consistent for pruning")
+__failure __msg("invalid variable-offset")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void add_const_base_id_pruning(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 16) = r1; \
+ call %[bpf_get_prandom_u32]; \
+ r6 = r0; \
+ r6 &= 1; \
+ if r6 >= 1 goto l_true_%=; \
+ \
+ /* False branch (explored first, old state) */ \
+ call %[bpf_get_prandom_u32]; \
+ r2 = r0; \
+ r2 &= 0xff; /* R2 = scalar(id=A) [0,255] */ \
+ r3 = r2; /* R3 linked to R2 (id=A) */ \
+ r3 += 10; /* R3 id=A|ADD_CONST, delta=10 */\
+ r6 = 0; \
+ goto l_merge_%=; \
+ \
+l_true_%=: \
+ /* True branch (runtime path, cur state) */ \
+ call %[bpf_get_prandom_u32]; \
+ r2 = r0; \
+ r2 &= 0xff; /* R2 = scalar [0,255], id=0 */ \
+ r4 = r0; \
+ r4 &= 0xff; /* R4 = scalar [0,255], id=0 */ \
+ r3 = r4; /* R3 linked to R4 (new id=C) */\
+ r3 += 10; /* R3 id=C|ADD_CONST, delta=10 */\
+ r6 = 0; \
+ \
+l_merge_%=: \
+ /* At merge, old R3 linked to R2, cur R3 linked to R4. */\
+ /* Pruning must fail: base_ids A vs C inconsistent. */ \
+ if r2 >= 6 goto l_exit_%=; \
+ /* sync_linked_regs: R2<6 => R3<16 in old state. */ \
+ /* Without fix: R3 in [10,15] from incorrect pruning. */\
+ /* With fix: R3 in [10,265], not synced from R2. */ \
+ r3 -= 10; /* [0,5] vs [0,255] */ \
+ r9 = r10; \
+ r9 += -16; \
+ r9 += r3; /* fp-16+[0,5] vs fp-16+[0,255] */\
+ *(u8*)(r9 + 0) = r6; /* within 16B vs past fp */ \
+l_exit_%=: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_live_stack.c b/tools/testing/selftests/bpf/progs/verifier_live_stack.c
index 2de105057bbc..b7a9fa10e84d 100644
--- a/tools/testing/selftests/bpf/progs/verifier_live_stack.c
+++ b/tools/testing/selftests/bpf/progs/verifier_live_stack.c
@@ -3,8 +3,10 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
#include "bpf_misc.h"
+char _license[] SEC("license") = "GPL";
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 1);
@@ -12,14 +14,20 @@ struct {
__type(value, long long);
} map SEC(".maps");
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} array_map_8b SEC(".maps");
+
+const char snprintf_u64_fmt[] = "%llu";
+
SEC("socket")
__log_level(2)
-__msg("(0) frame 0 insn 2 +written -8")
-__msg("(0) frame 0 insn 1 +live -24")
-__msg("(0) frame 0 insn 1 +written -8")
-__msg("(0) frame 0 insn 0 +live -8,-24")
-__msg("(0) frame 0 insn 0 +written -8")
-__msg("(0) live stack update done in 2 iterations")
+__msg("0: (79) r1 = *(u64 *)(r10 -8) ; use: fp0-8")
+__msg("1: (79) r2 = *(u64 *)(r10 -24) ; use: fp0-24")
+__msg("2: (7b) *(u64 *)(r10 -8) = r1 ; def: fp0-8")
__naked void simple_read_simple_write(void)
{
asm volatile (
@@ -33,12 +41,8 @@ __naked void simple_read_simple_write(void)
SEC("socket")
__log_level(2)
-__msg("(0) frame 0 insn 1 +live -8")
-__not_msg("(0) frame 0 insn 1 +written")
-__msg("(0) live stack update done in 2 iterations")
-__msg("(0) frame 0 insn 1 +live -16")
-__msg("(0) frame 0 insn 1 +written -32")
-__msg("(0) live stack update done in 2 iterations")
+__msg("2: (79) r0 = *(u64 *)(r10 -8) ; use: fp0-8")
+__msg("6: (79) r0 = *(u64 *)(r10 -16) ; use: fp0-16")
__naked void read_write_join(void)
{
asm volatile (
@@ -58,13 +62,9 @@ __naked void read_write_join(void)
SEC("socket")
__log_level(2)
-__msg("2: (25) if r0 > 0x2a goto pc+1")
-__msg("7: (95) exit")
-__msg("(0) frame 0 insn 2 +written -16")
-__msg("(0) live stack update done in 2 iterations")
-__msg("7: (95) exit")
-__not_msg("(0) frame 0 insn 2")
-__msg("(0) live stack update done in 1 iterations")
+__msg("stack use/def subprog#0 must_write_not_same_slot (d0,cs0):")
+__msg("6: (7b) *(u64 *)(r2 +0) = r0{{$}}")
+__msg("Live regs before insn:")
__naked void must_write_not_same_slot(void)
{
asm volatile (
@@ -83,10 +83,8 @@ __naked void must_write_not_same_slot(void)
SEC("socket")
__log_level(2)
-__msg("(0) frame 0 insn 0 +written -8,-16")
-__msg("(0) live stack update done in 2 iterations")
-__msg("(0) frame 0 insn 0 +written -8")
-__msg("(0) live stack update done in 2 iterations")
+__msg("0: (7a) *(u64 *)(r10 -8) = 0 ; def: fp0-8")
+__msg("5: (85) call bpf_map_lookup_elem#1 ; use: fp0-8h")
__naked void must_write_not_same_type(void)
{
asm volatile (
@@ -110,10 +108,11 @@ __naked void must_write_not_same_type(void)
SEC("socket")
__log_level(2)
-__msg("(2,4) frame 0 insn 4 +written -8")
-__msg("(2,4) live stack update done in 2 iterations")
-__msg("(0) frame 0 insn 2 +written -8")
-__msg("(0) live stack update done in 2 iterations")
+/* Callee writes fp[0]-8: stack_use at call site has slots 0,1 live */
+__msg("stack use/def subprog#0 caller_stack_write (d0,cs0):")
+__msg("2: (85) call pc+1{{$}}")
+__msg("stack use/def subprog#1 write_first_param (d1,cs2):")
+__msg("4: (7a) *(u64 *)(r1 +0) = 7 ; def: fp0-8")
__naked void caller_stack_write(void)
{
asm volatile (
@@ -135,23 +134,15 @@ static __used __naked void write_first_param(void)
SEC("socket")
__log_level(2)
-/* caller_stack_read() function */
-__msg("2: .12345.... (85) call pc+4")
-__msg("5: .12345.... (85) call pc+1")
-__msg("6: 0......... (95) exit")
-/* read_first_param() function */
-__msg("7: .1........ (79) r0 = *(u64 *)(r1 +0)")
-__msg("8: 0......... (95) exit")
-/* update for callsite at (2) */
-__msg("(2,7) frame 0 insn 7 +live -8")
-__msg("(2,7) live stack update done in 2 iterations")
-__msg("(0) frame 0 insn 2 +live -8")
-__msg("(0) live stack update done in 2 iterations")
-/* update for callsite at (5) */
-__msg("(5,7) frame 0 insn 7 +live -16")
-__msg("(5,7) live stack update done in 2 iterations")
-__msg("(0) frame 0 insn 5 +live -16")
-__msg("(0) live stack update done in 2 iterations")
+__msg("stack use/def subprog#0 caller_stack_read (d0,cs0):")
+__msg("2: (85) call pc+{{.*}} ; use: fp0-8{{$}}")
+__msg("5: (85) call pc+{{.*}} ; use: fp0-16{{$}}")
+__msg("stack use/def subprog#1 read_first_param (d1,cs2):")
+__msg("7: (79) r0 = *(u64 *)(r1 +0) ; use: fp0-8{{$}}")
+__msg("8: (95) exit")
+__msg("stack use/def subprog#1 read_first_param (d1,cs5):")
+__msg("7: (79) r0 = *(u64 *)(r1 +0) ; use: fp0-16{{$}}")
+__msg("8: (95) exit")
__naked void caller_stack_read(void)
{
asm volatile (
@@ -174,20 +165,48 @@ static __used __naked void read_first_param(void)
}
SEC("socket")
+__success
+__naked void arg_track_join_convergence(void)
+{
+ asm volatile (
+ "r1 = 1;"
+ "r2 = 2;"
+ "call arg_track_join_convergence_subprog;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+static __used __naked void arg_track_join_convergence_subprog(void)
+{
+ asm volatile (
+ "if r1 == 0 goto 1f;"
+ "r0 = r1;"
+ "goto 2f;"
+"1:"
+ "r0 = r2;"
+"2:"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
__flag(BPF_F_TEST_STATE_FREQ)
__log_level(2)
-/* read_first_param2() function */
-__msg(" 9: .1........ (79) r0 = *(u64 *)(r1 +0)")
-__msg("10: .......... (b7) r0 = 0")
-__msg("11: 0......... (05) goto pc+0")
-__msg("12: 0......... (95) exit")
+/* fp0-8 consumed at insn 9, dead by insn 11. stack_def at insn 4 kills slots 0,1. */
+__msg("4: (7b) *(u64 *)(r10 -8) = r0 ; def: fp0-8")
+/* stack_use at call site: callee reads fp0-8, slots 0,1 live */
+__msg("7: (85) call pc+{{.*}} ; use: fp0-8")
+/* read_first_param2: no caller stack live inside callee after first read */
+__msg("9: (79) r0 = *(u64 *)(r1 +0) ; use: fp0-8")
+__msg("10: (b7) r0 = 0{{$}}")
+__msg("11: (05) goto pc+0{{$}}")
+__msg("12: (95) exit")
/*
- * The purpose of the test is to check that checkpoint in
- * read_first_param2() stops path traversal. This will only happen if
- * verifier understands that fp[0]-8 at insn (12) is not alive.
+ * Checkpoint at goto +0 fires because fp0-8 is dead → state pruning.
*/
__msg("12: safe")
-__msg("processed 20 insns")
__naked void caller_stack_pruning(void)
{
asm volatile (
@@ -342,3 +361,2289 @@ static __used __naked unsigned long write_tail_call(void)
__imm_addr(map_array)
: __clobber_all);
}
+
+/* Test precise subprog stack access analysis.
+ * Caller passes fp-32 (SPI 3) to callee that only accesses arg+0 and arg+8
+ * (SPIs 3 and 2). Slots 0 and 1 should NOT be live at the call site.
+ *
+ * Insn layout:
+ * 0: *(u64*)(r10 - 8) = 0 write SPI 0
+ * 1: *(u64*)(r10 - 16) = 0 write SPI 1
+ * 2: *(u64*)(r10 - 24) = 0 write SPI 2
+ * 3: *(u64*)(r10 - 32) = 0 write SPI 3
+ * 4: r1 = r10
+ * 5: r1 += -32
+ * 6: call precise_read_two passes fp-32 (SPI 3)
+ * 7: r0 = 0
+ * 8: exit
+ *
+ * At insn 6 only SPIs 2,3 should be live (slots 4-7, 0xf0).
+ * SPIs 0,1 are written but never read → dead.
+ */
+SEC("socket")
+__log_level(2)
+__msg("6: (85) call pc+{{.*}} ; use: fp0-24 fp0-32{{$}}")
+__naked void subprog_precise_stack_access(void)
+{
+ asm volatile (
+ "*(u64 *)(r10 - 8) = 0;"
+ "*(u64 *)(r10 - 16) = 0;"
+ "*(u64 *)(r10 - 24) = 0;"
+ "*(u64 *)(r10 - 32) = 0;"
+ "r1 = r10;"
+ "r1 += -32;"
+ "call precise_read_two;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/* Callee reads only at arg+0 (SPI 3) and arg+8 (SPI 2) */
+static __used __naked void precise_read_two(void)
+{
+ asm volatile (
+ "r0 = *(u64 *)(r1 + 0);"
+ "r2 = *(u64 *)(r1 + 8);"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/* Test that multi-level subprog calls (callee passes arg-derived ptr
+ * to another BPF subprog) are analyzed precisely.
+ *
+ * Caller passes fp-32 (SPI 3). The callee forwards it to inner_callee.
+ * inner_callee only reads at offset 0 from the pointer.
+ * The analysis recurses into forward_to_inner -> inner_callee and
+ * determines only SPI 3 is accessed (slots 6-7, 0xc0), not all of SPIs 0-3.
+ *
+ * Insn layout:
+ * 0: *(u64*)(r10 - 8) = 0 write SPI 0
+ * 1: *(u64*)(r10 - 16) = 0 write SPI 1
+ * 2: *(u64*)(r10 - 24) = 0 write SPI 2
+ * 3: *(u64*)(r10 - 32) = 0 write SPI 3
+ * 4: r1 = r10
+ * 5: r1 += -32
+ * 6: call forward_to_inner passes fp-32 (SPI 3)
+ * 7: r0 = 0
+ * 8: exit
+ */
+SEC("socket")
+__log_level(2)
+__msg("6: (85) call pc+{{.*}} ; use: fp0-32{{$}}")
+__naked void subprog_multilevel_conservative(void)
+{
+ asm volatile (
+ "*(u64 *)(r10 - 8) = 0;"
+ "*(u64 *)(r10 - 16) = 0;"
+ "*(u64 *)(r10 - 24) = 0;"
+ "*(u64 *)(r10 - 32) = 0;"
+ "r1 = r10;"
+ "r1 += -32;"
+ "call forward_to_inner;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/* Forwards arg to another subprog */
+static __used __naked void forward_to_inner(void)
+{
+ asm volatile (
+ "call inner_callee;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+static __used __naked void inner_callee(void)
+{
+ asm volatile (
+ "r0 = *(u64 *)(r1 + 0);"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/* Test multi-frame precision loss: callee consumes caller stack early,
+ * but static liveness keeps it live at pruning points inside callee.
+ *
+ * Caller stores map_ptr or scalar(42) at fp-8, then calls
+ * consume_and_call_inner. The callee reads fp0-8 at entry (consuming
+ * the slot), then calls do_nothing2. After do_nothing2 returns (a
+ * pruning point), fp-8 should be dead -- the read already happened.
+ * But because the call instruction's stack_use includes SPI 0, the
+ * static live_stack_before at insn 7 is 0x1, keeping fp-8 live inside
+ * the callee and preventing state pruning between the two paths.
+ *
+ * Insn layout:
+ * 0: call bpf_get_prandom_u32
+ * 1: if r0 == 42 goto pc+2 -> insn 4
+ * 2: r0 = map ll (ldimm64 part1)
+ * 3: (ldimm64 part2)
+ * 4: *(u64)(r10 - 8) = r0 fp-8 = map_ptr OR scalar(42)
+ * 5: r1 = r10
+ * 6: r1 += -8
+ * 7: call consume_and_call_inner
+ * 8: r0 = 0
+ * 9: exit
+ *
+ * At insn 7, live_stack_before = 0x3 (slots 0-1 live due to stack_use).
+ * At insn 8, live_stack_before = 0x0 (SPI 0 dead, caller doesn't need it).
+ */
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__log_level(2)
+__success
+__msg(" 7: (85) call pc+{{.*}} ; use: fp0-8")
+__msg(" 8: {{.*}} (b7)")
+__naked void callee_consumed_caller_stack(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == 42 goto 1f;"
+ "r0 = %[map] ll;"
+"1:"
+ "*(u64 *)(r10 - 8) = r0;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call consume_and_call_inner;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm_addr(map)
+ : __clobber_all);
+}
+
+static __used __naked void consume_and_call_inner(void)
+{
+ asm volatile (
+ "r0 = *(u64 *)(r1 + 0);" /* read fp[0]-8 into caller-saved r0 */
+ "call do_nothing2;" /* inner call clobbers r0 */
+ "r0 = 0;"
+ "goto +0;" /* checkpoint */
+ "r0 = 0;"
+ "goto +0;" /* checkpoint */
+ "r0 = 0;"
+ "goto +0;" /* checkpoint */
+ "r0 = 0;"
+ "goto +0;" /* checkpoint */
+ "exit;"
+ ::: __clobber_all);
+}
+
+static __used __naked void do_nothing2(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "r0 = 0;"
+ "r0 = 0;"
+ "r0 = 0;"
+ "r0 = 0;"
+ "r0 = 0;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/*
+ * Reproducer for unsound pruning when clean_verifier_state() promotes
+ * live STACK_ZERO bytes to STACK_MISC.
+ *
+ * Program shape:
+ * - Build key at fp-4:
+ * - path A keeps key byte as STACK_ZERO;
+ * - path B writes unknown byte making it STACK_MISC.
+ * - Branches merge at a prune point before map_lookup.
+ * - map_lookup on ARRAY map is value-sensitive to constant zero key:
+ * - path A: const key 0 => PTR_TO_MAP_VALUE (non-NULL);
+ * - path B: non-const key => PTR_TO_MAP_VALUE_OR_NULL.
+ * - Dereference lookup result without null check.
+ *
+ * Note this behavior won't trigger at fp-8, since the verifier will
+ * track 32-bit scalar spill differently as spilled_ptr.
+ *
+ * Correct verifier behavior: reject (path B unsafe).
+ * With blanket STACK_ZERO->STACK_MISC promotion on live slots, cached path A
+ * state can be generalized and incorrectly prune path B, making program load.
+ */
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("R0 invalid mem access 'map_value_or_null'")
+__naked void stack_zero_to_misc_unsound_array_lookup(void)
+{
+ asm volatile (
+ /* key at fp-4: all bytes STACK_ZERO */
+ "*(u32 *)(r10 - 4) = 0;"
+ "call %[bpf_get_prandom_u32];"
+ /* fall-through (path A) explored first */
+ "if r0 != 0 goto l_nonconst%=;"
+ /* path A: keep key constant zero */
+ "goto l_lookup%=;"
+"l_nonconst%=:"
+ /* path B: key byte turns to STACK_MISC, key no longer const */
+ "*(u8 *)(r10 - 4) = r0;"
+"l_lookup%=:"
+ /* value-sensitive lookup */
+ "r2 = r10;"
+ "r2 += -4;"
+ "r1 = %[array_map_8b] ll;"
+ "call %[bpf_map_lookup_elem];"
+ /* unsafe when lookup result is map_value_or_null */
+ "r0 = *(u64 *)(r0 + 0);"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(array_map_8b)
+ : __clobber_all);
+}
+
+/*
+ * Subprog variant of stack_zero_to_misc_unsound_array_lookup.
+ *
+ * Check unsound pruning when a callee modifies the caller's
+ * stack through a pointer argument.
+ *
+ * Program shape:
+ * main:
+ * *(u32)(fp - 4) = 0 key = 0 (all bytes STACK_ZERO)
+ * r1 = fp - 4
+ * call maybe_clobber_key may overwrite key[0] with scalar
+ * <-- prune point: two states meet here -->
+ * r2 = fp - 4
+ * r1 = array_map_8b
+ * call bpf_map_lookup_elem value-sensitive on const-zero key
+ * r0 = *(u64)(r0 + 0) deref without null check
+ * exit
+ *
+ * maybe_clobber_key(r1):
+ * r6 = r1 save &key
+ * call bpf_get_prandom_u32
+ * if r0 == 0 goto skip path A: key stays STACK_ZERO
+ * *(u8)(r6 + 0) = r0 path B: key[0] becomes STACK_MISC
+ * skip:
+ * r0 = 0
+ * exit
+ *
+ * Path A: const-zero key => array lookup => PTR_TO_MAP_VALUE => deref OK.
+ * Path B: non-const key => array lookup => PTR_TO_MAP_VALUE_OR_NULL => UNSAFE.
+ *
+ * If the cleaner collapses STACK_ZERO -> STACK_MISC for the live key
+ * slot, path A's cached state matches path B, pruning the unsafe path.
+ *
+ * Correct verifier behaviour: reject.
+ */
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("R0 invalid mem access 'map_value_or_null'")
+__naked void subprog_stack_zero_to_misc_unsound(void)
+{
+ asm volatile (
+ /* key at fp-4: all bytes STACK_ZERO */
+ "*(u32 *)(r10 - 4) = 0;"
+ /* subprog may clobber key[0] with a scalar byte */
+ "r1 = r10;"
+ "r1 += -4;"
+ "call maybe_clobber_key;"
+ /* value-sensitive array lookup */
+ "r2 = r10;"
+ "r2 += -4;"
+ "r1 = %[array_map_8b] ll;"
+ "call %[bpf_map_lookup_elem];"
+ /* unsafe when result is map_value_or_null (path B) */
+ "r0 = *(u64 *)(r0 + 0);"
+ "exit;"
+ :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(array_map_8b)
+ : __clobber_all);
+}
+
+static __used __naked void maybe_clobber_key(void)
+{
+ asm volatile (
+ "r6 = r1;"
+ "call %[bpf_get_prandom_u32];"
+ /* path A (r0==0): key stays STACK_ZERO, explored first */
+ "if r0 == 0 goto 1f;"
+ /* path B (r0!=0): overwrite key[0] with scalar */
+ "*(u8 *)(r6 + 0) = r0;"
+ "1:"
+ "r0 = 0;"
+ "exit;"
+ :: __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/*
+ * Demonstrate that subprog arg spill/reload breaks arg tracking,
+ * inflating caller stack liveness and preventing state pruning.
+ *
+ * modifier2(fp-24) has two paths: one writes a scalar to *(r1+8)
+ * = caller fp-16, the other leaves it as zero. After modifier2
+ * returns, fp-16 is never read again — it is dead.
+ *
+ * spill_reload_reader2(fp-24) only reads caller fp-8 via
+ * *(r1+16), but it spills r1 across a helper call. This
+ * breaks compute_subprog_arg_access(): the reload from callee
+ * stack cannot be connected back to arg1, so arg1 access goes
+ * "all (conservative)". At the call site (r1 = fp-24, slot 5)
+ * apply_callee_stack_access() marks slots 0..5 as stack_use —
+ * pulling fp-16 (slots 2-3) into live_stack_before even though
+ * the reader never touches it.
+ *
+ * Result: at modifier2's return point two states with different
+ * fp-16 values cannot be pruned.
+ *
+ * With correct (or old dynamic) liveness fp-16 is dead at that
+ * point and the states prune → "6: safe" appears in the log.
+ */
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__log_level(2)
+__success
+__msg("6: safe")
+__naked void spill_reload_inflates_stack_liveness(void)
+{
+ asm volatile (
+ /* struct at fp-24: { ctx; ptr; tail; } */
+ "*(u64 *)(r10 - 24) = r1;" /* fp-24 = ctx */
+ "*(u64 *)(r10 - 16) = r1;" /* fp-16 = ctx (STACK_SPILL ptr) */
+ "*(u64 *)(r10 - 8) = 0;" /* fp-8 = tail */
+ /* modifier2 writes different values to fp-16 on two paths */
+ "r1 = r10;"
+ "r1 += -24;"
+ "call modifier2;"
+ /* insn 6: prune point — two states with different fp-16
+ * path A: fp-16 = STACK_MISC (scalar overwrote pointer)
+ * path B: fp-16 = STACK_SPILL (original ctx pointer)
+ * STACK_MISC does NOT subsume STACK_SPILL(ptr),
+ * so pruning fails unless fp-16 is cleaned (dead).
+ */
+ "r1 = r10;"
+ "r1 += -24;"
+ "call spill_reload_reader2;" /* reads fp-8 via *(r1+16) */
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/* Two paths: one writes a scalar to *(r1+8) = caller fp-16,
+ * the other leaves it unchanged. Both return 0 via separate
+ * exits to prevent pruning inside the subprog at the merge.
+ */
+static __used __naked void modifier2(void)
+{
+ asm volatile (
+ "r6 = r1;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == 0 goto 1f;"
+ "*(u64 *)(r6 + 8) = r0;" /* fp-16 = random */
+ "r0 = 0;"
+ "exit;" /* path A exit */
+ "1:"
+ "r0 = 0;"
+ "exit;" /* path B exit */
+ :: __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/* Receives r1 = caller fp-24. Only reads *(r1+16) = fp-8.
+ * Spills r1 across a helper call → arg tracking goes conservative →
+ * slots 0..5 all appear used instead of just slot 1 (fp-8).
+ */
+static __used __naked void spill_reload_reader2(void)
+{
+ asm volatile (
+ "*(u64 *)(r10 - 8) = r1;" /* spill arg1 */
+ "call %[bpf_get_prandom_u32];" /* clobbers r1-r5 */
+ "r1 = *(u64 *)(r10 - 8);" /* reload arg1 */
+ "r0 = *(u64 *)(r1 + 16);" /* read caller fp-8 */
+ "r0 = 0;"
+ "exit;"
+ :: __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/* BTF FUNC records are not generated for kfuncs referenced
+ * from inline assembly. These records are necessary for
+ * libbpf to link the program. The function below is a hack
+ * to ensure that BTF FUNC records are generated.
+ */
+void __kfunc_btf_root(void)
+{
+ bpf_iter_num_new(0, 0, 0);
+ bpf_iter_num_next(0);
+ bpf_iter_num_destroy(0);
+}
+
+/* Test that open-coded iterator kfunc arguments get precise stack
+ * liveness tracking. struct bpf_iter_num is 8 bytes (1 SPI).
+ *
+ * Insn layout:
+ * 0: *(u64*)(r10 - 8) = 0 write SPI 0 (dead)
+ * 1: *(u64*)(r10 - 16) = 0 write SPI 1 (dead)
+ * 2: r1 = r10
+ * 3: r1 += -24 iter state at fp-24 (SPI 2)
+ * 4: r2 = 0
+ * 5: r3 = 10
+ * 6: call bpf_iter_num_new defines SPI 2 (KF_ITER_NEW) → 0x0
+ * 7-8: r1 = fp-24
+ * 9: call bpf_iter_num_next uses SPI 2 → 0x30
+ * 10: if r0 == 0 goto 2f
+ * 11: goto 1b
+ * 12-13: r1 = fp-24
+ * 14: call bpf_iter_num_destroy uses SPI 2 → 0x30
+ * 15: r0 = 0
+ * 16: exit
+ *
+ * At insn 6, SPI 2 is defined (KF_ITER_NEW initializes, doesn't read),
+ * so it kills liveness from successors. live_stack_before = 0x0.
+ * At insns 9 and 14, SPI 2 is used (iter_next/destroy read the state),
+ * so live_stack_before = 0x30.
+ */
+SEC("socket")
+__success __log_level(2)
+__msg(" 6: (85) call bpf_iter_num_new{{.*}} ; def: fp0-24{{$}}")
+__msg(" 9: (85) call bpf_iter_num_next{{.*}} ; use: fp0-24{{$}}")
+__msg("14: (85) call bpf_iter_num_destroy{{.*}} ; use: fp0-24{{$}}")
+__naked void kfunc_iter_stack_liveness(void)
+{
+ asm volatile (
+ "*(u64 *)(r10 - 8) = 0;" /* SPI 0 - dead */
+ "*(u64 *)(r10 - 16) = 0;" /* SPI 1 - dead */
+ "r1 = r10;"
+ "r1 += -24;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+"1:"
+ "r1 = r10;"
+ "r1 += -24;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto 2f;"
+ "goto 1b;"
+"2:"
+ "r1 = r10;"
+ "r1 += -24;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ :: __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy)
+ : __clobber_all);
+}
+
+/*
+ * Test for soundness bug in static stack liveness analysis.
+ *
+ * The static pre-pass tracks FP-derived register offsets to determine
+ * which stack slots are accessed. When a PTR_TO_STACK is spilled to
+ * the stack and later reloaded, the reload (BPF_LDX) kills FP-derived
+ * tracking, making subsequent accesses through the reloaded pointer
+ * invisible to the static analysis.
+ *
+ * This causes the analysis to incorrectly mark SPI 0 as dead at the
+ * merge point. clean_verifier_state() zeros it in the cached state,
+ * and stacksafe() accepts the new state against STACK_INVALID,
+ * enabling incorrect pruning.
+ *
+ * Path A (verified first): stores PTR_TO_MAP_VALUE in SPI 0
+ * Path B (verified second): stores scalar 42 in SPI 0
+ * After merge: reads SPI 0 through spilled/reloaded PTR_TO_STACK
+ * and dereferences the result as a pointer.
+ *
+ * Correct behavior: reject (path B dereferences a scalar)
+ * Bug behavior: accept (path B is incorrectly pruned)
+ */
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("R0 invalid mem access 'scalar'")
+__naked void spill_ptr_liveness_type_confusion(void)
+{
+ asm volatile (
+ /* Map lookup to get PTR_TO_MAP_VALUE */
+ "r1 = %[map] ll;"
+ "*(u32 *)(r10 - 32) = 0;"
+ "r2 = r10;"
+ "r2 += -32;"
+ "call %[bpf_map_lookup_elem];"
+ "if r0 == 0 goto l_exit%=;"
+ /* r6 = PTR_TO_MAP_VALUE (callee-saved) */
+ "r6 = r0;"
+ /* Branch: fall-through (path A) verified first */
+ "call %[bpf_get_prandom_u32];"
+ "if r0 != 0 goto l_scalar%=;"
+ /* Path A: store map value ptr at SPI 0 */
+ "*(u64 *)(r10 - 8) = r6;"
+ "goto l_merge%=;"
+"l_scalar%=:"
+ /* Path B: store scalar at SPI 0 */
+ "r1 = 42;"
+ "*(u64 *)(r10 - 8) = r1;"
+"l_merge%=:"
+ /*
+ * Spill PTR_TO_STACK{off=-8} to SPI 1, then reload.
+ * Reload kills FP-derived tracking, hiding the
+ * subsequent SPI 0 access from the static analysis.
+ */
+ "r1 = r10;"
+ "r1 += -8;"
+ "*(u64 *)(r10 - 16) = r1;"
+ "goto +0;" /* checkpoint */
+ "goto +0;" /* checkpoint */
+ "goto +0;" /* checkpoint */
+ "r1 = *(u64 *)(r10 - 16);"
+ /* Read SPI 0 through reloaded pointer */
+ "r0 = *(u64 *)(r1 + 0);"
+ /* Dereference: safe for map value (path A),
+ * unsafe for scalar (path B).
+ */
+ "r0 = *(u64 *)(r0 + 0);"
+ "exit;"
+"l_exit%=:"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_get_prandom_u32),
+ __imm_addr(map)
+ : __clobber_all);
+}
+
+/* === Tests for 4-byte stack slot liveness granularity === */
+
+/* Test that a 4-byte aligned write is stack_def and kills liveness.
+ *
+ * 0: *(u64 *)(r10 - 8) = 0 def slots 0,1 (full SPI 0)
+ * 1: *(u32 *)(r10 - 8) = 0 def slot 1 (4-byte write kills slot 1)
+ * 2: r0 = *(u64 *)(r10 - 8) use slots 0,1
+ * 3: r0 = 0
+ * 4: exit
+ *
+ * At insn 1, the 4-byte write defines slot 1. Slot 0 still flows
+ * backward from insn 2's read: live_stack_before = 0x1.
+ */
+SEC("socket")
+__log_level(2)
+__msg("1: (62) *(u32 *)(r10 -8) = 0 ; def: fp0-8h")
+__naked void four_byte_write_kills_slot(void)
+{
+ asm volatile (
+ "*(u64 *)(r10 - 8) = 0;"
+ "*(u32 *)(r10 - 8) = 0;"
+ "r0 = *(u64 *)(r10 - 8);"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/* Test that a write to the upper half of an SPI is dead when only
+ * the lower half is read. This was impossible at SPI granularity
+ * where any read of the SPI kept the entire SPI live.
+ *
+ * 0: *(u32 *)(r10 - 8) = 0 def slot 1 (DEAD: never read)
+ * 1: *(u32 *)(r10 - 4) = 0 def slot 0
+ * 2: r0 = *(u32 *)(r10 - 4) use slot 0 only
+ * 3: r0 = 0
+ * 4: exit
+ *
+ * At insn 0, nothing is live (0x0). Previously at SPI granularity,
+ * the read at insn 2 would mark the full SPI 0 as live and the
+ * 4-byte writes wouldn't count as def, so insn 0 would have had
+ * SPI 0 live (0x1).
+ */
+SEC("socket")
+__log_level(2)
+__msg("0: (62) *(u32 *)(r10 -8) = 0 ; def: fp0-8h")
+__msg("2: (61) r0 = *(u32 *)(r10 -4) ; use: fp0-4h")
+__naked void dead_half_spi_write(void)
+{
+ asm volatile (
+ "*(u32 *)(r10 - 8) = 0;"
+ "*(u32 *)(r10 - 4) = 0;"
+ "r0 = *(u32 *)(r10 - 4);"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/* Test that a 4-byte read from the upper half of SPI 0 makes only
+ * slot 1 live (0x2), not the full SPI (0x3).
+ *
+ * 0: *(u64 *)(r10 - 8) = 0 def slots 0,1
+ * 1: r0 = *(u32 *)(r10 - 8) use slot 1 only (upper half)
+ * 2: r0 = 0
+ * 3: exit
+ *
+ * At insn 1, live_stack_before = 0x2 (slot 1 only).
+ */
+SEC("socket")
+__log_level(2)
+__msg("1: (61) r0 = *(u32 *)(r10 -8) ; use: fp0-8h")
+__naked void four_byte_read_upper_half(void)
+{
+ asm volatile (
+ "*(u64 *)(r10 - 8) = 0;"
+ "r0 = *(u32 *)(r10 - 8);"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/* Test that a 2-byte write does NOT count as stack_def.
+ * Sub-4-byte writes don't fully cover a 4-byte slot,
+ * so liveness passes through.
+ *
+ * 0: *(u64 *)(r10 - 8) = 0 def slots 0,1
+ * 1: *(u16 *)(r10 - 4) = 0 NOT stack_def (2 < 4 bytes)
+ * 2: r0 = *(u32 *)(r10 - 4) use slot 0
+ * 3: r0 = 0
+ * 4: exit
+ *
+ * At insn 1, slot 0 still live (0x1) because 2-byte write
+ * didn't kill it.
+ */
+SEC("socket")
+__log_level(2)
+__msg("0: (7a) *(u64 *)(r10 -8) = 0 ; def: fp0-8")
+__msg("1: (6a) *(u16 *)(r10 -4) = 0{{$}}")
+__msg("2: (61) r0 = *(u32 *)(r10 -4) ; use: fp0-4h")
+__naked void two_byte_write_no_kill(void)
+{
+ asm volatile (
+ "*(u64 *)(r10 - 8) = 0;"
+ "*(u16 *)(r10 - 4) = 0;"
+ "r0 = *(u32 *)(r10 - 4);"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/* Test that a 1-byte write does NOT count as stack_def.
+ *
+ * 0: *(u64 *)(r10 - 8) = 0 def slots 0,1
+ * 1: *(u8 *)(r10 - 4) = 0 NOT stack_def (1 < 4 bytes)
+ * 2: r0 = *(u32 *)(r10 - 4) use slot 0
+ * 3: r0 = 0
+ * 4: exit
+ *
+ * At insn 1, slot 0 still live (0x1).
+ */
+SEC("socket")
+__log_level(2)
+__msg("0: (7a) *(u64 *)(r10 -8) = 0 ; def: fp0-8")
+__msg("1: (72) *(u8 *)(r10 -4) = 0")
+__msg("2: (61) r0 = *(u32 *)(r10 -4) ; use: fp0-4h")
+__naked void one_byte_write_no_kill(void)
+{
+ asm volatile (
+ "*(u64 *)(r10 - 8) = 0;"
+ "*(u8 *)(r10 - 4) = 0;"
+ "r0 = *(u32 *)(r10 - 4);"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/* Test stack access beyond fp-256 exercising the second bitmask word.
+ * fp-264 is SPI 32, slots 64-65, which are bits 0-1 of live_stack[1].
+ *
+ * 0: *(u64 *)(r10 - 264) = 0 def slots 64,65
+ * 1: r0 = *(u64 *)(r10 - 264) use slots 64,65
+ * 2: r0 = 0
+ * 3: exit
+ *
+ * At insn 1, live_stack high word has bits 0,1 set: 0x3:0x0.
+ */
+SEC("socket")
+__log_level(2)
+__msg("1: (79) r0 = *(u64 *)(r10 -264) ; use: fp0-264")
+__naked void high_stack_second_bitmask_word(void)
+{
+ asm volatile (
+ "*(u64 *)(r10 - 264) = 0;"
+ "r0 = *(u64 *)(r10 - 264);"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/* Test that two separate 4-byte writes to each half of an SPI
+ * together kill liveness for the full SPI.
+ *
+ * 0: *(u32 *)(r10 - 8) = 0 def slot 1 (upper half)
+ * 1: *(u32 *)(r10 - 4) = 0 def slot 0 (lower half)
+ * 2: r0 = *(u64 *)(r10 - 8) use slots 0,1
+ * 3: r0 = 0
+ * 4: exit
+ *
+ * At insn 0: live_stack_before = 0x0 (both slots killed by insns 0,1).
+ * At insn 1: live_stack_before = 0x2 (slot 1 still live, slot 0 killed here).
+ */
+SEC("socket")
+__log_level(2)
+__msg("0: (62) *(u32 *)(r10 -8) = 0 ; def: fp0-8h")
+__msg("1: (62) *(u32 *)(r10 -4) = 0 ; def: fp0-4h")
+__naked void two_four_byte_writes_kill_full_spi(void)
+{
+ asm volatile (
+ "*(u32 *)(r10 - 8) = 0;"
+ "*(u32 *)(r10 - 4) = 0;"
+ "r0 = *(u64 *)(r10 - 8);"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/* Test that 4-byte writes on both branches kill a slot at the
+ * join point. Previously at SPI granularity, a 4-byte write was
+ * not stack_def, so liveness would flow backward through the
+ * branch that only had a 4-byte write.
+ *
+ * 0: call bpf_get_prandom_u32
+ * 1: if r0 != 0 goto 1f
+ * 2: *(u64 *)(r10 - 8) = 0 path A: def slots 0,1
+ * 3: goto 2f
+ * 1:4: *(u32 *)(r10 - 4) = 0 path B: def slot 0
+ * 2:5: r0 = *(u32 *)(r10 - 4) use slot 0
+ * 6: r0 = 0
+ * 7: exit
+ *
+ * Both paths define slot 0 before the read. At insn 1 (branch),
+ * live_stack_before = 0x0 because slot 0 is killed on both paths.
+ */
+SEC("socket")
+__log_level(2)
+__msg("1: (55) if r0 != 0x0 goto pc+2")
+__msg("2: (7a) *(u64 *)(r10 -8) = 0 ; def: fp0-8")
+__msg("3: (05) goto pc+1")
+__msg("4: (62) *(u32 *)(r10 -4) = 0 ; def: fp0-4h")
+__msg("5: (61) r0 = *(u32 *)(r10 -4) ; use: fp0-4h")
+__naked void both_branches_kill_slot(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "if r0 != 0 goto 1f;"
+ "*(u64 *)(r10 - 8) = 0;"
+ "goto 2f;"
+"1:"
+ "*(u32 *)(r10 - 4) = 0;"
+"2:"
+ "r0 = *(u32 *)(r10 - 4);"
+ "r0 = 0;"
+ "exit;"
+ :: __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/* Soundness: cleaning the dead upper half of an SPI must not
+ * affect the live lower half's type information for pruning.
+ *
+ * Both halves of SPI 0 are written separately. Only the lower
+ * half (slot 0) is used as a 4-byte map key. The upper half
+ * (slot 1) is dead and cleaned to STACK_INVALID.
+ *
+ * Path A: key stays 0 (STACK_ZERO) → non-null array lookup
+ * Path B: key byte turns STACK_MISC → may-null array lookup
+ * Deref without null check: safe for A, unsafe for B.
+ *
+ * If half-SPI cleaning incorrectly corrupted the live half's
+ * type info, path A's cached state could generalize and unsoundly
+ * prune path B.
+ *
+ * Expected: reject (path B unsafe).
+ */
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("R0 invalid mem access 'map_value_or_null'")
+__naked void half_spi_clean_preserves_stack_zero(void)
+{
+ asm volatile (
+ "*(u32 *)(r10 - 4) = 0;" /* slot 0: STACK_ZERO */
+ "*(u32 *)(r10 - 8) = 0;" /* slot 1: STACK_ZERO (dead) */
+ "call %[bpf_get_prandom_u32];"
+ "if r0 != 0 goto l_nonconst%=;"
+ "goto l_lookup%=;"
+"l_nonconst%=:"
+ "*(u8 *)(r10 - 4) = r0;" /* slot 0: STACK_MISC */
+"l_lookup%=:"
+ "r2 = r10;"
+ "r2 += -4;"
+ "r1 = %[array_map_8b] ll;"
+ "call %[bpf_map_lookup_elem];"
+ "r0 = *(u64 *)(r0 + 0);" /* unsafe if null */
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(array_map_8b)
+ : __clobber_all);
+}
+
+/*
+ * Model of scx_lavd's pick_idle_cpu_at_cpdom iat block:
+ * conditional block with helper call and temporary stack spill,
+ * spill dead after merge.
+ *
+ * Path A (fall-through): spill r6 to fp-8 across helper call
+ * Path B (branch taken): skip the block entirely
+ * At merge (insn 6): fp-8 is dead (never read after merge)
+ *
+ * Static liveness marks fp-8 dead at merge. clean_verifier_state()
+ * converts path A's STACK_SPILL to STACK_INVALID. Path B has
+ * STACK_INVALID. stacksafe() matches -> path B pruned -> "6: safe".
+ */
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__success
+__log_level(2)
+__msg("6: safe")
+__naked void dead_spill_at_merge_enables_pruning(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "r6 = 7;"
+ "if r0 != 0 goto l_skip%=;"
+ /* conditional block: spill, call, reload */
+ "*(u64 *)(r10 - 8) = r6;"
+ "call %[bpf_get_prandom_u32];"
+ "r6 = *(u64 *)(r10 - 8);"
+"l_skip%=:"
+ /* fp-8 dead. Path B pruned here -> "6: safe" */
+ "r0 = r6;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/*
+ * FP-offset tracking loses precision on second ADD, killing all liveness.
+ *
+ * fp_off_insn_xfer() handles "FP itself + negative imm" precisely
+ * (e.g. r6 = r10; r6 += -24 -> slot 5). But any subsequent ADD/SUB
+ * on a register that already has non-zero spis falls through to
+ * spis_set_all(), because the code only handles the FP-itself case.
+ *
+ * A write through this imprecise register enters the non-zero-spis
+ * branch of set_indirect_stack_access(), which OR's the all-ones
+ * mask into stack_def. The backward liveness equation
+ *
+ * stack_in = (stack_out & ~stack_def) | stack_use
+ *
+ * sees ~ALL = 0, killing ALL slot liveness at that instruction.
+ *
+ * At the merge pruning point, live_stack_before is empty.
+ * clean_verifier_state() marks fp-8 as STACK_INVALID.
+ * stacksafe() skips STACK_INVALID (line "continue"), so pruning
+ * succeeds regardless of the current state's fp-8 value.
+ * Path B is pruned, its null deref is never explored.
+ *
+ * Correct behavior: reject (path B dereferences NULL).
+ * Bug behavior: accept (path B pruned away).
+ */
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("R1 invalid mem access 'scalar'")
+__naked void fp_add_loses_precision_kills_liveness(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "if r0 != 0 goto l_pathB%=;"
+
+ /* Path A (fall-through, explored first): fp-8 = 0 */
+ "r1 = 0;"
+ "*(u64 *)(r10 - 8) = r1;"
+ "goto l_merge%=;"
+
+"l_pathB%=:"
+ /* Path B (explored second): fp-8 = 42 */
+ "r1 = 42;"
+ "*(u64 *)(r10 - 8) = r1;"
+
+"l_merge%=:"
+ /*
+ * Create imprecise FP-derived register.
+ * r6 = r10 - 24 gets precise slot 5.
+ * r6 += 8 hits the else branch (spis non-zero, delta > 0)
+ * and sets spis to ALL. r6 is actually r10-16.
+ */
+ "r6 = r10;"
+ "r6 += -24;"
+ "r6 += 8;"
+
+ /*
+ * Write through imprecise r6. Actually writes to fp-16
+ * (does NOT touch fp-8), but liveness marks ALL slots
+ * as stack_def, killing fp-8's liveness.
+ */
+ "r7 = 0;"
+ "*(u64 *)(r6 + 0) = r7;"
+
+ /* Read fp-8: liveness says dead, but value is needed. */
+ "r2 = *(u64 *)(r10 - 8);"
+ "if r2 == 42 goto l_danger%=;"
+
+ /* r2 != 42 (path A: r2 == 0): safe exit */
+ "r0 = 0;"
+ "exit;"
+
+"l_danger%=:"
+ /* Only reachable from path B (r2 == 42): null deref */
+ "r1 = 0;"
+ "r0 = *(u64 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("R1 invalid mem access 'scalar'")
+__naked void fp_spill_loses_precision_kills_liveness(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "if r0 != 0 goto l_pathB%=;"
+
+ "r1 = 0;"
+ "*(u64 *)(r10 - 8) = r1;"
+ "goto l_merge%=;"
+
+"l_pathB%=:"
+ "r1 = 42;"
+ "*(u64 *)(r10 - 8) = r1;"
+
+"l_merge%=:"
+ "r6 = r10;"
+ "r6 += -64;"
+ "*(u64 *)(r10 - 160) = r6;"
+ "r6 = *(u64 *)(r10 - 160);"
+
+ "r7 = 0;"
+ "*(u64 *)(r6 + 0) = r7;"
+
+ "r2 = *(u64 *)(r10 - 8);"
+ "if r2 == 42 goto l_danger%=;"
+
+ "r0 = *(u64 *)(r10 - 56);"
+ "exit;"
+
+"l_danger%=:"
+ "r1 = 0;"
+ "r0 = *(u64 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/* === Tests for frame-based AT_FP tracking === */
+
+/*
+ * Test 1: conditional_stx_in_subprog
+ * Subprog conditionally writes caller's slot.
+ * Verify slot stays live (backward pass handles conditional def via CFG).
+ *
+ * Main writes fp-8=42, calls cond_writer(fp-8), reads fp-8.
+ * cond_writer only writes on one path → parent_def only on that path.
+ * The backward parent_live correctly keeps fp-8 live at entry
+ * (conditional write doesn't kill liveness at the join).
+ */
+SEC("socket")
+__log_level(2)
+/* fp-8 live at call (callee conditionally writes → slot not killed) */
+__msg("1: (7b) *(u64 *)(r10 -8) = r1 ; def: fp0-8")
+__msg("4: (85) call pc+2{{$}}")
+__msg("5: (79) r0 = *(u64 *)(r10 -8) ; use: fp0-8")
+__naked void conditional_stx_in_subprog(void)
+{
+ asm volatile (
+ "r1 = 42;"
+ "*(u64 *)(r10 - 8) = r1;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call cond_writer;"
+ "r0 = *(u64 *)(r10 - 8);"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/* Conditionally writes to *(r1+0) */
+static __used __naked void cond_writer(void)
+{
+ asm volatile (
+ "r6 = r1;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == 0 goto 1f;"
+ "*(u64 *)(r6 + 0) = r0;"
+ "1:"
+ "r0 = 0;"
+ "exit;"
+ :: __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("4: (85) call pc+{{.*}} ; use: fp0-16")
+__msg("7: (85) call pc+{{.*}} ; use: fp0-32")
+__naked void multiple_callsites_different_offsets(void)
+{
+ asm volatile (
+ "*(u64 *)(r10 - 16) = 0;"
+ "*(u64 *)(r10 - 32) = 0;"
+ "r1 = r10;"
+ "r1 += -16;"
+ "call read_first_param;"
+ "r1 = r10;"
+ "r1 += -32;"
+ "call read_first_param;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/*
+ * Test 3: nested_fp_passthrough
+ * main→A→B, main's FP forwarded to B. B accesses main's stack.
+ * Verify liveness propagates through.
+ *
+ * Main passes fp-32 to outer_forwarder, which passes it to inner_reader.
+ * inner_reader reads at arg+0 (= main's fp-32).
+ * parent_live propagates transitively: inner→outer→main.
+ */
+SEC("socket")
+__log_level(2)
+/* At call to outer_forwarder: main's fp-32 (slots 6,7) should be live */
+__msg("6: (85) call pc+{{.*}} ; use: fp0-32")
+__naked void nested_fp_passthrough(void)
+{
+ asm volatile (
+ "*(u64 *)(r10 - 8) = 0;"
+ "*(u64 *)(r10 - 16) = 0;"
+ "*(u64 *)(r10 - 24) = 0;"
+ "*(u64 *)(r10 - 32) = 0;"
+ "r1 = r10;"
+ "r1 += -32;"
+ "call outer_forwarder;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/* Forwards arg to inner_reader */
+static __used __naked void outer_forwarder(void)
+{
+ asm volatile (
+ "call inner_reader;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+static __used __naked void inner_reader(void)
+{
+ asm volatile (
+ "r0 = *(u64 *)(r1 + 0);"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/*
+ * Test 4: callee_must_write_before_read
+ * Callee unconditionally writes parent slot before reading.
+ * Verify slot is NOT live at call site (parent_def kills it).
+ */
+SEC("socket")
+__log_level(2)
+/* fp-8 NOT live at call: callee writes before reading (parent_def kills it) */
+__msg("2: .12345.... (85) call pc+")
+__naked void callee_must_write_before_read(void)
+{
+ asm volatile (
+ "r1 = r10;"
+ "r1 += -8;"
+ "call write_then_read;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/* Unconditionally writes *(r1+0), then reads it back */
+static __used __naked void write_then_read(void)
+{
+ asm volatile (
+ "r6 = r1;"
+ "r7 = 99;"
+ "*(u64 *)(r6 + 0) = r7;"
+ "r0 = *(u64 *)(r6 + 0);"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/*
+ * Test 5: return_site_liveness_bleeding
+ * Main calls subprog twice. Slot used after one call but not the other.
+ * Context-insensitive: slot conservatively live at both.
+ *
+ * After first call: read fp-8.
+ * After second call: don't read fp-8.
+ * Since parent_live is per-subprog (not per call-site),
+ * fp-8 is live at both call sites.
+ */
+SEC("socket")
+__log_level(2)
+/* Both calls have fp-8 live due to context-insensitive parent_live */
+__msg("3: (85) call pc+{{.*}} ; use: fp0-8")
+__msg("7: (85) call pc+{{.*}} ; use: fp0-8")
+__naked void return_site_liveness_bleeding(void)
+{
+ asm volatile (
+ "*(u64 *)(r10 - 8) = 0;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call read_first_param;"
+ "r0 = *(u64 *)(r10 - 8);"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call read_first_param;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("9: (85) call bpf_loop#181 ; use: fp0-16")
+__naked void callback_conditional_read_beyond_ctx(void)
+{
+ asm volatile (
+ "r1 = 42;"
+ "*(u64 *)(r10 - 8) = r1;"
+ "*(u64 *)(r10 - 16) = r1;"
+ "r1 = 2;"
+ "r2 = cb_cond_read ll;"
+ "r3 = r10;"
+ "r3 += -8;"
+ "r4 = 0;"
+ "call %[bpf_loop];"
+ "r0 = 0;"
+ "exit;"
+ :: __imm(bpf_loop)
+ : __clobber_all);
+}
+
+/* Callback conditionally reads *(ctx - 8) = caller fp-16 */
+static __used __naked void cb_cond_read(void)
+{
+ asm volatile (
+ "r6 = r2;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == 0 goto 1f;"
+ "r0 = *(u64 *)(r6 - 8);"
+ "1:"
+ "r0 = 0;"
+ "exit;"
+ :: __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("14: (7b) *(u64 *)(r6 -8) = r7 ; def: fp0-16")
+__msg("15: (79) r0 = *(u64 *)(r6 -8) ; use: fp0-16")
+__naked void callback_write_before_read_kills(void)
+{
+ asm volatile (
+ "r1 = 42;"
+ "*(u64 *)(r10 - 8) = r1;"
+ "*(u64 *)(r10 - 16) = r1;"
+ "r1 = 2;"
+ "r2 = cb_write_read ll;"
+ "r3 = r10;"
+ "r3 += -8;"
+ "r4 = 0;"
+ "call %[bpf_loop];"
+ "r0 = 0;"
+ "exit;"
+ :: __imm(bpf_loop)
+ : __clobber_all);
+}
+
+/* Callback unconditionally writes *(ctx-8), then reads it back.
+ * The write (parent_def) kills liveness before entry.
+ */
+static __used __naked void cb_write_read(void)
+{
+ asm volatile (
+ "r6 = r2;"
+ "r7 = 99;"
+ "*(u64 *)(r6 - 8) = r7;"
+ "r0 = *(u64 *)(r6 - 8);"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/*
+ * bpf_loop callback conditionally writes fp-16 then unconditionally
+ * reads it. The conditional write does NOT kill liveness
+ */
+SEC("socket")
+__log_level(2)
+__msg("9: (85) call bpf_loop#181 ; use: fp0-16")
+__naked void callback_conditional_write_preserves(void)
+{
+ asm volatile (
+ "r1 = 42;"
+ "*(u64 *)(r10 - 8) = r1;"
+ "*(u64 *)(r10 - 16) = r1;"
+ "r1 = 2;"
+ "r2 = cb_cond_write_read ll;"
+ "r3 = r10;"
+ "r3 += -8;"
+ "r4 = 0;"
+ "call %[bpf_loop];"
+ "r0 = 0;"
+ "exit;"
+ :: __imm(bpf_loop)
+ : __clobber_all);
+}
+
+static __used __naked void cb_cond_write_read(void)
+{
+ asm volatile (
+ "r6 = r2;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == 0 goto 1f;"
+ "*(u64 *)(r6 - 8) = r0;"
+ "1:"
+ "r0 = *(u64 *)(r6 - 8);"
+ "r0 = 0;"
+ "exit;"
+ :: __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/*
+ * Two bpf_loop calls with the same callback but different ctx pointers.
+ *
+ * First call: ctx=fp-8, second call: ctx=fp-24.
+ */
+SEC("socket")
+__log_level(2)
+__msg(" 8: (85) call bpf_loop{{.*}} ; use: fp0-8")
+__msg("15: (85) call bpf_loop{{.*}} ; use: fp0-24")
+__naked void callback_two_calls_different_ctx(void)
+{
+ asm volatile (
+ "*(u64 *)(r10 - 8) = 0;"
+ "*(u64 *)(r10 - 24) = 0;"
+ "r1 = 1;"
+ "r2 = cb_read_ctx ll;"
+ "r3 = r10;"
+ "r3 += -8;"
+ "r4 = 0;"
+ "call %[bpf_loop];"
+ "r1 = 1;"
+ "r2 = cb_read_ctx ll;"
+ "r3 = r10;"
+ "r3 += -24;"
+ "r4 = 0;"
+ "call %[bpf_loop];"
+ "r0 = 0;"
+ "exit;"
+ :: __imm(bpf_loop)
+ : __clobber_all);
+}
+
+/* Callback reads at ctx+0 unconditionally */
+static __used __naked void cb_read_ctx(void)
+{
+ asm volatile (
+ "r0 = *(u64 *)(r2 + 0);"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/*
+ * Reproducer for unsound pruning in refined_caller_live_stack().
+ *
+ * Three-level call chain: main → mid_fwd → grandchild_deref.
+ * Main passes &fp-8 to mid_fwd, which forwards R1 to grandchild_deref.
+ * grandchild_deref reads main's fp-8 through the forwarded pointer
+ * and dereferences the result.
+ *
+ * refined_caller_live_stack() has a callee_offset++ when mid_fwd
+ * (frame 1) is mid-call. This drops the transitive parent_live
+ * contribution at mid_fwd's call instruction — the only place
+ * where grandchild_deref's read of main's fp-8 is recorded.
+ * As a result, main's fp-8 is cleaned to STACK_INVALID at the
+ * pruning point inside grandchild_deref, and path B is
+ * incorrectly pruned against path A.
+ *
+ * Path A: main stores PTR_TO_MAP_VALUE at fp-8
+ * Path B: main stores scalar 42 at fp-8
+ *
+ * Correct behavior: reject (path B dereferences scalar)
+ * Bug behavior: accept (path B pruned against cleaned path A)
+ */
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("R0 invalid mem access 'scalar'")
+__naked void transitive_parent_stack_read_unsound(void)
+{
+ asm volatile (
+ /* Map lookup to get PTR_TO_MAP_VALUE */
+ "r1 = %[map] ll;"
+ "*(u32 *)(r10 - 32) = 0;"
+ "r2 = r10;"
+ "r2 += -32;"
+ "call %[bpf_map_lookup_elem];"
+ "if r0 == 0 goto l_exit%=;"
+ "r6 = r0;"
+ /* Branch: path A (fall-through) explored first */
+ "call %[bpf_get_prandom_u32];"
+ "if r0 != 0 goto l_scalar%=;"
+ /* Path A: fp-8 = PTR_TO_MAP_VALUE */
+ "*(u64 *)(r10 - 8) = r6;"
+ "goto l_merge%=;"
+"l_scalar%=:"
+ /* Path B: fp-8 = scalar 42 */
+ "r1 = 42;"
+ "*(u64 *)(r10 - 8) = r1;"
+"l_merge%=:"
+ /* Pass &fp-8 to mid_fwd → grandchild_deref */
+ "r1 = r10;"
+ "r1 += -8;"
+ "call mid_fwd;"
+ "r0 = 0;"
+ "exit;"
+"l_exit%=:"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_get_prandom_u32),
+ __imm_addr(map)
+ : __clobber_all);
+}
+
+/* Forwards R1 (ptr to main's fp-8) to grandchild_deref */
+static __used __naked void mid_fwd(void)
+{
+ asm volatile (
+ "call grandchild_deref;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/* Reads main's fp-8 through forwarded pointer, dereferences result */
+static __used __naked void grandchild_deref(void)
+{
+ asm volatile (
+ "goto +0;" /* checkpoint */
+ "goto +0;" /* checkpoint */
+ /* read main's fp-8: map_ptr (path A) or scalar (path B) */
+ "r0 = *(u64 *)(r1 + 0);"
+ /* dereference: safe for map_ptr, unsafe for scalar */
+ "r0 = *(u64 *)(r0 + 0);"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__success
+__msg("14: (79) r1 = *(u64 *)(r10 -8) // r6=fp0-8 r7=fp1-16 fp-8=fp1-16 fp-16=fp0-8")
+__msg("15: (79) r0 = *(u64 *)(r1 +0) // r1=fp1-16 r6=fp0-8 r7=fp1-16 fp-8=fp1-16 fp-16=fp0-8")
+__msg("stack use/def subprog#1 mid_two_fp_threshold (d1,cs2):")
+__msg("14: (79) r1 = *(u64 *)(r10 -8) ; use: fp1-8")
+__msg("15: (79) r0 = *(u64 *)(r1 +0) ; use: fp1-16")
+__naked void two_fp_clear_stack_threshold(void)
+{
+ asm volatile (
+ "r1 = r10;"
+ "r1 += -8;"
+ "call mid_two_fp_threshold;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+static __used __naked void mid_two_fp_threshold(void)
+{
+ asm volatile (
+ "r6 = r1;"
+ "r7 = r10;"
+ "r7 += -16;"
+ "*(u64 *)(r10 - 8) = r7;"
+ "*(u64 *)(r10 - 16) = r6;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = r6;"
+ "call inner_nop_fptest;"
+ "r1 = *(u64 *)(r10 - 8);"
+ "r0 = *(u64 *)(r1 + 0);"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+static __used __naked void inner_nop_fptest(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__success
+__msg("13: (79) r1 = *(u64 *)(r10 -8) // r6=fp0-8 r7=fp1-16 fp-8=fp1-16 fp-16=fp0-8")
+__msg("14: (79) r0 = *(u64 *)(r1 +0) // r1=fp1-16 r6=fp0-8 r7=fp1-16 fp-8=fp1-16 fp-16=fp0-8")
+__msg("stack use/def subprog#1 mid_one_fp_threshold (d1,cs2):")
+__msg("13: (79) r1 = *(u64 *)(r10 -8) ; use: fp1-8")
+__msg("14: (79) r0 = *(u64 *)(r1 +0) ; use: fp1-16")
+__naked void one_fp_clear_stack_threshold(void)
+{
+ asm volatile (
+ "r1 = r10;"
+ "r1 += -8;"
+ "call mid_one_fp_threshold;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+static __used __naked void mid_one_fp_threshold(void)
+{
+ asm volatile (
+ "r6 = r1;"
+ "r7 = r10;"
+ "r7 += -16;"
+ "*(u64 *)(r10 - 8) = r7;"
+ "*(u64 *)(r10 - 16) = r6;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call inner_nop_fptest;"
+ "r1 = *(u64 *)(r10 - 8);"
+ "r0 = *(u64 *)(r1 + 0);"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/*
+ * Reproducer for unsound pruning when a subprog forwards a parent
+ * stack pointer (AT_PARENT) to a helper with a memory argument.
+ *
+ * set_call_stack_access_at() previously only tracked AT_CURRENT args,
+ * skipping AT_PARENT entirely. This meant helper reads through parent
+ * stack pointers did not set parent_use, letting the slot appear dead
+ * at pruning checkpoints inside the subprog.
+ *
+ * Program shape:
+ * main:
+ * *(u32)(fp-4) = 0 key = STACK_ZERO (const 0)
+ * call bpf_get_prandom_u32
+ * if r0 != 0 goto clobber path A (fall-through) first
+ * goto merge
+ * clobber:
+ * *(u8)(fp-4) = r0 path B: key[0] = STACK_MISC
+ * merge:
+ * r1 = fp - 4
+ * call fwd_parent_key_to_helper
+ * r0 = 0
+ * exit
+ *
+ * fwd_parent_key_to_helper(r1 = &caller_fp-4):
+ * goto +0 checkpoint
+ * r2 = r1 R2 = AT_PARENT ptr to caller fp-4
+ * r1 = array_map_8b ll R1 = array map
+ * call bpf_map_lookup_elem reads key_size(4) from parent fp-4
+ * r0 = *(u64 *)(r0 + 0) deref without null check
+ * r0 = 0
+ * exit
+ *
+ * Path A: STACK_ZERO key = const 0 -> array lookup -> PTR_TO_MAP_VALUE
+ * (non-NULL for in-bounds const key) -> deref OK.
+ * Path B: STACK_MISC key = unknown -> array lookup ->
+ * PTR_TO_MAP_VALUE_OR_NULL -> deref UNSAFE.
+ *
+ * Bug: AT_PARENT R2 arg to bpf_map_lookup_elem skipped -> parent_use
+ * not set -> fp-4 cleaned at checkpoint -> STACK_ZERO collapses
+ * to STACK_INVALID -> path B pruned -> deref never checked.
+ *
+ * Correct verifier behavior: reject (path B deref of map_value_or_null).
+ */
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("R0 invalid mem access 'map_value_or_null'")
+__naked void helper_parent_stack_read_unsound(void)
+{
+ asm volatile (
+ /* key at fp-4: all bytes STACK_ZERO */
+ "*(u32 *)(r10 - 4) = 0;"
+ "call %[bpf_get_prandom_u32];"
+ /* fall-through (path A) explored first */
+ "if r0 != 0 goto l_clobber%=;"
+ /* path A: key stays constant zero */
+ "goto l_merge%=;"
+"l_clobber%=:"
+ /* path B: key[0] becomes STACK_MISC, key no longer const */
+ "*(u8 *)(r10 - 4) = r0;"
+"l_merge%=:"
+ "r1 = r10;"
+ "r1 += -4;"
+ "call fwd_parent_key_to_helper;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/*
+ * Subprog forwards parent stack pointer to bpf_map_lookup_elem as key
+ * on an array map, then dereferences the result without a null check.
+ * R1 = &parent_fp-4 (AT_PARENT in this frame).
+ *
+ * The helper reads key_size(4) bytes from parent stack. The deref of
+ * R0 reads the map value, NOT parent stack, so record_insn_mem_accesses
+ * does not set parent_use for it. The ONLY parent stack access is
+ * through the helper's R2 arg.
+ */
+static __used __naked void fwd_parent_key_to_helper(void)
+{
+ asm volatile (
+ "goto +0;" /* checkpoint */
+ "r2 = r1;" /* R2 = parent ptr (AT_PARENT) */
+ "r1 = %[array_map_8b] ll;" /* R1 = array map */
+ "call %[bpf_map_lookup_elem];" /* reads 4 bytes from parent fp-4 */
+ /* deref without null check: safe for PTR_TO_MAP_VALUE,
+ * unsafe for PTR_TO_MAP_VALUE_OR_NULL
+ */
+ "r0 = *(u64 *)(r0 + 0);"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(array_map_8b)
+ : __clobber_all);
+}
+
+/*
+ * Regression for keeping later helper args after a whole-stack fallback
+ * on an earlier local arg. The first bpf_snprintf() arg is a local
+ * frame-derived pointer with offset-imprecise tracking (`fp1 ?`), which
+ * conservatively marks the whole local stack live. The fourth arg still
+ * forwards &parent_fp-8 and must contribute nonlocal_use[0]=0:3.
+ */
+SEC("socket")
+__log_level(2)
+__success
+__msg("call bpf_snprintf{{.*}} ; use: fp1-8..-512 fp0-8")
+__naked void helper_arg_fallback_keeps_scanning(void)
+{
+ asm volatile (
+ "r1 = 42;"
+ "*(u64 *)(r10 - 8) = r1;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call helper_snprintf_parent_after_local_fallback;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+static __used __naked void helper_snprintf_parent_after_local_fallback(void)
+{
+ asm volatile (
+ "r6 = r1;" /* save &parent_fp-8 */
+ "call %[bpf_get_prandom_u32];"
+ "r0 &= 8;"
+ "r1 = r10;"
+ "r1 += -16;"
+ "r1 += r0;" /* local fp, offset-imprecise */
+ "r2 = 8;"
+ "r3 = %[snprintf_u64_fmt] ll;"
+ "r4 = r6;" /* later arg: parent fp-8 */
+ "r5 = 8;"
+ "call %[bpf_snprintf];"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_snprintf),
+ __imm_addr(snprintf_u64_fmt)
+ : __clobber_all);
+}
+
+/*
+ * Test that propagate_callee_ancestor() correctly chains ancestor
+ * liveness across sequential calls within a single frame.
+ *
+ * main → mid_seq_touch → {nop_callee, deref_ancestor}
+ *
+ * mid_seq_touch receives two pointers: R1 = &main_fp-8 (forwarded to
+ * deref_ancestor) and R2 = &main_fp-16 (read directly by mid_seq_touch).
+ * The direct read of fp-16 forces ensure_anc_arrays() to allocate
+ * ancestor_live[0] for mid_seq_touch, so refined_caller_live_stack()
+ * uses the refined path (not the conservative fallback).
+ *
+ * mid_seq_touch calls nop_callee first (no-op, creates a pruning point),
+ * then calls deref_ancestor which reads main's fp-8 and dereferences it.
+ *
+ * propagate_callee_ancestor() propagates deref_ancestor's entry
+ * ancestor_live[0] into mid_seq_touch's anc_use[0] at the call-to-deref
+ * instruction. mid_seq_touch's backward pass flows this backward so
+ * ancestor_live[0] includes fp-8 at the pruning point between the calls.
+ *
+ * Without propagation, mid_seq_touch's ancestor_live[0] only has fp-16
+ * (from the direct read) — fp-8 is missing. refined_caller_live_stack()
+ * Term 1 says fp-8 is dead, the verifier cleans it, and path B
+ * (scalar 42) is incorrectly pruned against path A (MAP_VALUE).
+ *
+ * Path A: main stores PTR_TO_MAP_VALUE at fp-8 → deref succeeds
+ * Path B: main stores scalar 42 at fp-8 → deref must fail
+ *
+ * Correct: reject (path B dereferences scalar)
+ */
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("R0 invalid mem access 'scalar'")
+__naked void propagate_callee_ancestor_chain(void)
+{
+ asm volatile (
+ /* Map lookup to get PTR_TO_MAP_VALUE */
+ "r1 = %[map] ll;"
+ "*(u32 *)(r10 - 32) = 0;"
+ "r2 = r10;"
+ "r2 += -32;"
+ "call %[bpf_map_lookup_elem];"
+ "if r0 == 0 goto l_exit%=;"
+ "r6 = r0;"
+ /* Branch: path A (fall-through) explored first */
+ "call %[bpf_get_prandom_u32];"
+ "if r0 != 0 goto l_scalar%=;"
+ /* Path A: fp-8 = PTR_TO_MAP_VALUE */
+ "*(u64 *)(r10 - 8) = r6;"
+ "goto l_merge%=;"
+"l_scalar%=:"
+ /* Path B: fp-8 = scalar 42 */
+ "r1 = 42;"
+ "*(u64 *)(r10 - 8) = r1;"
+"l_merge%=:"
+ /* fp-16 = dummy value (mid_seq_touch reads it directly) */
+ "r1 = 99;"
+ "*(u64 *)(r10 - 16) = r1;"
+ /* R1 = &fp-8 (for deref_ancestor), R2 = &fp-16 (for mid_seq_touch) */
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = r10;"
+ "r2 += -16;"
+ "call mid_seq_touch;"
+ "r0 = 0;"
+ "exit;"
+"l_exit%=:"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_get_prandom_u32),
+ __imm_addr(map)
+ : __clobber_all);
+}
+
+/*
+ * R1 = &main_fp-8 (forwarded to deref_ancestor)
+ * R2 = &main_fp-16 (read directly here → allocates ancestor_live[0])
+ *
+ * Reads main's fp-16 to force ancestor_live[0] allocation, then
+ * calls nop_callee (pruning point), then deref_ancestor.
+ */
+static __used __naked void mid_seq_touch(void)
+{
+ asm volatile (
+ "r6 = r1;" /* save &main_fp-8 in callee-saved */
+ "r0 = *(u64 *)(r2 + 0);" /* read main's fp-16: triggers anc_use[0] */
+ "call nop_callee;" /* no-op, creates pruning point after */
+ "r1 = r6;" /* restore ptr to &main_fp-8 */
+ "call deref_ancestor;" /* reads main's fp-8, dereferences */
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+static __used __naked void nop_callee(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/* Reads main's fp-8 through forwarded pointer, dereferences result */
+static __used __naked void deref_ancestor(void)
+{
+ asm volatile (
+ "r0 = *(u64 *)(r1 + 0);" /* read main's fp-8 */
+ "r0 = *(u64 *)(r0 + 0);" /* deref: safe for map_ptr, unsafe for scalar */
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/*
+ * Test: callee loads an fp-derived pointer from caller's stack, then
+ * reads through it to access another caller stack slot.
+ *
+ * main stores PTR_TO_MAP_VALUE at fp-24, stores &fp-24 (an fp-derived
+ * pointer) at fp-8, passes &fp-8 through mid_fwd_spilled_ptr to
+ * load_ptr_deref_grandchild. The leaf loads the pointer from main's
+ * fp-8, then reads main's fp-24 through the loaded pointer.
+ *
+ * fill_from_stack() in arg_track_xfer() only handles local-frame
+ * FP-derived loads (src_is_local_fp check requires frame == depth).
+ * When a callee loads from a parent-frame pointer (frame < depth),
+ * the loaded value gets ARG_NONE instead of being recognized as
+ * fp-derived. Subsequent reads through that loaded pointer are
+ * invisible to liveness — nonlocal_use is never set for fp-24.
+ *
+ * clean_live_states() cleans the current state at every prune point.
+ * Because liveness misses fp-24, refined_caller_live_stack() tells
+ * __clean_func_state() that fp-24 is dead, which destroys the
+ * PTR_TO_MAP_VALUE spill before the grandchild can read it.
+ * The grandchild then reads STACK_INVALID → scalar, and the deref
+ * is rejected with "R0 invalid mem access 'scalar'" — even though
+ * fp-24 is genuinely live and holds a valid map pointer.
+ *
+ * This is a false positive: a valid program incorrectly rejected.
+ */
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__success
+__naked void spilled_fp_cross_frame_deref(void)
+{
+ asm volatile (
+ /* Map lookup to get PTR_TO_MAP_VALUE */
+ "r1 = %[map] ll;"
+ "*(u32 *)(r10 - 32) = 0;"
+ "r2 = r10;"
+ "r2 += -32;"
+ "call %[bpf_map_lookup_elem];"
+ "if r0 == 0 goto l_exit%=;"
+ /* fp-24 = PTR_TO_MAP_VALUE */
+ "*(u64 *)(r10 - 24) = r0;"
+ /* Store pointer to fp-24 at fp-8 */
+ "r1 = r10;"
+ "r1 += -24;"
+ "*(u64 *)(r10 - 8) = r1;"
+ /* R1 = &fp-8: pointer to the spilled ptr */
+ "r1 = r10;"
+ "r1 += -8;"
+ "call mid_fwd_spilled_ptr;"
+ "r0 = 0;"
+ "exit;"
+"l_exit%=:"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map)
+ : __clobber_all);
+}
+
+/* Forwards R1 (ptr to main's fp-8, which holds &main_fp-24) to leaf */
+static __used __naked void mid_fwd_spilled_ptr(void)
+{
+ asm volatile (
+ "call load_ptr_deref_grandchild;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/*
+ * R1 = &main_fp-8 (where main stored ptr to fp-24)
+ * Loads the ptr from main's fp-8, reads main's fp-24 through it,
+ * then dereferences the result.
+ */
+static __used __naked void load_ptr_deref_grandchild(void)
+{
+ asm volatile (
+ /* Load ptr from main's fp-8 → r2 = &main_fp-24 */
+ "r2 = *(u64 *)(r1 + 0);"
+ /* Read main's fp-24 through loaded ptr */
+ "r0 = *(u64 *)(r2 + 0);"
+ /* Dereference: safe for map_ptr */
+ "r0 = *(u64 *)(r0 + 0);"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/*
+ * Exercise merge_nonlocal_live().
+ *
+ * merge_shared_mid is analyzed twice (once from each wrapper), so the
+ * callsite within merge_shared_mid that calls merge_leaf_read gets its
+ * nonlocal_live info merged twice via merge_nonlocal_live().
+ */
+SEC("socket")
+__log_level(2)
+__success
+__msg("14: (85) call pc+2 r1: fp0-16")
+__msg("17: (79) r0 = *(u64 *)(r1 +0) // r1=fp0-16")
+__msg("14: (85) call pc+2 r1: fp0-8")
+__msg("17: (79) r0 = *(u64 *)(r1 +0) // r1=fp0-8")
+__msg("5: (85) call pc+{{.*}} ; use: fp0-8 fp0-16")
+__naked void test_merge_nonlocal_live(void)
+{
+ asm volatile (
+ "r1 = 0;"
+ "*(u64 *)(r10 - 8) = r1;"
+ "*(u64 *)(r10 - 16) = r1;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call merge_wrapper_a;"
+ "r1 = r10;"
+ "r1 += -16;"
+ "call merge_wrapper_b;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+static __used __naked void merge_wrapper_a(void)
+{
+ asm volatile (
+ "call merge_shared_mid;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+static __used __naked void merge_wrapper_b(void)
+{
+ asm volatile (
+ "call merge_shared_mid;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+static __used __naked void merge_shared_mid(void)
+{
+ asm volatile (
+ "call merge_leaf_read;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+static __used __naked void merge_leaf_read(void)
+{
+ asm volatile (
+ "r0 = *(u64 *)(r1 + 0);"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/* Same bpf_loop instruction calls different callbacks depending on branch. */
+SEC("socket")
+__log_level(2)
+__success
+__msg("call bpf_loop#181 ; use: fp2-8..-512 fp1-8..-512 fp0-8..-512")
+__naked void bpf_loop_two_callbacks(void)
+{
+ asm volatile (
+ "r1 = 0;"
+ "*(u64 *)(r10 - 8) = r1;"
+ "*(u64 *)(r10 - 16) = r1;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call dyn_wrapper_a;"
+ "r1 = r10;"
+ "r1 += -16;"
+ "call dyn_wrapper_b;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+static __used __naked void dyn_wrapper_a(void)
+{
+ asm volatile (
+ "call mid_dynamic_cb;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+static __used __naked void dyn_wrapper_b(void)
+{
+ asm volatile (
+ "call mid_dynamic_cb;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+static __used __naked void mid_dynamic_cb(void)
+{
+ asm volatile (
+ "r6 = r1;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == 0 goto 1f;"
+ "r2 = dyn_cb_a ll;"
+ "goto 2f;"
+ "1:"
+ "r2 = dyn_cb_b ll;"
+ "2:"
+ "r1 = 1;"
+ "r3 = r6;" /* ctx = fp-derived ptr from parent */
+ "r4 = 0;"
+ "call %[bpf_loop];"
+ "r0 = 0;"
+ "exit;"
+ :: __imm(bpf_get_prandom_u32),
+ __imm(bpf_loop)
+ : __clobber_all);
+}
+
+/* Callback A/B: read parent stack through ctx */
+static __used __naked void dyn_cb_a(void)
+{
+ asm volatile (
+ "r0 = *(u64 *)(r2 + 0);"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+static __used __naked void dyn_cb_b(void)
+{
+ asm volatile (
+ "r0 = *(u64 *)(r2 + 0);"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/*
+ * Path A: r0 = map_lookup result (non-FP, ARG_NONE for stack tracking)
+ * Path B: r0 = fp-8 (FP-derived, frame=0, off=-8)
+ * At the join: r0 is not guaranteed to be a frame pointer.
+ */
+SEC("socket")
+__log_level(2)
+__msg("10: (79) r0 = *(u64 *)(r10 -8) // r0=fp0-8|fp0+0")
+__naked void stack_or_non_stack_write(void)
+{
+ asm volatile (
+ /* initial write to fp-8 */
+ "*(u64 *)(r10 - 8) = 0;"
+ /* map lookup to get a non-FP pointer */
+ "r2 = r10;"
+ "r2 += -4;"
+ "r1 = %[map] ll;"
+ "call %[bpf_map_lookup_elem];"
+ /* r0 = map_value (ARG_NONE) */
+ "if r0 != 0 goto 1f;"
+ /* path B: r0 = fp-8 */
+ "r0 = r10;"
+ "r0 += -8;"
+"1:"
+ /* join: the write is not a def for fp[0]-8 */
+ "*(u64 *)(r0 + 0) = 7;"
+ /* read fp-8: should be non-poisoned */
+ "r0 = *(u64 *)(r10 - 8);"
+ "exit;"
+ :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map)
+ : __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__flag(BPF_F_TEST_STATE_FREQ)
+__msg("subprog#2 write_first_read_second:")
+__msg("17: (7a) *(u64 *)(r1 +0) = 42{{$}}")
+__msg("18: (79) r0 = *(u64 *)(r2 +0) // r1=fp0-8 r2=fp0-16{{$}}")
+__msg("stack use/def subprog#2 write_first_read_second (d2,cs15):")
+__msg("17: (7a) *(u64 *)(r1 +0) = 42{{$}}")
+__msg("18: (79) r0 = *(u64 *)(r2 +0) ; use: fp0-8 fp0-16")
+__naked void shared_instance_must_write_overwrite(void)
+{
+ asm volatile (
+ "r1 = 1;"
+ "*(u64 *)(r10 - 8) = r1;"
+ "*(u64 *)(r10 - 16) = r1;"
+ /* Call 1: write_first_read_second(&fp[-8], &fp[-16]) */
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = r10;"
+ "r2 += -16;"
+ "call forwarding_rw;"
+ /* Call 2: write_first_read_second(&fp[-16], &fp[-8]) */
+ "r1 = r10;"
+ "r1 += -16;"
+ "r2 = r10;"
+ "r2 += -8;"
+ "call forwarding_rw;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+static __used __naked void forwarding_rw(void)
+{
+ asm volatile (
+ "call write_first_read_second;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+static __used __naked void write_first_read_second(void)
+{
+ asm volatile (
+ "*(u64 *)(r1 + 0) = 42;"
+ "r0 = *(u64 *)(r2 + 0);"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/*
+ * Shared must_write when (callsite, depth) instance is reused.
+ * Main calls fwd_to_stale_wr at two sites. fwd_to_stale_wr calls
+ * stale_wr_leaf at a single internal callsite. Both calls share
+ * stale_wr_leaf's (callsite, depth) instance.
+ *
+ * Call 1: stale_wr_leaf(map_value, fp-8) writes map, reads fp-8.
+ * Call 2: stale_wr_leaf(fp-8, fp-8) writes fp-8, reads fp-8.
+ *
+ * The analysis can't presume that stale_wr_leaf() always writes fp-8,
+ * it must conservatively join must_write masks computed for both calls.
+ */
+SEC("socket")
+__success
+__naked void stale_must_write_cross_callsite(void)
+{
+ asm volatile (
+ "*(u64 *)(r10 - 8) = 0;"
+ /* Call 1: map_value write, fp-8 read (processed second in PO) */
+ "*(u32 *)(r10 - 16) = 0;"
+ "r1 = %[map] ll;"
+ "r2 = r10;"
+ "r2 += -16;"
+ "call %[bpf_map_lookup_elem];"
+ "if r0 == 0 goto 1f;"
+ "r1 = r0;"
+ "r2 = r10;"
+ "r2 += -8;"
+ "call fwd_to_stale_wr;"
+ /* Call 2: fp-8 write, fp-8 read (processed first in PO) */
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = r1;"
+ "call fwd_to_stale_wr;"
+"1:"
+ "r0 = 0;"
+ "exit;"
+ :: __imm_addr(map),
+ __imm(bpf_map_lookup_elem)
+ : __clobber_all);
+}
+
+static __used __naked void fwd_to_stale_wr(void)
+{
+ asm volatile (
+ "call stale_wr_leaf;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+static __used __naked void stale_wr_leaf(void)
+{
+ asm volatile (
+ "*(u64 *)(r1 + 0) = 42;"
+ "r0 = *(u64 *)(r2 + 0);"
+ "exit;"
+ ::: __clobber_all);
+}
+
+#ifdef CAN_USE_LOAD_ACQ_STORE_REL
+
+SEC("socket")
+__log_level(2)
+__success
+__msg("*(u64 *)(r0 +0) = 42 ; def: fp0-16")
+__naked void load_acquire_dont_clear_dst(void)
+{
+ asm volatile (
+ "r0 = r10;"
+ "r0 += -16;"
+ "*(u64 *)(r0 + 0) = r0;" /* fp[-16] == &fp[-16] */
+ ".8byte %[load_acquire_insn];" /* load_acquire is a special case for BPF_STX, */
+ "r0 = *(u64 *)(r10 - 16);" /* it shouldn't clear tracking info for */
+ "*(u64 *)(r0 + 0) = 42;" /* dst register, r0 in this case. */
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_0, 0))
+ : __clobber_all);
+}
+
+#endif /* CAN_USE_LOAD_ACQ_STORE_REL */
+
+SEC("socket")
+__success
+__naked void imprecise_fill_loses_cross_frame(void)
+{
+ asm volatile (
+ "*(u64 *)(r10 - 8) = 0;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call imprecise_fill_cross_frame;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+static __used __naked void imprecise_fill_cross_frame(void)
+{
+ asm volatile (
+ /* spill &caller_fp-8 to callee's fp-8 */
+ "*(u64 *)(r10 - 8) = r1;"
+ /* imprecise FP pointer in r1 */
+ "r1 = r10;"
+ "r2 = -8;"
+ "r1 += r2;"
+ /* load from imprecise offset. fill_from_stack returns
+ * ARG_IMPRECISE{mask=BIT(1)}, losing frame 0
+ */
+ "r1 = *(u64 *)(r1 + 0);"
+ /* read caller's fp-8 through loaded pointer, should mark fp0-8 live */
+ "r0 = *(u64 *)(r1 + 0);"
+ "r0 = 0;"
+ "exit;"
+ :: __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/* Test that spill_to_stack with multi-offset dst (sz=8) joins instead
+ * of overwriting. r1 has offsets [-8, -16]. Both slots hold FP-derived
+ * pointers. Writing through r1 should join *val with existing values,
+ * not destroy them.
+ *
+ * fp-8 = &fp-24
+ * fp-16 = &fp-32
+ * r1 = fp-8 or fp-16 (two offsets from branch)
+ * *(u64 *)(r1 + 0) = &fp-24 -- writes to one slot, other untouched
+ * r0 = *(u64 *)(r10 - 16) -- fill from fp-16
+ * r0 = *(u64 *)(r0 + 0) -- deref: should produce use
+ */
+SEC("socket")
+__log_level(2)
+__success
+__msg("20: (79) r0 = *(u64 *)(r10 -16)")
+__msg("21: (79) r0 = *(u64 *)(r0 +0) ; use: fp0-24 fp0-32")
+__naked void spill_join_with_multi_off(void)
+{
+ asm volatile (
+ /* fp-8 = &fp-24, fp-16 = &fp-32 (different pointers) */
+ "*(u64 *)(r10 - 24) = 0;"
+ "*(u64 *)(r10 - 32) = 0;"
+ "r1 = r10;"
+ "r1 += -24;"
+ "*(u64 *)(r10 - 8) = r1;"
+ "r1 = r10;"
+ "r1 += -32;"
+ "*(u64 *)(r10 - 16) = r1;"
+ /* create r1 with two candidate offsets: fp-8 or fp-16 */
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == 0 goto 1f;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "goto 2f;"
+"1:"
+ "r1 = r10;"
+ "r1 += -16;"
+"2:"
+ /* write &fp-24 through multi-offset r1: hits one slot, other untouched */
+ "r2 = r10;"
+ "r2 += -24;"
+ "*(u64 *)(r1 + 0) = r2;"
+ /* read back *fp-8 and *fp-16 */
+ "r0 = *(u64 *)(r10 - 8);"
+ "r0 = *(u64 *)(r0 + 0);"
+ "r0 = *(u64 *)(r10 - 16);"
+ "r0 = *(u64 *)(r0 + 0);"
+ "exit;"
+ :: __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/* Test that spill_to_stack with imprecise dst (off_cnt == 0, sz=8)
+ * joins instead of overwriting. Use "r2 = -8; r1 += r2" to make
+ * arg tracking lose offset precision while the main verifier keeps
+ * r1 as PTR_TO_STACK with fixed offset. Both slots hold FP-derived
+ * pointers. Writing through r1 should join *val with existing
+ * values, not destroy them.
+ *
+ * fp-8 = &fp-24
+ * fp-16 = &fp-32
+ * r1 = fp-8 (imprecise to arg tracking)
+ * *(u64 *)(r1 + 0) = &fp-24 -- since r1 is imprecise, this adds &fp-24
+ * to the set of possible values for all slots,
+ * hence the values at fp-16 become [fp-24, fp-32]
+ * r0 = *(u64 *)(r10 - 16)
+ * r0 = *(u64 *)(r0 + 0) -- deref: should produce use of fp-24 or fp-32
+ */
+SEC("socket")
+__log_level(2)
+__success
+__msg("15: (79) r0 = *(u64 *)(r0 +0) ; use: fp0-24 fp0-32")
+__naked void spill_join_with_imprecise_off(void)
+{
+ asm volatile (
+ "*(u64 *)(r10 - 24) = 0;"
+ "*(u64 *)(r10 - 32) = 0;"
+ "r1 = r10;"
+ "r1 += -24;"
+ "*(u64 *)(r10 - 8) = r1;"
+ "r1 = r10;"
+ "r1 += -32;"
+ "*(u64 *)(r10 - 16) = r1;"
+ /* r1 = fp-8 but arg tracking sees off_cnt == 0 */
+ "r1 = r10;"
+ "r2 = -8;"
+ "r1 += r2;"
+ /* write through imprecise r1 */
+ "r3 = r10;"
+ "r3 += -24;"
+ "*(u64 *)(r1 + 0) = r3;"
+ /* read back fp-16: at_stack should still track &fp-32 */
+ "r0 = *(u64 *)(r10 - 16);"
+ /* deref: should produce use for fp-32 */
+ "r0 = *(u64 *)(r0 + 0);"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_liveness_exp.c b/tools/testing/selftests/bpf/progs/verifier_liveness_exp.c
new file mode 100644
index 000000000000..b058de623200
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_liveness_exp.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+/*
+ * Exponential complexity in analyze_subprog() liveness analysis.
+ *
+ * analyze_subprog() recurses into each call site that passes FP-derived
+ * arguments, creating a unique func_instance per (callsite, depth).
+ * There is no memoization for callees reached with equivalent entry args.
+ * Even if memoization were added, it can be defeated by passing a distinct
+ * FP offset at each call site. arg_track keys on (frame, off[]), so
+ * r1=fp-8, r1=fp-16, ... r1=fp-400 produce 50 unique cache keys per level.
+ *
+ * This test chains 8 subprograms (the MAX_CALL_FRAMES limit). Each
+ * intermediate function calls the next one 50 times, each time with a
+ * different FP-relative offset in r1.
+ *
+ * Without complexity limits in analyze_subprog() the resulting 50^7 ~ 7.8 * 10^11
+ * recursive analyze_subprog() calls will cause a CPU soft lockup or OOM.
+ *
+ * The BPF program itself is ~1200 instructions and perfectly valid.
+ */
+
+char _license[] SEC("license") = "GPL";
+
+/* Call fn with r1 = r10 + off (a unique FP-derived arg per call site) */
+#define C(fn, off) "r1 = r10;" \
+ "r1 += -" #off ";" \
+ "call " #fn ";"
+
+/* 50 calls, each with a distinct FP offset: -8, -16, ... -400 */
+#define CALLS_50(fn) \
+ C(fn, 8) C(fn, 16) C(fn, 24) C(fn, 32) C(fn, 40) \
+ C(fn, 48) C(fn, 56) C(fn, 64) C(fn, 72) C(fn, 80) \
+ C(fn, 88) C(fn, 96) C(fn, 104) C(fn, 112) C(fn, 120) \
+ C(fn, 128) C(fn, 136) C(fn, 144) C(fn, 152) C(fn, 160) \
+ C(fn, 168) C(fn, 176) C(fn, 184) C(fn, 192) C(fn, 200) \
+ C(fn, 208) C(fn, 216) C(fn, 224) C(fn, 232) C(fn, 240) \
+ C(fn, 248) C(fn, 256) C(fn, 264) C(fn, 272) C(fn, 280) \
+ C(fn, 288) C(fn, 296) C(fn, 304) C(fn, 312) C(fn, 320) \
+ C(fn, 328) C(fn, 336) C(fn, 344) C(fn, 352) C(fn, 360) \
+ C(fn, 368) C(fn, 376) C(fn, 384) C(fn, 392) C(fn, 400)
+
+/* Leaf: depth 7, no further calls */
+__naked __noinline __used
+static unsigned long exp_sub7(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/* depth 6 -> calls exp_sub7 x50 with distinct offsets */
+__naked __noinline __used
+static unsigned long exp_sub6(void)
+{
+ asm volatile (
+ CALLS_50(exp_sub7)
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/* depth 5 -> calls exp_sub6 x50 */
+__naked __noinline __used
+static unsigned long exp_sub5(void)
+{
+ asm volatile (
+ CALLS_50(exp_sub6)
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/* depth 4 -> calls exp_sub5 x50 */
+__naked __noinline __used
+static unsigned long exp_sub4(void)
+{
+ asm volatile (
+ CALLS_50(exp_sub5)
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/* depth 3 -> calls exp_sub4 x50 */
+__naked __noinline __used
+static unsigned long exp_sub3(void)
+{
+ asm volatile (
+ CALLS_50(exp_sub4)
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/* depth 2 -> calls exp_sub3 x50 */
+__naked __noinline __used
+static unsigned long exp_sub2(void)
+{
+ asm volatile (
+ CALLS_50(exp_sub3)
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/* depth 1 -> calls exp_sub2 x50 */
+__naked __noinline __used
+static unsigned long exp_sub1(void)
+{
+ asm volatile (
+ CALLS_50(exp_sub2)
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/*
+ * Entry: depth 0. Calls exp_sub1 50 times, each with a distinct
+ * FP offset in r1. Every call site produces a unique arg_track,
+ * defeating any memoization keyed on entry args.
+ */
+SEC("?raw_tp")
+__failure __log_level(2)
+__msg("liveness analysis exceeded complexity limit")
+__naked int liveness_exponential_complexity(void)
+{
+ asm volatile (
+ CALLS_50(exp_sub1)
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_loops1.c b/tools/testing/selftests/bpf/progs/verifier_loops1.c
index fbdde80e7b90..d248ce877f14 100644
--- a/tools/testing/selftests/bpf/progs/verifier_loops1.c
+++ b/tools/testing/selftests/bpf/progs/verifier_loops1.c
@@ -138,8 +138,7 @@ l0_%=: exit; \
SEC("tracepoint")
__description("bounded recursion")
__failure
-/* verifier limitation in detecting max stack depth */
-__msg("the call stack of 8 frames is too deep !")
+__msg("recursive call from")
__naked void bounded_recursion(void)
{
asm volatile (" \
diff --git a/tools/testing/selftests/bpf/progs/verifier_meta_access.c b/tools/testing/selftests/bpf/progs/verifier_meta_access.c
index d81722fb5f19..62235f032ffe 100644
--- a/tools/testing/selftests/bpf/progs/verifier_meta_access.c
+++ b/tools/testing/selftests/bpf/progs/verifier_meta_access.c
@@ -27,7 +27,7 @@ l0_%=: r0 = 0; \
SEC("xdp")
__description("meta access, test2")
-__failure __msg("invalid access to packet, off=-8")
+__failure __msg("R0 min value is negative")
__naked void meta_access_test2(void)
{
asm volatile (" \
diff --git a/tools/testing/selftests/bpf/progs/verifier_private_stack.c b/tools/testing/selftests/bpf/progs/verifier_private_stack.c
index 1ecd34ebde19..646e8ef82051 100644
--- a/tools/testing/selftests/bpf/progs/verifier_private_stack.c
+++ b/tools/testing/selftests/bpf/progs/verifier_private_stack.c
@@ -170,11 +170,11 @@ __jited(" mrs x10, TPIDR_EL{{[0-1]}}")
__jited(" add x27, x27, x10")
__jited(" add x25, x27, {{.*}}")
__jited(" bl 0x{{.*}}")
-__jited(" add x7, x0, #0x0")
+__jited(" mov x7, x0")
__jited(" mov x0, #0x2a")
__jited(" str x0, [x27]")
__jited(" bl 0x{{.*}}")
-__jited(" add x7, x0, #0x0")
+__jited(" mov x7, x0")
__jited(" mov x7, #0x0")
__jited(" ldp x25, x27, [sp], {{.*}}")
__naked void private_stack_callback(void)
@@ -220,7 +220,7 @@ __jited(" mov x0, #0x2a")
__jited(" str x0, [x27]")
__jited(" mov x0, #0x0")
__jited(" bl 0x{{.*}}")
-__jited(" add x7, x0, #0x0")
+__jited(" mov x7, x0")
__jited(" ldp x27, x28, [sp], #0x10")
int private_stack_exception_main_prog(void)
{
@@ -258,7 +258,7 @@ __jited(" add x25, x27, {{.*}}")
__jited(" mov x0, #0x2a")
__jited(" str x0, [x27]")
__jited(" bl 0x{{.*}}")
-__jited(" add x7, x0, #0x0")
+__jited(" mov x7, x0")
__jited(" ldp x27, x28, [sp], #0x10")
int private_stack_exception_sub_prog(void)
{
diff --git a/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c
index 58c7704d61cd..70ae14d6084f 100644
--- a/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c
+++ b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c
@@ -264,13 +264,13 @@ void precision_many_frames__bar(void)
*/
SEC("socket")
__success __log_level(2)
-__msg("11: (0f) r2 += r1")
+__msg("12: (0f) r2 += r1")
/* foo frame */
-__msg("frame1: regs=r1 stack= before 10: (bf) r2 = r10")
-__msg("frame1: regs=r1 stack= before 9: (25) if r1 > 0x7 goto pc+0")
-__msg("frame1: regs=r1 stack=-8,-16 before 8: (7b) *(u64 *)(r10 -16) = r1")
-__msg("frame1: regs=r1 stack=-8 before 7: (7b) *(u64 *)(r10 -8) = r1")
-__msg("frame1: regs=r1 stack= before 4: (85) call pc+2")
+__msg("frame1: regs=r1 stack= before 11: (bf) r2 = r10")
+__msg("frame1: regs=r1 stack= before 10: (25) if r1 > 0x7 goto pc+0")
+__msg("frame1: regs=r1 stack=-8,-16 before 9: (7b) *(u64 *)(r10 -16) = r1")
+__msg("frame1: regs=r1 stack=-8 before 8: (7b) *(u64 *)(r10 -8) = r1")
+__msg("frame1: regs=r1 stack= before 4: (85) call pc+3")
/* main frame */
__msg("frame0: regs=r1 stack=-8 before 3: (7b) *(u64 *)(r10 -8) = r1")
__msg("frame0: regs=r1 stack= before 2: (bf) r1 = r0")
@@ -286,6 +286,7 @@ __naked void precision_stack(void)
"r1 = r0;"
"*(u64*)(r10 - 8) = r1;"
"call precision_stack__foo;"
+ "r0 = *(u64*)(r10 - 8);"
"r0 = 0;"
"exit;"
:
@@ -309,6 +310,8 @@ void precision_stack__foo(void)
*/
"r2 = r10;"
"r2 += r1;"
+ "r0 = *(u64*)(r10 - 8);"
+ "r0 = *(u64*)(r10 - 16);"
"exit"
::: __clobber_all);
}
@@ -592,10 +595,10 @@ __naked void check_ids_in_regsafe_2(void)
*/
SEC("socket")
__success __log_level(2)
-__msg("11: (1d) if r3 == r4 goto pc+0")
+__msg("14: (1d) if r3 == r4 goto pc+0")
__msg("frame 0: propagating r3,r4")
-__msg("11: safe")
-__msg("processed 15 insns")
+__msg("14: safe")
+__msg("processed 18 insns")
__flag(BPF_F_TEST_STATE_FREQ)
__naked void no_scalar_id_for_const(void)
{
@@ -605,6 +608,7 @@ __naked void no_scalar_id_for_const(void)
"if r0 > 7 goto l0_%=;"
/* possibly generate same scalar ids for r3 and r4 */
"r1 = 0;"
+ "r1 ^= r1;" /* prevent bpf_prune_dead_branches from folding the branch */
"r1 = r1;"
"r3 = r1;"
"r4 = r1;"
@@ -612,7 +616,9 @@ __naked void no_scalar_id_for_const(void)
"l0_%=:"
/* possibly generate different scalar ids for r3 and r4 */
"r1 = 0;"
+ "r1 ^= r1;"
"r2 = 0;"
+ "r2 ^= r2;"
"r3 = r1;"
"r4 = r2;"
"l1_%=:"
@@ -628,10 +634,10 @@ __naked void no_scalar_id_for_const(void)
/* Same as no_scalar_id_for_const() but for 32-bit values */
SEC("socket")
__success __log_level(2)
-__msg("11: (1e) if w3 == w4 goto pc+0")
+__msg("14: (1e) if w3 == w4 goto pc+0")
__msg("frame 0: propagating r3,r4")
-__msg("11: safe")
-__msg("processed 15 insns")
+__msg("14: safe")
+__msg("processed 18 insns")
__flag(BPF_F_TEST_STATE_FREQ)
__naked void no_scalar_id_for_const32(void)
{
@@ -641,6 +647,7 @@ __naked void no_scalar_id_for_const32(void)
"if r0 > 7 goto l0_%=;"
/* possibly generate same scalar ids for r3 and r4 */
"w1 = 0;"
+ "w1 ^= w1;" /* prevent bpf_prune_dead_branches from folding the branch */
"w1 = w1;"
"w3 = w1;"
"w4 = w1;"
@@ -648,11 +655,13 @@ __naked void no_scalar_id_for_const32(void)
"l0_%=:"
/* possibly generate different scalar ids for r3 and r4 */
"w1 = 0;"
+ "w1 ^= w1;"
"w2 = 0;"
+ "w2 ^= w2;"
"w3 = w1;"
"w4 = w2;"
"l1_%=:"
- /* predictable jump, marks r1 and r2 precise */
+ /* predictable jump, marks r3 and r4 precise */
"if w3 == w4 goto +0;"
"r0 = 0;"
"exit;"
@@ -796,9 +805,9 @@ __success __log_level(2)
/* The exit instruction should be reachable from two states,
* use two matches and "processed .. insns" to ensure this.
*/
-__msg("15: (95) exit")
-__msg("15: (95) exit")
-__msg("processed 20 insns")
+__msg("16: (95) exit")
+__msg("16: (95) exit")
+__msg("processed 22 insns")
__flag(BPF_F_TEST_STATE_FREQ)
__naked void two_old_ids_one_cur_id(void)
{
@@ -829,6 +838,11 @@ __naked void two_old_ids_one_cur_id(void)
"r2 = r10;"
"r2 += r6;"
"r2 += r7;"
+ /*
+ * keep r8 and r9 live, otherwise r6->id and r7->id
+ * will become singular and reset to zero before if r6 > r7
+ */
+ "r9 += r8;"
"exit;"
:
: __imm(bpf_ktime_get_ns)
diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
index 7a13dbd794b2..6bc721accbae 100644
--- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
+++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
@@ -650,13 +650,13 @@ __msg("mark_precise: frame0: last_idx 9 first_idx 7 subseq_idx -1")
__msg("mark_precise: frame0: regs=r2 stack= before 8: (79) r2 = *(u64 *)(r10 -8)")
__msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r1 = r6")
/* note, fp-8 is precise, fp-16 is not yet precise, we'll get there */
-__msg("mark_precise: frame0: parent state regs= stack=-8: R0=1 R1=ctx() R6=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8=P1 fp-16=1")
+__msg("mark_precise: frame0: parent state regs= stack=-8: R6=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8=P1 fp-16=1")
__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
__msg("mark_precise: frame0: regs= stack=-8 before 6: (05) goto pc+0")
__msg("mark_precise: frame0: regs= stack=-8 before 5: (7b) *(u64 *)(r10 -16) = r0")
__msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r0 = 1")
__msg("mark_precise: frame0: regs= stack=-8 before 3: (7a) *(u64 *)(r10 -8) = 1")
-__msg("10: R1=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2=1")
+__msg("10: R1=map_value(map=.data.two_byte_,ks=4,vs=2,imm=1) R2=1")
/* validate load from fp-16, which was initialized using BPF_STX_MEM */
__msg("12: (79) r2 = *(u64 *)(r10 -16) ; R2=1 R10=fp0 fp-16=1")
__msg("13: (0f) r1 += r2")
@@ -668,12 +668,12 @@ __msg("mark_precise: frame0: regs= stack=-16 before 9: (0f) r1 += r2")
__msg("mark_precise: frame0: regs= stack=-16 before 8: (79) r2 = *(u64 *)(r10 -8)")
__msg("mark_precise: frame0: regs= stack=-16 before 7: (bf) r1 = r6")
/* now both fp-8 and fp-16 are precise, very good */
-__msg("mark_precise: frame0: parent state regs= stack=-16: R0=1 R1=ctx() R6=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8=P1 fp-16=P1")
+__msg("mark_precise: frame0: parent state regs= stack=-16: R6=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8=P1 fp-16=P1")
__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
__msg("mark_precise: frame0: regs= stack=-16 before 6: (05) goto pc+0")
__msg("mark_precise: frame0: regs= stack=-16 before 5: (7b) *(u64 *)(r10 -16) = r0")
__msg("mark_precise: frame0: regs=r0 stack= before 4: (b7) r0 = 1")
-__msg("14: R1=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2=1")
+__msg("14: R1=map_value(map=.data.two_byte_,ks=4,vs=2,imm=1) R2=1")
__naked void stack_load_preserves_const_precision(void)
{
asm volatile (
@@ -726,13 +726,13 @@ __msg("9: (0f) r1 += r2")
__msg("mark_precise: frame0: last_idx 9 first_idx 7 subseq_idx -1")
__msg("mark_precise: frame0: regs=r2 stack= before 8: (61) r2 = *(u32 *)(r10 -8)")
__msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r1 = r6")
-__msg("mark_precise: frame0: parent state regs= stack=-8: R0=1 R1=ctx() R6=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8=????P1 fp-16=????1")
+__msg("mark_precise: frame0: parent state regs= stack=-8: R6=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8=????P1 fp-16=????1")
__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
__msg("mark_precise: frame0: regs= stack=-8 before 6: (05) goto pc+0")
__msg("mark_precise: frame0: regs= stack=-8 before 5: (63) *(u32 *)(r10 -16) = r0")
__msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r0 = 1")
__msg("mark_precise: frame0: regs= stack=-8 before 3: (62) *(u32 *)(r10 -8) = 1")
-__msg("10: R1=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2=1")
+__msg("10: R1=map_value(map=.data.two_byte_,ks=4,vs=2,imm=1) R2=1")
/* validate load from fp-16, which was initialized using BPF_STX_MEM */
__msg("12: (61) r2 = *(u32 *)(r10 -16) ; R2=1 R10=fp0 fp-16=????1")
__msg("13: (0f) r1 += r2")
@@ -743,12 +743,12 @@ __msg("mark_precise: frame0: regs= stack=-16 before 10: (73) *(u8 *)(r1 +0) = r2
__msg("mark_precise: frame0: regs= stack=-16 before 9: (0f) r1 += r2")
__msg("mark_precise: frame0: regs= stack=-16 before 8: (61) r2 = *(u32 *)(r10 -8)")
__msg("mark_precise: frame0: regs= stack=-16 before 7: (bf) r1 = r6")
-__msg("mark_precise: frame0: parent state regs= stack=-16: R0=1 R1=ctx() R6=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8=????P1 fp-16=????P1")
+__msg("mark_precise: frame0: parent state regs= stack=-16: R6=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8=????P1 fp-16=????P1")
__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
__msg("mark_precise: frame0: regs= stack=-16 before 6: (05) goto pc+0")
__msg("mark_precise: frame0: regs= stack=-16 before 5: (63) *(u32 *)(r10 -16) = r0")
__msg("mark_precise: frame0: regs=r0 stack= before 4: (b7) r0 = 1")
-__msg("14: R1=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2=1")
+__msg("14: R1=map_value(map=.data.two_byte_,ks=4,vs=2,imm=1) R2=1")
__naked void stack_load_preserves_const_precision_subreg(void)
{
asm volatile (
@@ -780,6 +780,8 @@ __naked void stack_load_preserves_const_precision_subreg(void)
"r1 += r2;"
"*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+ "r2 = *(u64 *)(r10 -8);" /* keep slots alive */
+ "r2 = *(u64 *)(r10 -16);"
"r0 = 0;"
"exit;"
:
@@ -1279,4 +1281,82 @@ __naked void stack_noperfmon_spill_32bit_onto_64bit_slot(void)
: __clobber_all);
}
+/*
+ * stacksafe(): check if 32-bit scalar spill in old state is considered
+ * equivalent to STACK_MISC in cur state.
+ * 32-bit scalar spill creates slot[0-3] = STACK_MISC, slot[4-7] = STACK_SPILL.
+ * Without 32-bit spill support in stacksafe(), the STACK_SPILL vs STACK_MISC
+ * mismatch at slot[4] causes pruning to fail.
+ */
+SEC("socket")
+__success __log_level(2)
+__msg("8: (79) r1 = *(u64 *)(r10 -8)")
+__msg("8: safe")
+__msg("processed 11 insns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void old_imprecise_scalar32_vs_cur_stack_misc(void)
+{
+ asm volatile(
+ /* get a random value for branching */
+ "call %[bpf_ktime_get_ns];"
+ "if r0 == 0 goto 1f;"
+ /* conjure 32-bit scalar spill at fp-8 */
+ "r0 = 42;"
+ "*(u32*)(r10 - 8) = r0;"
+ "goto 2f;"
+"1:"
+ /* conjure STACK_MISC at fp-8 */
+ "call %[bpf_ktime_get_ns];"
+ "*(u16*)(r10 - 8) = r0;"
+ "*(u16*)(r10 - 6) = r0;"
+"2:"
+ /* read fp-8, should be considered safe on second visit */
+ "r1 = *(u64*)(r10 - 8);"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("raw_tp")
+__success
+__naked void var_off_write_over_scalar_spill(void)
+{
+ asm volatile (
+ /* Get an unknown value bounded to {0, 4} */
+ "call %[bpf_ktime_get_ns];"
+ "r6 = r0;"
+ "r6 &= 4;"
+
+ /* Spill a scalar to fp-16 */
+ "r7 = 0xdeadbeef00000000 ll;"
+ "*(u64 *)(r10 - 16) = r7;"
+
+ /*
+ * Variable-offset 4-byte write covering [fp-12, fp-4).
+ * This touches stype[3..0] of the spill slot at fp-16 but
+ * leaves stype[7..4] as STACK_SPILL. check_stack_write_var_off()
+ * must scrub the entire slot when setting spilled_ptr to NOT_INIT,
+ * otherwise a subsequent sub-register fill sees a non-scalar
+ * spilled_ptr and is rejected.
+ */
+ "r8 = r10;"
+ "r8 += r6;"
+ "r8 += -12;"
+ "r9 = 0;"
+ "*(u32 *)(r8 + 0) = r9;"
+
+ /*
+ * 4-byte read from fp-16. Without the fix this fails with
+ * "invalid size of register fill" because is_spilled_reg()
+ * sees STACK_SPILL while spilled_ptr.type == NOT_INIT.
+ */
+ "r0 = *(u32 *)(r10 - 16);"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c b/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c
index 24aabc6083fd..8e8cf8232255 100644
--- a/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c
+++ b/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c
@@ -37,7 +37,7 @@ __naked void ptr_to_stack_store_load(void)
SEC("socket")
__description("PTR_TO_STACK store/load - bad alignment on off")
-__failure __msg("misaligned stack access off 0+-8+2 size 8")
+__failure __msg("misaligned stack access off -8+2 size 8")
__failure_unpriv
__naked void load_bad_alignment_on_off(void)
{
@@ -53,7 +53,7 @@ __naked void load_bad_alignment_on_off(void)
SEC("socket")
__description("PTR_TO_STACK store/load - bad alignment on reg")
-__failure __msg("misaligned stack access off 0+-10+8 size 8")
+__failure __msg("misaligned stack access off -10+8 size 8")
__failure_unpriv
__naked void load_bad_alignment_on_reg(void)
{
diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
index 61886ed554de..d21d32f6a676 100644
--- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
+++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
@@ -282,7 +282,7 @@ __msg("mark_precise: frame0: regs=r0,r6 stack= before 10: (bf) r6 = r0")
__msg("mark_precise: frame0: regs=r0 stack= before 9: (85) call bpf_loop")
/* State entering callback body popped from states stack */
__msg("from 9 to 17: frame1:")
-__msg("17: frame1: R1=scalar() R2=0 R10=fp0 cb")
+__msg("17: frame1: R10=fp0 cb")
__msg("17: (b7) r0 = 0")
__msg("18: (95) exit")
__msg("returning from callee:")
@@ -411,7 +411,7 @@ __msg("mark_precise: frame0: regs=r6 stack= before 5: (b7) r1 = 1")
__msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3")
/* State entering callback body popped from states stack */
__msg("from 9 to 15: frame1:")
-__msg("15: frame1: R1=scalar() R2=0 R10=fp0 cb")
+__msg("15: frame1: R10=fp0 cb")
__msg("15: (b7) r0 = 0")
__msg("16: (95) exit")
__msg("returning from callee:")
@@ -567,7 +567,7 @@ __msg("mark_precise: frame0: regs= stack=-8 before 5: (7b) *(u64 *)(r10 -8) = r6
__msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3")
/* State entering callback body popped from states stack */
__msg("from 10 to 17: frame1:")
-__msg("17: frame1: R1=scalar() R2=0 R10=fp0 cb")
+__msg("17: frame1: R10=fp0 cb")
__msg("17: (b7) r0 = 0")
__msg("18: (95) exit")
__msg("returning from callee:")
@@ -681,7 +681,7 @@ __msg("mark_precise: frame0: last_idx 10 first_idx 7 subseq_idx -1")
__msg("mark_precise: frame0: regs=r7 stack= before 9: (bf) r1 = r8")
__msg("mark_precise: frame0: regs=r7 stack= before 8: (27) r7 *= 4")
__msg("mark_precise: frame0: regs=r7 stack= before 7: (79) r7 = *(u64 *)(r10 -8)")
-__msg("mark_precise: frame0: parent state regs= stack=-8: R0=2 R6=1 R8=map_value(map=.data.vals,ks=4,vs=16) R10=fp0 fp-8=P1")
+__msg("mark_precise: frame0: parent state regs= stack=-8: R8=map_value(map=.data.vals,ks=4,vs=16) R10=fp0 fp-8=P1")
__msg("mark_precise: frame0: last_idx 18 first_idx 0 subseq_idx 7")
__msg("mark_precise: frame0: regs= stack=-8 before 18: (95) exit")
__msg("mark_precise: frame1: regs= stack= before 17: (0f) r0 += r2")
diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_topo.c b/tools/testing/selftests/bpf/progs/verifier_subprog_topo.c
new file mode 100644
index 000000000000..e2b9d14bbc3d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_subprog_topo.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+/* linear chain main -> A -> B */
+__naked __noinline __used
+static unsigned long linear_b(void)
+{
+ asm volatile (
+ "r0 = 42;"
+ "exit;"
+ );
+}
+
+__naked __noinline __used
+static unsigned long linear_a(void)
+{
+ asm volatile (
+ "call linear_b;"
+ "exit;"
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("topo_order[0] = linear_b")
+__msg("topo_order[1] = linear_a")
+__msg("topo_order[2] = topo_linear")
+__naked int topo_linear(void)
+{
+ asm volatile (
+ "call linear_a;"
+ "exit;"
+ );
+}
+
+/* diamond main -> A, main -> B, A -> C, B -> C */
+__naked __noinline __used
+static unsigned long diamond_c(void)
+{
+ asm volatile (
+ "r0 = 1;"
+ "exit;"
+ );
+}
+
+__naked __noinline __used
+static unsigned long diamond_b(void)
+{
+ asm volatile (
+ "call diamond_c;"
+ "exit;"
+ );
+}
+
+__naked __noinline __used
+static unsigned long diamond_a(void)
+{
+ asm volatile (
+ "call diamond_c;"
+ "exit;"
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("topo_order[0] = diamond_c")
+__msg("topo_order[3] = topo_diamond")
+__naked int topo_diamond(void)
+{
+ asm volatile (
+ "call diamond_a;"
+ "call diamond_b;"
+ "exit;"
+ );
+}
+
+/* main -> global_a (global) -> static_leaf (static, leaf) */
+__naked __noinline __used
+static unsigned long static_leaf(void)
+{
+ asm volatile (
+ "r0 = 7;"
+ "exit;"
+ );
+}
+
+__noinline __used
+int global_a(int x)
+{
+ return static_leaf();
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("topo_order[0] = static_leaf")
+__msg("topo_order[1] = global_a")
+__msg("topo_order[2] = topo_mixed")
+__naked int topo_mixed(void)
+{
+ asm volatile (
+ "r1 = 0;"
+ "call global_a;"
+ "exit;"
+ );
+}
+
+/*
+ * shared static callee from global and main:
+ * main -> shared_leaf (static)
+ * main -> global_b (global) -> shared_leaf (static)
+ */
+__naked __noinline __used
+static unsigned long shared_leaf(void)
+{
+ asm volatile (
+ "r0 = 99;"
+ "exit;"
+ );
+}
+
+__noinline __used
+int global_b(int x)
+{
+ return shared_leaf();
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("topo_order[0] = shared_leaf")
+__msg("topo_order[1] = global_b")
+__msg("topo_order[2] = topo_shared")
+__naked int topo_shared(void)
+{
+ asm volatile (
+ "call shared_leaf;"
+ "r1 = 0;"
+ "call global_b;"
+ "exit;"
+ );
+}
+
+/* duplicate calls to the same subprog */
+__naked __noinline __used
+static unsigned long dup_leaf(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "exit;"
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("topo_order[0] = dup_leaf")
+__msg("topo_order[1] = topo_dup_calls")
+__naked int topo_dup_calls(void)
+{
+ asm volatile (
+ "call dup_leaf;"
+ "call dup_leaf;"
+ "exit;"
+ );
+}
+
+/* main calls bpf_loop() with loop_cb as the callback */
+static int loop_cb(int idx, void *ctx)
+{
+ return 0;
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("topo_order[0] = loop_cb")
+__msg("topo_order[1] = topo_loop_cb")
+int topo_loop_cb(void)
+{
+ bpf_loop(1, loop_cb, NULL, 0);
+ return 0;
+}
+
+/*
+ * bpf_loop callback calling another subprog
+ * main -> bpf_loop(callback=loop_cb2) -> loop_cb2 -> loop_cb2_leaf
+ */
+__naked __noinline __used
+static unsigned long loop_cb2_leaf(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "exit;"
+ );
+}
+
+static int loop_cb2(int idx, void *ctx)
+{
+ return loop_cb2_leaf();
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("topo_order[0] = loop_cb2_leaf")
+__msg("topo_order[1] = loop_cb2")
+__msg("topo_order[2] = topo_loop_cb_chain")
+int topo_loop_cb_chain(void)
+{
+ bpf_loop(1, loop_cb2, NULL, 0);
+ return 0;
+}
+
+/* no calls (single subprog) */
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("topo_order[0] = topo_no_calls")
+__naked int topo_no_calls(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "exit;"
+ );
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_subreg.c b/tools/testing/selftests/bpf/progs/verifier_subreg.c
index be328100ba53..31832a306f91 100644
--- a/tools/testing/selftests/bpf/progs/verifier_subreg.c
+++ b/tools/testing/selftests/bpf/progs/verifier_subreg.c
@@ -823,4 +823,169 @@ __naked void arsh_63_or(void)
: __clobber_all);
}
+SEC("socket")
+__success __retval(42)
+__naked void arsh32_imm1_value(void)
+{
+ asm volatile (" \
+ r0 = 42; \
+ r1 = -2147483648; \
+ w1 s>>= 1; /* r1 = 0xC0000000 */ \
+ r2 = 0xC0000000 ll; \
+ if r1 == r2 goto l0_%=; \
+ r0 /= 0; /* unreachable */ \
+l0_%=: exit; \
+" :
+ :
+ : __clobber_all);
+}
+
+SEC("socket")
+__success __retval(1)
+__naked void lsh32_reg0_zero_extend_check(void)
+{
+ asm volatile (" \
+ r6 = 1; \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w1 = 0; \
+ w0 <<= w1; /* reg shift by 0 */ \
+ r0 >>= 32; /* must be 0 */ \
+ if r0 == 0 goto l0_%=; \
+ r6 /= 0; /* unreachable */ \
+l0_%=: r0 = r6; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__success __retval(1)
+__naked void rsh32_reg0_zero_extend_check(void)
+{
+ asm volatile (" \
+ r6 = 1; \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w1 = 0; \
+ w0 >>= w1; /* reg rsh by 0 */ \
+ r0 >>= 32; /* must be 0 */ \
+ if r0 == 0 goto l0_%=; \
+ r6 /= 0; /* unreachable */ \
+l0_%=: r0 = r6; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__success __retval(1)
+__naked void arsh32_reg0_zero_extend_check(void)
+{
+ asm volatile (" \
+ r6 = 1; \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w1 = 0; \
+ w0 s>>= w1; /* reg arsh by 0 */ \
+ r0 >>= 32; /* must be 0 */ \
+ if r0 == 0 goto l0_%=; \
+ r6 /= 0; /* unreachable */ \
+l0_%=: r0 = r6; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__success __retval(42)
+__naked void lsh32_imm31_value(void)
+{
+ asm volatile (" \
+ r0 = 42; \
+ r1 = 1; \
+ w1 <<= 31; /* r1 = 0x80000000 */ \
+ r2 = 0x80000000 ll; \
+ if r1 == r2 goto l0_%=; \
+ r0 /= 0; /* unreachable */ \
+l0_%=: exit; \
+" :
+ :
+ : __clobber_all);
+}
+
+SEC("socket")
+__success __retval(42)
+__naked void rsh32_imm31_value(void)
+{
+ asm volatile (" \
+ r0 = 42; \
+ r1 = -2147483648; /* 0x80000000 */ \
+ w1 >>= 31; /* r1 = 1 */ \
+ if r1 == 1 goto l0_%=; \
+ r0 /= 0; /* unreachable */ \
+l0_%=: exit; \
+" :
+ :
+ : __clobber_all);
+}
+
+SEC("socket")
+__success __retval(42)
+__naked void arsh32_imm31_value(void)
+{
+ asm volatile (" \
+ r0 = 42; \
+ r1 = -2147483648; /* 0x80000000 */ \
+ w1 s>>= 31; /* r1 = 0xFFFFFFFF */ \
+ r2 = 0xFFFFFFFF ll; \
+ if r1 == r2 goto l0_%=; \
+ r0 /= 0; /* unreachable */ \
+l0_%=: exit; \
+" :
+ :
+ : __clobber_all);
+}
+
+SEC("socket")
+__success __retval(1)
+__naked void lsh32_unknown_precise_bounds(void)
+{
+ asm volatile (" \
+ r6 = 1; \
+ call %[bpf_get_prandom_u32]; \
+ w0 &= 3; /* u32: [0, 3] */ \
+ w0 <<= 1; /* u32: [0, 6] */ \
+ if w0 < 7 goto l0_%=; \
+ r6 /= 0; /* unreachable */ \
+l0_%=: r0 = r6; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__success __retval(1)
+__naked void rsh32_unknown_bounds(void)
+{
+ asm volatile (" \
+ r6 = 1; \
+ call %[bpf_get_prandom_u32]; \
+ w0 >>= 28; /* u32: [0, 15] */ \
+ if w0 < 16 goto l0_%=; \
+ r6 /= 0; /* unreachable */ \
+l0_%=: r0 = r6; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_unpriv.c b/tools/testing/selftests/bpf/progs/verifier_unpriv.c
index 8ee1243e62a8..c16f8382cf17 100644
--- a/tools/testing/selftests/bpf/progs/verifier_unpriv.c
+++ b/tools/testing/selftests/bpf/progs/verifier_unpriv.c
@@ -584,7 +584,7 @@ __naked void alu32_mov_u32_const(void)
{
asm volatile (" \
w7 = 0; \
- w7 &= 1; \
+ w7 ^= w7; \
w0 = w7; \
if r0 == 0 goto l0_%=; \
r0 = *(u64*)(r7 + 0); \
@@ -894,7 +894,9 @@ __naked void unpriv_spectre_v1_and_v4_simple(void)
{
asm volatile (" \
r8 = 0; \
+ r8 ^= r8; \
r9 = 0; \
+ r9 ^= r9; \
r0 = r10; \
r1 = 0; \
r2 = r10; \
@@ -932,7 +934,9 @@ __naked void unpriv_ldimm64_spectre_v1_and_v4_simple(void)
{
asm volatile (" \
r8 = 0; \
+ r8 ^= r8; \
r9 = 0; \
+ r9 ^= r9; \
r0 = r10; \
r1 = 0; \
r2 = r10; \
diff --git a/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c b/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c
index af7938ce56cb..b3b701b44550 100644
--- a/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c
+++ b/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c
@@ -346,7 +346,7 @@ l2_%=: r0 = 1; \
SEC("socket")
__description("map access: value_ptr -= known scalar from different maps")
__success __failure_unpriv
-__msg_unpriv("R0 min value is outside of the allowed memory range")
+__msg_unpriv("R0 min value is negative")
__retval(1)
__naked void known_scalar_from_different_maps(void)
{
@@ -683,9 +683,7 @@ l0_%=: r0 = 1; \
SEC("socket")
__description("map access: value_ptr -= known scalar, lower oob arith, test 1")
-__failure __msg("R0 min value is outside of the allowed memory range")
-__failure_unpriv
-__msg_unpriv("R0 pointer arithmetic of map value goes out of range")
+__failure __msg("R0 min value is negative")
__naked void lower_oob_arith_test_1(void)
{
asm volatile (" \
@@ -840,7 +838,7 @@ l0_%=: r0 = 1; \
SEC("socket")
__description("map access: value_ptr += known scalar, 3")
-__failure __msg("invalid access to map value")
+__failure __msg("R0 min value is negative")
__failure_unpriv
__naked void value_ptr_known_scalar_3(void)
{
@@ -1207,7 +1205,7 @@ l0_%=: r0 = 1; \
SEC("socket")
__description("map access: value_ptr -= known scalar")
-__failure __msg("R0 min value is outside of the allowed memory range")
+__failure __msg("R0 min value is negative")
__failure_unpriv
__naked void access_value_ptr_known_scalar(void)
{
diff --git a/tools/testing/selftests/bpf/progs/verifier_xdp_direct_packet_access.c b/tools/testing/selftests/bpf/progs/verifier_xdp_direct_packet_access.c
index df2dfd1b15d1..0b86d95a4133 100644
--- a/tools/testing/selftests/bpf/progs/verifier_xdp_direct_packet_access.c
+++ b/tools/testing/selftests/bpf/progs/verifier_xdp_direct_packet_access.c
@@ -69,7 +69,7 @@ l0_%=: r0 = 0; \
SEC("xdp")
__description("XDP pkt read, pkt_data' > pkt_end, bad access 1")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
__flag(BPF_F_ANY_ALIGNMENT)
__naked void pkt_end_bad_access_1_1(void)
{
@@ -131,7 +131,7 @@ l0_%=: r0 = 0; \
SEC("xdp")
__description("XDP pkt read, pkt_data' > pkt_end, corner case -1, bad access")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
__flag(BPF_F_ANY_ALIGNMENT)
__naked void corner_case_1_bad_access_1(void)
{
@@ -173,7 +173,7 @@ l1_%=: r0 = 0; \
SEC("xdp")
__description("XDP pkt read, pkt_end > pkt_data', corner case -1, bad access")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
__flag(BPF_F_ANY_ALIGNMENT)
__naked void corner_case_1_bad_access_2(void)
{
@@ -279,7 +279,7 @@ l1_%=: r0 = 0; \
SEC("xdp")
__description("XDP pkt read, pkt_data' < pkt_end, corner case -1, bad access")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
__flag(BPF_F_ANY_ALIGNMENT)
__naked void corner_case_1_bad_access_3(void)
{
@@ -384,7 +384,7 @@ l0_%=: r0 = 0; \
SEC("xdp")
__description("XDP pkt read, pkt_end < pkt_data', bad access 1")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
__flag(BPF_F_ANY_ALIGNMENT)
__naked void pkt_data_bad_access_1_1(void)
{
@@ -446,7 +446,7 @@ l0_%=: r0 = 0; \
SEC("xdp")
__description("XDP pkt read, pkt_end < pkt_data', corner case -1, bad access")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
__flag(BPF_F_ANY_ALIGNMENT)
__naked void corner_case_1_bad_access_4(void)
{
@@ -487,7 +487,7 @@ l0_%=: r0 = 0; \
SEC("xdp")
__description("XDP pkt read, pkt_data' >= pkt_end, corner case -1, bad access")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
__flag(BPF_F_ANY_ALIGNMENT)
__naked void corner_case_1_bad_access_5(void)
{
@@ -590,7 +590,7 @@ l1_%=: r0 = 0; \
SEC("xdp")
__description("XDP pkt read, pkt_end >= pkt_data', bad access 1")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
__flag(BPF_F_ANY_ALIGNMENT)
__naked void pkt_data_bad_access_1_2(void)
{
@@ -654,7 +654,7 @@ l1_%=: r0 = 0; \
SEC("xdp")
__description("XDP pkt read, pkt_end >= pkt_data', corner case -1, bad access")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
__flag(BPF_F_ANY_ALIGNMENT)
__naked void corner_case_1_bad_access_6(void)
{
@@ -697,7 +697,7 @@ l1_%=: r0 = 0; \
SEC("xdp")
__description("XDP pkt read, pkt_data' <= pkt_end, bad access 1")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
__flag(BPF_F_ANY_ALIGNMENT)
__naked void pkt_end_bad_access_1_2(void)
{
@@ -761,7 +761,7 @@ l1_%=: r0 = 0; \
SEC("xdp")
__description("XDP pkt read, pkt_data' <= pkt_end, corner case -1, bad access")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
__flag(BPF_F_ANY_ALIGNMENT)
__naked void corner_case_1_bad_access_7(void)
{
@@ -803,7 +803,7 @@ l0_%=: r0 = 0; \
SEC("xdp")
__description("XDP pkt read, pkt_end <= pkt_data', corner case -1, bad access")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
__flag(BPF_F_ANY_ALIGNMENT)
__naked void corner_case_1_bad_access_8(void)
{
@@ -905,7 +905,7 @@ l0_%=: r0 = 0; \
SEC("xdp")
__description("XDP pkt read, pkt_meta' > pkt_data, bad access 1")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
__flag(BPF_F_ANY_ALIGNMENT)
__naked void pkt_data_bad_access_1_3(void)
{
@@ -926,7 +926,7 @@ l0_%=: r0 = 0; \
SEC("xdp")
__description("XDP pkt read, pkt_meta' > pkt_data, bad access 2")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
__flag(BPF_F_ANY_ALIGNMENT)
__naked void pkt_data_bad_access_2_5(void)
{
@@ -967,7 +967,7 @@ l0_%=: r0 = 0; \
SEC("xdp")
__description("XDP pkt read, pkt_meta' > pkt_data, corner case -1, bad access")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
__flag(BPF_F_ANY_ALIGNMENT)
__naked void corner_case_1_bad_access_9(void)
{
@@ -1009,7 +1009,7 @@ l1_%=: r0 = 0; \
SEC("xdp")
__description("XDP pkt read, pkt_data > pkt_meta', corner case -1, bad access")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
__flag(BPF_F_ANY_ALIGNMENT)
__naked void corner_case_1_bad_access_10(void)
{
@@ -1031,7 +1031,7 @@ l1_%=: r0 = 0; \
SEC("xdp")
__description("XDP pkt read, pkt_data > pkt_meta', bad access 2")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
__flag(BPF_F_ANY_ALIGNMENT)
__naked void pkt_meta_bad_access_2_1(void)
{
@@ -1115,7 +1115,7 @@ l1_%=: r0 = 0; \
SEC("xdp")
__description("XDP pkt read, pkt_meta' < pkt_data, corner case -1, bad access")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
__flag(BPF_F_ANY_ALIGNMENT)
__naked void corner_case_1_bad_access_11(void)
{
@@ -1137,7 +1137,7 @@ l1_%=: r0 = 0; \
SEC("xdp")
__description("XDP pkt read, pkt_meta' < pkt_data, bad access 2")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
__flag(BPF_F_ANY_ALIGNMENT)
__naked void pkt_data_bad_access_2_6(void)
{
@@ -1220,7 +1220,7 @@ l0_%=: r0 = 0; \
SEC("xdp")
__description("XDP pkt read, pkt_data < pkt_meta', bad access 1")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
__flag(BPF_F_ANY_ALIGNMENT)
__naked void pkt_meta_bad_access_1_1(void)
{
@@ -1241,7 +1241,7 @@ l0_%=: r0 = 0; \
SEC("xdp")
__description("XDP pkt read, pkt_data < pkt_meta', bad access 2")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
__flag(BPF_F_ANY_ALIGNMENT)
__naked void pkt_meta_bad_access_2_2(void)
{
@@ -1282,7 +1282,7 @@ l0_%=: r0 = 0; \
SEC("xdp")
__description("XDP pkt read, pkt_data < pkt_meta', corner case -1, bad access")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
__flag(BPF_F_ANY_ALIGNMENT)
__naked void corner_case_1_bad_access_12(void)
{
@@ -1323,7 +1323,7 @@ l0_%=: r0 = 0; \
SEC("xdp")
__description("XDP pkt read, pkt_meta' >= pkt_data, corner case -1, bad access")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
__flag(BPF_F_ANY_ALIGNMENT)
__naked void corner_case_1_bad_access_13(void)
{
@@ -1344,7 +1344,7 @@ l0_%=: r0 = 0; \
SEC("xdp")
__description("XDP pkt read, pkt_meta' >= pkt_data, bad access 2")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
__flag(BPF_F_ANY_ALIGNMENT)
__naked void pkt_data_bad_access_2_7(void)
{
@@ -1426,7 +1426,7 @@ l1_%=: r0 = 0; \
SEC("xdp")
__description("XDP pkt read, pkt_data >= pkt_meta', bad access 1")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
__flag(BPF_F_ANY_ALIGNMENT)
__naked void pkt_meta_bad_access_1_2(void)
{
@@ -1448,7 +1448,7 @@ l1_%=: r0 = 0; \
SEC("xdp")
__description("XDP pkt read, pkt_data >= pkt_meta', bad access 2")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
__flag(BPF_F_ANY_ALIGNMENT)
__naked void pkt_meta_bad_access_2_3(void)
{
@@ -1490,7 +1490,7 @@ l1_%=: r0 = 0; \
SEC("xdp")
__description("XDP pkt read, pkt_data >= pkt_meta', corner case -1, bad access")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
__flag(BPF_F_ANY_ALIGNMENT)
__naked void corner_case_1_bad_access_14(void)
{
@@ -1533,7 +1533,7 @@ l1_%=: r0 = 0; \
SEC("xdp")
__description("XDP pkt read, pkt_meta' <= pkt_data, bad access 1")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
__flag(BPF_F_ANY_ALIGNMENT)
__naked void pkt_data_bad_access_1_4(void)
{
@@ -1555,7 +1555,7 @@ l1_%=: r0 = 0; \
SEC("xdp")
__description("XDP pkt read, pkt_meta' <= pkt_data, bad access 2")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
__flag(BPF_F_ANY_ALIGNMENT)
__naked void pkt_data_bad_access_2_8(void)
{
@@ -1597,7 +1597,7 @@ l1_%=: r0 = 0; \
SEC("xdp")
__description("XDP pkt read, pkt_meta' <= pkt_data, corner case -1, bad access")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
__flag(BPF_F_ANY_ALIGNMENT)
__naked void corner_case_1_bad_access_15(void)
{
@@ -1639,7 +1639,7 @@ l0_%=: r0 = 0; \
SEC("xdp")
__description("XDP pkt read, pkt_data <= pkt_meta', corner case -1, bad access")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
__flag(BPF_F_ANY_ALIGNMENT)
__naked void corner_case_1_bad_access_16(void)
{
@@ -1660,7 +1660,7 @@ l0_%=: r0 = 0; \
SEC("xdp")
__description("XDP pkt read, pkt_data <= pkt_meta', bad access 2")
-__failure __msg("R1 offset is outside of the packet")
+__failure __msg("R1 {{min|max}} value is outside of the allowed memory range")
__flag(BPF_F_ANY_ALIGNMENT)
__naked void pkt_meta_bad_access_2_4(void)
{
diff --git a/tools/testing/selftests/bpf/test_bpftool.py b/tools/testing/selftests/bpf/test_bpftool.py
deleted file mode 100644
index 1c2408ee1f5d..000000000000
--- a/tools/testing/selftests/bpf/test_bpftool.py
+++ /dev/null
@@ -1,174 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-# Copyright (c) 2020 SUSE LLC.
-
-import collections
-import functools
-import json
-import os
-import socket
-import subprocess
-import unittest
-
-
-# Add the source tree of bpftool and /usr/local/sbin to PATH
-cur_dir = os.path.dirname(os.path.realpath(__file__))
-bpftool_dir = os.path.abspath(os.path.join(cur_dir, "..", "..", "..", "..",
- "tools", "bpf", "bpftool"))
-os.environ["PATH"] = bpftool_dir + ":/usr/local/sbin:" + os.environ["PATH"]
-
-
-class IfaceNotFoundError(Exception):
- pass
-
-
-class UnprivilegedUserError(Exception):
- pass
-
-
-def _bpftool(args, json=True):
- _args = ["bpftool"]
- if json:
- _args.append("-j")
- _args.extend(args)
-
- return subprocess.check_output(_args)
-
-
-def bpftool(args):
- return _bpftool(args, json=False).decode("utf-8")
-
-
-def bpftool_json(args):
- res = _bpftool(args)
- return json.loads(res)
-
-
-def get_default_iface():
- for iface in socket.if_nameindex():
- if iface[1] != "lo":
- return iface[1]
- raise IfaceNotFoundError("Could not find any network interface to probe")
-
-
-def default_iface(f):
- @functools.wraps(f)
- def wrapper(*args, **kwargs):
- iface = get_default_iface()
- return f(*args, iface, **kwargs)
- return wrapper
-
-DMESG_EMITTING_HELPERS = [
- "bpf_probe_write_user",
- "bpf_trace_printk",
- "bpf_trace_vprintk",
- ]
-
-class TestBpftool(unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- if os.getuid() != 0:
- raise UnprivilegedUserError(
- "This test suite needs root privileges")
-
- @default_iface
- def test_feature_dev_json(self, iface):
- unexpected_helpers = DMESG_EMITTING_HELPERS
- expected_keys = [
- "syscall_config",
- "program_types",
- "map_types",
- "helpers",
- "misc",
- ]
-
- res = bpftool_json(["feature", "probe", "dev", iface])
- # Check if the result has all expected keys.
- self.assertCountEqual(res.keys(), expected_keys)
- # Check if unexpected helpers are not included in helpers probes
- # result.
- for helpers in res["helpers"].values():
- for unexpected_helper in unexpected_helpers:
- self.assertNotIn(unexpected_helper, helpers)
-
- def test_feature_kernel(self):
- test_cases = [
- bpftool_json(["feature", "probe", "kernel"]),
- bpftool_json(["feature", "probe"]),
- bpftool_json(["feature"]),
- ]
- unexpected_helpers = DMESG_EMITTING_HELPERS
- expected_keys = [
- "syscall_config",
- "system_config",
- "program_types",
- "map_types",
- "helpers",
- "misc",
- ]
-
- for tc in test_cases:
- # Check if the result has all expected keys.
- self.assertCountEqual(tc.keys(), expected_keys)
- # Check if unexpected helpers are not included in helpers probes
- # result.
- for helpers in tc["helpers"].values():
- for unexpected_helper in unexpected_helpers:
- self.assertNotIn(unexpected_helper, helpers)
-
- def test_feature_kernel_full(self):
- test_cases = [
- bpftool_json(["feature", "probe", "kernel", "full"]),
- bpftool_json(["feature", "probe", "full"]),
- ]
- expected_helpers = DMESG_EMITTING_HELPERS
-
- for tc in test_cases:
- # Check if expected helpers are included at least once in any
- # helpers list for any program type. Unfortunately we cannot assume
- # that they will be included in all program types or a specific
- # subset of programs. It depends on the kernel version and
- # configuration.
- found_helpers = False
-
- for helpers in tc["helpers"].values():
- if all(expected_helper in helpers
- for expected_helper in expected_helpers):
- found_helpers = True
- break
-
- self.assertTrue(found_helpers)
-
- def test_feature_kernel_full_vs_not_full(self):
- full_res = bpftool_json(["feature", "probe", "full"])
- not_full_res = bpftool_json(["feature", "probe"])
- not_full_set = set()
- full_set = set()
-
- for helpers in full_res["helpers"].values():
- for helper in helpers:
- full_set.add(helper)
-
- for helpers in not_full_res["helpers"].values():
- for helper in helpers:
- not_full_set.add(helper)
-
- self.assertCountEqual(full_set - not_full_set,
- set(DMESG_EMITTING_HELPERS))
- self.assertCountEqual(not_full_set - full_set, set())
-
- def test_feature_macros(self):
- expected_patterns = [
- r"/\*\*\* System call availability \*\*\*/",
- r"#define HAVE_BPF_SYSCALL",
- r"/\*\*\* eBPF program types \*\*\*/",
- r"#define HAVE.*PROG_TYPE",
- r"/\*\*\* eBPF map types \*\*\*/",
- r"#define HAVE.*MAP_TYPE",
- r"/\*\*\* eBPF helper functions \*\*\*/",
- r"#define HAVE.*HELPER",
- r"/\*\*\* eBPF misc features \*\*\*/",
- ]
-
- res = bpftool(["feature", "probe", "macros"])
- for pattern in expected_patterns:
- self.assertRegex(res, pattern)
diff --git a/tools/testing/selftests/bpf/test_bpftool.sh b/tools/testing/selftests/bpf/test_bpftool.sh
deleted file mode 100755
index 718f59692ccb..000000000000
--- a/tools/testing/selftests/bpf/test_bpftool.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-# Copyright (c) 2020 SUSE LLC.
-
-# 'make -C tools/testing/selftests/bpf install' will install to SCRIPT_DIR
-SCRIPT_DIR=$(dirname $(realpath $0))
-
-# 'make -C tools/testing/selftests/bpf' will install to BPFTOOL_INSTALL_PATH
-BPFTOOL_INSTALL_PATH="$SCRIPT_DIR"/tools/sbin
-export PATH=$SCRIPT_DIR:$BPFTOOL_INSTALL_PATH:$PATH
-python3 -m unittest -v test_bpftool.TestBpftool
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
index e62c6b78657f..d876314a4d67 100644
--- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
@@ -470,6 +470,11 @@ noinline void bpf_testmod_stacktrace_test_1(void)
int bpf_testmod_fentry_ok;
+noinline int bpf_testmod_trampoline_count_test(void)
+{
+ return 0;
+}
+
noinline ssize_t
bpf_testmod_test_read(struct file *file, struct kobject *kobj,
const struct bin_attribute *bin_attr,
@@ -548,6 +553,8 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj,
21, 22, 23, 24, 25, 26) != 231)
goto out;
+ bpf_testmod_trampoline_count_test();
+
bpf_testmod_stacktrace_test_1();
bpf_testmod_fentry_ok = 1;
@@ -716,6 +723,7 @@ BTF_ID_FLAGS(func, bpf_iter_testmod_seq_next, KF_ITER_NEXT | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_iter_testmod_seq_destroy, KF_ITER_DESTROY)
BTF_ID_FLAGS(func, bpf_iter_testmod_seq_value)
BTF_ID_FLAGS(func, bpf_kfunc_common_test)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_pass1)
BTF_ID_FLAGS(func, bpf_kfunc_dynptr_test)
BTF_ID_FLAGS(func, bpf_kfunc_nested_acquire_nonzero_offset_test, KF_ACQUIRE)
BTF_ID_FLAGS(func, bpf_kfunc_nested_acquire_zero_offset_test, KF_ACQUIRE)
@@ -760,12 +768,63 @@ __bpf_kfunc struct sock *bpf_kfunc_call_test3(struct sock *sk)
__bpf_kfunc long noinline bpf_kfunc_call_test4(signed char a, short b, int c, long d)
{
- /* Provoke the compiler to assume that the caller has sign-extended a,
+ /*
+ * Make val as volatile to avoid compiler optimizations.
+ * Verify that negative signed values remain negative after
+ * sign-extension (JIT must sign-extend, not zero-extend).
+ */
+ volatile long val;
+
+ /* val will be positive, if JIT does zero-extension instead of sign-extension */
+ val = a;
+ if (val >= 0)
+ return 1;
+
+ val = b;
+ if (val >= 0)
+ return 2;
+
+ val = c;
+ if (val >= 0)
+ return 3;
+
+ /*
+ * Provoke the compiler to assume that the caller has sign-extended a,
* b and c on platforms where this is required (e.g. s390x).
*/
return (long)a + (long)b + (long)c + d;
}
+__bpf_kfunc int bpf_kfunc_call_test5(u8 a, u16 b, u32 c)
+{
+ /*
+ * Make val as volatile to avoid compiler optimizations on the below checks
+ * In C, assigning u8/u16/u32 to long performs zero-extension.
+ */
+ volatile long val = a;
+
+ /* Check zero-extension */
+ if (val != (unsigned long)a)
+ return 1;
+ /* Check no sign-extension */
+ if (val < 0)
+ return 2;
+
+ val = b;
+ if (val != (unsigned long)b)
+ return 3;
+ if (val < 0)
+ return 4;
+
+ val = c;
+ if (val != (unsigned long)c)
+ return 5;
+ if (val < 0)
+ return 6;
+
+ return 0;
+}
+
static struct prog_test_ref_kfunc prog_test_struct = {
.a = 42,
.b = 108,
@@ -1228,7 +1287,7 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_test1)
BTF_ID_FLAGS(func, bpf_kfunc_call_test2)
BTF_ID_FLAGS(func, bpf_kfunc_call_test3)
BTF_ID_FLAGS(func, bpf_kfunc_call_test4)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_pass1)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test5)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_acquire, KF_ACQUIRE | KF_RET_NULL)
@@ -1359,6 +1418,12 @@ static int bpf_testmod_ops__test_refcounted(int dummy,
return 0;
}
+static int bpf_testmod_ops__test_refcounted_multi(int dummy, struct task_struct *task__nullable,
+ struct task_struct *task__ref)
+{
+ return 0;
+}
+
static struct task_struct *
bpf_testmod_ops__test_return_ref_kptr(int dummy, struct task_struct *task__ref,
struct cgroup *cgrp)
@@ -1371,6 +1436,7 @@ static struct bpf_testmod_ops __bpf_testmod_ops = {
.test_2 = bpf_testmod_test_2,
.test_maybe_null = bpf_testmod_ops__test_maybe_null,
.test_refcounted = bpf_testmod_ops__test_refcounted,
+ .test_refcounted_multi = bpf_testmod_ops__test_refcounted_multi,
.test_return_ref_kptr = bpf_testmod_ops__test_return_ref_kptr,
};
@@ -1843,6 +1909,16 @@ struct bpf_struct_ops testmod_multi_st_ops = {
extern int bpf_fentry_test1(int a);
+BTF_KFUNCS_START(bpf_testmod_trampoline_count_ids)
+BTF_ID_FLAGS(func, bpf_testmod_trampoline_count_test)
+BTF_KFUNCS_END(bpf_testmod_trampoline_count_ids)
+
+static const struct
+btf_kfunc_id_set bpf_testmod_trampoline_count_fmodret_set = {
+ .owner = THIS_MODULE,
+ .set = &bpf_testmod_trampoline_count_ids,
+};
+
static int bpf_testmod_init(void)
{
const struct btf_id_dtor_kfunc bpf_testmod_dtors[] = {
@@ -1859,6 +1935,7 @@ static int bpf_testmod_init(void)
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_testmod_kfunc_set);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &bpf_testmod_kfunc_set);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_testmod_kfunc_set);
+ ret = ret ?: register_btf_fmodret_id_set(&bpf_testmod_trampoline_count_fmodret_set);
ret = ret ?: register_bpf_struct_ops(&bpf_bpf_testmod_ops, bpf_testmod_ops);
ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops2, bpf_testmod_ops2);
ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops3, bpf_testmod_ops3);
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h
index f6e492f9d042..863fd10f1619 100644
--- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h
@@ -39,6 +39,9 @@ struct bpf_testmod_ops {
int (*unsupported_ops)(void);
/* Used to test ref_acquired arguments. */
int (*test_refcounted)(int dummy, struct task_struct *task);
+ /* Used to test checking of __ref arguments when it not the first argument. */
+ int (*test_refcounted_multi)(int dummy, struct task_struct *task,
+ struct task_struct *task2);
/* Used to test returning referenced kptr. */
struct task_struct *(*test_return_ref_kptr)(int dummy, struct task_struct *task,
struct cgroup *cgrp);
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h
index b393bf771131..aa0b8d41e71b 100644
--- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h
@@ -110,6 +110,7 @@ __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b,
int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym;
struct sock *bpf_kfunc_call_test3(struct sock *sk) __ksym;
long bpf_kfunc_call_test4(signed char a, short b, int c, long d) __ksym;
+int bpf_kfunc_call_test5(__u8 a, __u16 b, __u32 c) __ksym;
void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) __ksym;
void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) __ksym;
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
index 338c035c3688..c4c34cae6102 100644
--- a/tools/testing/selftests/bpf/test_loader.c
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
#include <linux/capability.h>
+#include <linux/err.h>
#include <stdlib.h>
#include <test_progs.h>
#include <bpf/btf.h>
@@ -11,39 +12,15 @@
#include "cap_helpers.h"
#include "jit_disasm_helpers.h"
-#define str_has_pfx(str, pfx) \
- (strncmp(str, pfx, __builtin_constant_p(pfx) ? sizeof(pfx) - 1 : strlen(pfx)) == 0)
+static inline const char *str_has_pfx(const char *str, const char *pfx)
+{
+ size_t len = strlen(pfx);
+
+ return strncmp(str, pfx, len) == 0 ? str + len : NULL;
+}
#define TEST_LOADER_LOG_BUF_SZ 2097152
-#define TEST_TAG_EXPECT_FAILURE "comment:test_expect_failure"
-#define TEST_TAG_EXPECT_SUCCESS "comment:test_expect_success"
-#define TEST_TAG_EXPECT_MSG_PFX "comment:test_expect_msg="
-#define TEST_TAG_EXPECT_NOT_MSG_PFX "comment:test_expect_not_msg="
-#define TEST_TAG_EXPECT_XLATED_PFX "comment:test_expect_xlated="
-#define TEST_TAG_EXPECT_FAILURE_UNPRIV "comment:test_expect_failure_unpriv"
-#define TEST_TAG_EXPECT_SUCCESS_UNPRIV "comment:test_expect_success_unpriv"
-#define TEST_TAG_EXPECT_MSG_PFX_UNPRIV "comment:test_expect_msg_unpriv="
-#define TEST_TAG_EXPECT_NOT_MSG_PFX_UNPRIV "comment:test_expect_not_msg_unpriv="
-#define TEST_TAG_EXPECT_XLATED_PFX_UNPRIV "comment:test_expect_xlated_unpriv="
-#define TEST_TAG_LOG_LEVEL_PFX "comment:test_log_level="
-#define TEST_TAG_PROG_FLAGS_PFX "comment:test_prog_flags="
-#define TEST_TAG_DESCRIPTION_PFX "comment:test_description="
-#define TEST_TAG_RETVAL_PFX "comment:test_retval="
-#define TEST_TAG_RETVAL_PFX_UNPRIV "comment:test_retval_unpriv="
-#define TEST_TAG_AUXILIARY "comment:test_auxiliary"
-#define TEST_TAG_AUXILIARY_UNPRIV "comment:test_auxiliary_unpriv"
-#define TEST_BTF_PATH "comment:test_btf_path="
-#define TEST_TAG_ARCH "comment:test_arch="
-#define TEST_TAG_JITED_PFX "comment:test_jited="
-#define TEST_TAG_JITED_PFX_UNPRIV "comment:test_jited_unpriv="
-#define TEST_TAG_CAPS_UNPRIV "comment:test_caps_unpriv="
-#define TEST_TAG_LOAD_MODE_PFX "comment:load_mode="
-#define TEST_TAG_EXPECT_STDERR_PFX "comment:test_expect_stderr="
-#define TEST_TAG_EXPECT_STDERR_PFX_UNPRIV "comment:test_expect_stderr_unpriv="
-#define TEST_TAG_EXPECT_STDOUT_PFX "comment:test_expect_stdout="
-#define TEST_TAG_EXPECT_STDOUT_PFX_UNPRIV "comment:test_expect_stdout_unpriv="
-#define TEST_TAG_LINEAR_SIZE "comment:test_linear_size="
/* Warning: duplicated in bpf_misc.h */
#define POINTER_VALUE 0xbadcafe
@@ -69,6 +46,7 @@ enum load_mode {
struct test_subspec {
char *name;
+ char *description;
bool expect_failure;
struct expected_msgs expect_msgs;
struct expected_msgs expect_xlated;
@@ -142,9 +120,13 @@ static void free_test_spec(struct test_spec *spec)
free_msgs(&spec->priv.stdout);
free(spec->priv.name);
+ free(spec->priv.description);
free(spec->unpriv.name);
+ free(spec->unpriv.description);
spec->priv.name = NULL;
+ spec->priv.description = NULL;
spec->unpriv.name = NULL;
+ spec->unpriv.description = NULL;
}
/* Compiles regular expression matching pattern.
@@ -161,21 +143,21 @@ static void free_test_spec(struct test_spec *spec)
static int compile_regex(const char *pattern, regex_t *regex)
{
char err_buf[256], buf[256] = {}, *ptr, *buf_end;
- const char *original_pattern = pattern;
+ const char *original_pattern = pattern, *next;
bool in_regex = false;
int err;
buf_end = buf + sizeof(buf);
ptr = buf;
while (*pattern && ptr < buf_end - 2) {
- if (!in_regex && str_has_pfx(pattern, "{{")) {
+ if (!in_regex && (next = str_has_pfx(pattern, "{{"))) {
in_regex = true;
- pattern += 2;
+ pattern = next;
continue;
}
- if (in_regex && str_has_pfx(pattern, "}}")) {
+ if (in_regex && (next = str_has_pfx(pattern, "}}"))) {
in_regex = false;
- pattern += 2;
+ pattern = next;
continue;
}
if (in_regex) {
@@ -343,33 +325,49 @@ static void update_flags(int *flags, int flag, bool clear)
*flags |= flag;
}
-/* Matches a string of form '<pfx>[^=]=.*' and returns it's suffix.
- * Used to parse btf_decl_tag values.
- * Such values require unique prefix because compiler does not add
- * same __attribute__((btf_decl_tag(...))) twice.
- * Test suite uses two-component tags for such cases:
- *
- * <pfx> __COUNTER__ '='
- *
- * For example, two consecutive __msg tags '__msg("foo") __msg("foo")'
- * would be encoded as:
- *
- * [18] DECL_TAG 'comment:test_expect_msg=0=foo' type_id=15 component_idx=-1
- * [19] DECL_TAG 'comment:test_expect_msg=1=foo' type_id=15 component_idx=-1
- *
- * And the purpose of this function is to extract 'foo' from the above.
- */
-static const char *skip_dynamic_pfx(const char *s, const char *pfx)
+static const char *skip_decl_tag_pfx(const char *s)
{
- const char *msg;
+ int n = 0;
- if (strncmp(s, pfx, strlen(pfx)) != 0)
+ if (sscanf(s, "comment:%*d:%n", &n) < 0 || !n)
return NULL;
- msg = s + strlen(pfx);
- msg = strchr(msg, '=');
- if (!msg)
- return NULL;
- return msg + 1;
+ return s + n;
+}
+
+static int compare_decl_tags(const void *a, const void *b)
+{
+ return strverscmp(*(const char **)a, *(const char **)b);
+}
+
+/*
+ * Compilers don't guarantee order in which BTF attributes would be generated,
+ * while order is important for test tags like __msg.
+ * Each test tag has the following prefix: "comment:" __COUNTER__,
+ * when sorted using strverscmp this gives same order as in the original C code.
+ */
+static const char **collect_decl_tags(struct btf *btf, int id, int *cnt)
+{
+ const char **tmp, **tags = NULL;
+ const struct btf_type *t;
+ int i;
+
+ *cnt = 0;
+ for (i = 1; i < btf__type_cnt(btf); i++) {
+ t = btf__type_by_id(btf, i);
+ if (!btf_is_decl_tag(t) || t->type != id || btf_decl_tag(t)->component_idx != -1)
+ continue;
+ tmp = realloc(tags, (*cnt + 1) * sizeof(*tags));
+ if (!tmp) {
+ free(tags);
+ return ERR_PTR(-ENOMEM);
+ }
+ tags = tmp;
+ tags[(*cnt)++] = btf__str_by_offset(btf, t->name_off);
+ }
+
+ if (*cnt)
+ qsort(tags, *cnt, sizeof(*tags), compare_decl_tags);
+ return tags;
}
enum arch {
@@ -415,7 +413,9 @@ static int parse_test_spec(struct test_loader *tester,
bool stdout_on_next_line = true;
bool unpriv_stdout_on_next_line = true;
bool collect_jit = false;
- int func_id, i, err = 0;
+ const char **tags = NULL;
+ int func_id, i, nr_tags;
+ int err = 0;
u32 arch_mask = 0;
u32 load_mask = 0;
struct btf *btf;
@@ -438,63 +438,61 @@ static int parse_test_spec(struct test_loader *tester,
return -EINVAL;
}
- for (i = 1; i < btf__type_cnt(btf); i++) {
+ tags = collect_decl_tags(btf, func_id, &nr_tags);
+ if (IS_ERR(tags))
+ return PTR_ERR(tags);
+
+ for (i = 0; i < nr_tags; i++) {
const char *s, *val, *msg;
- const struct btf_type *t;
bool clear;
int flags;
- t = btf__type_by_id(btf, i);
- if (!btf_is_decl_tag(t))
- continue;
-
- if (t->type != func_id || btf_decl_tag(t)->component_idx != -1)
+ s = skip_decl_tag_pfx(tags[i]);
+ if (!s)
continue;
-
- s = btf__str_by_offset(btf, t->name_off);
- if (str_has_pfx(s, TEST_TAG_DESCRIPTION_PFX)) {
- description = s + sizeof(TEST_TAG_DESCRIPTION_PFX) - 1;
- } else if (strcmp(s, TEST_TAG_EXPECT_FAILURE) == 0) {
+ if ((val = str_has_pfx(s, "test_description="))) {
+ description = val;
+ } else if (strcmp(s, "test_expect_failure") == 0) {
spec->priv.expect_failure = true;
spec->mode_mask |= PRIV;
- } else if (strcmp(s, TEST_TAG_EXPECT_SUCCESS) == 0) {
+ } else if (strcmp(s, "test_expect_success") == 0) {
spec->priv.expect_failure = false;
spec->mode_mask |= PRIV;
- } else if (strcmp(s, TEST_TAG_EXPECT_FAILURE_UNPRIV) == 0) {
+ } else if (strcmp(s, "test_expect_failure_unpriv") == 0) {
spec->unpriv.expect_failure = true;
spec->mode_mask |= UNPRIV;
has_unpriv_result = true;
- } else if (strcmp(s, TEST_TAG_EXPECT_SUCCESS_UNPRIV) == 0) {
+ } else if (strcmp(s, "test_expect_success_unpriv") == 0) {
spec->unpriv.expect_failure = false;
spec->mode_mask |= UNPRIV;
has_unpriv_result = true;
- } else if (strcmp(s, TEST_TAG_AUXILIARY) == 0) {
+ } else if (strcmp(s, "test_auxiliary") == 0) {
spec->auxiliary = true;
spec->mode_mask |= PRIV;
- } else if (strcmp(s, TEST_TAG_AUXILIARY_UNPRIV) == 0) {
+ } else if (strcmp(s, "test_auxiliary_unpriv") == 0) {
spec->auxiliary = true;
spec->mode_mask |= UNPRIV;
- } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_MSG_PFX))) {
+ } else if ((msg = str_has_pfx(s, "test_expect_msg="))) {
err = push_msg(msg, false, &spec->priv.expect_msgs);
if (err)
goto cleanup;
spec->mode_mask |= PRIV;
- } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_NOT_MSG_PFX))) {
+ } else if ((msg = str_has_pfx(s, "test_expect_not_msg="))) {
err = push_msg(msg, true, &spec->priv.expect_msgs);
if (err)
goto cleanup;
spec->mode_mask |= PRIV;
- } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_MSG_PFX_UNPRIV))) {
+ } else if ((msg = str_has_pfx(s, "test_expect_msg_unpriv="))) {
err = push_msg(msg, false, &spec->unpriv.expect_msgs);
if (err)
goto cleanup;
spec->mode_mask |= UNPRIV;
- } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_NOT_MSG_PFX_UNPRIV))) {
+ } else if ((msg = str_has_pfx(s, "test_expect_not_msg_unpriv="))) {
err = push_msg(msg, true, &spec->unpriv.expect_msgs);
if (err)
goto cleanup;
spec->mode_mask |= UNPRIV;
- } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_JITED_PFX))) {
+ } else if ((msg = str_has_pfx(s, "test_jited="))) {
if (arch_mask == 0) {
PRINT_FAIL("__jited used before __arch_*");
goto cleanup;
@@ -506,7 +504,7 @@ static int parse_test_spec(struct test_loader *tester,
goto cleanup;
spec->mode_mask |= PRIV;
}
- } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_JITED_PFX_UNPRIV))) {
+ } else if ((msg = str_has_pfx(s, "test_jited_unpriv="))) {
if (arch_mask == 0) {
PRINT_FAIL("__unpriv_jited used before __arch_*");
goto cleanup;
@@ -518,41 +516,36 @@ static int parse_test_spec(struct test_loader *tester,
goto cleanup;
spec->mode_mask |= UNPRIV;
}
- } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_XLATED_PFX))) {
+ } else if ((msg = str_has_pfx(s, "test_expect_xlated="))) {
err = push_disasm_msg(msg, &xlated_on_next_line,
&spec->priv.expect_xlated);
if (err)
goto cleanup;
spec->mode_mask |= PRIV;
- } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_XLATED_PFX_UNPRIV))) {
+ } else if ((msg = str_has_pfx(s, "test_expect_xlated_unpriv="))) {
err = push_disasm_msg(msg, &unpriv_xlated_on_next_line,
&spec->unpriv.expect_xlated);
if (err)
goto cleanup;
spec->mode_mask |= UNPRIV;
- } else if (str_has_pfx(s, TEST_TAG_RETVAL_PFX)) {
- val = s + sizeof(TEST_TAG_RETVAL_PFX) - 1;
+ } else if ((val = str_has_pfx(s, "test_retval="))) {
err = parse_retval(val, &spec->priv.retval, "__retval");
if (err)
goto cleanup;
spec->priv.execute = true;
spec->mode_mask |= PRIV;
- } else if (str_has_pfx(s, TEST_TAG_RETVAL_PFX_UNPRIV)) {
- val = s + sizeof(TEST_TAG_RETVAL_PFX_UNPRIV) - 1;
+ } else if ((val = str_has_pfx(s, "test_retval_unpriv="))) {
err = parse_retval(val, &spec->unpriv.retval, "__retval_unpriv");
if (err)
goto cleanup;
spec->mode_mask |= UNPRIV;
spec->unpriv.execute = true;
has_unpriv_retval = true;
- } else if (str_has_pfx(s, TEST_TAG_LOG_LEVEL_PFX)) {
- val = s + sizeof(TEST_TAG_LOG_LEVEL_PFX) - 1;
+ } else if ((val = str_has_pfx(s, "test_log_level="))) {
err = parse_int(val, &spec->log_level, "test log level");
if (err)
goto cleanup;
- } else if (str_has_pfx(s, TEST_TAG_PROG_FLAGS_PFX)) {
- val = s + sizeof(TEST_TAG_PROG_FLAGS_PFX) - 1;
-
+ } else if ((val = str_has_pfx(s, "test_prog_flags="))) {
clear = val[0] == '!';
if (clear)
val++;
@@ -577,8 +570,7 @@ static int parse_test_spec(struct test_loader *tester,
goto cleanup;
update_flags(&spec->prog_flags, flags, clear);
}
- } else if (str_has_pfx(s, TEST_TAG_ARCH)) {
- val = s + sizeof(TEST_TAG_ARCH) - 1;
+ } else if ((val = str_has_pfx(s, "test_arch="))) {
if (strcmp(val, "X86_64") == 0) {
arch = ARCH_X86_64;
} else if (strcmp(val, "ARM64") == 0) {
@@ -596,16 +588,14 @@ static int parse_test_spec(struct test_loader *tester,
collect_jit = get_current_arch() == arch;
unpriv_jit_on_next_line = true;
jit_on_next_line = true;
- } else if (str_has_pfx(s, TEST_BTF_PATH)) {
- spec->btf_custom_path = s + sizeof(TEST_BTF_PATH) - 1;
- } else if (str_has_pfx(s, TEST_TAG_CAPS_UNPRIV)) {
- val = s + sizeof(TEST_TAG_CAPS_UNPRIV) - 1;
+ } else if ((val = str_has_pfx(s, "test_btf_path="))) {
+ spec->btf_custom_path = val;
+ } else if ((val = str_has_pfx(s, "test_caps_unpriv="))) {
err = parse_caps(val, &spec->unpriv.caps, "test caps");
if (err)
goto cleanup;
spec->mode_mask |= UNPRIV;
- } else if (str_has_pfx(s, TEST_TAG_LOAD_MODE_PFX)) {
- val = s + sizeof(TEST_TAG_LOAD_MODE_PFX) - 1;
+ } else if ((val = str_has_pfx(s, "load_mode="))) {
if (strcmp(val, "jited") == 0) {
load_mask = JITED;
} else if (strcmp(val, "no_jited") == 0) {
@@ -615,32 +605,31 @@ static int parse_test_spec(struct test_loader *tester,
err = -EINVAL;
goto cleanup;
}
- } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_STDERR_PFX))) {
+ } else if ((msg = str_has_pfx(s, "test_expect_stderr="))) {
err = push_disasm_msg(msg, &stderr_on_next_line,
&spec->priv.stderr);
if (err)
goto cleanup;
- } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_STDERR_PFX_UNPRIV))) {
+ } else if ((msg = str_has_pfx(s, "test_expect_stderr_unpriv="))) {
err = push_disasm_msg(msg, &unpriv_stderr_on_next_line,
&spec->unpriv.stderr);
if (err)
goto cleanup;
- } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_STDOUT_PFX))) {
+ } else if ((msg = str_has_pfx(s, "test_expect_stdout="))) {
err = push_disasm_msg(msg, &stdout_on_next_line,
&spec->priv.stdout);
if (err)
goto cleanup;
- } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_STDOUT_PFX_UNPRIV))) {
+ } else if ((msg = str_has_pfx(s, "test_expect_stdout_unpriv="))) {
err = push_disasm_msg(msg, &unpriv_stdout_on_next_line,
&spec->unpriv.stdout);
if (err)
goto cleanup;
- } else if (str_has_pfx(s, TEST_TAG_LINEAR_SIZE)) {
+ } else if ((val = str_has_pfx(s, "test_linear_size="))) {
switch (bpf_program__type(prog)) {
case BPF_PROG_TYPE_SCHED_ACT:
case BPF_PROG_TYPE_SCHED_CLS:
case BPF_PROG_TYPE_CGROUP_SKB:
- val = s + sizeof(TEST_TAG_LINEAR_SIZE) - 1;
err = parse_int(val, &spec->linear_sz, "test linear size");
if (err)
goto cleanup;
@@ -659,33 +648,56 @@ static int parse_test_spec(struct test_loader *tester,
if (spec->mode_mask == 0)
spec->mode_mask = PRIV;
- if (!description)
- description = spec->prog_name;
-
if (spec->mode_mask & PRIV) {
- spec->priv.name = strdup(description);
+ spec->priv.name = strdup(spec->prog_name);
if (!spec->priv.name) {
PRINT_FAIL("failed to allocate memory for priv.name\n");
err = -ENOMEM;
goto cleanup;
}
+
+ if (description) {
+ spec->priv.description = strdup(description);
+ if (!spec->priv.description) {
+ PRINT_FAIL("failed to allocate memory for priv.description\n");
+ err = -ENOMEM;
+ goto cleanup;
+ }
+ }
}
if (spec->mode_mask & UNPRIV) {
- int descr_len = strlen(description);
+ int name_len = strlen(spec->prog_name);
const char *suffix = " @unpriv";
+ int suffix_len = strlen(suffix);
char *name;
- name = malloc(descr_len + strlen(suffix) + 1);
+ name = malloc(name_len + suffix_len + 1);
if (!name) {
PRINT_FAIL("failed to allocate memory for unpriv.name\n");
err = -ENOMEM;
goto cleanup;
}
- strcpy(name, description);
- strcpy(&name[descr_len], suffix);
+ strcpy(name, spec->prog_name);
+ strcpy(&name[name_len], suffix);
spec->unpriv.name = name;
+
+ if (description) {
+ int descr_len = strlen(description);
+ char *descr;
+
+ descr = malloc(descr_len + suffix_len + 1);
+ if (!descr) {
+ PRINT_FAIL("failed to allocate memory for unpriv.description\n");
+ err = -ENOMEM;
+ goto cleanup;
+ }
+
+ strcpy(descr, description);
+ strcpy(&descr[descr_len], suffix);
+ spec->unpriv.description = descr;
+ }
}
if (spec->mode_mask & (PRIV | UNPRIV)) {
@@ -711,9 +723,11 @@ static int parse_test_spec(struct test_loader *tester,
spec->valid = true;
+ free(tags);
return 0;
cleanup:
+ free(tags);
free_test_spec(spec);
return err;
}
@@ -1148,7 +1162,7 @@ void run_subtest(struct test_loader *tester,
int links_cnt = 0;
bool should_load;
- if (!test__start_subtest(subspec->name))
+ if (!test__start_subtest_with_desc(subspec->name, subspec->description))
return;
if ((get_current_arch() & spec->arch_mask) == 0) {
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 0929f4a7bda4..7fe16b5131b1 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -308,16 +308,34 @@ static bool match_subtest(struct test_filter_set *filter,
return false;
}
+static bool match_subtest_desc(struct test_filter_set *filter,
+ const char *test_name,
+ const char *subtest_name,
+ const char *subtest_desc)
+{
+ if (match_subtest(filter, test_name, subtest_name))
+ return true;
+
+ if (!subtest_desc || !subtest_desc[0] ||
+ strcmp(subtest_name, subtest_desc) == 0)
+ return false;
+
+ return match_subtest(filter, test_name, subtest_desc);
+}
+
static bool should_run_subtest(struct test_selector *sel,
struct test_selector *subtest_sel,
int subtest_num,
const char *test_name,
- const char *subtest_name)
+ const char *subtest_name,
+ const char *subtest_desc)
{
- if (match_subtest(&sel->blacklist, test_name, subtest_name))
+ if (match_subtest_desc(&sel->blacklist, test_name,
+ subtest_name, subtest_desc))
return false;
- if (match_subtest(&sel->whitelist, test_name, subtest_name))
+ if (match_subtest_desc(&sel->whitelist, test_name,
+ subtest_name, subtest_desc))
return true;
if (!sel->whitelist.cnt && !subtest_sel->num_set)
@@ -544,11 +562,12 @@ void test__end_subtest(void)
env.subtest_state = NULL;
}
-bool test__start_subtest(const char *subtest_name)
+bool test__start_subtest_with_desc(const char *subtest_name, const char *subtest_desc)
{
struct prog_test_def *test = env.test;
struct test_state *state = env.test_state;
struct subtest_state *subtest_state;
+ const char *subtest_display_name;
size_t sub_state_size = sizeof(*subtest_state);
if (env.subtest_state)
@@ -574,7 +593,9 @@ bool test__start_subtest(const char *subtest_name)
return false;
}
- subtest_state->name = strdup(subtest_name);
+ subtest_display_name = subtest_desc ? subtest_desc : subtest_name;
+
+ subtest_state->name = strdup(subtest_display_name);
if (!subtest_state->name) {
fprintf(env.stderr_saved,
"Subtest #%d: failed to copy subtest name!\n",
@@ -586,14 +607,15 @@ bool test__start_subtest(const char *subtest_name)
&env.subtest_selector,
state->subtest_num,
test->test_name,
- subtest_name)) {
+ subtest_name,
+ subtest_desc)) {
subtest_state->filtered = true;
return false;
}
- subtest_state->should_tmon = match_subtest(&env.tmon_selector.whitelist,
- test->test_name,
- subtest_name);
+ subtest_state->should_tmon = match_subtest_desc(&env.tmon_selector.whitelist,
+ test->test_name, subtest_name,
+ subtest_desc);
env.subtest_state = subtest_state;
stdio_hijack_init(&subtest_state->log_buf, &subtest_state->log_cnt);
@@ -602,6 +624,11 @@ bool test__start_subtest(const char *subtest_name)
return true;
}
+bool test__start_subtest(const char *subtest_name)
+{
+ return test__start_subtest_with_desc(subtest_name, NULL);
+}
+
void test__force_log(void)
{
env.test_state->force_log = true;
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index eebfc18cdcd2..1a44467f4310 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -181,6 +181,7 @@ struct msg {
extern struct test_env env;
void test__force_log(void);
+bool test__start_subtest_with_desc(const char *name, const char *description);
bool test__start_subtest(const char *name);
void test__end_subtest(void);
void test__skip(void);
diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c
index 66af0d13751a..6fbe1e995660 100644
--- a/tools/testing/selftests/bpf/testing_helpers.c
+++ b/tools/testing/selftests/bpf/testing_helpers.c
@@ -368,7 +368,7 @@ int delete_module(const char *name, int flags)
return syscall(__NR_delete_module, name, flags);
}
-int unload_module(const char *name, bool verbose)
+int try_unload_module(const char *name, int retries, bool verbose)
{
int ret, cnt = 0;
@@ -379,7 +379,7 @@ int unload_module(const char *name, bool verbose)
ret = delete_module(name, 0);
if (!ret || errno != EAGAIN)
break;
- if (++cnt > 10000) {
+ if (++cnt > retries) {
fprintf(stdout, "Unload of %s timed out\n", name);
break;
}
@@ -400,6 +400,11 @@ int unload_module(const char *name, bool verbose)
return 0;
}
+int unload_module(const char *name, bool verbose)
+{
+ return try_unload_module(name, 10000, verbose);
+}
+
static int __load_module(const char *path, const char *param_values, bool verbose)
{
int fd;
diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h
index eb20d3772218..2ca2356a0b58 100644
--- a/tools/testing/selftests/bpf/testing_helpers.h
+++ b/tools/testing/selftests/bpf/testing_helpers.h
@@ -40,6 +40,7 @@ int finit_module(int fd, const char *param_values, int flags);
int delete_module(const char *name, int flags);
int load_module(const char *path, bool verbose);
int load_module_params(const char *path, const char *param_values, bool verbose);
+int try_unload_module(const char *name, int retries, bool verbose);
int unload_module(const char *name, bool verbose);
static inline __u64 get_time_ns(void)
diff --git a/tools/testing/selftests/bpf/uprobe_multi.c b/tools/testing/selftests/bpf/uprobe_multi.c
index dd38dc68f635..3e58a86b8e25 100644
--- a/tools/testing/selftests/bpf/uprobe_multi.c
+++ b/tools/testing/selftests/bpf/uprobe_multi.c
@@ -100,6 +100,9 @@ int __attribute__((weak)) trigger_uprobe(bool build_id_resident)
int page_sz = sysconf(_SC_PAGESIZE);
void *addr;
+ unsigned char vec[1];
+ int poll = 0;
+
/* page-align build ID start */
addr = (void *)((uintptr_t)&build_id_start & ~(page_sz - 1));
@@ -108,9 +111,19 @@ int __attribute__((weak)) trigger_uprobe(bool build_id_resident)
* do MADV_POPULATE_READ, and then MADV_PAGEOUT, if necessary
*/
madvise(addr, page_sz, MADV_POPULATE_READ);
- if (!build_id_resident)
- madvise(addr, page_sz, MADV_PAGEOUT);
-
+ if (!build_id_resident) {
+ do {
+ madvise(addr, page_sz, MADV_PAGEOUT);
+ /* check if page has been evicted */
+ mincore(addr, page_sz, vec);
+ if (!(vec[0] & 1))
+ break;
+ /* if page is still resident re-attempt MADV_POPULATE_READ/MADV_PAGEOUT */
+ madvise(addr, page_sz, MADV_POPULATE_READ);
+ poll++;
+ usleep(100);
+ } while (poll < 500);
+ }
(void)uprobe();
return 0;
diff --git a/tools/testing/selftests/bpf/uprobe_multi.ld b/tools/testing/selftests/bpf/uprobe_multi.ld
index a2e94828bc8c..2063714b2899 100644
--- a/tools/testing/selftests/bpf/uprobe_multi.ld
+++ b/tools/testing/selftests/bpf/uprobe_multi.ld
@@ -1,8 +1,8 @@
SECTIONS
{
- . = ALIGN(4096);
+ . = ALIGN(65536);
.note.gnu.build-id : { *(.note.gnu.build-id) }
- . = ALIGN(4096);
+ . = ALIGN(65536);
}
INSERT AFTER .text;
diff --git a/tools/testing/selftests/bpf/usdt.h b/tools/testing/selftests/bpf/usdt.h
index 549d1f774810..c71e21df38b3 100644
--- a/tools/testing/selftests/bpf/usdt.h
+++ b/tools/testing/selftests/bpf/usdt.h
@@ -312,6 +312,8 @@ struct usdt_sema { volatile unsigned short active; };
#ifndef USDT_NOP
#if defined(__ia64__) || defined(__s390__) || defined(__s390x__)
#define USDT_NOP nop 0
+#elif defined(__x86_64__)
+#define USDT_NOP .byte 0x90, 0x0f, 0x1f, 0x44, 0x00, 0x0 /* nop, nop5 */
#else
#define USDT_NOP nop
#endif
diff --git a/tools/testing/selftests/bpf/usdt_1.c b/tools/testing/selftests/bpf/usdt_1.c
new file mode 100644
index 000000000000..4f06e8bcf58b
--- /dev/null
+++ b/tools/testing/selftests/bpf/usdt_1.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#if defined(__x86_64__)
+
+/*
+ * Include usdt.h with defined USDT_NOP macro to use single
+ * nop instruction.
+ */
+#define USDT_NOP .byte 0x90
+#include "usdt.h"
+
+__attribute__((aligned(16)))
+void usdt_1(void)
+{
+ USDT(optimized_attach, usdt_1);
+}
+
+#endif
diff --git a/tools/testing/selftests/bpf/usdt_2.c b/tools/testing/selftests/bpf/usdt_2.c
new file mode 100644
index 000000000000..789883aaca4c
--- /dev/null
+++ b/tools/testing/selftests/bpf/usdt_2.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#if defined(__x86_64__)
+
+/*
+ * Include usdt.h with default nop,nop5 instructions combo.
+ */
+#include "usdt.h"
+
+__attribute__((aligned(16)))
+void usdt_2(void)
+{
+ USDT(optimized_attach, usdt_2);
+}
+
+#endif
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 9ca83dce100d..c3164b9b2be5 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -220,7 +220,7 @@
},
.result_unpriv = REJECT,
.result = REJECT,
- .errstr = "variable trusted_ptr_ access var_off=(0x0; 0x7) disallowed",
+ .errstr = "R1 must have zero offset when passed to release func or trusted arg to kfunc",
},
{
"calls: invalid kfunc call: referenced arg needs refcounted PTR_TO_BTF_ID",
@@ -455,7 +455,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
- .errstr = "the call stack of 9 frames is too deep",
+ .errstr = "recursive call",
.result = REJECT,
},
{
@@ -812,7 +812,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
- .errstr = "the call stack of 9 frames is too deep",
+ .errstr = "recursive call",
.result = REJECT,
},
{
@@ -824,7 +824,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
- .errstr = "the call stack of 9 frames is too deep",
+ .errstr = "recursive call",
.result = REJECT,
},
{
@@ -1521,6 +1521,7 @@
.errstr = "R0 invalid mem access 'scalar'",
.result_unpriv = REJECT,
.errstr_unpriv = "invalid read from stack R7 off=-16 size=8",
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"calls: two calls that receive map_value via arg=ptr_stack_of_caller. test1",
diff --git a/tools/testing/selftests/bpf/verifier/junk_insn.c b/tools/testing/selftests/bpf/verifier/junk_insn.c
index 89d690f1992a..735d3b9510cf 100644
--- a/tools/testing/selftests/bpf/verifier/junk_insn.c
+++ b/tools/testing/selftests/bpf/verifier/junk_insn.c
@@ -10,7 +10,7 @@
{
"junk insn2",
.insns = {
- BPF_RAW_INSN(1, 0, 0, 0, 0),
+ BPF_RAW_INSN(BPF_LDX | BPF_MEM | BPF_W, 0, 0, 0, 1),
BPF_EXIT_INSN(),
},
.errstr = "BPF_LDX uses reserved fields",
@@ -28,7 +28,7 @@
{
"junk insn4",
.insns = {
- BPF_RAW_INSN(-1, -1, -1, -1, -1),
+ BPF_RAW_INSN(-1, 0, 0, -1, -1),
BPF_EXIT_INSN(),
},
.errstr = "unknown opcode ff",
@@ -37,7 +37,7 @@
{
"junk insn5",
.insns = {
- BPF_RAW_INSN(0x7f, -1, -1, -1, -1),
+ BPF_RAW_INSN(0x7f, 0, 0, -1, -1),
BPF_EXIT_INSN(),
},
.errstr = "BPF_ALU uses reserved fields",
diff --git a/tools/testing/selftests/bpf/verifier/sleepable.c b/tools/testing/selftests/bpf/verifier/sleepable.c
index 1f0d2bdc673f..c2b7f5ebf168 100644
--- a/tools/testing/selftests/bpf/verifier/sleepable.c
+++ b/tools/testing/selftests/bpf/verifier/sleepable.c
@@ -85,7 +85,7 @@
.expected_attach_type = BPF_TRACE_RAW_TP,
.kfunc = "sched_switch",
.result = REJECT,
- .errstr = "Only fentry/fexit/fmod_ret, lsm, iter, uprobe, and struct_ops programs can be sleepable",
+ .errstr = "Only fentry/fexit/fsession/fmod_ret, lsm, iter, uprobe, and struct_ops programs can be sleepable",
.flags = BPF_F_SLEEPABLE,
.runs = -1,
},
diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index 75f85e0362f5..5c82950e6978 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -1236,7 +1236,7 @@ static void mask_unrelated_struct_ops_progs(struct bpf_object *obj,
}
}
-static void fixup_obj(struct bpf_object *obj, struct bpf_program *prog, const char *filename)
+static void fixup_obj_maps(struct bpf_object *obj)
{
struct bpf_map *map;
@@ -1251,15 +1251,23 @@ static void fixup_obj(struct bpf_object *obj, struct bpf_program *prog, const ch
case BPF_MAP_TYPE_INODE_STORAGE:
case BPF_MAP_TYPE_CGROUP_STORAGE:
case BPF_MAP_TYPE_CGRP_STORAGE:
- break;
case BPF_MAP_TYPE_STRUCT_OPS:
- mask_unrelated_struct_ops_progs(obj, map, prog);
break;
default:
if (bpf_map__max_entries(map) == 0)
bpf_map__set_max_entries(map, 1);
}
}
+}
+
+static void fixup_obj(struct bpf_object *obj, struct bpf_program *prog, const char *filename)
+{
+ struct bpf_map *map;
+
+ bpf_object__for_each_map(map, obj) {
+ if (bpf_map__type(map) == BPF_MAP_TYPE_STRUCT_OPS)
+ mask_unrelated_struct_ops_progs(obj, map, prog);
+ }
/* SEC(freplace) programs can't be loaded with veristat as is,
* but we can try guessing their target program's expected type by
@@ -1608,6 +1616,7 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
const char *base_filename = basename(strdupa(filename));
const char *prog_name = bpf_program__name(prog);
long mem_peak_a, mem_peak_b, mem_peak = -1;
+ LIBBPF_OPTS(bpf_prog_load_opts, opts);
char *buf;
int buf_sz, log_level;
struct verif_stats *stats;
@@ -1647,9 +1656,6 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
}
verif_log_buf[0] = '\0';
- bpf_program__set_log_buf(prog, buf, buf_sz);
- bpf_program__set_log_level(prog, log_level);
-
/* increase chances of successful BPF object loading */
fixup_obj(obj, prog, base_filename);
@@ -1658,15 +1664,22 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
if (env.force_reg_invariants)
bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_REG_INVARIANTS);
- err = bpf_object__prepare(obj);
- if (!err) {
- cgroup_err = reset_stat_cgroup();
- mem_peak_a = cgroup_memory_peak();
- err = bpf_object__load(obj);
- mem_peak_b = cgroup_memory_peak();
- if (!cgroup_err && mem_peak_a >= 0 && mem_peak_b >= 0)
- mem_peak = mem_peak_b - mem_peak_a;
+ opts.log_buf = buf;
+ opts.log_size = buf_sz;
+ opts.log_level = log_level;
+
+ cgroup_err = reset_stat_cgroup();
+ mem_peak_a = cgroup_memory_peak();
+ fd = bpf_program__clone(prog, &opts);
+ if (fd < 0) {
+ err = fd;
+ if (env.verbose)
+ fprintf(stderr, "Failed to load program %s %d\n", prog_name, err);
}
+ mem_peak_b = cgroup_memory_peak();
+ if (!cgroup_err && mem_peak_a >= 0 && mem_peak_b >= 0)
+ mem_peak = mem_peak_b - mem_peak_a;
+
env.progs_processed++;
stats->file_name = strdup(base_filename);
@@ -1678,7 +1691,6 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
stats->stats[MEMORY_PEAK] = mem_peak < 0 ? -1 : mem_peak / (1024 * 1024);
memset(&info, 0, info_len);
- fd = bpf_program__fd(prog);
if (fd > 0 && bpf_prog_get_info_by_fd(fd, &info, &info_len) == 0) {
stats->stats[JITED_SIZE] = info.jited_prog_len;
if (env.dump_mode & DUMP_JITED)
@@ -1699,7 +1711,8 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
if (verif_log_buf != buf)
free(buf);
-
+ if (fd > 0)
+ close(fd);
return 0;
}
@@ -2182,8 +2195,8 @@ static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, i
static int process_obj(const char *filename)
{
const char *base_filename = basename(strdupa(filename));
- struct bpf_object *obj = NULL, *tobj;
- struct bpf_program *prog, *tprog, *lprog;
+ struct bpf_object *obj = NULL;
+ struct bpf_program *prog;
libbpf_print_fn_t old_libbpf_print_fn;
LIBBPF_OPTS(bpf_object_open_opts, opts);
int err = 0, prog_cnt = 0;
@@ -2222,51 +2235,24 @@ static int process_obj(const char *filename)
env.files_processed++;
bpf_object__for_each_program(prog, obj) {
+ bpf_program__set_autoload(prog, true);
prog_cnt++;
}
- if (prog_cnt == 1) {
- prog = bpf_object__next_program(obj, NULL);
- bpf_program__set_autoload(prog, true);
- err = set_global_vars(obj, env.presets, env.npresets);
- if (err) {
- fprintf(stderr, "Failed to set global variables %d\n", err);
- goto cleanup;
- }
- process_prog(filename, obj, prog);
+ fixup_obj_maps(obj);
+
+ err = set_global_vars(obj, env.presets, env.npresets);
+ if (err) {
+ fprintf(stderr, "Failed to set global variables %d\n", err);
goto cleanup;
}
- bpf_object__for_each_program(prog, obj) {
- const char *prog_name = bpf_program__name(prog);
-
- tobj = bpf_object__open_file(filename, &opts);
- if (!tobj) {
- err = -errno;
- fprintf(stderr, "Failed to open '%s': %d\n", filename, err);
- goto cleanup;
- }
-
- err = set_global_vars(tobj, env.presets, env.npresets);
- if (err) {
- fprintf(stderr, "Failed to set global variables %d\n", err);
- goto cleanup;
- }
-
- lprog = NULL;
- bpf_object__for_each_program(tprog, tobj) {
- const char *tprog_name = bpf_program__name(tprog);
-
- if (strcmp(prog_name, tprog_name) == 0) {
- bpf_program__set_autoload(tprog, true);
- lprog = tprog;
- } else {
- bpf_program__set_autoload(tprog, false);
- }
- }
+ err = bpf_object__prepare(obj);
+ if (err && env.verbose) /* run process_prog() anyway to output per program failures */
+ fprintf(stderr, "Failed to prepare BPF object for loading %d\n", err);
- process_prog(filename, tobj, lprog);
- bpf_object__close(tobj);
+ bpf_object__for_each_program(prog, obj) {
+ process_prog(filename, obj, prog);
}
cleanup:
@@ -3264,17 +3250,14 @@ static int handle_verif_mode(void)
create_stat_cgroup();
for (i = 0; i < env.filename_cnt; i++) {
err = process_obj(env.filenames[i]);
- if (err) {
+ if (err)
fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err);
- goto out;
- }
}
qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
output_prog_stats();
-out:
destroy_stat_cgroup();
return err;
}