From cfd3bfe9507b4aa39f7e86772e60b50b799e490e Mon Sep 17 00:00:00 2001 From: Dmitrii Bundin Date: Sat, 20 Apr 2024 07:24:57 +0300 Subject: bpf: Include linux/types.h for u32 Inclusion of the header linux/btf_ids.h relies on indirect inclusion of the header linux/types.h. Including it directly on the top level helps to avoid potential problems if linux/types.h hasn't been included before. The main motivation to introduce this it is to avoid similar problems that have shown up in the bpftool where GNU libc indirectly pulls linux/types.h causing compile error of the form: error: unknown type name 'u32' u32 cnt; ^~~ The bpftool compile error was fixed in 62248b22d01e ("tools/resolve_btfids: fix build with musl libc"). Signed-off-by: Dmitrii Bundin Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240420042457.3198883-1-dmitrii.bundin.a@gmail.com --- include/linux/btf_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index e24aabfe8ecc..c0e3e1426a82 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -3,6 +3,8 @@ #ifndef _LINUX_BTF_IDS_H #define _LINUX_BTF_IDS_H +#include /* for u32 */ + struct btf_id_set { u32 cnt; u32 ids[]; -- cgit v1.2.3 From 535a3692ba7245792e6f23654507865d4293c850 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 30 Apr 2024 13:28:24 +0200 Subject: bpf: Add support for kprobe session attach Adding support to attach bpf program for entry and return probe of the same function. This is common use case which at the moment requires to create two kprobe multi links. Adding new BPF_TRACE_KPROBE_SESSION attach type that instructs kernel to attach single link program to both entry and exit probe. It's possible to control execution of the bpf program on return probe simply by returning zero or non zero from the entry bpf program execution to execute or not the bpf program on return probe respectively. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240430112830.1184228-2-jolsa@kernel.org --- include/uapi/linux/bpf.h | 1 + kernel/bpf/syscall.c | 7 ++++++- kernel/trace/bpf_trace.c | 28 ++++++++++++++++++++-------- tools/include/uapi/linux/bpf.h | 1 + 4 files changed, 28 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d94a72593ead..90706a47f6ff 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1115,6 +1115,7 @@ enum bpf_attach_type { BPF_CGROUP_UNIX_GETSOCKNAME, BPF_NETKIT_PRIMARY, BPF_NETKIT_PEER, + BPF_TRACE_KPROBE_SESSION, __MAX_BPF_ATTACH_TYPE }; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index f655adf42e39..13ad74ecf2cd 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -4016,11 +4016,15 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI && attach_type != BPF_TRACE_KPROBE_MULTI) return -EINVAL; + if (prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION && + attach_type != BPF_TRACE_KPROBE_SESSION) + return -EINVAL; if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI && attach_type != BPF_TRACE_UPROBE_MULTI) return -EINVAL; if (attach_type != BPF_PERF_EVENT && attach_type != BPF_TRACE_KPROBE_MULTI && + attach_type != BPF_TRACE_KPROBE_SESSION && attach_type != BPF_TRACE_UPROBE_MULTI) return -EINVAL; return 0; @@ -5281,7 +5285,8 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) case BPF_PROG_TYPE_KPROBE: if (attr->link_create.attach_type == BPF_PERF_EVENT) ret = bpf_perf_link_attach(attr, prog); - else if (attr->link_create.attach_type == BPF_TRACE_KPROBE_MULTI) + else if (attr->link_create.attach_type == BPF_TRACE_KPROBE_MULTI || + attr->link_create.attach_type == BPF_TRACE_KPROBE_SESSION) ret = bpf_kprobe_multi_link_attach(attr, prog); else if (attr->link_create.attach_type == BPF_TRACE_UPROBE_MULTI) ret = bpf_uprobe_multi_link_attach(attr, prog); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 0ba722b57af3..06a9671834b6 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1631,6 +1631,17 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) } } +static bool is_kprobe_multi(const struct bpf_prog *prog) +{ + return prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI || + prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION; +} + +static inline bool is_kprobe_session(const struct bpf_prog *prog) +{ + return prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION; +} + static const struct bpf_func_proto * kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -1646,13 +1657,13 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_override_return_proto; #endif case BPF_FUNC_get_func_ip: - if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI) + if (is_kprobe_multi(prog)) return &bpf_get_func_ip_proto_kprobe_multi; if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI) return &bpf_get_func_ip_proto_uprobe_multi; return &bpf_get_func_ip_proto_kprobe; case BPF_FUNC_get_attach_cookie: - if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI) + if (is_kprobe_multi(prog)) return &bpf_get_attach_cookie_proto_kmulti; if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI) return &bpf_get_attach_cookie_proto_umulti; @@ -2834,10 +2845,11 @@ kprobe_multi_link_handler(struct fprobe *fp, unsigned long fentry_ip, void *data) { struct bpf_kprobe_multi_link *link; + int err; link = container_of(fp, struct bpf_kprobe_multi_link, fp); - kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), regs); - return 0; + err = kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), regs); + return is_kprobe_session(link->link.prog) ? err : 0; } static void @@ -2981,7 +2993,7 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr if (sizeof(u64) != sizeof(void *)) return -EOPNOTSUPP; - if (prog->expected_attach_type != BPF_TRACE_KPROBE_MULTI) + if (!is_kprobe_multi(prog)) return -EINVAL; flags = attr->link_create.kprobe_multi.flags; @@ -3062,10 +3074,10 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr if (err) goto error; - if (flags & BPF_F_KPROBE_MULTI_RETURN) - link->fp.exit_handler = kprobe_multi_link_exit_handler; - else + if (!(flags & BPF_F_KPROBE_MULTI_RETURN)) link->fp.entry_handler = kprobe_multi_link_handler; + if ((flags & BPF_F_KPROBE_MULTI_RETURN) || is_kprobe_session(prog)) + link->fp.exit_handler = kprobe_multi_link_exit_handler; link->addrs = addrs; link->cookies = cookies; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index d94a72593ead..90706a47f6ff 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1115,6 +1115,7 @@ enum bpf_attach_type { BPF_CGROUP_UNIX_GETSOCKNAME, BPF_NETKIT_PRIMARY, BPF_NETKIT_PEER, + BPF_TRACE_KPROBE_SESSION, __MAX_BPF_ATTACH_TYPE }; -- cgit v1.2.3 From 57bfc7605ca5b102ba336779ae9adbc5bbba1d96 Mon Sep 17 00:00:00 2001 From: Miao Xu Date: Wed, 1 May 2024 21:23:16 -0700 Subject: tcp: Add new args for cong_control in tcp_congestion_ops This patch adds two new arguments for cong_control of struct tcp_congestion_ops: - ack - flag These two arguments are inherited from the caller tcp_cong_control in tcp_intput.c. One use case of them is to update cwnd and pacing rate inside cong_control based on the info they provide. For example, the flag can be used to decide if it is the right time to raise or reduce a sender's cwnd. Reviewed-by: Eric Dumazet Signed-off-by: Miao Xu Link: https://lore.kernel.org/r/20240502042318.801932-2-miaxu@meta.com Signed-off-by: Martin KaFai Lau --- include/net/tcp.h | 2 +- net/ipv4/bpf_tcp_ca.c | 3 ++- net/ipv4/tcp_bbr.c | 2 +- net/ipv4/tcp_input.c | 2 +- tools/testing/selftests/bpf/progs/tcp_ca_kfunc.c | 6 +++--- 5 files changed, 8 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index fe98fb01879b..7294da8fb780 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1172,7 +1172,7 @@ struct tcp_congestion_ops { /* call when packets are delivered to update cwnd and pacing rate, * after all the ca_state processing. (optional) */ - void (*cong_control)(struct sock *sk, const struct rate_sample *rs); + void (*cong_control)(struct sock *sk, u32 ack, int flag, const struct rate_sample *rs); /* new value of cwnd after loss (required) */ diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 7f518ea5f4ac..6bd7f8db189a 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -307,7 +307,8 @@ static u32 bpf_tcp_ca_min_tso_segs(struct sock *sk) return 0; } -static void bpf_tcp_ca_cong_control(struct sock *sk, const struct rate_sample *rs) +static void bpf_tcp_ca_cong_control(struct sock *sk, u32 ack, int flag, + const struct rate_sample *rs) { } diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 7e52ab24e40a..760941e55153 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -1024,7 +1024,7 @@ static void bbr_update_model(struct sock *sk, const struct rate_sample *rs) bbr_update_gains(sk); } -__bpf_kfunc static void bbr_main(struct sock *sk, const struct rate_sample *rs) +__bpf_kfunc static void bbr_main(struct sock *sk, u32 ack, int flag, const struct rate_sample *rs) { struct bbr *bbr = inet_csk_ca(sk); u32 bw; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 53e1150f706f..23ccfc7b1d3c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3541,7 +3541,7 @@ static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked, const struct inet_connection_sock *icsk = inet_csk(sk); if (icsk->icsk_ca_ops->cong_control) { - icsk->icsk_ca_ops->cong_control(sk, rs); + icsk->icsk_ca_ops->cong_control(sk, ack, flag, rs); return; } diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_kfunc.c b/tools/testing/selftests/bpf/progs/tcp_ca_kfunc.c index fcfbfe0336b4..52b610357309 100644 --- a/tools/testing/selftests/bpf/progs/tcp_ca_kfunc.c +++ b/tools/testing/selftests/bpf/progs/tcp_ca_kfunc.c @@ -5,7 +5,7 @@ #include extern void bbr_init(struct sock *sk) __ksym; -extern void bbr_main(struct sock *sk, const struct rate_sample *rs) __ksym; +extern void bbr_main(struct sock *sk, u32 ack, int flag, const struct rate_sample *rs) __ksym; extern u32 bbr_sndbuf_expand(struct sock *sk) __ksym; extern u32 bbr_undo_cwnd(struct sock *sk) __ksym; extern void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) __ksym; @@ -42,9 +42,9 @@ void BPF_PROG(in_ack_event, struct sock *sk, u32 flags) } SEC("struct_ops/cong_control") -void BPF_PROG(cong_control, struct sock *sk, const struct rate_sample *rs) +void BPF_PROG(cong_control, struct sock *sk, u32 ack, int flag, const struct rate_sample *rs) { - bbr_main(sk, rs); + bbr_main(sk, ack, flag, rs); } SEC("struct_ops/cong_avoid") -- cgit v1.2.3 From 2ddec2c80b4402c293c7e6e0881cecaaf77e8cec Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Thu, 2 May 2024 15:18:52 +0000 Subject: riscv, bpf: inline bpf_get_smp_processor_id() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Inline the calls to bpf_get_smp_processor_id() in the riscv bpf jit. RISCV saves the pointer to the CPU's task_struct in the TP (thread pointer) register. This makes it trivial to get the CPU's processor id. As thread_info is the first member of task_struct, we can read the processor id from TP + offsetof(struct thread_info, cpu). RISCV64 JIT output for `call bpf_get_smp_processor_id` ====================================================== Before After -------- ------- auipc t1,0x848c ld a5,32(tp) jalr 604(t1) mv a5,a0 Benchmark using [1] on Qemu. ./benchs/run_bench_trigger.sh glob-arr-inc arr-inc hash-inc +---------------+------------------+------------------+--------------+ | Name | Before | After | % change | |---------------+------------------+------------------+--------------| | glob-arr-inc | 1.077 ± 0.006M/s | 1.336 ± 0.010M/s | + 24.04% | | arr-inc | 1.078 ± 0.002M/s | 1.332 ± 0.015M/s | + 23.56% | | hash-inc | 0.494 ± 0.004M/s | 0.653 ± 0.001M/s | + 32.18% | +---------------+------------------+------------------+--------------+ NOTE: This benchmark includes changes from this patch and the previous patch that implemented the per-cpu insn. [1] https://github.com/anakryiko/linux/commit/8dec900975ef Signed-off-by: Puranjay Mohan Acked-by: Kumar Kartikeya Dwivedi Acked-by: Andrii Nakryiko Acked-by: Björn Töpel Link: https://lore.kernel.org/r/20240502151854.9810-3-puranjay@kernel.org Signed-off-by: Alexei Starovoitov --- arch/riscv/net/bpf_jit_comp64.c | 26 ++++++++++++++++++++++++++ include/linux/filter.h | 1 + kernel/bpf/core.c | 11 +++++++++++ kernel/bpf/verifier.c | 4 ++++ 4 files changed, 42 insertions(+) (limited to 'include') diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index 1f0159963b3e..a46ec7fb4489 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -1493,6 +1493,22 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, bool fixed_addr; u64 addr; + /* Inline calls to bpf_get_smp_processor_id() + * + * RV_REG_TP holds the address of the current CPU's task_struct and thread_info is + * at offset 0 in task_struct. + * Load cpu from thread_info: + * Set R0 to ((struct thread_info *)(RV_REG_TP))->cpu + * + * This replicates the implementation of raw_smp_processor_id() on RISCV + */ + if (insn->src_reg == 0 && insn->imm == BPF_FUNC_get_smp_processor_id) { + /* Load current CPU number in R0 */ + emit_ld(bpf_to_rv_reg(BPF_REG_0, ctx), offsetof(struct thread_info, cpu), + RV_REG_TP, ctx); + break; + } + mark_call(ctx); ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, &addr, &fixed_addr); @@ -2062,3 +2078,13 @@ bool bpf_jit_supports_percpu_insn(void) { return true; } + +bool bpf_jit_inlines_helper_call(s32 imm) +{ + switch (imm) { + case BPF_FUNC_get_smp_processor_id: + return true; + default: + return false; + } +} diff --git a/include/linux/filter.h b/include/linux/filter.h index 7a27f19bf44d..3e19bb62ed1a 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -993,6 +993,7 @@ u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); void bpf_jit_compile(struct bpf_prog *prog); bool bpf_jit_needs_zext(void); +bool bpf_jit_inlines_helper_call(s32 imm); bool bpf_jit_supports_subprog_tailcalls(void); bool bpf_jit_supports_percpu_insn(void); bool bpf_jit_supports_kfunc_call(void); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 99b8b1c9a248..aa59af9f9bd9 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2941,6 +2941,17 @@ bool __weak bpf_jit_needs_zext(void) return false; } +/* Return true if the JIT inlines the call to the helper corresponding to + * the imm. + * + * The verifier will not patch the insn->imm for the call to the helper if + * this returns true. + */ +bool __weak bpf_jit_inlines_helper_call(s32 imm) +{ + return false; +} + /* Return TRUE if the JIT backend supports mixing bpf2bpf and tailcalls. */ bool __weak bpf_jit_supports_subprog_tailcalls(void) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9e3aba08984e..1658ca4136a3 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -19996,6 +19996,10 @@ static int do_misc_fixups(struct bpf_verifier_env *env) goto next_insn; } + /* Skip inlining the helper call if the JIT does it. */ + if (bpf_jit_inlines_helper_call(insn->imm)) + goto next_insn; + if (insn->imm == BPF_FUNC_get_route_realm) prog->dst_needed = 1; if (insn->imm == BPF_FUNC_get_prandom_u32) -- cgit v1.2.3