summaryrefslogtreecommitdiff
path: root/net/bpf
diff options
context:
space:
mode:
authorSebastian Andrzej Siewior <bigeasy@linutronix.de>2024-06-20 15:22:04 +0200
committerJakub Kicinski <kuba@kernel.org>2024-06-24 16:41:24 -0700
commit401cb7dae8130fd34eb84648e02ab4c506df7d5e (patch)
treef551429eb47ddd58e2bfb13aff74aa894752a3d9 /net/bpf
parent78f520b7bbe579438dfc202226b3dac5607d8c7f (diff)
downloadlwn-401cb7dae8130fd34eb84648e02ab4c506df7d5e.tar.gz
lwn-401cb7dae8130fd34eb84648e02ab4c506df7d5e.zip
net: Reference bpf_redirect_info via task_struct on PREEMPT_RT.
The XDP redirect process is two staged: - bpf_prog_run_xdp() is invoked to run a eBPF program which inspects the packet and makes decisions. While doing that, the per-CPU variable bpf_redirect_info is used. - Afterwards xdp_do_redirect() is invoked and accesses bpf_redirect_info and it may also access other per-CPU variables like xskmap_flush_list. At the very end of the NAPI callback, xdp_do_flush() is invoked which does not access bpf_redirect_info but will touch the individual per-CPU lists. The per-CPU variables are only used in the NAPI callback hence disabling bottom halves is the only protection mechanism. Users from preemptible context (like cpu_map_kthread_run()) explicitly disable bottom halves for protections reasons. Without locking in local_bh_disable() on PREEMPT_RT this data structure requires explicit locking. PREEMPT_RT has forced-threaded interrupts enabled and every NAPI-callback runs in a thread. If each thread has its own data structure then locking can be avoided. Create a struct bpf_net_context which contains struct bpf_redirect_info. Define the variable on stack, use bpf_net_ctx_set() to save a pointer to it, bpf_net_ctx_clear() removes it again. The bpf_net_ctx_set() may nest. For instance a function can be used from within NET_RX_SOFTIRQ/ net_rx_action which uses bpf_net_ctx_set() and NET_TX_SOFTIRQ which does not. Therefore only the first invocations updates the pointer. Use bpf_net_ctx_get_ri() as a wrapper to retrieve the current struct bpf_redirect_info. The returned data structure is zero initialized to ensure nothing is leaked from stack. This is done on first usage of the struct. bpf_net_ctx_set() sets bpf_redirect_info::kern_flags to 0 to note that initialisation is required. First invocation of bpf_net_ctx_get_ri() will memset() the data structure and update bpf_redirect_info::kern_flags. bpf_redirect_info::nh is excluded from memset because it is only used once BPF_F_NEIGH is set which also sets the nh member. The kern_flags is moved past nh to exclude it from memset. The pointer to bpf_net_context is saved task's task_struct. Using always the bpf_net_context approach has the advantage that there is almost zero differences between PREEMPT_RT and non-PREEMPT_RT builds. Cc: Andrii Nakryiko <andrii@kernel.org> Cc: Eduard Zingerman <eddyz87@gmail.com> Cc: Hao Luo <haoluo@google.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: John Fastabend <john.fastabend@gmail.com> Cc: KP Singh <kpsingh@kernel.org> Cc: Martin KaFai Lau <martin.lau@linux.dev> Cc: Song Liu <song@kernel.org> Cc: Stanislav Fomichev <sdf@google.com> Cc: Yonghong Song <yonghong.song@linux.dev> Acked-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Jesper Dangaard Brouer <hawk@kernel.org> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Link: https://patch.msgid.link/20240620132727.660738-15-bigeasy@linutronix.de Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/bpf')
-rw-r--r--net/bpf/test_run.c11
1 files changed, 10 insertions, 1 deletions
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 36ae54f57bf5..a6d7f790cdda 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -283,9 +283,10 @@ static int xdp_recv_frames(struct xdp_frame **frames, int nframes,
static int xdp_test_run_batch(struct xdp_test_data *xdp, struct bpf_prog *prog,
u32 repeat)
{
- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
int err = 0, act, ret, i, nframes = 0, batch_sz;
struct xdp_frame **frames = xdp->frames;
+ struct bpf_redirect_info *ri;
struct xdp_page_head *head;
struct xdp_frame *frm;
bool redirect = false;
@@ -295,6 +296,8 @@ static int xdp_test_run_batch(struct xdp_test_data *xdp, struct bpf_prog *prog,
batch_sz = min_t(u32, repeat, xdp->batch_size);
local_bh_disable();
+ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
+ ri = bpf_net_ctx_get_ri();
xdp_set_return_frame_no_direct();
for (i = 0; i < batch_sz; i++) {
@@ -359,6 +362,7 @@ out:
}
xdp_clear_return_frame_no_direct();
+ bpf_net_ctx_clear(bpf_net_ctx);
local_bh_enable();
return err;
}
@@ -394,6 +398,7 @@ static int bpf_test_run_xdp_live(struct bpf_prog *prog, struct xdp_buff *ctx,
static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
u32 *retval, u32 *time, bool xdp)
{
+ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
struct bpf_prog_array_item item = {.prog = prog};
struct bpf_run_ctx *old_ctx;
struct bpf_cg_run_ctx run_ctx;
@@ -419,10 +424,14 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
do {
run_ctx.prog_item = &item;
local_bh_disable();
+ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
+
if (xdp)
*retval = bpf_prog_run_xdp(prog, ctx);
else
*retval = bpf_prog_run(prog, ctx);
+
+ bpf_net_ctx_clear(bpf_net_ctx);
local_bh_enable();
} while (bpf_test_timer_continue(&t, 1, repeat, &ret, time));
bpf_reset_run_ctx(old_ctx);