diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-06-17 09:18:14 +0100 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-06-17 09:18:14 +0100 |
| commit | 9c87e61e3c5797277407ba5eae4eac8a52be3fa3 (patch) | |
| tree | e3f902cb5363b5b90ab74a4b7e26fafbc15aaeaf /arch | |
| parent | b85966adbf5de0668a815c6e3527f87e0c387fb4 (diff) | |
| parent | e4287bf34f97a88c7d9322f5bde828724c073a6b (diff) | |
| download | lwn-9c87e61e3c5797277407ba5eae4eac8a52be3fa3.tar.gz lwn-9c87e61e3c5797277407ba5eae4eac8a52be3fa3.zip | |
Merge tag 'bpf-next-7.2' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Pull bpf updates from Alexei Starovoitov:
"Major changes:
- Recover from BPF arena page faults using a scratch page and add
ptep_try_set() for lockless empty-slot installs on x86 and arm64.
This allows BPF kfuncs to access arena pointers directly.
The 'arena_direct_access' stable branch was created for this work
and was pulled into sched-ext and bpf-next trees (Tejun Heo, Kumar
Kartikeya Dwivedi)
- Lift old restriction and support 6+ arguments in BPF programs and
kfuncs on x86 and arm64 (Yonghong Song, Puranjay Mohan)
Other features and fixes:
- Add 24-bit BTF vlen and reclaim unused bits in the BTF UAPI to ease
addition of new BTF kinds (Alan Maguire)
- Raise the maximum BPF call chain depth from 8 to 16 frames (Alexei
Starovoitov)
- Refactor object relationship tracking in the verifier and fix a
dynptr use-after-free bug (Amery Hung)
- Harden the signed program loader and reject exclusive maps as inner
maps (Daniel Borkmann)
- Replace the verifier min/max bounds fields with a circular number
(cnum) representation and improve 32->64 bit range refinements
(Eduard Zingerman)
- Introduce the arena library and runtime (libarena) with a buddy
allocator, rbtree and SPMC queue data structures, ASAN support and
a parallel test harness. Allow subprograms to return arena pointers
and switch to a BTF type-tag based __arena annotation (Emil
Tsalapatis)
- Cache build IDs in the sleepable stackmap path and avoid faultable
build ID reads under mm locks (Ihor Solodrai)
- Introduce the tracing_multi link to attach a single BPF program to
many kernel functions at once. Allow specifying the uprobe_multi
target via FD (Jiri Olsa)
- Extend the bpf_list family of kfuncs with bpf_list_add/del(), and
bpf_list_is_first/is_last/empty() (Kaitao Cheng)
- Extend the BPF syscall with common attributes support for
prog_load, btf_load and map_create (Leon Hwang)
- Wrap rhashtable as BPF map (Mykyta Yatsenko, Herbert Xu)
- Add sleepable support for tracepoint programs and fix deadlocks in
LRU map due to NMI reentry (Mykyta Yatsenko)
- Fix OOB access in bpf_flow_keys, fix nullness analysis of inner
arrays, enforce write checks for global subprograms (Nuoqi Gui)
- Report the maximum combined stack depth and print a breakdown of
instructions processed per subprogram (Paul Chaignon)
- Add an XDP load-balancer benchmark and arm64 JIT support for stack
arguments (Puranjay Mohan)
- Add kfuncs to traverse over wakeup_sources (Samuel Wu)
- Allow sleepable BPF programs to use LPM trie maps directly (Vlad
Poenaru)
- Many more fixes and cleanups across the verifier, BTF, sockmap,
devmap, bpffs, security hooks, s390/riscv/loongarch JITs,
rqspinlock, libbpf, bpftool, selftests"
* tag 'bpf-next-7.2' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (336 commits)
selftests/bpf: Work around llvm stack overflow in crypto progs
selftests/bpf: add test for bpf_msg_pop_data() overflow
bpf, sockmap: fix integer overflow in bpf_msg_pop_data() bounds check
sockmap: Fix use-after-free in udp_bpf_recvmsg()
bpf, sockmap: keep sk_msg copy state in sync
bpf, sockmap: Fix wrong rsge offset in bpf_msg_push_data()
bpf, sockmap: reject overflowing copy + len in bpf_msg_push_data()
selftsets/bpf: Retry map update on helper_fill_hashmap()
selftests/bpf: Add test for sleepable lsm_cgroup rejection
selftests/bpf: Add test to verify the fix for bpf_setsockopt() helper
bpf: Fix bpf_get/setsockopt to tos for ipv4-mapped ipv6 socket
selftests/bpf: Avoid static LLVM linking for cross builds
selftests/bpf: Use common CFLAGS for urandom_read
selftests/bpf: Initialize operation name before use
tools/bpf: build: Append extra cflags
libbpf: Initialize CFLAGS before including Makefile.include
bpftool: Append extra host flags
bpftool: Avoid adding EXTRA_CFLAGS to HOST_CFLAGS
bpftool: Pass host flags to bootstrap libbpf
selftests/bpf: correct CONFIG_PPC64 macro name in comment
...
Diffstat (limited to 'arch')
| -rw-r--r-- | arch/arm64/include/asm/pgtable.h | 28 | ||||
| -rw-r--r-- | arch/arm64/mm/fault.c | 10 | ||||
| -rw-r--r-- | arch/arm64/net/bpf_jit_comp.c | 152 | ||||
| -rw-r--r-- | arch/arm64/net/bpf_timed_may_goto.S | 8 | ||||
| -rw-r--r-- | arch/loongarch/net/bpf_jit.c | 52 | ||||
| -rw-r--r-- | arch/powerpc/net/bpf_jit_comp.c | 54 | ||||
| -rw-r--r-- | arch/riscv/net/bpf_jit_comp64.c | 61 | ||||
| -rw-r--r-- | arch/s390/net/bpf_jit_comp.c | 81 | ||||
| -rw-r--r-- | arch/x86/include/asm/pgtable.h | 12 | ||||
| -rw-r--r-- | arch/x86/mm/fault.c | 12 | ||||
| -rw-r--r-- | arch/x86/net/bpf_jit_comp.c | 205 |
11 files changed, 498 insertions, 177 deletions
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index c9e4e00a9af2..27689c62bd25 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -1830,6 +1830,34 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, return __ptep_get_and_clear(mm, addr, ptep); } +static inline bool ptep_try_set(pte_t *ptep, pte_t new_pte) +{ + pteval_t old = 0; + + if (!try_cmpxchg(&pte_val(*ptep), &old, pte_val(new_pte))) + return false; + + /* + * The store must be complete by the time this returns, but the caller + * may be in lazy MMU mode, where __set_pte_complete() would defer the + * barriers. Issue them directly. + */ + emit_pte_barriers(); + return true; +} +#define ptep_try_set ptep_try_set + +/* + * arm64 mandates break-before-make: a cleared kernel PTE must have its TLB + * invalidated before a different page is installed in its place. The broadcast + * TLBI is an instruction, not an IPI, so this is safe with interrupts disabled. + */ +static inline void flush_tlb_before_set(unsigned long addr) +{ + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); +} +#define flush_tlb_before_set flush_tlb_before_set + #define test_and_clear_young_ptes test_and_clear_young_ptes static inline bool test_and_clear_young_ptes(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, unsigned int nr) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 739800835920..85e23388f9bb 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -9,6 +9,7 @@ #include <linux/acpi.h> #include <linux/bitfield.h> +#include <linux/bpf_defs.h> #include <linux/extable.h> #include <linux/kfence.h> #include <linux/signal.h> @@ -436,9 +437,12 @@ static void __do_kernel_fault(unsigned long addr, unsigned long esr, } else if (is_pkvm_stage2_abort(esr)) { msg = "access to hypervisor-protected memory"; } else { - if (esr_fsc_is_translation_fault(esr) && - kfence_handle_page_fault(addr, esr & ESR_ELx_WNR, regs)) - return; + if (esr_fsc_is_translation_fault(esr)) { + if (kfence_handle_page_fault(addr, esr & ESR_ELx_WNR, regs)) + return; + if (bpf_arena_handle_page_fault(addr, esr & ESR_ELx_WNR, regs->pc)) + return; + } msg = "paging request"; } diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 0816c40fc7af..f6bcc0e1a950 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -47,7 +47,7 @@ /* Map BPF registers to A64 registers */ static const int bpf2a64[] = { /* return value from in-kernel function, and exit value from eBPF */ - [BPF_REG_0] = A64_R(7), + [BPF_REG_0] = A64_R(8), /* arguments from eBPF program to in-kernel function */ [BPF_REG_1] = A64_R(0), [BPF_REG_2] = A64_R(1), @@ -86,6 +86,7 @@ struct jit_ctx { __le32 *image; __le32 *ro_image; u32 stack_size; + u16 stack_arg_size; u64 user_vm_start; u64 arena_vm_start; bool fp_used; @@ -533,13 +534,20 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) * | | * +-----+ <= (BPF_FP - prog->aux->stack_depth) * |RSVD | padding - * current A64_SP => +-----+ <= (BPF_FP - ctx->stack_size) + * +-----+ <= (BPF_FP - ctx->stack_size) + * | | + * | ... | outgoing stack args (9+, if any) + * | | + * current A64_SP => +-----+ * | | * | ... | Function call stack * | | * +-----+ * low * + * Stack args 6-8 are passed in x5-x7, args 9+ at [SP]. + * Incoming args 9+ are at [A64_FP + 16], [A64_FP + 24], ... + * (above the saved FP/LR pair pushed in the callee prologue). */ emit_kcfi(is_main_prog ? cfi_bpf_hash : cfi_bpf_subprog_hash, ctx); @@ -613,6 +621,9 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) if (ctx->stack_size && !ctx->priv_sp_used) emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); + if (ctx->stack_arg_size) + emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_arg_size), ctx); + if (ctx->arena_vm_start) emit_a64_mov_i64(arena_vm_base, ctx->arena_vm_start, ctx); @@ -673,6 +684,9 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) /* Update tail_call_cnt if the slot is populated. */ emit(A64_STR64I(tcc, ptr, 0), ctx); + if (ctx->stack_arg_size) + emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_arg_size), ctx); + /* restore SP */ if (ctx->stack_size && !ctx->priv_sp_used) emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); @@ -1034,6 +1048,9 @@ static void build_epilogue(struct jit_ctx *ctx, bool was_classic) const u8 r0 = bpf2a64[BPF_REG_0]; const u8 ptr = bpf2a64[TCCNT_PTR]; + if (ctx->stack_arg_size) + emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_arg_size), ctx); + /* We're done with BPF stack */ if (ctx->stack_size && !ctx->priv_sp_used) emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); @@ -1048,7 +1065,7 @@ static void build_epilogue(struct jit_ctx *ctx, bool was_classic) /* Restore FP/LR registers */ emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx); - /* Move the return value from bpf:r0 (aka x7) to x0 */ + /* Move the return value from bpf:r0 (aka x8) to x0 */ emit(A64_MOV(1, A64_R(0), r0), ctx); /* Authenticate lr */ @@ -1191,6 +1208,42 @@ static int add_exception_handler(const struct bpf_insn *insn, return 0; } +static const u8 stack_arg_reg[] = { A64_R(5), A64_R(6), A64_R(7) }; + +#define NR_STACK_ARG_REGS ARRAY_SIZE(stack_arg_reg) + +static void emit_stack_arg_load(u8 dst, s16 bpf_off, struct jit_ctx *ctx) +{ + int idx = bpf_off / sizeof(u64) - 1; + + if (idx < NR_STACK_ARG_REGS) + emit(A64_MOV(1, dst, stack_arg_reg[idx]), ctx); + else + emit(A64_LDR64I(dst, A64_FP, (idx - NR_STACK_ARG_REGS) * sizeof(u64) + 16), ctx); +} + +static void emit_stack_arg_store(u8 src_a64, s16 bpf_off, struct jit_ctx *ctx) +{ + int idx = -bpf_off / sizeof(u64) - 1; + + if (idx < NR_STACK_ARG_REGS) + emit(A64_MOV(1, stack_arg_reg[idx], src_a64), ctx); + else + emit(A64_STR64I(src_a64, A64_SP, (idx - NR_STACK_ARG_REGS) * sizeof(u64)), ctx); +} + +static void emit_stack_arg_store_imm(s32 imm, s16 bpf_off, const u8 tmp, struct jit_ctx *ctx) +{ + int idx = -bpf_off / sizeof(u64) - 1; + + if (idx < NR_STACK_ARG_REGS) { + emit_a64_mov_i(1, stack_arg_reg[idx], imm, ctx); + } else { + emit_a64_mov_i(1, tmp, imm, ctx); + emit(A64_STR64I(tmp, A64_SP, (idx - NR_STACK_ARG_REGS) * sizeof(u64)), ctx); + } +} + /* JITs an eBPF instruction. * Returns: * 0 - successfully JITed an 8-byte eBPF instruction. @@ -1646,6 +1699,11 @@ emit_cond_jmp: case BPF_LDX | BPF_MEM | BPF_H: case BPF_LDX | BPF_MEM | BPF_B: case BPF_LDX | BPF_MEM | BPF_DW: + if (insn->src_reg == BPF_REG_PARAMS) { + emit_stack_arg_load(dst, off, ctx); + break; + } + fallthrough; case BPF_LDX | BPF_PROBE_MEM | BPF_DW: case BPF_LDX | BPF_PROBE_MEM | BPF_W: case BPF_LDX | BPF_PROBE_MEM | BPF_H: @@ -1672,6 +1730,8 @@ emit_cond_jmp: if (src == fp) { src_adj = ctx->priv_sp_used ? priv_sp : A64_SP; off_adj = off + ctx->stack_size; + if (!ctx->priv_sp_used) + off_adj += ctx->stack_arg_size; } else { src_adj = src; off_adj = off; @@ -1752,6 +1812,11 @@ emit_cond_jmp: case BPF_ST | BPF_MEM | BPF_H: case BPF_ST | BPF_MEM | BPF_B: case BPF_ST | BPF_MEM | BPF_DW: + if (insn->dst_reg == BPF_REG_PARAMS) { + emit_stack_arg_store_imm(imm, off, tmp, ctx); + break; + } + fallthrough; case BPF_ST | BPF_PROBE_MEM32 | BPF_B: case BPF_ST | BPF_PROBE_MEM32 | BPF_H: case BPF_ST | BPF_PROBE_MEM32 | BPF_W: @@ -1763,6 +1828,8 @@ emit_cond_jmp: if (dst == fp) { dst_adj = ctx->priv_sp_used ? priv_sp : A64_SP; off_adj = off + ctx->stack_size; + if (!ctx->priv_sp_used) + off_adj += ctx->stack_arg_size; } else { dst_adj = dst; off_adj = off; @@ -1814,6 +1881,11 @@ emit_cond_jmp: case BPF_STX | BPF_MEM | BPF_H: case BPF_STX | BPF_MEM | BPF_B: case BPF_STX | BPF_MEM | BPF_DW: + if (insn->dst_reg == BPF_REG_PARAMS) { + emit_stack_arg_store(src, off, ctx); + break; + } + fallthrough; case BPF_STX | BPF_PROBE_MEM32 | BPF_B: case BPF_STX | BPF_PROBE_MEM32 | BPF_H: case BPF_STX | BPF_PROBE_MEM32 | BPF_W: @@ -1825,6 +1897,8 @@ emit_cond_jmp: if (dst == fp) { dst_adj = ctx->priv_sp_used ? priv_sp : A64_SP; off_adj = off + ctx->stack_size; + if (!ctx->priv_sp_used) + off_adj += ctx->stack_arg_size; } else { dst_adj = dst; off_adj = off; @@ -2018,6 +2092,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_pr u8 *ro_image_ptr; int body_idx; int exentry_idx; + int out_cnt; if (!prog->jit_requested) return prog; @@ -2065,6 +2140,14 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_pr ctx.user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena); ctx.arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena); + out_cnt = bpf_out_stack_arg_cnt(env, prog); + if (out_cnt) { + int nr_on_stack = out_cnt - NR_STACK_ARG_REGS; + + if (nr_on_stack > 0) + ctx.stack_arg_size = round_up(nr_on_stack * sizeof(u64), 16); + } + if (priv_stack_ptr) ctx.priv_sp_used = true; @@ -2229,6 +2312,11 @@ bool bpf_jit_supports_kfunc_call(void) return true; } +bool bpf_jit_supports_stack_args(void) +{ + return true; +} + void *bpf_arch_text_copy(void *dst, void *src, size_t len) { if (!aarch64_insn_copy(dst, src, len)) @@ -2247,24 +2335,24 @@ bool bpf_jit_supports_subprog_tailcalls(void) return true; } -static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, +static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_node *node, int bargs_off, int retval_off, int run_ctx_off, bool save_ret) { __le32 *branch; u64 enter_prog; u64 exit_prog; - struct bpf_prog *p = l->link.prog; + struct bpf_prog *p = node->link->prog; int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie); enter_prog = (u64)bpf_trampoline_enter(p); exit_prog = (u64)bpf_trampoline_exit(p); - if (l->cookie == 0) { + if (node->cookie == 0) { /* if cookie is zero, one instruction is enough to store it */ emit(A64_STR64I(A64_ZR, A64_SP, run_ctx_off + cookie_off), ctx); } else { - emit_a64_mov_i64(A64_R(10), l->cookie, ctx); + emit_a64_mov_i64(A64_R(10), node->cookie, ctx); emit(A64_STR64I(A64_R(10), A64_SP, run_ctx_off + cookie_off), ctx); } @@ -2314,7 +2402,7 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, emit_call(exit_prog, ctx); } -static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl, +static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_nodes *tn, int bargs_off, int retval_off, int run_ctx_off, __le32 **branches) { @@ -2324,8 +2412,8 @@ static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl, * Set this to 0 to avoid confusing the program. */ emit(A64_STR64I(A64_ZR, A64_SP, retval_off), ctx); - for (i = 0; i < tl->nr_links; i++) { - invoke_bpf_prog(ctx, tl->links[i], bargs_off, retval_off, + for (i = 0; i < tn->nr_nodes; i++) { + invoke_bpf_prog(ctx, tn->nodes[i], bargs_off, retval_off, run_ctx_off, true); /* if (*(u64 *)(sp + retval_off) != 0) * goto do_fexit; @@ -2456,10 +2544,10 @@ static void restore_args(struct jit_ctx *ctx, int bargs_off, int nregs) } } -static bool is_struct_ops_tramp(const struct bpf_tramp_links *fentry_links) +static bool is_struct_ops_tramp(const struct bpf_tramp_nodes *fentry_nodes) { - return fentry_links->nr_links == 1 && - fentry_links->links[0]->link.type == BPF_LINK_TYPE_STRUCT_OPS; + return fentry_nodes->nr_nodes == 1 && + fentry_nodes->nodes[0]->link->type == BPF_LINK_TYPE_STRUCT_OPS; } static void store_func_meta(struct jit_ctx *ctx, u64 func_meta, int func_meta_off) @@ -2480,7 +2568,7 @@ static void store_func_meta(struct jit_ctx *ctx, u64 func_meta, int func_meta_of * */ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, - struct bpf_tramp_links *tlinks, void *func_addr, + struct bpf_tramp_nodes *tnodes, void *func_addr, const struct btf_func_model *m, const struct arg_aux *a, u32 flags) @@ -2496,14 +2584,14 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, int run_ctx_off; int oargs_off; int nfuncargs; - struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; - struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; - struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; + struct bpf_tramp_nodes *fentry = &tnodes[BPF_TRAMP_FENTRY]; + struct bpf_tramp_nodes *fexit = &tnodes[BPF_TRAMP_FEXIT]; + struct bpf_tramp_nodes *fmod_ret = &tnodes[BPF_TRAMP_MODIFY_RETURN]; bool save_ret; __le32 **branches = NULL; bool is_struct_ops = is_struct_ops_tramp(fentry); int cookie_off, cookie_cnt, cookie_bargs_off; - int fsession_cnt = bpf_fsession_cnt(tlinks); + int fsession_cnt = bpf_fsession_cnt(tnodes); u64 func_meta; /* trampoline stack layout: @@ -2549,7 +2637,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, cookie_off = stack_size; /* room for session cookies */ - cookie_cnt = bpf_fsession_cookie_cnt(tlinks); + cookie_cnt = bpf_fsession_cookie_cnt(tnodes); stack_size += cookie_cnt * 8; ip_off = stack_size; @@ -2646,20 +2734,20 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, } cookie_bargs_off = (bargs_off - cookie_off) / 8; - for (i = 0; i < fentry->nr_links; i++) { - if (bpf_prog_calls_session_cookie(fentry->links[i])) { + for (i = 0; i < fentry->nr_nodes; i++) { + if (bpf_prog_calls_session_cookie(fentry->nodes[i])) { u64 meta = func_meta | (cookie_bargs_off << BPF_TRAMP_COOKIE_INDEX_SHIFT); store_func_meta(ctx, meta, func_meta_off); cookie_bargs_off--; } - invoke_bpf_prog(ctx, fentry->links[i], bargs_off, + invoke_bpf_prog(ctx, fentry->nodes[i], bargs_off, retval_off, run_ctx_off, flags & BPF_TRAMP_F_RET_FENTRY_RET); } - if (fmod_ret->nr_links) { - branches = kcalloc(fmod_ret->nr_links, sizeof(__le32 *), + if (fmod_ret->nr_nodes) { + branches = kcalloc(fmod_ret->nr_nodes, sizeof(__le32 *), GFP_KERNEL); if (!branches) return -ENOMEM; @@ -2683,7 +2771,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, } /* update the branches saved in invoke_bpf_mod_ret with cbnz */ - for (i = 0; i < fmod_ret->nr_links && ctx->image != NULL; i++) { + for (i = 0; i < fmod_ret->nr_nodes && ctx->image != NULL; i++) { int offset = &ctx->image[ctx->idx] - branches[i]; *branches[i] = cpu_to_le32(A64_CBNZ(1, A64_R(10), offset)); } @@ -2694,14 +2782,14 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, store_func_meta(ctx, func_meta, func_meta_off); cookie_bargs_off = (bargs_off - cookie_off) / 8; - for (i = 0; i < fexit->nr_links; i++) { - if (bpf_prog_calls_session_cookie(fexit->links[i])) { + for (i = 0; i < fexit->nr_nodes; i++) { + if (bpf_prog_calls_session_cookie(fexit->nodes[i])) { u64 meta = func_meta | (cookie_bargs_off << BPF_TRAMP_COOKIE_INDEX_SHIFT); store_func_meta(ctx, meta, func_meta_off); cookie_bargs_off--; } - invoke_bpf_prog(ctx, fexit->links[i], bargs_off, retval_off, + invoke_bpf_prog(ctx, fexit->nodes[i], bargs_off, retval_off, run_ctx_off, false); } @@ -2759,7 +2847,7 @@ bool bpf_jit_supports_fsession(void) } int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, - struct bpf_tramp_links *tlinks, void *func_addr) + struct bpf_tramp_nodes *tnodes, void *func_addr) { struct jit_ctx ctx = { .image = NULL, @@ -2773,7 +2861,7 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, if (ret < 0) return ret; - ret = prepare_trampoline(&ctx, &im, tlinks, func_addr, m, &aaux, flags); + ret = prepare_trampoline(&ctx, &im, tnodes, func_addr, m, &aaux, flags); if (ret < 0) return ret; @@ -2797,7 +2885,7 @@ int arch_protect_bpf_trampoline(void *image, unsigned int size) int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image, void *ro_image_end, const struct btf_func_model *m, - u32 flags, struct bpf_tramp_links *tlinks, + u32 flags, struct bpf_tramp_nodes *tnodes, void *func_addr) { u32 size = ro_image_end - ro_image; @@ -2824,7 +2912,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image, ret = calc_arg_aux(m, &aaux); if (ret) goto out; - ret = prepare_trampoline(&ctx, im, tlinks, func_addr, m, &aaux, flags); + ret = prepare_trampoline(&ctx, im, tnodes, func_addr, m, &aaux, flags); if (ret > 0 && validate_code(&ctx) < 0) { ret = -EINVAL; diff --git a/arch/arm64/net/bpf_timed_may_goto.S b/arch/arm64/net/bpf_timed_may_goto.S index 894cfcd7b241..a9a802711a7f 100644 --- a/arch/arm64/net/bpf_timed_may_goto.S +++ b/arch/arm64/net/bpf_timed_may_goto.S @@ -8,8 +8,8 @@ SYM_FUNC_START(arch_bpf_timed_may_goto) stp x29, x30, [sp, #-64]! mov x29, sp - /* Save BPF registers R0 - R5 (x7, x0-x4)*/ - stp x7, x0, [sp, #16] + /* Save BPF registers R0 - R5 (x8, x0-x4)*/ + stp x8, x0, [sp, #16] stp x1, x2, [sp, #32] stp x3, x4, [sp, #48] @@ -28,8 +28,8 @@ SYM_FUNC_START(arch_bpf_timed_may_goto) /* BPF_REG_AX(x9) will be stored into count, so move return value to it. */ mov x9, x0 - /* Restore BPF registers R0 - R5 (x7, x0-x4) */ - ldp x7, x0, [sp, #16] + /* Restore BPF registers R0 - R5 (x8, x0-x4) */ + ldp x8, x0, [sp, #16] ldp x1, x2, [sp, #32] ldp x3, x4, [sp, #48] diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 24913dc7f4e8..058ffbbaad85 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -1674,17 +1674,17 @@ static void restore_stk_args(struct jit_ctx *ctx, int nr_stk_args, int args_off, } } -static int invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, +static int invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_node *n, int args_off, int retval_off, int run_ctx_off, bool save_ret) { int ret; u32 *branch; - struct bpf_prog *p = l->link.prog; + struct bpf_prog *p = n->link->prog; int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie); - if (l->cookie) + if (n->cookie) emit_store_stack_imm64(ctx, LOONGARCH_GPR_T1, - -run_ctx_off + cookie_off, l->cookie); + -run_ctx_off + cookie_off, n->cookie); else emit_insn(ctx, std, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_FP, -run_ctx_off + cookie_off); @@ -1737,22 +1737,22 @@ static int invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, return ret; } -static int invoke_bpf(struct jit_ctx *ctx, struct bpf_tramp_links *tl, +static int invoke_bpf(struct jit_ctx *ctx, struct bpf_tramp_nodes *tn, int args_off, int retval_off, int run_ctx_off, int func_meta_off, bool save_ret, u64 func_meta, int cookie_off) { int i, cur_cookie = (cookie_off - args_off) / 8; - for (i = 0; i < tl->nr_links; i++) { + for (i = 0; i < tn->nr_nodes; i++) { int err; - if (bpf_prog_calls_session_cookie(tl->links[i])) { + if (bpf_prog_calls_session_cookie(tn->nodes[i])) { u64 meta = func_meta | ((u64)cur_cookie << BPF_TRAMP_COOKIE_INDEX_SHIFT); emit_store_stack_imm64(ctx, LOONGARCH_GPR_T1, -func_meta_off, meta); cur_cookie--; } - err = invoke_bpf_prog(ctx, tl->links[i], args_off, retval_off, run_ctx_off, save_ret); + err = invoke_bpf_prog(ctx, tn->nodes[i], args_off, retval_off, run_ctx_off, save_ret); if (err) return err; } @@ -1807,7 +1807,7 @@ static void sign_extend(struct jit_ctx *ctx, int rd, int rj, u8 size, bool sign) } static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, - const struct btf_func_model *m, struct bpf_tramp_links *tlinks, + const struct btf_func_model *m, struct bpf_tramp_nodes *tnodes, void *func_addr, u32 flags) { int i, ret, save_ret; @@ -1817,9 +1817,9 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i unsigned long long func_meta; bool is_struct_ops = flags & BPF_TRAMP_F_INDIRECT; void *orig_call = func_addr; - struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; - struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; - struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; + struct bpf_tramp_nodes *fentry = &tnodes[BPF_TRAMP_FENTRY]; + struct bpf_tramp_nodes *fexit = &tnodes[BPF_TRAMP_FEXIT]; + struct bpf_tramp_nodes *fmod_ret = &tnodes[BPF_TRAMP_MODIFY_RETURN]; u32 **branches = NULL; /* @@ -1898,7 +1898,7 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i ip_off = stack_size; } - cookie_cnt = bpf_fsession_cookie_cnt(tlinks); + cookie_cnt = bpf_fsession_cookie_cnt(tnodes); /* Room for session cookies */ stack_size += cookie_cnt * 8; @@ -1969,7 +1969,7 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i store_args(ctx, nr_arg_slots, args_off); - if (bpf_fsession_cnt(tlinks)) { + if (bpf_fsession_cnt(tnodes)) { /* clear all session cookies' value */ for (i = 0; i < cookie_cnt; i++) emit_insn(ctx, std, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_FP, -cookie_off + 8 * i); @@ -1994,20 +1994,20 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i return ret; } - if (fentry->nr_links) { + if (fentry->nr_nodes) { ret = invoke_bpf(ctx, fentry, args_off, retval_off, run_ctx_off, func_meta_off, flags & BPF_TRAMP_F_RET_FENTRY_RET, func_meta, cookie_off); if (ret) return ret; } - if (fmod_ret->nr_links) { - branches = kcalloc(fmod_ret->nr_links, sizeof(u32 *), GFP_KERNEL); + if (fmod_ret->nr_nodes) { + branches = kcalloc(fmod_ret->nr_nodes, sizeof(u32 *), GFP_KERNEL); if (!branches) return -ENOMEM; emit_insn(ctx, std, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_FP, -retval_off); - for (i = 0; i < fmod_ret->nr_links; i++) { - ret = invoke_bpf_prog(ctx, fmod_ret->links[i], + for (i = 0; i < fmod_ret->nr_nodes; i++) { + ret = invoke_bpf_prog(ctx, fmod_ret->nodes[i], args_off, retval_off, run_ctx_off, true); if (ret) goto out; @@ -2035,17 +2035,17 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i emit_insn(ctx, nop); } - for (i = 0; ctx->image && i < fmod_ret->nr_links; i++) { + for (i = 0; ctx->image && i < fmod_ret->nr_nodes; i++) { int offset = (void *)(&ctx->image[ctx->idx]) - (void *)branches[i]; *branches[i] = larch_insn_gen_bne(LOONGARCH_GPR_T1, LOONGARCH_GPR_ZERO, offset); } /* Set "is_return" flag for fsession */ func_meta |= (1ULL << BPF_TRAMP_IS_RETURN_SHIFT); - if (bpf_fsession_cnt(tlinks)) + if (bpf_fsession_cnt(tnodes)) emit_store_stack_imm64(ctx, LOONGARCH_GPR_T1, -func_meta_off, func_meta); - if (fexit->nr_links) { + if (fexit->nr_nodes) { ret = invoke_bpf(ctx, fexit, args_off, retval_off, run_ctx_off, func_meta_off, false, func_meta, cookie_off); if (ret) @@ -2115,7 +2115,7 @@ out: int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image, void *ro_image_end, const struct btf_func_model *m, - u32 flags, struct bpf_tramp_links *tlinks, void *func_addr) + u32 flags, struct bpf_tramp_nodes *tnodes, void *func_addr) { int ret, size; void *image, *tmp; @@ -2131,7 +2131,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image, ctx.idx = 0; jit_fill_hole(image, (unsigned int)(ro_image_end - ro_image)); - ret = __arch_prepare_bpf_trampoline(&ctx, im, m, tlinks, func_addr, flags); + ret = __arch_prepare_bpf_trampoline(&ctx, im, m, tnodes, func_addr, flags); if (ret < 0) goto out; @@ -2152,7 +2152,7 @@ out: } int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, - struct bpf_tramp_links *tlinks, void *func_addr) + struct bpf_tramp_nodes *tnodes, void *func_addr) { int ret; struct jit_ctx ctx; @@ -2161,7 +2161,7 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, ctx.image = NULL; ctx.idx = 0; - ret = __arch_prepare_bpf_trampoline(&ctx, &im, m, tlinks, func_addr, flags); + ret = __arch_prepare_bpf_trampoline(&ctx, &im, m, tnodes, func_addr, flags); return ret < 0 ? ret : ret * LOONGARCH_INSN_SIZE; } diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 53ab97ad6074..6351a187ca61 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -597,22 +597,22 @@ int arch_protect_bpf_trampoline(void *image, unsigned int size) } static int invoke_bpf_prog(u32 *image, u32 *ro_image, struct codegen_context *ctx, - struct bpf_tramp_link *l, int regs_off, int retval_off, + struct bpf_tramp_node *n, int regs_off, int retval_off, int run_ctx_off, bool save_ret) { - struct bpf_prog *p = l->link.prog; + struct bpf_prog *p = n->link->prog; ppc_inst_t branch_insn; u32 jmp_idx; int ret = 0; /* Save cookie */ if (IS_ENABLED(CONFIG_PPC64)) { - PPC_LI64(_R3, l->cookie); + PPC_LI64(_R3, n->cookie); EMIT(PPC_RAW_STD(_R3, _R1, run_ctx_off + offsetof(struct bpf_tramp_run_ctx, bpf_cookie))); } else { - PPC_LI32(_R3, l->cookie >> 32); - PPC_LI32(_R4, l->cookie); + PPC_LI32(_R3, n->cookie >> 32); + PPC_LI32(_R4, n->cookie); EMIT(PPC_RAW_STW(_R3, _R1, run_ctx_off + offsetof(struct bpf_tramp_run_ctx, bpf_cookie))); EMIT(PPC_RAW_STW(_R4, _R1, @@ -679,7 +679,7 @@ static int invoke_bpf_prog(u32 *image, u32 *ro_image, struct codegen_context *ct } static int invoke_bpf_mod_ret(u32 *image, u32 *ro_image, struct codegen_context *ctx, - struct bpf_tramp_links *tl, int regs_off, int retval_off, + struct bpf_tramp_nodes *tn, int regs_off, int retval_off, int run_ctx_off, u32 *branches) { int i; @@ -690,8 +690,8 @@ static int invoke_bpf_mod_ret(u32 *image, u32 *ro_image, struct codegen_context */ EMIT(PPC_RAW_LI(_R3, 0)); EMIT(PPC_RAW_STL(_R3, _R1, retval_off)); - for (i = 0; i < tl->nr_links; i++) { - if (invoke_bpf_prog(image, ro_image, ctx, tl->links[i], regs_off, retval_off, + for (i = 0; i < tn->nr_nodes; i++) { + if (invoke_bpf_prog(image, ro_image, ctx, tn->nodes[i], regs_off, retval_off, run_ctx_off, true)) return -EINVAL; @@ -807,18 +807,18 @@ static void bpf_trampoline_restore_args_stack(u32 *image, struct codegen_context static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_image, void *rw_image_end, void *ro_image, const struct btf_func_model *m, u32 flags, - struct bpf_tramp_links *tlinks, + struct bpf_tramp_nodes *tnodes, void *func_addr) { int regs_off, func_meta_off, ip_off, run_ctx_off, retval_off; int nvr_off, alt_lr_off, r4_off = 0; - struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; - struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; - struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; + struct bpf_tramp_nodes *fmod_ret = &tnodes[BPF_TRAMP_MODIFY_RETURN]; + struct bpf_tramp_nodes *fentry = &tnodes[BPF_TRAMP_FENTRY]; + struct bpf_tramp_nodes *fexit = &tnodes[BPF_TRAMP_FEXIT]; int i, ret, nr_regs, retaddr_off, bpf_frame_size = 0; struct codegen_context codegen_ctx, *ctx; int cookie_off, cookie_cnt, cookie_ctx_off; - int fsession_cnt = bpf_fsession_cnt(tlinks); + int fsession_cnt = bpf_fsession_cnt(tnodes); u64 func_meta; u32 *image = (u32 *)rw_image; ppc_inst_t branch_insn; @@ -893,7 +893,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im /* room for session cookies */ cookie_off = bpf_frame_size; - cookie_cnt = bpf_fsession_cookie_cnt(tlinks); + cookie_cnt = bpf_fsession_cookie_cnt(tnodes); bpf_frame_size += cookie_cnt * 8; /* Room for IP address argument */ @@ -1030,21 +1030,21 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im cookie_ctx_off = (regs_off - cookie_off) / 8; - for (i = 0; i < fentry->nr_links; i++) { - if (bpf_prog_calls_session_cookie(fentry->links[i])) { + for (i = 0; i < fentry->nr_nodes; i++) { + if (bpf_prog_calls_session_cookie(fentry->nodes[i])) { u64 meta = func_meta | (cookie_ctx_off << BPF_TRAMP_COOKIE_INDEX_SHIFT); store_func_meta(image, ctx, meta, func_meta_off); cookie_ctx_off--; } - if (invoke_bpf_prog(image, ro_image, ctx, fentry->links[i], regs_off, retval_off, + if (invoke_bpf_prog(image, ro_image, ctx, fentry->nodes[i], regs_off, retval_off, run_ctx_off, flags & BPF_TRAMP_F_RET_FENTRY_RET)) return -EINVAL; } - if (fmod_ret->nr_links) { - branches = kcalloc(fmod_ret->nr_links, sizeof(u32), GFP_KERNEL); + if (fmod_ret->nr_nodes) { + branches = kcalloc(fmod_ret->nr_nodes, sizeof(u32), GFP_KERNEL); if (!branches) return -ENOMEM; @@ -1093,7 +1093,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im } /* Update branches saved in invoke_bpf_mod_ret with address of do_fexit */ - for (i = 0; i < fmod_ret->nr_links && image; i++) { + for (i = 0; i < fmod_ret->nr_nodes && image; i++) { if (create_cond_branch(&branch_insn, &image[branches[i]], (unsigned long)&image[ctx->idx], COND_NE << 16)) { ret = -EINVAL; @@ -1110,15 +1110,15 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im cookie_ctx_off = (regs_off - cookie_off) / 8; - for (i = 0; i < fexit->nr_links; i++) { - if (bpf_prog_calls_session_cookie(fexit->links[i])) { + for (i = 0; i < fexit->nr_nodes; i++) { + if (bpf_prog_calls_session_cookie(fexit->nodes[i])) { u64 meta = func_meta | (cookie_ctx_off << BPF_TRAMP_COOKIE_INDEX_SHIFT); store_func_meta(image, ctx, meta, func_meta_off); cookie_ctx_off--; } - if (invoke_bpf_prog(image, ro_image, ctx, fexit->links[i], regs_off, retval_off, + if (invoke_bpf_prog(image, ro_image, ctx, fexit->nodes[i], regs_off, retval_off, run_ctx_off, false)) { ret = -EINVAL; goto cleanup; @@ -1185,18 +1185,18 @@ cleanup: } int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, - struct bpf_tramp_links *tlinks, void *func_addr) + struct bpf_tramp_nodes *tnodes, void *func_addr) { struct bpf_tramp_image im; int ret; - ret = __arch_prepare_bpf_trampoline(&im, NULL, NULL, NULL, m, flags, tlinks, func_addr); + ret = __arch_prepare_bpf_trampoline(&im, NULL, NULL, NULL, m, flags, tnodes, func_addr); return ret; } int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end, const struct btf_func_model *m, u32 flags, - struct bpf_tramp_links *tlinks, + struct bpf_tramp_nodes *tnodes, void *func_addr) { u32 size = image_end - image; @@ -1212,7 +1212,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i return -ENOMEM; ret = __arch_prepare_bpf_trampoline(im, rw_image, rw_image + size, image, m, - flags, tlinks, func_addr); + flags, tnodes, func_addr); if (ret < 0) goto out; diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index 2f1109dbf105..c03c1de16b79 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -934,15 +934,15 @@ static void emit_store_stack_imm64(u8 reg, int stack_off, u64 imm64, emit_sd(RV_REG_FP, stack_off, reg, ctx); } -static int invoke_bpf_prog(struct bpf_tramp_link *l, int args_off, int retval_off, +static int invoke_bpf_prog(struct bpf_tramp_node *node, int args_off, int retval_off, int run_ctx_off, bool save_ret, struct rv_jit_context *ctx) { int ret, branch_off; - struct bpf_prog *p = l->link.prog; + struct bpf_prog *p = node->link->prog; int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie); - if (l->cookie) - emit_store_stack_imm64(RV_REG_T1, -run_ctx_off + cookie_off, l->cookie, ctx); + if (node->cookie) + emit_store_stack_imm64(RV_REG_T1, -run_ctx_off + cookie_off, node->cookie, ctx); else emit_sd(RV_REG_FP, -run_ctx_off + cookie_off, RV_REG_ZERO, ctx); @@ -996,22 +996,22 @@ static int invoke_bpf_prog(struct bpf_tramp_link *l, int args_off, int retval_of return ret; } -static int invoke_bpf(struct bpf_tramp_links *tl, int args_off, int retval_off, +static int invoke_bpf(struct bpf_tramp_nodes *tn, int args_off, int retval_off, int run_ctx_off, int func_meta_off, bool save_ret, u64 func_meta, int cookie_off, struct rv_jit_context *ctx) { int i, cur_cookie = (cookie_off - args_off) / 8; - for (i = 0; i < tl->nr_links; i++) { + for (i = 0; i < tn->nr_nodes; i++) { int err; - if (bpf_prog_calls_session_cookie(tl->links[i])) { + if (bpf_prog_calls_session_cookie(tn->nodes[i])) { u64 meta = func_meta | ((u64)cur_cookie << BPF_TRAMP_COOKIE_INDEX_SHIFT); emit_store_stack_imm64(RV_REG_T1, -func_meta_off, meta, ctx); cur_cookie--; } - err = invoke_bpf_prog(tl->links[i], args_off, retval_off, run_ctx_off, + err = invoke_bpf_prog(tn->nodes[i], args_off, retval_off, run_ctx_off, save_ret, ctx); if (err) return err; @@ -1021,7 +1021,7 @@ static int invoke_bpf(struct bpf_tramp_links *tl, int args_off, int retval_off, static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, const struct btf_func_model *m, - struct bpf_tramp_links *tlinks, + struct bpf_tramp_nodes *tnodes, void *func_addr, u32 flags, struct rv_jit_context *ctx) { @@ -1030,9 +1030,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, int stack_size = 0, nr_arg_slots = 0; int retval_off, args_off, func_meta_off, ip_off, run_ctx_off, sreg_off, stk_arg_off; int cookie_off, cookie_cnt; - struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; - struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; - struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; + struct bpf_tramp_nodes *fentry = &tnodes[BPF_TRAMP_FENTRY]; + struct bpf_tramp_nodes *fexit = &tnodes[BPF_TRAMP_FEXIT]; + struct bpf_tramp_nodes *fmod_ret = &tnodes[BPF_TRAMP_MODIFY_RETURN]; bool is_struct_ops = flags & BPF_TRAMP_F_INDIRECT; void *orig_call = func_addr; bool save_ret; @@ -1115,7 +1115,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, ip_off = stack_size; } - cookie_cnt = bpf_fsession_cookie_cnt(tlinks); + cookie_cnt = bpf_fsession_cookie_cnt(tnodes); /* room for session cookies */ stack_size += cookie_cnt * 8; cookie_off = stack_size; @@ -1172,7 +1172,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, store_args(nr_arg_slots, args_off, ctx); - if (bpf_fsession_cnt(tlinks)) { + if (bpf_fsession_cnt(tnodes)) { /* clear all session cookies' value */ for (i = 0; i < cookie_cnt; i++) emit_sd(RV_REG_FP, -cookie_off + 8 * i, RV_REG_ZERO, ctx); @@ -1187,22 +1187,22 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, return ret; } - if (fentry->nr_links) { + if (fentry->nr_nodes) { ret = invoke_bpf(fentry, args_off, retval_off, run_ctx_off, func_meta_off, flags & BPF_TRAMP_F_RET_FENTRY_RET, func_meta, cookie_off, ctx); if (ret) return ret; } - if (fmod_ret->nr_links) { - branches_off = kzalloc_objs(int, fmod_ret->nr_links); + if (fmod_ret->nr_nodes) { + branches_off = kzalloc_objs(int, fmod_ret->nr_nodes); if (!branches_off) return -ENOMEM; /* cleanup to avoid garbage return value confusion */ emit_sd(RV_REG_FP, -retval_off, RV_REG_ZERO, ctx); - for (i = 0; i < fmod_ret->nr_links; i++) { - ret = invoke_bpf_prog(fmod_ret->links[i], args_off, retval_off, + for (i = 0; i < fmod_ret->nr_nodes; i++) { + ret = invoke_bpf_prog(fmod_ret->nodes[i], args_off, retval_off, run_ctx_off, true, ctx); if (ret) goto out; @@ -1230,7 +1230,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, } /* update branches saved in invoke_bpf_mod_ret with bnez */ - for (i = 0; ctx->insns && i < fmod_ret->nr_links; i++) { + for (i = 0; ctx->insns && i < fmod_ret->nr_nodes; i++) { offset = ninsns_rvoff(ctx->ninsns - branches_off[i]); insn = rv_bne(RV_REG_T1, RV_REG_ZERO, offset >> 1); *(u32 *)(ctx->insns + branches_off[i]) = insn; @@ -1238,10 +1238,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, /* set "is_return" flag for fsession */ func_meta |= (1ULL << BPF_TRAMP_IS_RETURN_SHIFT); - if (bpf_fsession_cnt(tlinks)) + if (bpf_fsession_cnt(tnodes)) emit_store_stack_imm64(RV_REG_T1, -func_meta_off, func_meta, ctx); - if (fexit->nr_links) { + if (fexit->nr_nodes) { ret = invoke_bpf(fexit, args_off, retval_off, run_ctx_off, func_meta_off, false, func_meta, cookie_off, ctx); if (ret) @@ -1305,7 +1305,7 @@ out: } int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, - struct bpf_tramp_links *tlinks, void *func_addr) + struct bpf_tramp_nodes *tnodes, void *func_addr) { struct bpf_tramp_image im; struct rv_jit_context ctx; @@ -1314,7 +1314,7 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, ctx.ninsns = 0; ctx.insns = NULL; ctx.ro_insns = NULL; - ret = __arch_prepare_bpf_trampoline(&im, m, tlinks, func_addr, flags, &ctx); + ret = __arch_prepare_bpf_trampoline(&im, m, tnodes, func_addr, flags, &ctx); return ret < 0 ? ret : ninsns_rvoff(ctx.ninsns); } @@ -1331,7 +1331,7 @@ void arch_free_bpf_trampoline(void *image, unsigned int size) int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image, void *ro_image_end, const struct btf_func_model *m, - u32 flags, struct bpf_tramp_links *tlinks, + u32 flags, struct bpf_tramp_nodes *tnodes, void *func_addr) { int ret; @@ -1346,7 +1346,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image, ctx.ninsns = 0; ctx.insns = image; ctx.ro_insns = ro_image; - ret = __arch_prepare_bpf_trampoline(im, m, tlinks, func_addr, flags, &ctx); + ret = __arch_prepare_bpf_trampoline(im, m, tnodes, func_addr, flags, &ctx); if (ret < 0) goto out; @@ -1808,6 +1808,13 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, break; } + /* Implement helper call to bpf_get_current_task/_btf() inline */ + if (insn->src_reg == 0 && (insn->imm == BPF_FUNC_get_current_task || + insn->imm == BPF_FUNC_get_current_task_btf)) { + emit_mv(bpf_to_rv_reg(BPF_REG_0, ctx), RV_REG_TP, ctx); + break; + } + mark_call(ctx); ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, &addr, &fixed_addr); @@ -2138,6 +2145,8 @@ bool bpf_jit_inlines_helper_call(s32 imm) { switch (imm) { case BPF_FUNC_get_smp_processor_id: + case BPF_FUNC_get_current_task: + case BPF_FUNC_get_current_task_btf: return true; default: return false; diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 94128fe6be23..31749c0362ca 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -27,6 +27,7 @@ #include <asm/extable.h> #include <asm/dis.h> #include <asm/facility.h> +#include <asm/lowcore.h> #include <asm/nospec-branch.h> #include <asm/set_memory.h> #include <asm/text-patching.h> @@ -1777,6 +1778,30 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int j, ret; u64 func; + /* Implement helper call to bpf_get_smp_processor_id() inline */ + if (insn->src_reg == 0 && + insn->imm == BPF_FUNC_get_smp_processor_id) { + const u32 *cpu_nr = &get_lowcore()->cpu_nr; + + /* ly %b0, cpu_nr */ + EMIT6_DISP_LH(0xe3000000, 0x0058, BPF_REG_0, REG_0, REG_0, + (unsigned long)cpu_nr); + break; + } + + /* Implement helper call to bpf_get_current_task/_btf() inline */ + if (insn->src_reg == 0 && + (insn->imm == BPF_FUNC_get_current_task || + insn->imm == BPF_FUNC_get_current_task_btf)) { + const u64 *current_task = + &get_lowcore()->current_task; + + /* lg %b0, current_task */ + EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_0, REG_0, REG_0, + (unsigned long)current_task); + break; + } + ret = bpf_jit_get_func_addr(fp, insn, extra_pass, &func, &func_addr_fixed); if (ret < 0) @@ -2512,19 +2537,19 @@ static void emit_store_stack_imm64(struct bpf_jit *jit, int tmp_reg, int stack_o static int invoke_bpf_prog(struct bpf_tramp_jit *tjit, const struct btf_func_model *m, - struct bpf_tramp_link *tlink, bool save_ret) + struct bpf_tramp_node *node, bool save_ret) { struct bpf_jit *jit = &tjit->common; int cookie_off = tjit->run_ctx_off + offsetof(struct bpf_tramp_run_ctx, bpf_cookie); - struct bpf_prog *p = tlink->link.prog; + struct bpf_prog *p = node->link->prog; int patch; /* - * run_ctx.cookie = tlink->cookie; + * run_ctx.cookie = node->cookie; */ - emit_store_stack_imm64(jit, REG_W0, cookie_off, tlink->cookie); + emit_store_stack_imm64(jit, REG_W0, cookie_off, node->cookie); /* * if ((start = __bpf_prog_enter(p, &run_ctx)) == 0) @@ -2584,20 +2609,20 @@ static int invoke_bpf_prog(struct bpf_tramp_jit *tjit, static int invoke_bpf(struct bpf_tramp_jit *tjit, const struct btf_func_model *m, - struct bpf_tramp_links *tl, bool save_ret, + struct bpf_tramp_nodes *tn, bool save_ret, u64 func_meta, int cookie_off) { int i, cur_cookie = (tjit->bpf_args_off - cookie_off) / sizeof(u64); struct bpf_jit *jit = &tjit->common; - for (i = 0; i < tl->nr_links; i++) { - if (bpf_prog_calls_session_cookie(tl->links[i])) { + for (i = 0; i < tn->nr_nodes; i++) { + if (bpf_prog_calls_session_cookie(tn->nodes[i])) { u64 meta = func_meta | ((u64)cur_cookie << BPF_TRAMP_COOKIE_INDEX_SHIFT); emit_store_stack_imm64(jit, REG_0, tjit->func_meta_off, meta); cur_cookie--; } - if (invoke_bpf_prog(tjit, m, tl->links[i], save_ret)) + if (invoke_bpf_prog(tjit, m, tn->nodes[i], save_ret)) return -EINVAL; } @@ -2626,12 +2651,12 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, struct bpf_tramp_jit *tjit, const struct btf_func_model *m, u32 flags, - struct bpf_tramp_links *tlinks, + struct bpf_tramp_nodes *tnodes, void *func_addr) { - struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; - struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; - struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; + struct bpf_tramp_nodes *fmod_ret = &tnodes[BPF_TRAMP_MODIFY_RETURN]; + struct bpf_tramp_nodes *fentry = &tnodes[BPF_TRAMP_FENTRY]; + struct bpf_tramp_nodes *fexit = &tnodes[BPF_TRAMP_FEXIT]; int nr_bpf_args, nr_reg_args, nr_stack_args; int cookie_cnt, cookie_off, fsession_cnt; struct bpf_jit *jit = &tjit->common; @@ -2668,8 +2693,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, return -ENOTSUPP; } - cookie_cnt = bpf_fsession_cookie_cnt(tlinks); - fsession_cnt = bpf_fsession_cnt(tlinks); + cookie_cnt = bpf_fsession_cookie_cnt(tnodes); + fsession_cnt = bpf_fsession_cnt(tnodes); /* * Calculate the stack layout. @@ -2804,7 +2829,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, func_meta, cookie_off)) return -EINVAL; - if (fmod_ret->nr_links) { + if (fmod_ret->nr_nodes) { /* * retval = 0; */ @@ -2813,8 +2838,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, _EMIT6(0xd707f000 | tjit->retval_off, 0xf000 | tjit->retval_off); - for (i = 0; i < fmod_ret->nr_links; i++) { - if (invoke_bpf_prog(tjit, m, fmod_ret->links[i], true)) + for (i = 0; i < fmod_ret->nr_nodes; i++) { + if (invoke_bpf_prog(tjit, m, fmod_ret->nodes[i], true)) return -EINVAL; /* @@ -2939,7 +2964,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, } int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, - struct bpf_tramp_links *tlinks, void *orig_call) + struct bpf_tramp_nodes *tnodes, void *orig_call) { struct bpf_tramp_image im; struct bpf_tramp_jit tjit; @@ -2948,14 +2973,14 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, memset(&tjit, 0, sizeof(tjit)); ret = __arch_prepare_bpf_trampoline(&im, &tjit, m, flags, - tlinks, orig_call); + tnodes, orig_call); return ret < 0 ? ret : tjit.common.prg; } int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end, const struct btf_func_model *m, - u32 flags, struct bpf_tramp_links *tlinks, + u32 flags, struct bpf_tramp_nodes *tnodes, void *func_addr) { struct bpf_tramp_jit tjit; @@ -2964,7 +2989,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, /* Compute offsets, check whether the code fits. */ memset(&tjit, 0, sizeof(tjit)); ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags, - tlinks, func_addr); + tnodes, func_addr); if (ret < 0) return ret; @@ -2978,7 +3003,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, tjit.common.prg = 0; tjit.common.prg_buf = image; ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags, - tlinks, func_addr); + tnodes, func_addr); return ret < 0 ? ret : tjit.common.prg; } @@ -3057,3 +3082,15 @@ bool bpf_jit_supports_timed_may_goto(void) { return true; } + +bool bpf_jit_inlines_helper_call(s32 imm) +{ + switch (imm) { + case BPF_FUNC_get_smp_processor_id: + case BPF_FUNC_get_current_task: + case BPF_FUNC_get_current_task_btf: + return true; + default: + return false; + } +} diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 2187e9cfcefa..ac295ca6c92f 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1284,6 +1284,18 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, } while (!try_cmpxchg((long *)&ptep->pte, (long *)&old_pte, *(long *)&new_pte)); } +/* + * Note: strictly-zero compare is narrower than pte_none(), but the gap is + * harmless: _PAGE_DIRTY and _PAGE_ACCESSED aren't set on untouched kernel PTEs. + */ +static inline bool ptep_try_set(pte_t *ptep, pte_t new_pte) +{ + pte_t old_pte = __pte(0); + + return try_cmpxchg((long *)&ptep->pte, (long *)&old_pte, *(long *)&new_pte); +} +#define ptep_try_set ptep_try_set + #define flush_tlb_fix_spurious_fault(vma, address, ptep) do { } while (0) #define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 250942ef60bb..45b99c3b1442 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -8,6 +8,7 @@ #include <linux/sched/task_stack.h> /* task_stack_*(), ... */ #include <linux/kdebug.h> /* oops_begin/end, ... */ #include <linux/memblock.h> /* max_low_pfn */ +#include <linux/bpf_defs.h> /* bpf_arena_handle_page_fault */ #include <linux/kfence.h> /* kfence_handle_page_fault */ #include <linux/kprobes.h> /* NOKPROBE_SYMBOL, ... */ #include <linux/mmiotrace.h> /* kmmio_handler, ... */ @@ -693,10 +694,13 @@ page_fault_oops(struct pt_regs *regs, unsigned long error_code, if (IS_ENABLED(CONFIG_EFI)) efi_crash_gracefully_on_page_fault(address, regs); - /* Only not-present faults should be handled by KFENCE. */ - if (!(error_code & X86_PF_PROT) && - kfence_handle_page_fault(address, error_code & X86_PF_WRITE, regs)) - return; + /* Only not-present faults should be handled by KFENCE or BPF arena. */ + if (!(error_code & X86_PF_PROT)) { + if (kfence_handle_page_fault(address, error_code & X86_PF_WRITE, regs)) + return; + if (bpf_arena_handle_page_fault(address, error_code & X86_PF_WRITE, regs->ip)) + return; + } oops: /* diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index ea9e707e8abf..054e043ffcd2 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -10,6 +10,7 @@ #include <linux/if_vlan.h> #include <linux/bitfield.h> #include <linux/bpf.h> +#include <linux/bpf_verifier.h> #include <linux/memory.h> #include <linux/sort.h> #include <asm/extable.h> @@ -390,6 +391,34 @@ static void pop_callee_regs(u8 **pprog, bool *callee_regs_used) *pprog = prog; } +/* add rsp, depth */ +static void emit_add_rsp(u8 **pprog, u16 depth) +{ + u8 *prog = *pprog; + + if (!depth) + return; + if (is_imm8(depth)) + EMIT4(0x48, 0x83, 0xC4, depth); /* add rsp, imm8 */ + else + EMIT3_off32(0x48, 0x81, 0xC4, depth); /* add rsp, imm32 */ + *pprog = prog; +} + +/* sub rsp, depth */ +static void emit_sub_rsp(u8 **pprog, u16 depth) +{ + u8 *prog = *pprog; + + if (!depth) + return; + if (is_imm8(depth)) + EMIT4(0x48, 0x83, 0xEC, depth); /* sub rsp, imm8 */ + else + EMIT3_off32(0x48, 0x81, 0xEC, depth); /* sub rsp, imm32 */ + *pprog = prog; +} + static void emit_nops(u8 **pprog, int len) { u8 *prog = *pprog; @@ -1659,21 +1688,47 @@ static int do_jit(struct bpf_verifier_env *env, struct bpf_prog *bpf_prog, int * bool seen_exit = false; u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY]; void __percpu *priv_frame_ptr = NULL; + u16 out_stack_arg_cnt, outgoing_rsp; u64 arena_vm_start, user_vm_start; void __percpu *priv_stack_ptr; int i, excnt = 0; int ilen, proglen = 0; u8 *ip, *prog = temp; u32 stack_depth; + int callee_saved_size; + s32 outgoing_arg_base; int err; stack_depth = bpf_prog->aux->stack_depth; + out_stack_arg_cnt = bpf_out_stack_arg_cnt(env, bpf_prog); priv_stack_ptr = bpf_prog->aux->priv_stack_ptr; if (priv_stack_ptr) { priv_frame_ptr = priv_stack_ptr + PRIV_STACK_GUARD_SZ + round_up(stack_depth, 8); stack_depth = 0; } + /* + * Follow x86-64 calling convention for both BPF-to-BPF and + * kfunc calls: + * - Arg 6 is passed in R9 register + * - Args 7+ are passed on the stack at [rsp] + * + * Incoming arg 6 is read from R9 (BPF r11+8 → MOV from R9). + * Incoming args 7+ are read from [rbp + 16], [rbp + 24], ... + * (BPF r11+16, r11+24, ... map directly with no offset change). + * + * tail_call_reachable is rejected by the verifier and priv_stack + * is disabled by the JIT when stack args exist, so R9 is always + * available. + * + * Stack layout (high to low): + * [rbp + 16 + ...] incoming stack args 7+ (from caller) + * [rbp + 8] return address + * [rbp] saved rbp + * [rbp - prog_stack] program stack + * [below] callee-saved regs + * [below] outgoing args 7+ (= rsp) + */ arena_vm_start = bpf_arena_get_kern_vm_start(bpf_prog->aux->arena); user_vm_start = bpf_arena_get_user_vm_start(bpf_prog->aux->arena); @@ -1700,6 +1755,44 @@ static int do_jit(struct bpf_verifier_env *env, struct bpf_prog *bpf_prog, int * push_r12(&prog); push_callee_regs(&prog, callee_regs_used); } + + /* Compute callee-saved register area size. */ + callee_saved_size = 0; + if (bpf_prog->aux->exception_boundary || arena_vm_start) + callee_saved_size += 8; /* r12 */ + if (bpf_prog->aux->exception_boundary) { + callee_saved_size += 4 * 8; /* rbx, r13, r14, r15 */ + } else { + int j; + + for (j = 0; j < 4; j++) + if (callee_regs_used[j]) + callee_saved_size += 8; + } + /* + * Base offset from rbp for translating BPF outgoing args 7+ + * to native offsets. BPF uses negative offsets from r11 + * (r11-8 for arg6, r11-16 for arg7, ...) while x86 uses + * positive offsets from rsp ([rsp+0] for arg7, [rsp+8] for + * arg8, ...). Arg 6 goes to R9 directly. + * + * The translation reverses direction: + * native_off = outgoing_arg_base - outgoing_rsp - bpf_off - 16 + * + * Note that tail_call_reachable is guaranteed to be false when + * stack args exist, so tcc pushes need not be accounted for. + */ + outgoing_arg_base = -(round_up(stack_depth, 8) + callee_saved_size); + + /* + * Allocate outgoing stack arg area for args 7+ only. + * Arg 6 goes into r9 register, not on stack. + */ + outgoing_rsp = out_stack_arg_cnt > 1 ? (out_stack_arg_cnt - 1) * 8 : 0; + if (bpf_prog->aux->exception_boundary) + bpf_prog->aux->stack_arg_sp_adjust = outgoing_rsp; + emit_sub_rsp(&prog, outgoing_rsp); + if (arena_vm_start) emit_mov_imm64(&prog, X86_REG_R12, arena_vm_start >> 32, (u32) arena_vm_start); @@ -1721,7 +1814,7 @@ static int do_jit(struct bpf_verifier_env *env, struct bpf_prog *bpf_prog, int * u8 b2 = 0, b3 = 0; u8 *start_of_ldx; s64 jmp_offset; - s16 insn_off; + s32 insn_off; u8 jmp_cond; u8 *func; int nops; @@ -2134,12 +2227,27 @@ static int do_jit(struct bpf_verifier_env *env, struct bpf_prog *bpf_prog, int * EMIT1(0xC7); goto st; case BPF_ST | BPF_MEM | BPF_DW: + if (dst_reg == BPF_REG_PARAMS && insn->off == -8) { + /* Arg 6: store immediate in r9 register */ + emit_mov_imm64(&prog, X86_REG_R9, imm32 >> 31, (u32)imm32); + break; + } EMIT2(add_1mod(0x48, dst_reg), 0xC7); -st: if (is_imm8(insn->off)) - EMIT2(add_1reg(0x40, dst_reg), insn->off); +st: insn_off = insn->off; + if (dst_reg == BPF_REG_PARAMS) { + /* + * Args 7+: reverse BPF negative offsets to + * x86 positive rsp offsets. + * BPF off=-16 → [rsp+0], off=-24 → [rsp+8], ... + */ + insn_off = outgoing_arg_base - outgoing_rsp - insn_off - 16; + dst_reg = BPF_REG_FP; + } + if (is_imm8(insn_off)) + EMIT2(add_1reg(0x40, dst_reg), insn_off); else - EMIT1_off32(add_1reg(0x80, dst_reg), insn->off); + EMIT1_off32(add_1reg(0x80, dst_reg), insn_off); EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(insn->code))); break; @@ -2149,7 +2257,17 @@ st: if (is_imm8(insn->off)) case BPF_STX | BPF_MEM | BPF_H: case BPF_STX | BPF_MEM | BPF_W: case BPF_STX | BPF_MEM | BPF_DW: - emit_stx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off); + if (dst_reg == BPF_REG_PARAMS && insn->off == -8) { + /* Arg 6: store register value in r9 */ + EMIT_mov(X86_REG_R9, src_reg); + break; + } + insn_off = insn->off; + if (dst_reg == BPF_REG_PARAMS) { + insn_off = outgoing_arg_base - outgoing_rsp - insn_off - 16; + dst_reg = BPF_REG_FP; + } + emit_stx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn_off); break; case BPF_ST | BPF_PROBE_MEM32 | BPF_B: @@ -2248,6 +2366,19 @@ populate_extable: case BPF_LDX | BPF_PROBE_MEMSX | BPF_H: case BPF_LDX | BPF_PROBE_MEMSX | BPF_W: insn_off = insn->off; + if (src_reg == BPF_REG_PARAMS) { + if (insn_off == 8) { + /* Incoming arg 6: read from r9 */ + EMIT_mov(dst_reg, X86_REG_R9); + break; + } + src_reg = BPF_REG_FP; + /* + * Incoming args 7+: native_off == bpf_off + * (r11+16 → [rbp+16], r11+24 → [rbp+24], ...) + * No offset adjustment needed. + */ + } if (BPF_MODE(insn->code) == BPF_PROBE_MEM || BPF_MODE(insn->code) == BPF_PROBE_MEMSX) { @@ -2736,6 +2867,8 @@ emit_jmp: if (emit_spectre_bhb_barrier(&prog, ip, bpf_prog)) return -EINVAL; } + /* Deallocate outgoing args 7+ area. */ + emit_add_rsp(&prog, outgoing_rsp); if (bpf_prog->aux->exception_boundary) { pop_callee_regs(&prog, all_callee_regs_used); pop_r12(&prog); @@ -2971,15 +3104,15 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, } static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, - struct bpf_tramp_link *l, int stack_size, + struct bpf_tramp_node *node, int stack_size, int run_ctx_off, bool save_ret, void *image, void *rw_image) { u8 *prog = *pprog; u8 *jmp_insn; int ctx_cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie); - struct bpf_prog *p = l->link.prog; - u64 cookie = l->cookie; + struct bpf_prog *p = node->link->prog; + u64 cookie = node->cookie; /* mov rdi, cookie */ emit_mov_imm64(&prog, BPF_REG_1, (long) cookie >> 32, (u32) (long) cookie); @@ -3086,7 +3219,7 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond) } static int invoke_bpf(const struct btf_func_model *m, u8 **pprog, - struct bpf_tramp_links *tl, int stack_size, + struct bpf_tramp_nodes *tl, int stack_size, int run_ctx_off, int func_meta_off, bool save_ret, void *image, void *rw_image, u64 func_meta, int cookie_off) @@ -3094,13 +3227,13 @@ static int invoke_bpf(const struct btf_func_model *m, u8 **pprog, int i, cur_cookie = (cookie_off - stack_size) / 8; u8 *prog = *pprog; - for (i = 0; i < tl->nr_links; i++) { - if (tl->links[i]->link.prog->call_session_cookie) { + for (i = 0; i < tl->nr_nodes; i++) { + if (tl->nodes[i]->link->prog->call_session_cookie) { emit_store_stack_imm64(&prog, BPF_REG_0, -func_meta_off, func_meta | (cur_cookie << BPF_TRAMP_COOKIE_INDEX_SHIFT)); cur_cookie--; } - if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, + if (invoke_bpf_prog(m, &prog, tl->nodes[i], stack_size, run_ctx_off, save_ret, image, rw_image)) return -EINVAL; } @@ -3109,7 +3242,7 @@ static int invoke_bpf(const struct btf_func_model *m, u8 **pprog, } static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog, - struct bpf_tramp_links *tl, int stack_size, + struct bpf_tramp_nodes *tl, int stack_size, int run_ctx_off, u8 **branches, void *image, void *rw_image) { @@ -3121,8 +3254,8 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog, */ emit_mov_imm32(&prog, false, BPF_REG_0, 0); emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); - for (i = 0; i < tl->nr_links; i++) { - if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, run_ctx_off, true, + for (i = 0; i < tl->nr_nodes; i++) { + if (invoke_bpf_prog(m, &prog, tl->nodes[i], stack_size, run_ctx_off, true, image, rw_image)) return -EINVAL; @@ -3213,14 +3346,14 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog, static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_image, void *rw_image_end, void *image, const struct btf_func_model *m, u32 flags, - struct bpf_tramp_links *tlinks, + struct bpf_tramp_nodes *tnodes, void *func_addr) { int i, ret, nr_regs = m->nr_args, stack_size = 0; int regs_off, func_meta_off, ip_off, run_ctx_off, arg_stack_off, rbx_off; - struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; - struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; - struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; + struct bpf_tramp_nodes *fentry = &tnodes[BPF_TRAMP_FENTRY]; + struct bpf_tramp_nodes *fexit = &tnodes[BPF_TRAMP_FEXIT]; + struct bpf_tramp_nodes *fmod_ret = &tnodes[BPF_TRAMP_MODIFY_RETURN]; void *orig_call = func_addr; int cookie_off, cookie_cnt; u8 **branches = NULL; @@ -3292,7 +3425,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im ip_off = stack_size; - cookie_cnt = bpf_fsession_cookie_cnt(tlinks); + cookie_cnt = bpf_fsession_cookie_cnt(tnodes); /* room for session cookies */ stack_size += cookie_cnt * 8; cookie_off = stack_size; @@ -3385,7 +3518,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im } } - if (bpf_fsession_cnt(tlinks)) { + if (bpf_fsession_cnt(tnodes)) { /* clear all the session cookies' value */ for (int i = 0; i < cookie_cnt; i++) emit_store_stack_imm64(&prog, BPF_REG_0, -cookie_off + 8 * i, 0); @@ -3393,15 +3526,15 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im emit_store_stack_imm64(&prog, BPF_REG_0, -8, 0); } - if (fentry->nr_links) { + if (fentry->nr_nodes) { if (invoke_bpf(m, &prog, fentry, regs_off, run_ctx_off, func_meta_off, flags & BPF_TRAMP_F_RET_FENTRY_RET, image, rw_image, func_meta, cookie_off)) return -EINVAL; } - if (fmod_ret->nr_links) { - branches = kcalloc(fmod_ret->nr_links, sizeof(u8 *), + if (fmod_ret->nr_nodes) { + branches = kcalloc(fmod_ret->nr_nodes, sizeof(u8 *), GFP_KERNEL); if (!branches) return -ENOMEM; @@ -3440,7 +3573,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im emit_nops(&prog, X86_PATCH_SIZE); } - if (fmod_ret->nr_links) { + if (fmod_ret->nr_nodes) { /* From Intel 64 and IA-32 Architectures Optimization * Reference Manual, 3.4.1.4 Code Alignment, Assembly/Compiler * Coding Rule 11: All branch targets should be 16-byte @@ -3450,7 +3583,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im /* Update the branches saved in invoke_bpf_mod_ret with the * aligned address of do_fexit. */ - for (i = 0; i < fmod_ret->nr_links; i++) { + for (i = 0; i < fmod_ret->nr_nodes; i++) { emit_cond_near_jump(&branches[i], image + (prog - (u8 *)rw_image), image + (branches[i] - (u8 *)rw_image), X86_JNE); } @@ -3458,10 +3591,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im /* set the "is_return" flag for fsession */ func_meta |= (1ULL << BPF_TRAMP_IS_RETURN_SHIFT); - if (bpf_fsession_cnt(tlinks)) + if (bpf_fsession_cnt(tnodes)) emit_store_stack_imm64(&prog, BPF_REG_0, -func_meta_off, func_meta); - if (fexit->nr_links) { + if (fexit->nr_nodes) { if (invoke_bpf(m, &prog, fexit, regs_off, run_ctx_off, func_meta_off, false, image, rw_image, func_meta, cookie_off)) { ret = -EINVAL; @@ -3535,7 +3668,7 @@ int arch_protect_bpf_trampoline(void *image, unsigned int size) int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end, const struct btf_func_model *m, u32 flags, - struct bpf_tramp_links *tlinks, + struct bpf_tramp_nodes *tnodes, void *func_addr) { void *rw_image, *tmp; @@ -3550,7 +3683,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i return -ENOMEM; ret = __arch_prepare_bpf_trampoline(im, rw_image, rw_image + size, image, m, - flags, tlinks, func_addr); + flags, tnodes, func_addr); if (ret < 0) goto out; @@ -3563,7 +3696,7 @@ out: } int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, - struct bpf_tramp_links *tlinks, void *func_addr) + struct bpf_tramp_nodes *tnodes, void *func_addr) { struct bpf_tramp_image im; void *image; @@ -3581,7 +3714,7 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, return -ENOMEM; ret = __arch_prepare_bpf_trampoline(&im, image, image + PAGE_SIZE, image, - m, flags, tlinks, func_addr); + m, flags, tnodes, func_addr); bpf_jit_free_exec(image); return ret; } @@ -3793,7 +3926,8 @@ skip_init_addrs: for (pass = 0; pass < MAX_PASSES || image; pass++) { if (!padding && pass >= PADDING_PASSES) padding = true; - proglen = do_jit(env, prog, addrs, image, rw_image, oldproglen, &ctx, padding); + proglen = do_jit(env, prog, addrs, image, rw_image, oldproglen, + &ctx, padding); if (proglen <= 0) { out_image: image = NULL; @@ -3910,6 +4044,11 @@ bool bpf_jit_supports_kfunc_call(void) return true; } +bool bpf_jit_supports_stack_args(void) +{ + return true; +} + void *bpf_arch_text_copy(void *dst, void *src, size_t len) { if (text_poke_copy(dst, src, len) == NULL) |
