diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-04-17 15:58:22 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-04-17 15:58:22 -0700 |
| commit | eb0d6d97c27c29cd7392c8fd74f46edf7dff7ec2 (patch) | |
| tree | faec73a955172291535f227e5f20119292c1ca1c | |
| parent | 12bffaef28820e0b94c644c75708195c61af78f7 (diff) | |
| parent | e1d486445af3c392628532229f7ce5f5cf7891b6 (diff) | |
| download | lwn-master.tar.gz lwn-master.zip | |
Pull bpf fixes from Alexei Starovoitov:
"Most of the diff stat comes from Xu Kuohai's fix to emit ENDBR/BTI,
since all JITs had to be touched to move constant blinding out and
pass bpf_verifier_env in.
- Fix use-after-free in arena_vm_close on fork (Alexei Starovoitov)
- Dissociate struct_ops program with map if map_update fails (Amery
Hung)
- Fix out-of-range and off-by-one bugs in arm64 JIT (Daniel Borkmann)
- Fix precedence bug in convert_bpf_ld_abs alignment check (Daniel
Borkmann)
- Fix arg tracking for imprecise/multi-offset in BPF_ST/STX insns
(Eduard Zingerman)
- Copy token from main to subprogs to fix missing kallsyms (Eduard
Zingerman)
- Prevent double close and leak of btf objects in libbpf (Jiri Olsa)
- Fix af_unix null-ptr-deref in sockmap (Michal Luczaj)
- Fix NULL deref in map_kptr_match_type for scalar regs (Mykyta
Yatsenko)
- Avoid unnecessary IPIs. Remove redundant bpf_flush_icache() in
arm64 and riscv JITs (Puranjay Mohan)
- Fix out of bounds access. Validate node_id in arena_alloc_pages()
(Puranjay Mohan)
- Reject BPF-to-BPF calls and callbacks in arm32 JIT (Puranjay Mohan)
- Refactor all JITs to pass bpf_verifier_env to emit ENDBR/BTI for
indirect jump targets on x86-64, arm64 JITs (Xu Kuohai)
- Allow UTF-8 literals in bpf_bprintf_prepare() (Yihan Ding)"
* tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf: (32 commits)
bpf, arm32: Reject BPF-to-BPF calls and callbacks in the JIT
bpf: Dissociate struct_ops program with map if map_update fails
bpf: Validate node_id in arena_alloc_pages()
libbpf: Prevent double close and leak of btf objects
selftests/bpf: cover UTF-8 trace_printk output
bpf: allow UTF-8 literals in bpf_bprintf_prepare()
selftests/bpf: Reject scalar store into kptr slot
bpf: Fix NULL deref in map_kptr_match_type for scalar regs
bpf: Fix precedence bug in convert_bpf_ld_abs alignment check
bpf, arm64: Emit BTI for indirect jump target
bpf, x86: Emit ENDBR for indirect jump targets
bpf: Add helper to detect indirect jump targets
bpf: Pass bpf_verifier_env to JIT
bpf: Move constants blinding out of arch-specific JITs
bpf, sockmap: Take state lock for af_unix iter
bpf, sockmap: Fix af_unix null-ptr-deref in proto update
selftests/bpf: Extend bpf_iter_unix to attempt deadlocking
bpf, sockmap: Fix af_unix iter deadlock
bpf, sockmap: Annotate af_unix sock:: Sk_state data-races
selftests/bpf: verify kallsyms entries for token-loaded subprograms
...
46 files changed, 1173 insertions, 707 deletions
diff --git a/arch/arc/net/bpf_jit_core.c b/arch/arc/net/bpf_jit_core.c index 1421eeced0f5..639a2736f029 100644 --- a/arch/arc/net/bpf_jit_core.c +++ b/arch/arc/net/bpf_jit_core.c @@ -79,7 +79,6 @@ struct arc_jit_data { * The JIT pertinent context that is used by different functions. * * prog: The current eBPF program being handled. - * orig_prog: The original eBPF program before any possible change. * jit: The JIT buffer and its length. * bpf_header: The JITed program header. "jit.buf" points inside it. * emit: If set, opcodes are written to memory; else, a dry-run. @@ -94,12 +93,10 @@ struct arc_jit_data { * need_extra_pass: A forecast if an "extra_pass" will occur. * is_extra_pass: Indicates if the current pass is an extra pass. * user_bpf_prog: True, if VM opcodes come from a real program. - * blinded: True if "constant blinding" step returned a new "prog". * success: Indicates if the whole JIT went OK. */ struct jit_context { struct bpf_prog *prog; - struct bpf_prog *orig_prog; struct jit_buffer jit; struct bpf_binary_header *bpf_header; bool emit; @@ -114,7 +111,6 @@ struct jit_context { bool need_extra_pass; bool is_extra_pass; bool user_bpf_prog; - bool blinded; bool success; }; @@ -161,13 +157,7 @@ static int jit_ctx_init(struct jit_context *ctx, struct bpf_prog *prog) { memset(ctx, 0, sizeof(*ctx)); - ctx->orig_prog = prog; - - /* If constant blinding was requested but failed, scram. */ - ctx->prog = bpf_jit_blind_constants(prog); - if (IS_ERR(ctx->prog)) - return PTR_ERR(ctx->prog); - ctx->blinded = (ctx->prog != ctx->orig_prog); + ctx->prog = prog; /* If the verifier doesn't zero-extend, then we have to do it. */ ctx->do_zext = !ctx->prog->aux->verifier_zext; @@ -214,14 +204,6 @@ static inline void maybe_free(struct jit_context *ctx, void **mem) */ static void jit_ctx_cleanup(struct jit_context *ctx) { - if (ctx->blinded) { - /* if all went well, release the orig_prog. */ - if (ctx->success) - bpf_jit_prog_release_other(ctx->prog, ctx->orig_prog); - else - bpf_jit_prog_release_other(ctx->orig_prog, ctx->prog); - } - maybe_free(ctx, (void **)&ctx->bpf2insn); maybe_free(ctx, (void **)&ctx->jit_data); @@ -229,12 +211,19 @@ static void jit_ctx_cleanup(struct jit_context *ctx) ctx->bpf2insn_valid = false; /* Freeing "bpf_header" is enough. "jit.buf" is a sub-array of it. */ - if (!ctx->success && ctx->bpf_header) { - bpf_jit_binary_free(ctx->bpf_header); - ctx->bpf_header = NULL; - ctx->jit.buf = NULL; - ctx->jit.index = 0; - ctx->jit.len = 0; + if (!ctx->success) { + if (ctx->bpf_header) { + bpf_jit_binary_free(ctx->bpf_header); + ctx->bpf_header = NULL; + ctx->jit.buf = NULL; + ctx->jit.index = 0; + ctx->jit.len = 0; + } + if (ctx->is_extra_pass) { + ctx->prog->bpf_func = NULL; + ctx->prog->jited = 0; + ctx->prog->jited_len = 0; + } } ctx->emit = false; @@ -1411,7 +1400,7 @@ static struct bpf_prog *do_extra_pass(struct bpf_prog *prog) * (re)locations involved that their addresses are not known * during the first run. */ -struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) +struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *prog) { vm_dump(prog); diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index deeb8f292454..9ede81afbc50 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -1852,6 +1852,9 @@ exit: { u64 val = (u32)imm | (u64)insn[1].imm << 32; + if (insn->src_reg == BPF_PSEUDO_FUNC) + goto notyet; + emit_a32_mov_i64(dst, val, ctx); return 1; @@ -2055,6 +2058,9 @@ go_jmp: const s8 *r5 = bpf2a32[BPF_REG_5]; const u32 func = (u32)__bpf_call_base + (u32)imm; + if (insn->src_reg == BPF_PSEUDO_CALL) + goto notyet; + emit_a32_mov_r64(true, r0, r1, ctx); emit_a32_mov_r64(true, r1, r2, ctx); emit_push_r64(r5, ctx); @@ -2142,11 +2148,9 @@ bool bpf_jit_needs_zext(void) return true; } -struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) +struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *prog) { - struct bpf_prog *tmp, *orig_prog = prog; struct bpf_binary_header *header; - bool tmp_blinded = false; struct jit_ctx ctx; unsigned int tmp_idx; unsigned int image_size; @@ -2156,20 +2160,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) * the interpreter. */ if (!prog->jit_requested) - return orig_prog; - - /* If constant blinding was enabled and we failed during blinding - * then we must fall back to the interpreter. Otherwise, we save - * the new JITed code. - */ - tmp = bpf_jit_blind_constants(prog); - - if (IS_ERR(tmp)) - return orig_prog; - if (tmp != prog) { - tmp_blinded = true; - prog = tmp; - } + return prog; memset(&ctx, 0, sizeof(ctx)); ctx.prog = prog; @@ -2179,10 +2170,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) * we must fall back to the interpreter */ ctx.offsets = kcalloc(prog->len, sizeof(int), GFP_KERNEL); - if (ctx.offsets == NULL) { - prog = orig_prog; - goto out; - } + if (ctx.offsets == NULL) + return prog; /* 1) fake pass to find in the length of the JITed code, * to compute ctx->offsets and other context variables @@ -2194,10 +2183,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) * being successful in the second pass, so just fall back * to the interpreter. */ - if (build_body(&ctx)) { - prog = orig_prog; + if (build_body(&ctx)) goto out_off; - } tmp_idx = ctx.idx; build_prologue(&ctx); @@ -2213,10 +2200,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) ctx.idx += ctx.imm_count; if (ctx.imm_count) { ctx.imms = kcalloc(ctx.imm_count, sizeof(u32), GFP_KERNEL); - if (ctx.imms == NULL) { - prog = orig_prog; + if (ctx.imms == NULL) goto out_off; - } } #else /* there's nothing about the epilogue on ARMv7 */ @@ -2238,10 +2223,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) /* Not able to allocate memory for the structure then * we must fall back to the interpretation */ - if (header == NULL) { - prog = orig_prog; + if (header == NULL) goto out_imms; - } /* 2.) Actual pass to generate final JIT code */ ctx.target = (u32 *) image_ptr; @@ -2278,16 +2261,12 @@ out_imms: #endif out_off: kfree(ctx.offsets); -out: - if (tmp_blinded) - bpf_jit_prog_release_other(prog, prog == orig_prog ? - tmp : orig_prog); + return prog; out_free: image_ptr = NULL; bpf_jit_binary_free(header); - prog = orig_prog; goto out_imms; } diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c index cc5b40917d0d..37ce75f7f1f0 100644 --- a/arch/arm64/lib/insn.c +++ b/arch/arm64/lib/insn.c @@ -338,6 +338,8 @@ u32 aarch64_insn_gen_cond_branch_imm(unsigned long pc, unsigned long addr, long offset; offset = label_imm_common(pc, addr, SZ_1M); + if (offset >= SZ_1M) + return AARCH64_BREAK_FAULT; insn = aarch64_insn_get_bcond_value(); diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index adf84962d579..0816c40fc7af 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -18,7 +18,6 @@ #include <asm/asm-extable.h> #include <asm/byteorder.h> -#include <asm/cacheflush.h> #include <asm/cpufeature.h> #include <asm/debug-monitors.h> #include <asm/insn.h> @@ -35,8 +34,8 @@ #define ARENA_VM_START (MAX_BPF_JIT_REG + 5) #define check_imm(bits, imm) do { \ - if ((((imm) > 0) && ((imm) >> (bits))) || \ - (((imm) < 0) && (~(imm) >> (bits)))) { \ + if ((((imm) > 0) && ((imm) >> ((bits) - 1))) || \ + (((imm) < 0) && (~(imm) >> ((bits) - 1)))) { \ pr_info("[%2d] imm=%d(0x%x) out of range\n", \ i, imm, imm); \ return -EINVAL; \ @@ -1198,8 +1197,8 @@ static int add_exception_handler(const struct bpf_insn *insn, * >0 - successfully JITed a 16-byte eBPF instruction. * <0 - failed to JIT. */ -static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, - bool extra_pass) +static int build_insn(const struct bpf_verifier_env *env, const struct bpf_insn *insn, + struct jit_ctx *ctx, bool extra_pass) { const u8 code = insn->code; u8 dst = bpf2a64[insn->dst_reg]; @@ -1224,6 +1223,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, int ret; bool sign_extend; + if (bpf_insn_is_indirect_target(env, ctx->prog, i)) + emit_bti(A64_BTI_J, ctx); + switch (code) { /* dst = src */ case BPF_ALU | BPF_MOV | BPF_X: @@ -1899,7 +1901,7 @@ emit_cond_jmp: return 0; } -static int build_body(struct jit_ctx *ctx, bool extra_pass) +static int build_body(struct bpf_verifier_env *env, struct jit_ctx *ctx, bool extra_pass) { const struct bpf_prog *prog = ctx->prog; int i; @@ -1918,7 +1920,7 @@ static int build_body(struct jit_ctx *ctx, bool extra_pass) int ret; ctx->offset[i] = ctx->idx; - ret = build_insn(insn, ctx, extra_pass); + ret = build_insn(env, insn, ctx, extra_pass); if (ret > 0) { i++; ctx->offset[i] = ctx->idx; @@ -1961,11 +1963,6 @@ static int validate_ctx(struct jit_ctx *ctx) return 0; } -static inline void bpf_flush_icache(void *start, void *end) -{ - flush_icache_range((unsigned long)start, (unsigned long)end); -} - static void priv_stack_init_guard(void __percpu *priv_stack_ptr, int alloc_size) { int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3; @@ -2006,17 +2003,15 @@ struct arm64_jit_data { struct jit_ctx ctx; }; -struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) +struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *prog) { int image_size, prog_size, extable_size, extable_align, extable_offset; - struct bpf_prog *tmp, *orig_prog = prog; struct bpf_binary_header *header; struct bpf_binary_header *ro_header = NULL; struct arm64_jit_data *jit_data; void __percpu *priv_stack_ptr = NULL; bool was_classic = bpf_prog_was_classic(prog); int priv_stack_alloc_sz; - bool tmp_blinded = false; bool extra_pass = false; struct jit_ctx ctx; u8 *image_ptr; @@ -2025,26 +2020,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) int exentry_idx; if (!prog->jit_requested) - return orig_prog; - - tmp = bpf_jit_blind_constants(prog); - /* If blinding was requested and we failed during blinding, - * we must fall back to the interpreter. - */ - if (IS_ERR(tmp)) - return orig_prog; - if (tmp != prog) { - tmp_blinded = true; - prog = tmp; - } + return prog; jit_data = prog->aux->jit_data; if (!jit_data) { jit_data = kzalloc_obj(*jit_data); - if (!jit_data) { - prog = orig_prog; - goto out; - } + if (!jit_data) + return prog; prog->aux->jit_data = jit_data; } priv_stack_ptr = prog->aux->priv_stack_ptr; @@ -2056,10 +2038,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) priv_stack_alloc_sz = round_up(prog->aux->stack_depth, 16) + 2 * PRIV_STACK_GUARD_SZ; priv_stack_ptr = __alloc_percpu_gfp(priv_stack_alloc_sz, 16, GFP_KERNEL); - if (!priv_stack_ptr) { - prog = orig_prog; + if (!priv_stack_ptr) goto out_priv_stack; - } priv_stack_init_guard(priv_stack_ptr, priv_stack_alloc_sz); prog->aux->priv_stack_ptr = priv_stack_ptr; @@ -2079,10 +2059,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) ctx.prog = prog; ctx.offset = kvzalloc_objs(int, prog->len + 1); - if (ctx.offset == NULL) { - prog = orig_prog; + if (ctx.offset == NULL) goto out_off; - } ctx.user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena); ctx.arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena); @@ -2095,15 +2073,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) * BPF line info needs ctx->offset[i] to be the offset of * instruction[i] in jited image, so build prologue first. */ - if (build_prologue(&ctx, was_classic)) { - prog = orig_prog; + if (build_prologue(&ctx, was_classic)) goto out_off; - } - if (build_body(&ctx, extra_pass)) { - prog = orig_prog; + if (build_body(env, &ctx, extra_pass)) goto out_off; - } ctx.epilogue_offset = ctx.idx; build_epilogue(&ctx, was_classic); @@ -2121,10 +2095,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) ro_header = bpf_jit_binary_pack_alloc(image_size, &ro_image_ptr, sizeof(u64), &header, &image_ptr, jit_fill_hole); - if (!ro_header) { - prog = orig_prog; + if (!ro_header) goto out_off; - } /* Pass 2: Determine jited position and result for each instruction */ @@ -2152,10 +2124,8 @@ skip_init_ctx: /* Dont write body instructions to memory for now */ ctx.write = false; - if (build_body(&ctx, extra_pass)) { - prog = orig_prog; + if (build_body(env, &ctx, extra_pass)) goto out_free_hdr; - } ctx.epilogue_offset = ctx.idx; ctx.exentry_idx = exentry_idx; @@ -2163,20 +2133,16 @@ skip_init_ctx: ctx.write = true; /* Pass 3: Adjust jump offset and write final image */ - if (build_body(&ctx, extra_pass) || - WARN_ON_ONCE(ctx.idx != ctx.epilogue_offset)) { - prog = orig_prog; + if (build_body(env, &ctx, extra_pass) || + WARN_ON_ONCE(ctx.idx != ctx.epilogue_offset)) goto out_free_hdr; - } build_epilogue(&ctx, was_classic); build_plt(&ctx); /* Extra pass to validate JITed code. */ - if (validate_ctx(&ctx)) { - prog = orig_prog; + if (validate_ctx(&ctx)) goto out_free_hdr; - } /* update the real prog size */ prog_size = sizeof(u32) * ctx.idx; @@ -2193,23 +2159,14 @@ skip_init_ctx: if (extra_pass && ctx.idx > jit_data->ctx.idx) { pr_err_once("multi-func JIT bug %d > %d\n", ctx.idx, jit_data->ctx.idx); - prog->bpf_func = NULL; - prog->jited = 0; - prog->jited_len = 0; goto out_free_hdr; } if (WARN_ON(bpf_jit_binary_pack_finalize(ro_header, header))) { - /* ro_header has been freed */ + /* ro_header and header has been freed */ ro_header = NULL; - prog = orig_prog; - goto out_off; + header = NULL; + goto out_free_hdr; } - /* - * The instructions have now been copied to the ROX region from - * where they will execute. Now the data cache has to be cleaned to - * the PoU and the I-cache has to be invalidated for the VAs. - */ - bpf_flush_icache(ro_header, ctx.ro_image + ctx.idx); } else { jit_data->ctx = ctx; jit_data->ro_image = ro_image_ptr; @@ -2245,13 +2202,15 @@ out_priv_stack: kfree(jit_data); prog->aux->jit_data = NULL; } -out: - if (tmp_blinded) - bpf_jit_prog_release_other(prog, prog == orig_prog ? - tmp : orig_prog); + return prog; out_free_hdr: + if (extra_pass) { + prog->bpf_func = NULL; + prog->jited = 0; + prog->jited_len = 0; + } if (header) { bpf_arch_text_copy(&ro_header->size, &header->size, sizeof(header->size)); diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 9cb796e16379..5149ce4cef7e 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -1920,45 +1920,28 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, return ret < 0 ? ret : ret * LOONGARCH_INSN_SIZE; } -struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) +struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *prog) { - bool tmp_blinded = false, extra_pass = false; + bool extra_pass = false; u8 *image_ptr, *ro_image_ptr; int image_size, prog_size, extable_size; struct jit_ctx ctx; struct jit_data *jit_data; struct bpf_binary_header *header; struct bpf_binary_header *ro_header; - struct bpf_prog *tmp, *orig_prog = prog; /* * If BPF JIT was not enabled then we must fall back to * the interpreter. */ if (!prog->jit_requested) - return orig_prog; - - tmp = bpf_jit_blind_constants(prog); - /* - * If blinding was requested and we failed during blinding, - * we must fall back to the interpreter. Otherwise, we save - * the new JITed code. - */ - if (IS_ERR(tmp)) - return orig_prog; - - if (tmp != prog) { - tmp_blinded = true; - prog = tmp; - } + return prog; jit_data = prog->aux->jit_data; if (!jit_data) { jit_data = kzalloc_obj(*jit_data); - if (!jit_data) { - prog = orig_prog; - goto out; - } + if (!jit_data) + return prog; prog->aux->jit_data = jit_data; } if (jit_data->ctx.offset) { @@ -1978,17 +1961,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) ctx.user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena); ctx.offset = kvcalloc(prog->len + 1, sizeof(u32), GFP_KERNEL); - if (ctx.offset == NULL) { - prog = orig_prog; + if (ctx.offset == NULL) goto out_offset; - } /* 1. Initial fake pass to compute ctx->idx and set ctx->flags */ build_prologue(&ctx); - if (build_body(&ctx, extra_pass)) { - prog = orig_prog; + if (build_body(&ctx, extra_pass)) goto out_offset; - } ctx.epilogue_offset = ctx.idx; build_epilogue(&ctx); @@ -2004,10 +1983,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) /* Now we know the size of the structure to make */ ro_header = bpf_jit_binary_pack_alloc(image_size, &ro_image_ptr, sizeof(u32), &header, &image_ptr, jit_fill_hole); - if (!ro_header) { - prog = orig_prog; + if (!ro_header) goto out_offset; - } /* 2. Now, the actual pass to generate final JIT code */ /* @@ -2027,17 +2004,13 @@ skip_init_ctx: ctx.num_exentries = 0; build_prologue(&ctx); - if (build_body(&ctx, extra_pass)) { - prog = orig_prog; + if (build_body(&ctx, extra_pass)) goto out_free; - } build_epilogue(&ctx); /* 3. Extra pass to validate JITed code */ - if (validate_ctx(&ctx)) { - prog = orig_prog; + if (validate_ctx(&ctx)) goto out_free; - } /* And we're done */ if (bpf_jit_enable > 1) @@ -2050,9 +2023,9 @@ skip_init_ctx: goto out_free; } if (WARN_ON(bpf_jit_binary_pack_finalize(ro_header, header))) { - /* ro_header has been freed */ + /* ro_header and header have been freed */ ro_header = NULL; - prog = orig_prog; + header = NULL; goto out_free; } /* @@ -2084,13 +2057,15 @@ out_offset: prog->aux->jit_data = NULL; } -out: - if (tmp_blinded) - bpf_jit_prog_release_other(prog, prog == orig_prog ? tmp : orig_prog); - return prog; out_free: + if (extra_pass) { + prog->bpf_func = NULL; + prog->jited = 0; + prog->jited_len = 0; + } + if (header) { bpf_arch_text_copy(&ro_header->size, &header->size, sizeof(header->size)); bpf_jit_binary_pack_free(ro_header, header); diff --git a/arch/mips/net/bpf_jit_comp.c b/arch/mips/net/bpf_jit_comp.c index e355dfca4400..6ee4abe6a1f7 100644 --- a/arch/mips/net/bpf_jit_comp.c +++ b/arch/mips/net/bpf_jit_comp.c @@ -909,12 +909,10 @@ bool bpf_jit_needs_zext(void) return true; } -struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) +struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *prog) { - struct bpf_prog *tmp, *orig_prog = prog; struct bpf_binary_header *header = NULL; struct jit_context ctx; - bool tmp_blinded = false; unsigned int tmp_idx; unsigned int image_size; u8 *image_ptr; @@ -925,19 +923,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) * the interpreter. */ if (!prog->jit_requested) - return orig_prog; - /* - * If constant blinding was enabled and we failed during blinding - * then we must fall back to the interpreter. Otherwise, we save - * the new JITed code. - */ - tmp = bpf_jit_blind_constants(prog); - if (IS_ERR(tmp)) - return orig_prog; - if (tmp != prog) { - tmp_blinded = true; - prog = tmp; - } + return prog; memset(&ctx, 0, sizeof(ctx)); ctx.program = prog; @@ -1025,14 +1011,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) prog->jited_len = image_size; out: - if (tmp_blinded) - bpf_jit_prog_release_other(prog, prog == orig_prog ? - tmp : orig_prog); kfree(ctx.descriptors); return prog; out_err: - prog = orig_prog; if (header) bpf_jit_binary_free(header); goto out; diff --git a/arch/parisc/net/bpf_jit_core.c b/arch/parisc/net/bpf_jit_core.c index a5eb6b51e27a..172770132440 100644 --- a/arch/parisc/net/bpf_jit_core.c +++ b/arch/parisc/net/bpf_jit_core.c @@ -41,33 +41,22 @@ bool bpf_jit_needs_zext(void) return true; } -struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) +struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *prog) { unsigned int prog_size = 0, extable_size = 0; - bool tmp_blinded = false, extra_pass = false; - struct bpf_prog *tmp, *orig_prog = prog; + bool extra_pass = false; int pass = 0, prev_ninsns = 0, prologue_len, i; struct hppa_jit_data *jit_data; struct hppa_jit_context *ctx; if (!prog->jit_requested) - return orig_prog; - - tmp = bpf_jit_blind_constants(prog); - if (IS_ERR(tmp)) - return orig_prog; - if (tmp != prog) { - tmp_blinded = true; - prog = tmp; - } + return prog; jit_data = prog->aux->jit_data; if (!jit_data) { jit_data = kzalloc_obj(*jit_data); - if (!jit_data) { - prog = orig_prog; - goto out; - } + if (!jit_data) + return prog; prog->aux->jit_data = jit_data; } @@ -81,10 +70,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) ctx->prog = prog; ctx->offset = kzalloc_objs(int, prog->len); - if (!ctx->offset) { - prog = orig_prog; - goto out_offset; - } + if (!ctx->offset) + goto out_err; for (i = 0; i < prog->len; i++) { prev_ninsns += 20; ctx->offset[i] = prev_ninsns; @@ -93,10 +80,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) for (i = 0; i < NR_JIT_ITERATIONS; i++) { pass++; ctx->ninsns = 0; - if (build_body(ctx, extra_pass, ctx->offset)) { - prog = orig_prog; - goto out_offset; - } + if (build_body(ctx, extra_pass, ctx->offset)) + goto out_err; ctx->body_len = ctx->ninsns; bpf_jit_build_prologue(ctx); ctx->prologue_len = ctx->ninsns - ctx->body_len; @@ -116,10 +101,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) &jit_data->image, sizeof(long), bpf_fill_ill_insns); - if (!jit_data->header) { - prog = orig_prog; - goto out_offset; - } + if (!jit_data->header) + goto out_err; ctx->insns = (u32 *)jit_data->image; /* @@ -134,8 +117,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) pr_err("bpf-jit: image did not converge in <%d passes!\n", i); if (jit_data->header) bpf_jit_binary_free(jit_data->header); - prog = orig_prog; - goto out_offset; + goto out_err; } if (extable_size) @@ -148,8 +130,7 @@ skip_init_ctx: bpf_jit_build_prologue(ctx); if (build_body(ctx, extra_pass, NULL)) { bpf_jit_binary_free(jit_data->header); - prog = orig_prog; - goto out_offset; + goto out_err; } bpf_jit_build_epilogue(ctx); @@ -160,20 +141,19 @@ skip_init_ctx: { extern int machine_restart(char *); machine_restart(""); } } + if (!prog->is_func || extra_pass) { + if (bpf_jit_binary_lock_ro(jit_data->header)) { + bpf_jit_binary_free(jit_data->header); + goto out_err; + } + bpf_flush_icache(jit_data->header, ctx->insns + ctx->ninsns); + } + prog->bpf_func = (void *)ctx->insns; prog->jited = 1; prog->jited_len = prog_size; - bpf_flush_icache(jit_data->header, ctx->insns + ctx->ninsns); - if (!prog->is_func || extra_pass) { - if (bpf_jit_binary_lock_ro(jit_data->header)) { - bpf_jit_binary_free(jit_data->header); - prog->bpf_func = NULL; - prog->jited = 0; - prog->jited_len = 0; - goto out_offset; - } prologue_len = ctx->epilogue_offset - ctx->body_len; for (i = 0; i < prog->len; i++) ctx->offset[i] += prologue_len; @@ -183,14 +163,19 @@ out_offset: kfree(jit_data); prog->aux->jit_data = NULL; } -out: + if (HPPA_JIT_REBOOT) { extern int machine_restart(char *); machine_restart(""); } - if (tmp_blinded) - bpf_jit_prog_release_other(prog, prog == orig_prog ? - tmp : orig_prog); return prog; + +out_err: + if (extra_pass) { + prog->bpf_func = NULL; + prog->jited = 0; + prog->jited_len = 0; + } + goto out_offset; } u64 hppa_div64(u64 div, u64 divisor) diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 50103b3794fb..53ab97ad6074 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -162,7 +162,7 @@ static void priv_stack_check_guard(void __percpu *priv_stack_ptr, int alloc_size } } -struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) +struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *fp) { u32 proglen; u32 alloclen; @@ -177,9 +177,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) void __percpu *priv_stack_ptr = NULL; struct bpf_binary_header *fhdr = NULL; struct bpf_binary_header *hdr = NULL; - struct bpf_prog *org_fp = fp; - struct bpf_prog *tmp_fp = NULL; - bool bpf_blinded = false; bool extra_pass = false; u8 *fimage = NULL; u32 *fcode_base = NULL; @@ -187,24 +184,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) u32 fixup_len; if (!fp->jit_requested) - return org_fp; - - tmp_fp = bpf_jit_blind_constants(org_fp); - if (IS_ERR(tmp_fp)) - return org_fp; - - if (tmp_fp != org_fp) { - bpf_blinded = true; - fp = tmp_fp; - } + return fp; jit_data = fp->aux->jit_data; if (!jit_data) { jit_data = kzalloc_obj(*jit_data); - if (!jit_data) { - fp = org_fp; - goto out; - } + if (!jit_data) + return fp; fp->aux->jit_data = jit_data; } @@ -219,10 +205,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) priv_stack_alloc_size = round_up(fp->aux->stack_depth, 16) + 2 * PRIV_STACK_GUARD_SZ; priv_stack_ptr = __alloc_percpu_gfp(priv_stack_alloc_size, 16, GFP_KERNEL); - if (!priv_stack_ptr) { - fp = org_fp; + if (!priv_stack_ptr) goto out_priv_stack; - } priv_stack_init_guard(priv_stack_ptr, priv_stack_alloc_size); fp->aux->priv_stack_ptr = priv_stack_ptr; @@ -249,10 +233,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) } addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL); - if (addrs == NULL) { - fp = org_fp; - goto out_addrs; - } + if (addrs == NULL) + goto out_err; memset(&cgctx, 0, sizeof(struct codegen_context)); bpf_jit_init_reg_mapping(&cgctx); @@ -279,11 +261,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) } /* Scouting faux-generate pass 0 */ - if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) { + if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) /* We hit something illegal or unsupported. */ - fp = org_fp; - goto out_addrs; - } + goto out_err; /* * If we have seen a tail call, we need a second pass. @@ -294,10 +274,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) */ if (cgctx.seen & SEEN_TAILCALL || !is_offset_in_branch_range((long)cgctx.idx * 4)) { cgctx.idx = 0; - if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) { - fp = org_fp; - goto out_addrs; - } + if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) + goto out_err; } bpf_jit_realloc_regs(&cgctx); @@ -318,10 +296,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) fhdr = bpf_jit_binary_pack_alloc(alloclen, &fimage, 4, &hdr, &image, bpf_jit_fill_ill_insns); - if (!fhdr) { - fp = org_fp; - goto out_addrs; - } + if (!fhdr) + goto out_err; if (extable_len) fp->aux->extable = (void *)fimage + FUNCTION_DESCR_SIZE + proglen + fixup_len; @@ -340,8 +316,7 @@ skip_init_ctx: extra_pass)) { bpf_arch_text_copy(&fhdr->size, &hdr->size, sizeof(hdr->size)); bpf_jit_binary_pack_free(fhdr, hdr); - fp = org_fp; - goto out_addrs; + goto out_err; } bpf_jit_build_epilogue(code_base, &cgctx); @@ -363,15 +338,16 @@ skip_init_ctx: ((u64 *)image)[1] = local_paca->kernel_toc; #endif + if (!fp->is_func || extra_pass) { + if (bpf_jit_binary_pack_finalize(fhdr, hdr)) + goto out_err; + } + fp->bpf_func = (void *)fimage; fp->jited = 1; fp->jited_len = cgctx.idx * 4 + FUNCTION_DESCR_SIZE; if (!fp->is_func || extra_pass) { - if (bpf_jit_binary_pack_finalize(fhdr, hdr)) { - fp = org_fp; - goto out_addrs; - } bpf_prog_fill_jited_linfo(fp, addrs); /* * On ABI V1, executable code starts after the function @@ -398,11 +374,15 @@ out_priv_stack: jit_data->hdr = hdr; } -out: - if (bpf_blinded) - bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp); - return fp; + +out_err: + if (extra_pass) { + fp->bpf_func = NULL; + fp->jited = 0; + fp->jited_len = 0; + } + goto out_addrs; } /* diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h index 632ced07bca4..da0271790244 100644 --- a/arch/riscv/net/bpf_jit.h +++ b/arch/riscv/net/bpf_jit.h @@ -11,7 +11,6 @@ #include <linux/bpf.h> #include <linux/filter.h> -#include <asm/cacheflush.h> /* verify runtime detection extension status */ #define rv_ext_enabled(ext) \ @@ -105,11 +104,6 @@ static inline void bpf_fill_ill_insns(void *area, unsigned int size) memset(area, 0, size); } -static inline void bpf_flush_icache(void *start, void *end) -{ - flush_icache_range((unsigned long)start, (unsigned long)end); -} - /* Emit a 4-byte riscv instruction. */ static inline void emit(const u32 insn, struct rv_jit_context *ctx) { diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c index b3581e926436..4365d07aaf54 100644 --- a/arch/riscv/net/bpf_jit_core.c +++ b/arch/riscv/net/bpf_jit_core.c @@ -41,32 +41,22 @@ bool bpf_jit_needs_zext(void) return true; } -struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) +struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *prog) { unsigned int prog_size = 0, extable_size = 0; - bool tmp_blinded = false, extra_pass = false; - struct bpf_prog *tmp, *orig_prog = prog; + bool extra_pass = false; int pass = 0, prev_ninsns = 0, i; struct rv_jit_data *jit_data; struct rv_jit_context *ctx; if (!prog->jit_requested) - return orig_prog; - - tmp = bpf_jit_blind_constants(prog); - if (IS_ERR(tmp)) - return orig_prog; - if (tmp != prog) { - tmp_blinded = true; - prog = tmp; - } + return prog; jit_data = prog->aux->jit_data; if (!jit_data) { jit_data = kzalloc_obj(*jit_data); if (!jit_data) { - prog = orig_prog; - goto out; + return prog; } prog->aux->jit_data = jit_data; } @@ -83,15 +73,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) ctx->user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena); ctx->prog = prog; ctx->offset = kzalloc_objs(int, prog->len); - if (!ctx->offset) { - prog = orig_prog; + if (!ctx->offset) goto out_offset; - } - if (build_body(ctx, extra_pass, NULL)) { - prog = orig_prog; + if (build_body(ctx, extra_pass, NULL)) goto out_offset; - } for (i = 0; i < prog->len; i++) { prev_ninsns += 32; @@ -105,10 +91,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) bpf_jit_build_prologue(ctx, bpf_is_subprog(prog)); ctx->prologue_len = ctx->ninsns; - if (build_body(ctx, extra_pass, ctx->offset)) { - prog = orig_prog; + if (build_body(ctx, extra_pass, ctx->offset)) goto out_offset; - } ctx->epilogue_offset = ctx->ninsns; bpf_jit_build_epilogue(ctx); @@ -126,10 +110,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) &jit_data->ro_image, sizeof(u32), &jit_data->header, &jit_data->image, bpf_fill_ill_insns); - if (!jit_data->ro_header) { - prog = orig_prog; + if (!jit_data->ro_header) goto out_offset; - } /* * Use the image(RW) for writing the JITed instructions. But also save @@ -150,7 +132,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) if (i == NR_JIT_ITERATIONS) { pr_err("bpf-jit: image did not converge in <%d passes!\n", i); - prog = orig_prog; goto out_free_hdr; } @@ -163,33 +144,27 @@ skip_init_ctx: ctx->nexentries = 0; bpf_jit_build_prologue(ctx, bpf_is_subprog(prog)); - if (build_body(ctx, extra_pass, NULL)) { - prog = orig_prog; + if (build_body(ctx, extra_pass, NULL)) goto out_free_hdr; - } bpf_jit_build_epilogue(ctx); if (bpf_jit_enable > 1) bpf_jit_dump(prog->len, prog_size, pass, ctx->insns); - prog->bpf_func = (void *)ctx->ro_insns + cfi_get_offset(); - prog->jited = 1; - prog->jited_len = prog_size - cfi_get_offset(); - if (!prog->is_func || extra_pass) { if (WARN_ON(bpf_jit_binary_pack_finalize(jit_data->ro_header, jit_data->header))) { /* ro_header has been freed */ jit_data->ro_header = NULL; - prog = orig_prog; - goto out_offset; + jit_data->header = NULL; + goto out_free_hdr; } - /* - * The instructions have now been copied to the ROX region from - * where they will execute. - * Write any modified data cache blocks out to memory and - * invalidate the corresponding blocks in the instruction cache. - */ - bpf_flush_icache(jit_data->ro_header, ctx->ro_insns + ctx->ninsns); + } + + prog->bpf_func = (void *)ctx->ro_insns + cfi_get_offset(); + prog->jited = 1; + prog->jited_len = prog_size - cfi_get_offset(); + + if (!prog->is_func || extra_pass) { for (i = 0; i < prog->len; i++) ctx->offset[i] = ninsns_rvoff(ctx->offset[i]); bpf_prog_fill_jited_linfo(prog, ctx->offset); @@ -198,14 +173,15 @@ out_offset: kfree(jit_data); prog->aux->jit_data = NULL; } -out: - if (tmp_blinded) - bpf_jit_prog_release_other(prog, prog == orig_prog ? - tmp : orig_prog); return prog; out_free_hdr: + if (extra_pass) { + prog->bpf_func = NULL; + prog->jited = 0; + prog->jited_len = 0; + } if (jit_data->header) { bpf_arch_text_copy(&jit_data->ro_header->size, &jit_data->header->size, sizeof(jit_data->header->size)); diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index d08d159b6319..94128fe6be23 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -2312,38 +2312,22 @@ static struct bpf_binary_header *bpf_jit_alloc(struct bpf_jit *jit, /* * Compile eBPF program "fp" */ -struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) +struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *fp) { - struct bpf_prog *tmp, *orig_fp = fp; struct bpf_binary_header *header; struct s390_jit_data *jit_data; - bool tmp_blinded = false; bool extra_pass = false; struct bpf_jit jit; int pass; if (!fp->jit_requested) - return orig_fp; - - tmp = bpf_jit_blind_constants(fp); - /* - * If blinding was requested and we failed during blinding, - * we must fall back to the interpreter. - */ - if (IS_ERR(tmp)) - return orig_fp; - if (tmp != fp) { - tmp_blinded = true; - fp = tmp; - } + return fp; jit_data = fp->aux->jit_data; if (!jit_data) { jit_data = kzalloc_obj(*jit_data); - if (!jit_data) { - fp = orig_fp; - goto out; - } + if (!jit_data) + return fp; fp->aux->jit_data = jit_data; } if (jit_data->ctx.addrs) { @@ -2356,34 +2340,27 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) memset(&jit, 0, sizeof(jit)); jit.addrs = kvcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL); - if (jit.addrs == NULL) { - fp = orig_fp; - goto free_addrs; - } + if (jit.addrs == NULL) + goto out_err; /* * Three initial passes: * - 1/2: Determine clobbered registers * - 3: Calculate program size and addrs array */ for (pass = 1; pass <= 3; pass++) { - if (bpf_jit_prog(&jit, fp, extra_pass)) { - fp = orig_fp; - goto free_addrs; - } + if (bpf_jit_prog(&jit, fp, extra_pass)) + goto out_err; } /* * Final pass: Allocate and generate program */ header = bpf_jit_alloc(&jit, fp); - if (!header) { - fp = orig_fp; - goto free_addrs; - } + if (!header) + goto out_err; skip_init_ctx: if (bpf_jit_prog(&jit, fp, extra_pass)) { bpf_jit_binary_free(header); - fp = orig_fp; - goto free_addrs; + goto out_err; } if (bpf_jit_enable > 1) { bpf_jit_dump(fp->len, jit.size, pass, jit.prg_buf); @@ -2392,8 +2369,7 @@ skip_init_ctx: if (!fp->is_func || extra_pass) { if (bpf_jit_binary_lock_ro(header)) { bpf_jit_binary_free(header); - fp = orig_fp; - goto free_addrs; + goto out_err; } } else { jit_data->header = header; @@ -2411,11 +2387,16 @@ free_addrs: kfree(jit_data); fp->aux->jit_data = NULL; } -out: - if (tmp_blinded) - bpf_jit_prog_release_other(fp, fp == orig_fp ? - tmp : orig_fp); + return fp; + +out_err: + if (extra_pass) { + fp->bpf_func = NULL; + fp->jited = 0; + fp->jited_len = 0; + } + goto free_addrs; } bool bpf_jit_supports_kfunc_call(void) diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c index b23d1c645ae5..2fa0e9375127 100644 --- a/arch/sparc/net/bpf_jit_comp_64.c +++ b/arch/sparc/net/bpf_jit_comp_64.c @@ -1477,39 +1477,24 @@ struct sparc64_jit_data { struct jit_ctx ctx; }; -struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) +struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *prog) { - struct bpf_prog *tmp, *orig_prog = prog; struct sparc64_jit_data *jit_data; struct bpf_binary_header *header; u32 prev_image_size, image_size; - bool tmp_blinded = false; bool extra_pass = false; struct jit_ctx ctx; u8 *image_ptr; int pass, i; if (!prog->jit_requested) - return orig_prog; - - tmp = bpf_jit_blind_constants(prog); - /* If blinding was requested and we failed during blinding, - * we must fall back to the interpreter. - */ - if (IS_ERR(tmp)) - return orig_prog; - if (tmp != prog) { - tmp_blinded = true; - prog = tmp; - } + return prog; jit_data = prog->aux->jit_data; if (!jit_data) { jit_data = kzalloc_obj(*jit_data); - if (!jit_data) { - prog = orig_prog; - goto out; - } + if (!jit_data) + return prog; prog->aux->jit_data = jit_data; } if (jit_data->ctx.offset) { @@ -1527,10 +1512,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) ctx.prog = prog; ctx.offset = kmalloc_array(prog->len, sizeof(unsigned int), GFP_KERNEL); - if (ctx.offset == NULL) { - prog = orig_prog; - goto out_off; - } + if (ctx.offset == NULL) + goto out_err; /* Longest sequence emitted is for bswap32, 12 instructions. Pre-cook * the offset array so that we converge faster. @@ -1543,10 +1526,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) ctx.idx = 0; build_prologue(&ctx); - if (build_body(&ctx)) { - prog = orig_prog; - goto out_off; - } + if (build_body(&ctx)) + goto out_err; build_epilogue(&ctx); if (bpf_jit_enable > 1) @@ -1569,10 +1550,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) image_size = sizeof(u32) * ctx.idx; header = bpf_jit_binary_alloc(image_size, &image_ptr, sizeof(u32), jit_fill_hole); - if (header == NULL) { - prog = orig_prog; - goto out_off; - } + if (header == NULL) + goto out_err; ctx.image = (u32 *)image_ptr; skip_init_ctx: @@ -1582,8 +1561,7 @@ skip_init_ctx: if (build_body(&ctx)) { bpf_jit_binary_free(header); - prog = orig_prog; - goto out_off; + goto out_err; } build_epilogue(&ctx); @@ -1592,8 +1570,7 @@ skip_init_ctx: pr_err("bpf_jit: Failed to converge, prev_size=%u size=%d\n", prev_image_size, ctx.idx * 4); bpf_jit_binary_free(header); - prog = orig_prog; - goto out_off; + goto out_err; } if (bpf_jit_enable > 1) @@ -1604,8 +1581,7 @@ skip_init_ctx: if (!prog->is_func || extra_pass) { if (bpf_jit_binary_lock_ro(header)) { bpf_jit_binary_free(header); - prog = orig_prog; - goto out_off; + goto out_err; } } else { jit_data->ctx = ctx; @@ -1624,9 +1600,14 @@ out_off: kfree(jit_data); prog->aux->jit_data = NULL; } -out: - if (tmp_blinded) - bpf_jit_prog_release_other(prog, prog == orig_prog ? - tmp : orig_prog); + return prog; + +out_err: + if (extra_pass) { + prog->bpf_func = NULL; + prog->jited = 0; + prog->jited_len = 0; + } + goto out_off; } diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index e9b78040d703..ea9e707e8abf 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -58,8 +58,8 @@ static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) #define EMIT_ENDBR() EMIT(gen_endbr(), 4) #define EMIT_ENDBR_POISON() EMIT(gen_endbr_poison(), 4) #else -#define EMIT_ENDBR() -#define EMIT_ENDBR_POISON() +#define EMIT_ENDBR() do { } while (0) +#define EMIT_ENDBR_POISON() do { } while (0) #endif static bool is_imm8(int value) @@ -1649,8 +1649,8 @@ static int emit_spectre_bhb_barrier(u8 **pprog, u8 *ip, return 0; } -static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image, - int oldproglen, struct jit_context *ctx, bool jmp_padding) +static int do_jit(struct bpf_verifier_env *env, struct bpf_prog *bpf_prog, int *addrs, u8 *image, + u8 *rw_image, int oldproglen, struct jit_context *ctx, bool jmp_padding) { bool tail_call_reachable = bpf_prog->aux->tail_call_reachable; struct bpf_insn *insn = bpf_prog->insnsi; @@ -1663,7 +1663,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image void __percpu *priv_stack_ptr; int i, excnt = 0; int ilen, proglen = 0; - u8 *prog = temp; + u8 *ip, *prog = temp; u32 stack_depth; int err; @@ -1734,6 +1734,11 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image dst_reg = X86_REG_R9; } + if (bpf_insn_is_indirect_target(env, bpf_prog, i - 1)) + EMIT_ENDBR(); + + ip = image + addrs[i - 1] + (prog - temp); + switch (insn->code) { /* ALU */ case BPF_ALU | BPF_ADD | BPF_X: @@ -2440,8 +2445,6 @@ populate_extable: /* call */ case BPF_JMP | BPF_CALL: { - u8 *ip = image + addrs[i - 1]; - func = (u8 *) __bpf_call_base + imm32; if (src_reg == BPF_PSEUDO_CALL && tail_call_reachable) { LOAD_TAIL_CALL_CNT_PTR(stack_depth); @@ -2465,7 +2468,8 @@ populate_extable: if (imm32) emit_bpf_tail_call_direct(bpf_prog, &bpf_prog->aux->poke_tab[imm32 - 1], - &prog, image + addrs[i - 1], + &prog, + ip, callee_regs_used, stack_depth, ctx); @@ -2474,7 +2478,7 @@ populate_extable: &prog, callee_regs_used, stack_depth, - image + addrs[i - 1], + ip, ctx); break; @@ -2639,7 +2643,7 @@ emit_cond_jmp: /* Convert BPF opcode to x86 */ break; case BPF_JMP | BPF_JA | BPF_X: - emit_indirect_jump(&prog, insn->dst_reg, image + addrs[i - 1]); + emit_indirect_jump(&prog, insn->dst_reg, ip); break; case BPF_JMP | BPF_JA: case BPF_JMP32 | BPF_JA: @@ -2729,8 +2733,6 @@ emit_jmp: ctx->cleanup_addr = proglen; if (bpf_prog_was_classic(bpf_prog) && !ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN)) { - u8 *ip = image + addrs[i - 1]; - if (emit_spectre_bhb_barrier(&prog, ip, bpf_prog)) return -EINVAL; } @@ -3713,17 +3715,15 @@ struct x64_jit_data { #define MAX_PASSES 20 #define PADDING_PASSES (MAX_PASSES - 5) -struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) +struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *prog) { struct bpf_binary_header *rw_header = NULL; struct bpf_binary_header *header = NULL; - struct bpf_prog *tmp, *orig_prog = prog; void __percpu *priv_stack_ptr = NULL; struct x64_jit_data *jit_data; int priv_stack_alloc_sz; int proglen, oldproglen = 0; struct jit_context ctx = {}; - bool tmp_blinded = false; bool extra_pass = false; bool padding = false; u8 *rw_image = NULL; @@ -3733,27 +3733,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) int i; if (!prog->jit_requested) - return orig_prog; - - tmp = bpf_jit_blind_constants(prog); - /* - * If blinding was requested and we failed during blinding, - * we must fall back to the interpreter. - */ - if (IS_ERR(tmp)) - return orig_prog; - if (tmp != prog) { - tmp_blinded = true; - prog = tmp; - } + return prog; jit_data = prog->aux->jit_data; if (!jit_data) { jit_data = kzalloc_obj(*jit_data); - if (!jit_data) { - prog = orig_prog; - goto out; - } + if (!jit_data) + return prog; prog->aux->jit_data = jit_data; } priv_stack_ptr = prog->aux->priv_stack_ptr; @@ -3765,10 +3751,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) priv_stack_alloc_sz = round_up(prog->aux->stack_depth, 8) + 2 * PRIV_STACK_GUARD_SZ; priv_stack_ptr = __alloc_percpu_gfp(priv_stack_alloc_sz, 8, GFP_KERNEL); - if (!priv_stack_ptr) { - prog = orig_prog; + if (!priv_stack_ptr) goto out_priv_stack; - } priv_stack_init_guard(priv_stack_ptr, priv_stack_alloc_sz); prog->aux->priv_stack_ptr = priv_stack_ptr; @@ -3786,10 +3770,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) goto skip_init_addrs; } addrs = kvmalloc_objs(*addrs, prog->len + 1); - if (!addrs) { - prog = orig_prog; + if (!addrs) goto out_addrs; - } /* * Before first pass, make a rough estimation of addrs[] @@ -3811,7 +3793,7 @@ skip_init_addrs: for (pass = 0; pass < MAX_PASSES || image; pass++) { if (!padding && pass >= PADDING_PASSES) padding = true; - proglen = do_jit(prog, addrs, image, rw_image, oldproglen, &ctx, padding); + proglen = do_jit(env, prog, addrs, image, rw_image, oldproglen, &ctx, padding); if (proglen <= 0) { out_image: image = NULL; @@ -3820,8 +3802,6 @@ out_image: sizeof(rw_header->size)); bpf_jit_binary_pack_free(header, rw_header); } - /* Fall back to interpreter mode */ - prog = orig_prog; if (extra_pass) { prog->bpf_func = NULL; prog->jited = 0; @@ -3852,10 +3832,8 @@ out_image: header = bpf_jit_binary_pack_alloc(roundup(proglen, align) + extable_size, &image, align, &rw_header, &rw_image, jit_fill_hole); - if (!header) { - prog = orig_prog; + if (!header) goto out_addrs; - } prog->aux->extable = (void *) image + roundup(proglen, align); } oldproglen = proglen; @@ -3908,8 +3886,6 @@ out_image: prog->bpf_func = (void *)image + cfi_get_offset(); prog->jited = 1; prog->jited_len = proglen - cfi_get_offset(); - } else { - prog = orig_prog; } if (!image || !prog->is_func || extra_pass) { @@ -3925,10 +3901,7 @@ out_priv_stack: kfree(jit_data); prog->aux->jit_data = NULL; } -out: - if (tmp_blinded) - bpf_jit_prog_release_other(prog, prog == orig_prog ? - tmp : orig_prog); + return prog; } diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c index dda423025c3d..852baf2e4db4 100644 --- a/arch/x86/net/bpf_jit_comp32.c +++ b/arch/x86/net/bpf_jit_comp32.c @@ -2518,38 +2518,22 @@ bool bpf_jit_needs_zext(void) return true; } -struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) +struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *prog) { struct bpf_binary_header *header = NULL; - struct bpf_prog *tmp, *orig_prog = prog; int proglen, oldproglen = 0; struct jit_context ctx = {}; - bool tmp_blinded = false; u8 *image = NULL; int *addrs; int pass; int i; if (!prog->jit_requested) - return orig_prog; - - tmp = bpf_jit_blind_constants(prog); - /* - * If blinding was requested and we failed during blinding, - * we must fall back to the interpreter. - */ - if (IS_ERR(tmp)) - return orig_prog; - if (tmp != prog) { - tmp_blinded = true; - prog = tmp; - } + return prog; addrs = kmalloc_objs(*addrs, prog->len); - if (!addrs) { - prog = orig_prog; - goto out; - } + if (!addrs) + return prog; /* * Before first pass, make a rough estimation of addrs[] @@ -2574,7 +2558,6 @@ out_image: image = NULL; if (header) bpf_jit_binary_free(header); - prog = orig_prog; goto out_addrs; } if (image) { @@ -2588,10 +2571,8 @@ out_image: if (proglen == oldproglen) { header = bpf_jit_binary_alloc(proglen, &image, 1, jit_fill_hole); - if (!header) { - prog = orig_prog; + if (!header) goto out_addrs; - } } oldproglen = proglen; cond_resched(); @@ -2604,16 +2585,10 @@ out_image: prog->bpf_func = (void *)image; prog->jited = 1; prog->jited_len = proglen; - } else { - prog = orig_prog; } out_addrs: kfree(addrs); -out: - if (tmp_blinded) - bpf_jit_prog_release_other(prog, prog == orig_prog ? - tmp : orig_prog); return prog; } diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 0136a108d083..b4b703c90ca9 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1541,6 +1541,8 @@ bool bpf_has_frame_pointer(unsigned long ip); int bpf_jit_charge_modmem(u32 size); void bpf_jit_uncharge_modmem(u32 size); bool bpf_prog_has_trampoline(const struct bpf_prog *prog); +bool bpf_insn_is_indirect_target(const struct bpf_verifier_env *env, const struct bpf_prog *prog, + int insn_idx); #else static inline int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr, diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 53e8664cb566..b148f816f25b 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -630,16 +630,17 @@ struct bpf_insn_aux_data { /* below fields are initialized once */ unsigned int orig_idx; /* original instruction index */ - bool jmp_point; - bool prune_point; + u32 jmp_point:1; + u32 prune_point:1; /* ensure we check state equivalence and save state checkpoint and * this instruction, regardless of any heuristics */ - bool force_checkpoint; + u32 force_checkpoint:1; /* true if instruction is a call to a helper function that * accepts callback function as a parameter. */ - bool calls_callback; + u32 calls_callback:1; + u32 indirect_target:1; /* if it is an indirect jump target */ /* * CFG strongly connected component this instruction belongs to, * zero if it is a singleton SCC. diff --git a/include/linux/filter.h b/include/linux/filter.h index f552170eacf4..1ec6d5ba64cc 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1108,6 +1108,8 @@ sk_filter_reason(struct sock *sk, struct sk_buff *skb) return sk_filter_trim_cap(sk, skb, 1); } +struct bpf_prog *__bpf_prog_select_runtime(struct bpf_verifier_env *env, struct bpf_prog *fp, + int *err); struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err); void bpf_prog_free(struct bpf_prog *fp); @@ -1153,7 +1155,7 @@ u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); ((u64 (*)(u64, u64, u64, u64, u64, const struct bpf_insn *)) \ (void *)__bpf_call_base) -struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); +struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *prog); void bpf_jit_compile(struct bpf_prog *prog); bool bpf_jit_needs_zext(void); bool bpf_jit_inlines_helper_call(s32 imm); @@ -1184,6 +1186,31 @@ static inline bool bpf_dump_raw_ok(const struct cred *cred) struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); + +#ifdef CONFIG_BPF_SYSCALL +struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off, + const struct bpf_insn *patch, u32 len); +struct bpf_insn_aux_data *bpf_dup_insn_aux_data(struct bpf_verifier_env *env); +void bpf_restore_insn_aux_data(struct bpf_verifier_env *env, + struct bpf_insn_aux_data *orig_insn_aux); +#else +static inline struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off, + const struct bpf_insn *patch, u32 len) +{ + return ERR_PTR(-ENOTSUPP); +} + +static inline struct bpf_insn_aux_data *bpf_dup_insn_aux_data(struct bpf_verifier_env *env) +{ + return NULL; +} + +static inline void bpf_restore_insn_aux_data(struct bpf_verifier_env *env, + struct bpf_insn_aux_data *orig_insn_aux) +{ +} +#endif /* CONFIG_BPF_SYSCALL */ + int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt); static inline bool xdp_return_frame_no_direct(void) @@ -1310,9 +1337,14 @@ int bpf_jit_get_func_addr(const struct bpf_prog *prog, const char *bpf_jit_get_prog_name(struct bpf_prog *prog); -struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *fp); +struct bpf_prog *bpf_jit_blind_constants(struct bpf_verifier_env *env, struct bpf_prog *prog); void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other); +static inline bool bpf_prog_need_blind(const struct bpf_prog *prog) +{ + return prog->blinding_requested && !prog->blinded; +} + static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen, u32 pass, void *image) { @@ -1451,6 +1483,20 @@ static inline void bpf_prog_kallsyms_del(struct bpf_prog *fp) { } +static inline bool bpf_prog_need_blind(const struct bpf_prog *prog) +{ + return false; +} + +static inline +struct bpf_prog *bpf_jit_blind_constants(struct bpf_verifier_env *env, struct bpf_prog *prog) +{ + return prog; +} + +static inline void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other) +{ +} #endif /* CONFIG_BPF_JIT */ void bpf_prog_kallsyms_del_all(struct bpf_prog *fp); diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c index 08d008cc471e..802656c6fd3c 100644 --- a/kernel/bpf/arena.c +++ b/kernel/bpf/arena.c @@ -341,6 +341,16 @@ static void arena_vm_open(struct vm_area_struct *vma) refcount_inc(&vml->mmap_count); } +static int arena_vm_may_split(struct vm_area_struct *vma, unsigned long addr) +{ + return -EINVAL; +} + +static int arena_vm_mremap(struct vm_area_struct *vma) +{ + return -EINVAL; +} + static void arena_vm_close(struct vm_area_struct *vma) { struct bpf_map *map = vma->vm_file->private_data; @@ -417,6 +427,8 @@ out_unlock_sigsegv: static const struct vm_operations_struct arena_vm_ops = { .open = arena_vm_open, + .may_split = arena_vm_may_split, + .mremap = arena_vm_mremap, .close = arena_vm_close, .fault = arena_vm_fault, }; @@ -486,10 +498,11 @@ static int arena_map_mmap(struct bpf_map *map, struct vm_area_struct *vma) arena->user_vm_end = vma->vm_end; /* * bpf_map_mmap() checks that it's being mmaped as VM_SHARED and - * clears VM_MAYEXEC. Set VM_DONTEXPAND as well to avoid - * potential change of user_vm_start. + * clears VM_MAYEXEC. Set VM_DONTEXPAND to avoid potential change + * of user_vm_start. Set VM_DONTCOPY to prevent arena VMA from + * being copied into the child process on fork. */ - vm_flags_set(vma, VM_DONTEXPAND); + vm_flags_set(vma, VM_DONTEXPAND | VM_DONTCOPY); vma->vm_ops = &arena_vm_ops; return 0; } @@ -549,6 +562,10 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt u32 uaddr32; int ret, i; + if (node_id != NUMA_NO_NODE && + ((unsigned int)node_id >= nr_node_ids || !node_online(node_id))) + return 0; + if (page_cnt > page_cnt_max) return 0; diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 05b366b821c3..521cb9d7e8c7 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -811,9 +811,6 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, goto reset_unlock; } - /* Poison pointer on error instead of return for backward compatibility */ - bpf_prog_assoc_struct_ops(prog, &st_map->map); - link = kzalloc_obj(*link, GFP_USER); if (!link) { bpf_prog_put(prog); @@ -824,6 +821,9 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, &bpf_struct_ops_link_lops, prog, prog->expected_attach_type); *plink++ = &link->link; + /* Poison pointer on error instead of return for backward compatibility */ + bpf_prog_assoc_struct_ops(prog, &st_map->map); + ksym = kzalloc_obj(*ksym, GFP_USER); if (!ksym) { err = -ENOMEM; @@ -906,6 +906,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, reset_unlock: bpf_struct_ops_map_free_ksyms(st_map); bpf_struct_ops_map_free_image(st_map); + bpf_struct_ops_map_dissoc_progs(st_map); bpf_struct_ops_map_put_progs(st_map); memset(uvalue, 0, map->value_size); memset(kvalue, 0, map->value_size); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 066b86e7233c..8b018ff48875 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1491,24 +1491,11 @@ void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other) bpf_prog_clone_free(fp_other); } -static void adjust_insn_arrays(struct bpf_prog *prog, u32 off, u32 len) -{ -#ifdef CONFIG_BPF_SYSCALL - struct bpf_map *map; - int i; - - if (len <= 1) - return; - - for (i = 0; i < prog->aux->used_map_cnt; i++) { - map = prog->aux->used_maps[i]; - if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY) - bpf_insn_array_adjust(map, off, len); - } -#endif -} - -struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog) +/* + * Now this function is used only to blind the main prog and must be invoked only when + * bpf_prog_need_blind() returns true. + */ +struct bpf_prog *bpf_jit_blind_constants(struct bpf_verifier_env *env, struct bpf_prog *prog) { struct bpf_insn insn_buff[16], aux[2]; struct bpf_prog *clone, *tmp; @@ -1516,13 +1503,17 @@ struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog) struct bpf_insn *insn; int i, rewritten; - if (!prog->blinding_requested || prog->blinded) - return prog; + if (WARN_ON_ONCE(env && env->prog != prog)) + return ERR_PTR(-EINVAL); clone = bpf_prog_clone_create(prog, GFP_USER); if (!clone) return ERR_PTR(-ENOMEM); + /* make sure bpf_patch_insn_data() patches the correct prog */ + if (env) + env->prog = clone; + insn_cnt = clone->len; insn = clone->insnsi; @@ -1550,21 +1541,28 @@ struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog) if (!rewritten) continue; - tmp = bpf_patch_insn_single(clone, i, insn_buff, rewritten); - if (IS_ERR(tmp)) { + if (env) + tmp = bpf_patch_insn_data(env, i, insn_buff, rewritten); + else + tmp = bpf_patch_insn_single(clone, i, insn_buff, rewritten); + + if (IS_ERR_OR_NULL(tmp)) { + if (env) + /* restore the original prog */ + env->prog = prog; /* Patching may have repointed aux->prog during * realloc from the original one, so we need to * fix it up here on error. */ bpf_jit_prog_release_other(prog, clone); - return tmp; + return IS_ERR(tmp) ? tmp : ERR_PTR(-ENOMEM); } clone = tmp; insn_delta = rewritten - 1; - /* Instructions arrays must be updated using absolute xlated offsets */ - adjust_insn_arrays(clone, prog->aux->subprog_start + i, rewritten); + if (env) + env->prog = clone; /* Walk new program and skip insns we just inserted. */ insn = clone->insnsi + i + insn_delta; @@ -1575,6 +1573,15 @@ struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog) clone->blinded = 1; return clone; } + +bool bpf_insn_is_indirect_target(const struct bpf_verifier_env *env, const struct bpf_prog *prog, + int insn_idx) +{ + if (!env) + return false; + insn_idx += prog->aux->subprog_start; + return env->insn_aux_data[insn_idx].indirect_target; +} #endif /* CONFIG_BPF_JIT */ /* Base function for offset calculation. Needs to go into .text section, @@ -2533,18 +2540,55 @@ static bool bpf_prog_select_interpreter(struct bpf_prog *fp) return select_interpreter; } -/** - * bpf_prog_select_runtime - select exec runtime for BPF program - * @fp: bpf_prog populated with BPF program - * @err: pointer to error variable - * - * Try to JIT eBPF program, if JIT is not available, use interpreter. - * The BPF program will be executed via bpf_prog_run() function. - * - * Return: the &fp argument along with &err set to 0 for success or - * a negative errno code on failure - */ -struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) +static struct bpf_prog *bpf_prog_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *prog) +{ +#ifdef CONFIG_BPF_JIT + struct bpf_prog *orig_prog; + struct bpf_insn_aux_data *orig_insn_aux; + + if (!bpf_prog_need_blind(prog)) + return bpf_int_jit_compile(env, prog); + + if (env) { + /* + * If env is not NULL, we are called from the end of bpf_check(), at this + * point, only insn_aux_data is used after failure, so it should be restored + * on failure. + */ + orig_insn_aux = bpf_dup_insn_aux_data(env); + if (!orig_insn_aux) + return prog; + } + + orig_prog = prog; + prog = bpf_jit_blind_constants(env, prog); + /* + * If blinding was requested and we failed during blinding, we must fall + * back to the interpreter. + */ + if (IS_ERR(prog)) + goto out_restore; + + prog = bpf_int_jit_compile(env, prog); + if (prog->jited) { + bpf_jit_prog_release_other(prog, orig_prog); + if (env) + vfree(orig_insn_aux); + return prog; + } + + bpf_jit_prog_release_other(orig_prog, prog); + +out_restore: + prog = orig_prog; + if (env) + bpf_restore_insn_aux_data(env, orig_insn_aux); +#endif + return prog; +} + +struct bpf_prog *__bpf_prog_select_runtime(struct bpf_verifier_env *env, struct bpf_prog *fp, + int *err) { /* In case of BPF to BPF calls, verifier did all the prep * work with regards to JITing, etc. @@ -2572,7 +2616,7 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) if (*err) return fp; - fp = bpf_int_jit_compile(fp); + fp = bpf_prog_jit_compile(env, fp); bpf_prog_jit_attempt_done(fp); if (!fp->jited && jit_needed) { *err = -ENOTSUPP; @@ -2598,6 +2642,22 @@ finalize: return fp; } + +/** + * bpf_prog_select_runtime - select exec runtime for BPF program + * @fp: bpf_prog populated with BPF program + * @err: pointer to error variable + * + * Try to JIT eBPF program, if JIT is not available, use interpreter. + * The BPF program will be executed via bpf_prog_run() function. + * + * Return: the &fp argument along with &err set to 0 for success or + * a negative errno code on failure + */ +struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) +{ + return __bpf_prog_select_runtime(NULL, fp, err); +} EXPORT_SYMBOL_GPL(bpf_prog_select_runtime); static unsigned int __bpf_prog_ret1(const void *ctx, @@ -3085,7 +3145,7 @@ const struct bpf_func_proto bpf_tail_call_proto = { * It is encouraged to implement bpf_int_jit_compile() instead, so that * eBPF and implicitly also cBPF can get JITed! */ -struct bpf_prog * __weak bpf_int_jit_compile(struct bpf_prog *prog) +struct bpf_prog * __weak bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *prog) { return prog; } diff --git a/kernel/bpf/fixups.c b/kernel/bpf/fixups.c index 67c9b28767e1..fba9e8c00878 100644 --- a/kernel/bpf/fixups.c +++ b/kernel/bpf/fixups.c @@ -183,6 +183,18 @@ static void adjust_insn_aux_data(struct bpf_verifier_env *env, data[i].seen = old_seen; data[i].zext_dst = insn_has_def32(insn + i); } + + /* + * The indirect_target flag of the original instruction was moved to the last of the + * new instructions by the above memmove and memset, but the indirect jump target is + * actually the first instruction, so move it back. This also matches with the behavior + * of bpf_insn_array_adjust(), which preserves xlated_off to point to the first new + * instruction. + */ + if (data[off + cnt - 1].indirect_target) { + data[off].indirect_target = 1; + data[off + cnt - 1].indirect_target = 0; + } } static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len) @@ -232,8 +244,8 @@ static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len) } } -static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off, - const struct bpf_insn *patch, u32 len) +struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off, + const struct bpf_insn *patch, u32 len) { struct bpf_prog *new_prog; struct bpf_insn_aux_data *new_data = NULL; @@ -973,7 +985,47 @@ patch_insn_buf: return 0; } -int bpf_jit_subprogs(struct bpf_verifier_env *env) +static u32 *bpf_dup_subprog_starts(struct bpf_verifier_env *env) +{ + u32 *starts = NULL; + + starts = kvmalloc_objs(u32, env->subprog_cnt, GFP_KERNEL_ACCOUNT); + if (starts) { + for (int i = 0; i < env->subprog_cnt; i++) + starts[i] = env->subprog_info[i].start; + } + return starts; +} + +static void bpf_restore_subprog_starts(struct bpf_verifier_env *env, u32 *orig_starts) +{ + for (int i = 0; i < env->subprog_cnt; i++) + env->subprog_info[i].start = orig_starts[i]; + /* restore the start of fake 'exit' subprog as well */ + env->subprog_info[env->subprog_cnt].start = env->prog->len; +} + +struct bpf_insn_aux_data *bpf_dup_insn_aux_data(struct bpf_verifier_env *env) +{ + size_t size; + void *new_aux; + + size = array_size(sizeof(struct bpf_insn_aux_data), env->prog->len); + new_aux = __vmalloc(size, GFP_KERNEL_ACCOUNT); + if (new_aux) + memcpy(new_aux, env->insn_aux_data, size); + return new_aux; +} + +void bpf_restore_insn_aux_data(struct bpf_verifier_env *env, + struct bpf_insn_aux_data *orig_insn_aux) +{ + /* the expanded elements are zero-filled, so no special handling is required */ + vfree(env->insn_aux_data); + env->insn_aux_data = orig_insn_aux; +} + +static int jit_subprogs(struct bpf_verifier_env *env) { struct bpf_prog *prog = env->prog, **func, *tmp; int i, j, subprog_start, subprog_end = 0, len, subprog; @@ -981,10 +1033,6 @@ int bpf_jit_subprogs(struct bpf_verifier_env *env) struct bpf_insn *insn; void *old_bpf_func; int err, num_exentries; - int old_len, subprog_start_adjustment = 0; - - if (env->subprog_cnt <= 1) - return 0; for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn)) @@ -1053,10 +1101,11 @@ int bpf_jit_subprogs(struct bpf_verifier_env *env) goto out_free; func[i]->is_func = 1; func[i]->sleepable = prog->sleepable; + func[i]->blinded = prog->blinded; func[i]->aux->func_idx = i; /* Below members will be freed only at prog->aux */ func[i]->aux->btf = prog->aux->btf; - func[i]->aux->subprog_start = subprog_start + subprog_start_adjustment; + func[i]->aux->subprog_start = subprog_start; func[i]->aux->func_info = prog->aux->func_info; func[i]->aux->func_info_cnt = prog->aux->func_info_cnt; func[i]->aux->poke_tab = prog->aux->poke_tab; @@ -1110,17 +1159,10 @@ int bpf_jit_subprogs(struct bpf_verifier_env *env) func[i]->aux->exception_cb = env->subprog_info[i].is_exception_cb; func[i]->aux->changes_pkt_data = env->subprog_info[i].changes_pkt_data; func[i]->aux->might_sleep = env->subprog_info[i].might_sleep; + func[i]->aux->token = prog->aux->token; if (!i) func[i]->aux->exception_boundary = env->seen_exception; - - /* - * To properly pass the absolute subprog start to jit - * all instruction adjustments should be accumulated - */ - old_len = func[i]->len; - func[i] = bpf_int_jit_compile(func[i]); - subprog_start_adjustment += func[i]->len - old_len; - + func[i] = bpf_int_jit_compile(env, func[i]); if (!func[i]->jited) { err = -ENOTSUPP; goto out_free; @@ -1164,7 +1206,7 @@ int bpf_jit_subprogs(struct bpf_verifier_env *env) } for (i = 0; i < env->subprog_cnt; i++) { old_bpf_func = func[i]->bpf_func; - tmp = bpf_int_jit_compile(func[i]); + tmp = bpf_int_jit_compile(env, func[i]); if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) { verbose(env, "JIT doesn't support bpf-to-bpf calls\n"); err = -ENOTSUPP; @@ -1246,16 +1288,87 @@ out_free: } kfree(func); out_undo_insn: + bpf_prog_jit_attempt_done(prog); + return err; +} + +int bpf_jit_subprogs(struct bpf_verifier_env *env) +{ + int err, i; + bool blinded = false; + struct bpf_insn *insn; + struct bpf_prog *prog, *orig_prog; + struct bpf_insn_aux_data *orig_insn_aux; + u32 *orig_subprog_starts; + + if (env->subprog_cnt <= 1) + return 0; + + prog = orig_prog = env->prog; + if (bpf_prog_need_blind(prog)) { + orig_insn_aux = bpf_dup_insn_aux_data(env); + if (!orig_insn_aux) { + err = -ENOMEM; + goto out_cleanup; + } + orig_subprog_starts = bpf_dup_subprog_starts(env); + if (!orig_subprog_starts) { + vfree(orig_insn_aux); + err = -ENOMEM; + goto out_cleanup; + } + prog = bpf_jit_blind_constants(env, prog); + if (IS_ERR(prog)) { + err = -ENOMEM; + prog = orig_prog; + goto out_restore; + } + blinded = true; + } + + err = jit_subprogs(env); + if (err) + goto out_jit_err; + + if (blinded) { + bpf_jit_prog_release_other(prog, orig_prog); + kvfree(orig_subprog_starts); + vfree(orig_insn_aux); + } + + return 0; + +out_jit_err: + if (blinded) { + bpf_jit_prog_release_other(orig_prog, prog); + /* roll back to the clean original prog */ + prog = env->prog = orig_prog; + goto out_restore; + } else { + if (err != -EFAULT) { + /* + * We will fall back to interpreter mode when err is not -EFAULT, before + * that, insn->off and insn->imm should be restored to their original + * values since they were modified by jit_subprogs. + */ + for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { + if (!bpf_pseudo_call(insn)) + continue; + insn->off = 0; + insn->imm = env->insn_aux_data[i].call_imm; + } + } + goto out_cleanup; + } + +out_restore: + bpf_restore_subprog_starts(env, orig_subprog_starts); + bpf_restore_insn_aux_data(env, orig_insn_aux); + kvfree(orig_subprog_starts); +out_cleanup: /* cleanup main prog to be interpreted */ prog->jit_requested = 0; prog->blinding_requested = 0; - for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { - if (!bpf_pseudo_call(insn)) - continue; - insn->off = 0; - insn->imm = env->insn_aux_data[i].call_imm; - } - bpf_prog_jit_attempt_done(prog); return err; } diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index bb95e287b0dc..2bb60200c266 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -845,7 +845,13 @@ int bpf_bprintf_prepare(const char *fmt, u32 fmt_size, const u64 *raw_args, data->buf = buffers->buf; for (i = 0; i < fmt_size; i++) { - if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) { + unsigned char c = fmt[i]; + + /* + * Permit bytes >= 0x80 in plain text so UTF-8 literals can pass + * through unchanged, while still rejecting ASCII control bytes. + */ + if (isascii(c) && !isprint(c) && !isspace(c)) { err = -EINVAL; goto out; } @@ -867,6 +873,15 @@ int bpf_bprintf_prepare(const char *fmt, u32 fmt_size, const u64 *raw_args, * always access fmt[i + 1], in the worst case it will be a 0 */ i++; + c = fmt[i]; + /* + * The format parser below only understands ASCII conversion + * specifiers and modifiers, so reject non-ASCII after '%'. + */ + if (!isascii(c)) { + err = -EINVAL; + goto out; + } /* skip optional "[0 +-][num]" width formatting field */ while (fmt[i] == '0' || fmt[i] == '+' || fmt[i] == '-' || diff --git a/kernel/bpf/liveness.c b/kernel/bpf/liveness.c index 1fb4c511db5a..332e6e003f27 100644 --- a/kernel/bpf/liveness.c +++ b/kernel/bpf/liveness.c @@ -574,7 +574,7 @@ static int print_instances(struct bpf_verifier_env *env) * * precise {frame=N, off=V} -- known absolute frame index and byte offset * | - * offset-imprecise {frame=N, off=OFF_IMPRECISE} + * offset-imprecise {frame=N, cnt=0} * | -- known frame identity, unknown offset * fully-imprecise {frame=ARG_IMPRECISE, mask=bitmask} * -- unknown frame identity; .mask is a @@ -607,8 +607,6 @@ enum arg_track_state { ARG_IMPRECISE = -3, /* lost identity; .mask is arg bitmask */ }; -#define OFF_IMPRECISE S16_MIN /* arg identity known but offset unknown */ - /* Track callee stack slots fp-8 through fp-512 (64 slots of 8 bytes each) */ #define MAX_ARG_SPILL_SLOTS 64 @@ -622,28 +620,6 @@ static bool arg_is_fp(const struct arg_track *at) return at->frame >= 0 || at->frame == ARG_IMPRECISE; } -/* - * Clear all tracked callee stack slots overlapping the byte range - * [off, off+sz-1] where off is a negative FP-relative offset. - */ -static void clear_overlapping_stack_slots(struct arg_track *at_stack, s16 off, u32 sz) -{ - struct arg_track none = { .frame = ARG_NONE }; - - if (off == OFF_IMPRECISE) { - for (int i = 0; i < MAX_ARG_SPILL_SLOTS; i++) - at_stack[i] = none; - return; - } - for (int i = 0; i < MAX_ARG_SPILL_SLOTS; i++) { - int slot_start = -((i + 1) * 8); - int slot_end = slot_start + 8; - - if (slot_start < off + (int)sz && slot_end > off) - at_stack[i] = none; - } -} - static void verbose_arg_track(struct bpf_verifier_env *env, struct arg_track *at) { int i; @@ -863,16 +839,13 @@ static void arg_track_alu64(struct arg_track *dst, const struct arg_track *src) *dst = arg_join_imprecise(*dst, *src); } -static s16 arg_add(s16 off, s64 delta) +static bool arg_add(s16 off, s64 delta, s16 *out) { - s64 res; - - if (off == OFF_IMPRECISE) - return OFF_IMPRECISE; - res = (s64)off + delta; - if (res < S16_MIN + 1 || res > S16_MAX) - return OFF_IMPRECISE; - return res; + s16 d = delta; + + if (d != delta) + return true; + return check_add_overflow(off, d, out); } static void arg_padd(struct arg_track *at, s64 delta) @@ -882,9 +855,9 @@ static void arg_padd(struct arg_track *at, s64 delta) if (at->off_cnt == 0) return; for (i = 0; i < at->off_cnt; i++) { - s16 new_off = arg_add(at->off[i], delta); + s16 new_off; - if (new_off == OFF_IMPRECISE) { + if (arg_add(at->off[i], delta, &new_off)) { at->off_cnt = 0; return; } @@ -899,8 +872,6 @@ static void arg_padd(struct arg_track *at, s64 delta) */ static int fp_off_to_slot(s16 off) { - if (off == OFF_IMPRECISE) - return -1; if (off >= 0 || off < -(int)(MAX_ARG_SPILL_SLOTS * 8)) return -1; if (off % 8) @@ -930,9 +901,11 @@ static struct arg_track fill_from_stack(struct bpf_insn *insn, return imp; for (i = 0; i < cnt; i++) { - s16 fp_off = arg_add(at_out[reg].off[i], insn->off); - int slot = fp_off_to_slot(fp_off); + s16 fp_off, slot; + if (arg_add(at_out[reg].off[i], insn->off, &fp_off)) + return imp; + slot = fp_off_to_slot(fp_off); if (slot < 0) return imp; result = __arg_track_join(result, at_stack_out[slot]); @@ -968,9 +941,12 @@ static void spill_to_stack(struct bpf_insn *insn, struct arg_track *at_out, return; } for (i = 0; i < cnt; i++) { - s16 fp_off = arg_add(at_out[reg].off[i], insn->off); - int slot = fp_off_to_slot(fp_off); + s16 fp_off; + int slot; + if (arg_add(at_out[reg].off[i], insn->off, &fp_off)) + continue; + slot = fp_off_to_slot(fp_off); if (slot < 0) continue; if (cnt == 1) @@ -981,6 +957,32 @@ static void spill_to_stack(struct bpf_insn *insn, struct arg_track *at_out, } /* + * Clear all tracked callee stack slots overlapping the byte range + * [off, off+sz-1] where off is a negative FP-relative offset. + */ +static void clear_overlapping_stack_slots(struct arg_track *at_stack, s16 off, u32 sz, int cnt) +{ + struct arg_track none = { .frame = ARG_NONE }; + + if (cnt == 0) { + for (int i = 0; i < MAX_ARG_SPILL_SLOTS; i++) + at_stack[i] = __arg_track_join(at_stack[i], none); + return; + } + for (int i = 0; i < MAX_ARG_SPILL_SLOTS; i++) { + int slot_start = -((i + 1) * 8); + int slot_end = slot_start + 8; + + if (slot_start < off + (int)sz && slot_end > off) { + if (cnt == 1) + at_stack[i] = none; + else + at_stack[i] = __arg_track_join(at_stack[i], none); + } + } +} + +/* * Clear stack slots overlapping all possible FP offsets in @reg. */ static void clear_stack_for_all_offs(struct bpf_insn *insn, @@ -990,18 +992,22 @@ static void clear_stack_for_all_offs(struct bpf_insn *insn, int cnt, i; if (reg == BPF_REG_FP) { - clear_overlapping_stack_slots(at_stack_out, insn->off, sz); + clear_overlapping_stack_slots(at_stack_out, insn->off, sz, 1); return; } cnt = at_out[reg].off_cnt; if (cnt == 0) { - clear_overlapping_stack_slots(at_stack_out, OFF_IMPRECISE, sz); + clear_overlapping_stack_slots(at_stack_out, 0, sz, cnt); return; } for (i = 0; i < cnt; i++) { - s16 fp_off = arg_add(at_out[reg].off[i], insn->off); + s16 fp_off; - clear_overlapping_stack_slots(at_stack_out, fp_off, sz); + if (arg_add(at_out[reg].off[i], insn->off, &fp_off)) { + clear_overlapping_stack_slots(at_stack_out, 0, sz, 0); + break; + } + clear_overlapping_stack_slots(at_stack_out, fp_off, sz, cnt); } } @@ -1042,6 +1048,12 @@ static void arg_track_log(struct bpf_verifier_env *env, struct bpf_insn *insn, i verbose(env, "\n"); } +static bool can_be_local_fp(int depth, int regno, struct arg_track *at) +{ + return regno == BPF_REG_FP || at->frame == depth || + (at->frame == ARG_IMPRECISE && (at->mask & BIT(depth))); +} + /* * Pure dataflow transfer function for arg_track state. * Updates at_out[] based on how the instruction modifies registers. @@ -1111,8 +1123,7 @@ static void arg_track_xfer(struct bpf_verifier_env *env, struct bpf_insn *insn, at_out[r] = none; } else if (class == BPF_LDX) { u32 sz = bpf_size_to_bytes(BPF_SIZE(insn->code)); - bool src_is_local_fp = insn->src_reg == BPF_REG_FP || src->frame == depth || - (src->frame == ARG_IMPRECISE && (src->mask & BIT(depth))); + bool src_is_local_fp = can_be_local_fp(depth, insn->src_reg, src); /* * Reload from callee stack: if src is current-frame FP-derived @@ -1147,7 +1158,7 @@ static void arg_track_xfer(struct bpf_verifier_env *env, struct bpf_insn *insn, bool dst_is_local_fp; /* Track spills to current-frame FP-derived callee stack */ - dst_is_local_fp = insn->dst_reg == BPF_REG_FP || dst->frame == depth; + dst_is_local_fp = can_be_local_fp(depth, insn->dst_reg, dst); if (dst_is_local_fp && BPF_MODE(insn->code) == BPF_MEM) spill_to_stack(insn, at_out, insn->dst_reg, at_stack_out, src, sz); @@ -1166,7 +1177,7 @@ static void arg_track_xfer(struct bpf_verifier_env *env, struct bpf_insn *insn, } } else if (class == BPF_ST && BPF_MODE(insn->code) == BPF_MEM) { u32 sz = bpf_size_to_bytes(BPF_SIZE(insn->code)); - bool dst_is_local_fp = insn->dst_reg == BPF_REG_FP || dst->frame == depth; + bool dst_is_local_fp = can_be_local_fp(depth, insn->dst_reg, dst); /* BPF_ST to FP-derived dst: clear overlapping stack slots */ if (dst_is_local_fp) @@ -1316,8 +1327,7 @@ static int record_load_store_access(struct bpf_verifier_env *env, resolved.off_cnt = ptr->off_cnt; resolved.frame = ptr->frame; for (oi = 0; oi < ptr->off_cnt; oi++) { - resolved.off[oi] = arg_add(ptr->off[oi], insn->off); - if (resolved.off[oi] == OFF_IMPRECISE) { + if (arg_add(ptr->off[oi], insn->off, &resolved.off[oi])) { resolved.off_cnt = 0; break; } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b73b25c63073..a3c0214ca934 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3083,10 +3083,6 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) if (err < 0) goto free_used_maps; - prog = bpf_prog_select_runtime(prog, &err); - if (err < 0) - goto free_used_maps; - err = bpf_prog_mark_insn_arrays_ready(prog); if (err < 0) goto free_used_maps; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9e4980128151..69d75515ed3f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3497,6 +3497,11 @@ static int insn_stack_access_flags(int frameno, int spi) return INSN_F_STACK_ACCESS | (spi << INSN_F_SPI_SHIFT) | frameno; } +static void mark_indirect_target(struct bpf_verifier_env *env, int idx) +{ + env->insn_aux_data[idx].indirect_target = true; +} + #define LR_FRAMENO_BITS 3 #define LR_SPI_BITS 6 #define LR_ENTRY_BITS (LR_SPI_BITS + LR_FRAMENO_BITS + 1) @@ -4544,6 +4549,9 @@ static int map_kptr_match_type(struct bpf_verifier_env *env, int perm_flags; const char *reg_name = ""; + if (base_type(reg->type) != PTR_TO_BTF_ID) + goto bad_type; + if (btf_is_kernel(reg->btf)) { perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED | MEM_RCU; @@ -4556,7 +4564,7 @@ static int map_kptr_match_type(struct bpf_verifier_env *env, perm_flags |= MEM_PERCPU; } - if (base_type(reg->type) != PTR_TO_BTF_ID || (type_flag(reg->type) & ~perm_flags)) + if (type_flag(reg->type) & ~perm_flags) goto bad_type; /* We need to verify reg->type and reg->btf, before accessing reg->btf */ @@ -17545,12 +17553,14 @@ static int check_indirect_jump(struct bpf_verifier_env *env, struct bpf_insn *in } for (i = 0; i < n - 1; i++) { + mark_indirect_target(env, env->gotox_tmp_buf->items[i]); other_branch = push_stack(env, env->gotox_tmp_buf->items[i], env->insn_idx, env->cur_state->speculative); if (IS_ERR(other_branch)) return PTR_ERR(other_branch); } env->insn_idx = env->gotox_tmp_buf->items[n-1]; + mark_indirect_target(env, env->insn_idx); return INSN_IDX_UPDATED; } @@ -20155,6 +20165,14 @@ skip_full_check: adjust_btf_func(env); + /* extension progs temporarily inherit the attach_type of their targets + for verification purposes, so set it back to zero before returning + */ + if (env->prog->type == BPF_PROG_TYPE_EXT) + env->prog->expected_attach_type = 0; + + env->prog = __bpf_prog_select_runtime(env, env->prog, &ret); + err_release_maps: if (ret) release_insn_arrays(env); @@ -20166,12 +20184,6 @@ err_release_maps: if (!env->prog->aux->used_btfs) release_btfs(env); - /* extension progs temporarily inherit the attach_type of their targets - for verification purposes, so set it back to zero before returning - */ - if (env->prog->type == BPF_PROG_TYPE_EXT) - env->prog->expected_attach_type = 0; - *prog = env->prog; module_put(env->attach_btf_mod); diff --git a/net/core/filter.c b/net/core/filter.c index fcfcb72663ca..5fa9189eb772 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -503,7 +503,7 @@ static bool convert_bpf_ld_abs(struct sock_filter *fp, struct bpf_insn **insnp) ((unaligned_ok && offset >= 0) || (!unaligned_ok && offset >= 0 && offset + ip_align >= 0 && - offset + ip_align % size == 0))) { + (offset + ip_align) % size == 0))) { bool ldx_off_ok = offset <= S16_MAX; *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_H); diff --git a/net/core/sock_map.c b/net/core/sock_map.c index b0e96337a269..02a68be3002a 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -530,7 +530,7 @@ static bool sock_map_redirect_allowed(const struct sock *sk) if (sk_is_tcp(sk)) return sk->sk_state != TCP_LISTEN; else - return sk->sk_state == TCP_ESTABLISHED; + return READ_ONCE(sk->sk_state) == TCP_ESTABLISHED; } static bool sock_map_sk_is_suitable(const struct sock *sk) @@ -543,7 +543,7 @@ static bool sock_map_sk_state_allowed(const struct sock *sk) if (sk_is_tcp(sk)) return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN); if (sk_is_stream_unix(sk)) - return (1 << sk->sk_state) & TCPF_ESTABLISHED; + return (1 << READ_ONCE(sk->sk_state)) & TCPF_ESTABLISHED; if (sk_is_vsock(sk) && (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) return (1 << sk->sk_state) & TCPF_ESTABLISHED; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 4c4a8d23ddd2..f668ff107722 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -3735,15 +3735,15 @@ static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v) struct bpf_prog *prog; struct sock *sk = v; uid_t uid; - bool slow; int ret; if (v == SEQ_START_TOKEN) return 0; - slow = lock_sock_fast(sk); + lock_sock(sk); + unix_state_lock(sk); - if (unlikely(sk_unhashed(sk))) { + if (unlikely(sock_flag(sk, SOCK_DEAD))) { ret = SEQ_SKIP; goto unlock; } @@ -3753,7 +3753,8 @@ static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v) prog = bpf_iter_get_info(&meta, false); ret = unix_prog_seq_show(prog, &meta, v, uid); unlock: - unlock_sock_fast(sk, slow); + unix_state_unlock(sk); + release_sock(sk); return ret; } diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c index d14cd5454a8d..f86ff19e9764 100644 --- a/net/unix/unix_bpf.c +++ b/net/unix/unix_bpf.c @@ -185,6 +185,9 @@ int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool r */ if (!psock->sk_pair) { sk_pair = unix_peer(sk); + if (unlikely(!sk_pair)) + return -EINVAL; + sock_hold(sk_pair); psock->sk_pair = sk_pair; } diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 8b0c3246097f..3a80a018fc7d 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -5852,11 +5852,12 @@ static int load_module_btfs(struct bpf_object *obj) info.name = ptr_to_u64(name); info.name_len = sizeof(name); + btf = NULL; err = bpf_btf_get_info_by_fd(fd, &info, &len); if (err) { err = -errno; pr_warn("failed to get BTF object #%d info: %s\n", id, errstr(err)); - goto err_out; + break; } /* ignore non-module BTFs */ @@ -5870,15 +5871,15 @@ static int load_module_btfs(struct bpf_object *obj) if (err) { pr_warn("failed to load module [%s]'s BTF object #%d: %s\n", name, id, errstr(err)); - goto err_out; + break; } err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap, sizeof(*obj->btf_modules), obj->btf_module_cnt + 1); if (err) - goto err_out; + break; - mod_btf = &obj->btf_modules[obj->btf_module_cnt++]; + mod_btf = &obj->btf_modules[obj->btf_module_cnt]; mod_btf->btf = btf; mod_btf->id = id; @@ -5886,16 +5887,16 @@ static int load_module_btfs(struct bpf_object *obj) mod_btf->name = strdup(name); if (!mod_btf->name) { err = -ENOMEM; - goto err_out; + break; } - continue; + obj->btf_module_cnt++; + } -err_out: + if (err) { + btf__free(btf); close(fd); - return err; } - - return 0; + return err; } static struct bpf_core_cand_list * diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 78e60040811e..6ef6872adbc3 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -751,6 +751,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c \ btf_helpers.c \ cap_helpers.c \ unpriv_helpers.c \ + sysctl_helpers.c \ netlink_helpers.c \ jit_disasm_helpers.c \ io_helpers.c \ diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c index 594441acb707..4e4a82d54f79 100644 --- a/tools/testing/selftests/bpf/prog_tests/snprintf.c +++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c @@ -114,7 +114,8 @@ static void test_snprintf_negative(void) ASSERT_ERR(load_single_snprintf("%--------"), "invalid specifier 5"); ASSERT_ERR(load_single_snprintf("%lc"), "invalid specifier 6"); ASSERT_ERR(load_single_snprintf("%llc"), "invalid specifier 7"); - ASSERT_ERR(load_single_snprintf("\x80"), "non ascii character"); + ASSERT_OK(load_single_snprintf("\x80"), "non ascii plain text"); + ASSERT_ERR(load_single_snprintf("%\x80"), "non ascii in specifier"); ASSERT_ERR(load_single_snprintf("\x1"), "non printable character"); ASSERT_ERR(load_single_snprintf("%p%"), "invalid specifier 8"); ASSERT_ERR(load_single_snprintf("%s%"), "invalid specifier 9"); diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_data.h b/tools/testing/selftests/bpf/prog_tests/task_local_data.h index 1e5c67c78ffb..8ae4fb2027f7 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_local_data.h +++ b/tools/testing/selftests/bpf/prog_tests/task_local_data.h @@ -99,14 +99,20 @@ struct tld_meta_u { struct tld_metadata metadata[]; }; +/* + * The unused field ensures map_val.start > 0. On the BPF side, __tld_fetch_key() + * calculates off by summing map_val.start and tld_key_t.off and treats off == 0 + * as key not cached. + */ struct tld_data_u { - __u64 start; /* offset of tld_data_u->data in a page */ + __u64 unused; char data[] __attribute__((aligned(8))); }; struct tld_map_value { void *data; struct tld_meta_u *meta; + __u16 start; /* offset of tld_data_u->data in a page */ }; struct tld_meta_u * _Atomic tld_meta_p __attribute__((weak)); @@ -182,7 +188,7 @@ static int __tld_init_data_p(int map_fd) * is a page in BTF. */ map_val.data = (void *)(TLD_PAGE_MASK & (intptr_t)data); - data->start = (~TLD_PAGE_MASK & (intptr_t)data) + sizeof(struct tld_data_u); + map_val.start = (~TLD_PAGE_MASK & (intptr_t)data) + sizeof(struct tld_data_u); map_val.meta = tld_meta_p; err = bpf_map_update_elem(map_fd, &tid_fd, &map_val, 0); @@ -241,7 +247,8 @@ retry: * TLD_DYN_DATA_SIZE is allocated for tld_create_key() */ if (dyn_data) { - if (off + TLD_ROUND_UP(size, 8) > tld_meta_p->size) + if (off + TLD_ROUND_UP(size, 8) > tld_meta_p->size || + tld_meta_p->size > TLD_PAGE_SIZE - sizeof(struct tld_data_u)) return (tld_key_t){-E2BIG}; } else { if (off + TLD_ROUND_UP(size, 8) > TLD_PAGE_SIZE - sizeof(struct tld_data_u)) diff --git a/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c b/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c index e219ff506b56..6a5806b36113 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c +++ b/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c @@ -3,8 +3,14 @@ #include <bpf/btf.h> #include <test_progs.h> +/* + * Only a page is pinned to kernel, so the maximum amount of dynamic data + * allowed is page_size - sizeof(struct tld_data_u) - static TLD fields. + */ +#define TLD_DYN_DATA_SIZE_MAX (getpagesize() - sizeof(struct tld_data_u) - 8) + #define TLD_FREE_DATA_ON_THREAD_EXIT -#define TLD_DYN_DATA_SIZE (getpagesize() - 8) +#define TLD_DYN_DATA_SIZE TLD_DYN_DATA_SIZE_MAX #include "task_local_data.h" struct test_tld_struct { @@ -24,12 +30,12 @@ TLD_DEFINE_KEY(value0_key, "value0", sizeof(int)); * sequentially. Users of task local data library should not touch * library internal. */ -static void reset_tld(void) +static void reset_tld(__u16 dyn_data_size) { if (tld_meta_p) { /* Remove TLDs created by tld_create_key() */ tld_meta_p->cnt = 1; - tld_meta_p->size = TLD_DYN_DATA_SIZE; + tld_meta_p->size = dyn_data_size + 8; memset(&tld_meta_p->metadata[1], 0, (TLD_MAX_DATA_CNT - 1) * sizeof(struct tld_metadata)); } @@ -127,7 +133,7 @@ static void test_task_local_data_basic(void) tld_key_t key; int i, err; - reset_tld(); + reset_tld(TLD_DYN_DATA_SIZE_MAX); ASSERT_OK(pthread_mutex_init(&global_mutex, NULL), "pthread_mutex_init"); @@ -147,11 +153,13 @@ static void test_task_local_data_basic(void) /* * Shouldn't be able to store data exceed a page. Create a TLD just big - * enough to exceed a page. TLDs already created are int value0, int - * value1, and struct test_tld_struct value2. + * enough to exceed a page. Data already contains struct tld_data_u, + * value0 and value1 of int type, and value 2 of struct test_tld_struct. */ - key = tld_create_key("value_not_exist", - TLD_PAGE_SIZE - 2 * sizeof(int) - sizeof(struct test_tld_struct) + 1); + key = tld_create_key("value_not_exist", TLD_PAGE_SIZE + 1 - + sizeof(struct tld_data_u) - + TLD_ROUND_UP(sizeof(int), 8) * 2 - + TLD_ROUND_UP(sizeof(struct test_tld_struct), 8)); ASSERT_EQ(tld_key_err_or_zero(key), -E2BIG, "tld_create_key"); key = tld_create_key("value2", sizeof(struct test_tld_struct)); @@ -239,7 +247,7 @@ static void test_task_local_data_race(void) tld_keys[0] = value0_key; for (j = 0; j < 100; j++) { - reset_tld(); + reset_tld(TLD_DYN_DATA_SIZE_MAX); for (i = 0; i < TEST_RACE_THREAD_NUM; i++) { /* @@ -288,10 +296,80 @@ out: test_task_local_data__destroy(skel); } +static void test_task_local_data_dyn_size(__u16 dyn_data_size) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + struct test_task_local_data *skel; + int max_keys, i, err, fd, *data; + char name[TLD_NAME_LEN]; + tld_key_t key; + + reset_tld(dyn_data_size); + + skel = test_task_local_data__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + tld_keys = calloc(TLD_MAX_DATA_CNT, sizeof(tld_key_t)); + if (!ASSERT_OK_PTR(tld_keys, "calloc tld_keys")) + goto out; + + fd = bpf_map__fd(skel->maps.tld_data_map); + + /* Create as many int-sized TLDs as the dynamic data size allows */ + max_keys = dyn_data_size / TLD_ROUND_UP(sizeof(int), 8); + for (i = 0; i < max_keys; i++) { + snprintf(name, TLD_NAME_LEN, "value_%d", i); + tld_keys[i] = tld_create_key(name, sizeof(int)); + if (!ASSERT_FALSE(tld_key_is_err(tld_keys[i]), "tld_create_key")) + goto out; + + data = tld_get_data(fd, tld_keys[i]); + if (!ASSERT_OK_PTR(data, "tld_get_data")) + goto out; + *data = i; + } + + /* The next key should fail with E2BIG */ + key = tld_create_key("overflow", sizeof(int)); + ASSERT_EQ(tld_key_err_or_zero(key), -E2BIG, "tld_create_key overflow"); + + /* Verify data for value_i do not overlap */ + for (i = 0; i < max_keys; i++) { + data = tld_get_data(fd, tld_keys[i]); + if (!ASSERT_OK_PTR(data, "tld_get_data")) + goto out; + + ASSERT_EQ(*data, i, "tld_get_data value_i"); + } + + /* Verify BPF side can still read the static key */ + data = tld_get_data(fd, value0_key); + if (!ASSERT_OK_PTR(data, "tld_get_data value0")) + goto out; + *data = 0xdeadbeef; + + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.task_main), &opts); + ASSERT_OK(err, "run task_main"); + ASSERT_EQ(skel->bss->test_value0, 0xdeadbeef, "tld_get_data value0"); + +out: + if (tld_keys) { + free(tld_keys); + tld_keys = NULL; + } + tld_free(); + test_task_local_data__destroy(skel); +} + void test_task_local_data(void) { if (test__start_subtest("task_local_data_basic")) test_task_local_data_basic(); if (test__start_subtest("task_local_data_race")) test_task_local_data_race(); + if (test__start_subtest("task_local_data_dyn_size_small")) + test_task_local_data_dyn_size(64); + if (test__start_subtest("task_local_data_dyn_size_zero")) + test_task_local_data_dyn_size(0); } diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c index b81dde283052..f2f5d36ae00a 100644 --- a/tools/testing/selftests/bpf/prog_tests/token.c +++ b/tools/testing/selftests/bpf/prog_tests/token.c @@ -1,9 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ #define _GNU_SOURCE -#include <test_progs.h> #include <bpf/btf.h> -#include "cap_helpers.h" #include <fcntl.h> #include <sched.h> #include <signal.h> @@ -15,9 +13,17 @@ #include <sys/stat.h> #include <sys/syscall.h> #include <sys/un.h> + +#include "bpf_util.h" +#include "cap_helpers.h" +#include "sysctl_helpers.h" +#include "test_progs.h" +#include "trace_helpers.h" + #include "priv_map.skel.h" #include "priv_prog.skel.h" #include "dummy_st_ops_success.skel.h" +#include "token_kallsyms.skel.h" #include "token_lsm.skel.h" #include "priv_freplace_prog.skel.h" @@ -1045,6 +1051,58 @@ err_out: return -EINVAL; } +static bool kallsyms_has_bpf_func(struct ksyms *ksyms, const char *func_name) +{ + char name[256]; + int i; + + for (i = 0; i < ksyms->sym_cnt; i++) { + if (sscanf(ksyms->syms[i].name, "bpf_prog_%*[^_]_%255s", name) == 1 && + strcmp(name, func_name) == 0) + return true; + } + return false; +} + +static int userns_obj_priv_prog_kallsyms(int mnt_fd, struct token_lsm *lsm_skel) +{ + const char *func_names[] = { "xdp_main", "token_ksym_subprog" }; + LIBBPF_OPTS(bpf_object_open_opts, opts); + struct token_kallsyms *skel; + struct ksyms *ksyms = NULL; + char buf[256]; + int i, err; + + snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd); + opts.bpf_token_path = buf; + skel = token_kallsyms__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "token_kallsyms__open_opts")) + return -EINVAL; + + err = token_kallsyms__load(skel); + if (!ASSERT_OK(err, "token_kallsyms__load")) + goto cleanup; + + ksyms = load_kallsyms_local(); + if (!ASSERT_OK_PTR(ksyms, "load_kallsyms_local")) { + err = -EINVAL; + goto cleanup; + } + + for (i = 0; i < ARRAY_SIZE(func_names); i++) { + if (!ASSERT_TRUE(kallsyms_has_bpf_func(ksyms, func_names[i]), + func_names[i])) { + err = -EINVAL; + break; + } + } + +cleanup: + free_kallsyms_local(ksyms); + token_kallsyms__destroy(skel); + return err; +} + #define bit(n) (1ULL << (n)) static int userns_bpf_token_info(int mnt_fd, struct token_lsm *lsm_skel) @@ -1082,7 +1140,7 @@ cleanup: return err; } -void test_token(void) +void serial_test_token(void) { if (test__start_subtest("map_token")) { struct bpffs_opts opts = { @@ -1194,4 +1252,26 @@ void test_token(void) subtest_userns(&opts, userns_bpf_token_info); } + if (test__start_subtest("obj_priv_prog_kallsyms")) { + char perf_paranoid_orig[32] = {}; + char kptr_restrict_orig[32] = {}; + struct bpffs_opts opts = { + .cmds = bit(BPF_BTF_LOAD) | bit(BPF_PROG_LOAD), + .progs = bit(BPF_PROG_TYPE_XDP), + .attachs = ~0ULL, + }; + + if (sysctl_set_or_fail("/proc/sys/kernel/perf_event_paranoid", perf_paranoid_orig, "0")) + goto cleanup; + if (sysctl_set_or_fail("/proc/sys/kernel/kptr_restrict", kptr_restrict_orig, "0")) + goto cleanup; + + subtest_userns(&opts, userns_obj_priv_prog_kallsyms); + +cleanup: + if (perf_paranoid_orig[0]) + sysctl_set_or_fail("/proc/sys/kernel/perf_event_paranoid", NULL, perf_paranoid_orig); + if (kptr_restrict_orig[0]) + sysctl_set_or_fail("/proc/sys/kernel/kptr_restrict", NULL, kptr_restrict_orig); + } } diff --git a/tools/testing/selftests/bpf/prog_tests/trace_printk.c b/tools/testing/selftests/bpf/prog_tests/trace_printk.c index e56e88596d64..a5a8104c1ddd 100644 --- a/tools/testing/selftests/bpf/prog_tests/trace_printk.c +++ b/tools/testing/selftests/bpf/prog_tests/trace_printk.c @@ -6,18 +6,21 @@ #include "trace_printk.lskel.h" #define SEARCHMSG "testing,testing" +#define SEARCHMSG_UTF8 "中文,测试" static void trace_pipe_cb(const char *str, void *data) { if (strstr(str, SEARCHMSG) != NULL) - (*(int *)data)++; + ((int *)data)[0]++; + if (strstr(str, SEARCHMSG_UTF8)) + ((int *)data)[1]++; } void serial_test_trace_printk(void) { struct trace_printk_lskel__bss *bss; struct trace_printk_lskel *skel; - int err = 0, found = 0; + int err = 0, found[2] = {}; skel = trace_printk_lskel__open(); if (!ASSERT_OK_PTR(skel, "trace_printk__open")) @@ -46,11 +49,24 @@ void serial_test_trace_printk(void) if (!ASSERT_GT(bss->trace_printk_ret, 0, "bss->trace_printk_ret")) goto cleanup; - /* verify our search string is in the trace buffer */ - ASSERT_OK(read_trace_pipe_iter(trace_pipe_cb, &found, 1000), - "read_trace_pipe_iter"); + if (!ASSERT_GT(bss->trace_printk_utf8_ran, 0, "bss->trace_printk_utf8_ran")) + goto cleanup; + + if (!ASSERT_GT(bss->trace_printk_utf8_ret, 0, "bss->trace_printk_utf8_ret")) + goto cleanup; + + if (!ASSERT_LT(bss->trace_printk_invalid_spec_ret, 0, + "bss->trace_printk_invalid_spec_ret")) + goto cleanup; + + /* verify our search strings are in the trace buffer */ + ASSERT_OK(read_trace_pipe_iter(trace_pipe_cb, found, 1000), + "read_trace_pipe_iter"); + + if (!ASSERT_EQ(found[0], bss->trace_printk_ran, "found")) + goto cleanup; - if (!ASSERT_EQ(found, bss->trace_printk_ran, "found")) + if (!ASSERT_EQ(found[1], bss->trace_printk_utf8_ran, "found_utf8")) goto cleanup; cleanup: diff --git a/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c b/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c index 472f4f9fa95f..64404602b9ab 100644 --- a/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c +++ b/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c @@ -8,6 +8,7 @@ #include "cap_helpers.h" #include "bpf_util.h" +#include "sysctl_helpers.h" /* Using CAP_LAST_CAP is risky here, since it can get pulled in from * an old /usr/include/linux/capability.h and be < CAP_BPF; as a result @@ -36,26 +37,6 @@ static void process_perfbuf(void *ctx, int cpu, void *data, __u32 len) got_perfbuf_val = *(__u32 *)data; } -static int sysctl_set(const char *sysctl_path, char *old_val, const char *new_val) -{ - int ret = 0; - FILE *fp; - - fp = fopen(sysctl_path, "r+"); - if (!fp) - return -errno; - if (old_val && fscanf(fp, "%s", old_val) <= 0) { - ret = -ENOENT; - } else if (!old_val || strcmp(old_val, new_val) != 0) { - fseek(fp, 0, SEEK_SET); - if (fprintf(fp, "%s", new_val) < 0) - ret = -errno; - } - fclose(fp); - - return ret; -} - static void test_unpriv_bpf_disabled_positive(struct test_unpriv_bpf_disabled *skel, __u32 prog_id, int prog_fd, int perf_fd, char **map_paths, int *map_fds) diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c index fea275df9e22..a2652c8c3616 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c @@ -7,6 +7,13 @@ char _license[] SEC("license") = "GPL"; +SEC(".maps") struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} sockmap; + static long sock_i_ino(const struct sock *sk) { const struct socket *sk_socket = sk->sk_socket; @@ -76,5 +83,8 @@ int dump_unix(struct bpf_iter__unix *ctx) BPF_SEQ_PRINTF(seq, "\n"); + /* Test for deadlock. */ + bpf_map_update_elem(&sockmap, &(int){0}, sk, 0); + return 0; } diff --git a/tools/testing/selftests/bpf/progs/map_kptr_fail.c b/tools/testing/selftests/bpf/progs/map_kptr_fail.c index 6443b320c732..ee053b24e6ca 100644 --- a/tools/testing/selftests/bpf/progs/map_kptr_fail.c +++ b/tools/testing/selftests/bpf/progs/map_kptr_fail.c @@ -385,4 +385,19 @@ int kptr_xchg_possibly_null(struct __sk_buff *ctx) return 0; } +SEC("?tc") +__failure __msg("invalid kptr access, R") +int reject_scalar_store_to_kptr(struct __sk_buff *ctx) +{ + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + *(volatile u64 *)&v->unref_ptr = 0xBADC0DE; + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/task_local_data.bpf.h b/tools/testing/selftests/bpf/progs/task_local_data.bpf.h index 1f396711f487..0df8a12fd61e 100644 --- a/tools/testing/selftests/bpf/progs/task_local_data.bpf.h +++ b/tools/testing/selftests/bpf/progs/task_local_data.bpf.h @@ -86,13 +86,14 @@ struct tld_meta_u { }; struct tld_data_u { - __u64 start; /* offset of tld_data_u->data in a page */ + __u64 unused; char data[__PAGE_SIZE - sizeof(__u64)] __attribute__((aligned(8))); }; struct tld_map_value { struct tld_data_u __uptr *data; struct tld_meta_u __uptr *meta; + __u16 start; /* offset of tld_data_u->data in a page */ }; typedef struct tld_uptr_dummy { @@ -176,7 +177,7 @@ static int __tld_fetch_key(struct tld_object *tld_obj, const char *name, int i_s if (!tld_obj->data_map || !tld_obj->data_map->data || !tld_obj->data_map->meta) return 0; - start = tld_obj->data_map->data->start; + start = tld_obj->data_map->start; cnt = tld_obj->data_map->meta->cnt; metadata = tld_obj->data_map->meta->metadata; diff --git a/tools/testing/selftests/bpf/progs/timer_start_deadlock.c b/tools/testing/selftests/bpf/progs/timer_start_deadlock.c index 019518ee18cd..afabd15bdac4 100644 --- a/tools/testing/selftests/bpf/progs/timer_start_deadlock.c +++ b/tools/testing/selftests/bpf/progs/timer_start_deadlock.c @@ -27,13 +27,13 @@ static int timer_cb(void *map, int *key, struct elem *value) return 0; } -SEC("tp_btf/hrtimer_cancel") -int BPF_PROG(tp_hrtimer_cancel, struct hrtimer *hrtimer) +SEC("tp_btf/hrtimer_start") +int BPF_PROG(tp_hrtimer_start, struct hrtimer *hrtimer, enum hrtimer_mode mode, bool was_armed) { struct bpf_timer *timer; int key = 0; - if (!in_timer_start) + if (!in_timer_start || !was_armed) return 0; tp_called = 1; @@ -60,7 +60,7 @@ int start_timer(void *ctx) /* * call hrtimer_start() twice, so that 2nd call does - * remove_hrtimer() and trace_hrtimer_cancel() tracepoint. + * trace_hrtimer_start(was_armed=1) tracepoint. */ in_timer_start = 1; bpf_timer_start(timer, 1000000000, 0); diff --git a/tools/testing/selftests/bpf/progs/token_kallsyms.c b/tools/testing/selftests/bpf/progs/token_kallsyms.c new file mode 100644 index 000000000000..c9f9344f3eb2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/token_kallsyms.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +__weak +int token_ksym_subprog(void) +{ + return 0; +} + +SEC("xdp") +int xdp_main(struct xdp_md *xdp) +{ + return token_ksym_subprog(); +} diff --git a/tools/testing/selftests/bpf/progs/trace_printk.c b/tools/testing/selftests/bpf/progs/trace_printk.c index 6695478c2b25..f4c538ec3ebd 100644 --- a/tools/testing/selftests/bpf/progs/trace_printk.c +++ b/tools/testing/selftests/bpf/progs/trace_printk.c @@ -10,13 +10,23 @@ char _license[] SEC("license") = "GPL"; int trace_printk_ret = 0; int trace_printk_ran = 0; +int trace_printk_invalid_spec_ret = 0; +int trace_printk_utf8_ret = 0; +int trace_printk_utf8_ran = 0; const char fmt[] = "Testing,testing %d\n"; +static const char utf8_fmt[] = "中文,测试 %d\n"; +/* Non-ASCII bytes after '%' must still be rejected. */ +static const char invalid_spec_fmt[] = "%\x80\n"; SEC("fentry/" SYS_PREFIX "sys_nanosleep") int sys_enter(void *ctx) { trace_printk_ret = bpf_trace_printk(fmt, sizeof(fmt), ++trace_printk_ran); + trace_printk_utf8_ret = bpf_trace_printk(utf8_fmt, sizeof(utf8_fmt), + ++trace_printk_utf8_ran); + trace_printk_invalid_spec_ret = bpf_trace_printk(invalid_spec_fmt, + sizeof(invalid_spec_fmt)); return 0; } diff --git a/tools/testing/selftests/bpf/progs/verifier_live_stack.c b/tools/testing/selftests/bpf/progs/verifier_live_stack.c index b7a9fa10e84d..401152b2b64f 100644 --- a/tools/testing/selftests/bpf/progs/verifier_live_stack.c +++ b/tools/testing/selftests/bpf/progs/verifier_live_stack.c @@ -2647,3 +2647,196 @@ __naked void spill_join_with_imprecise_off(void) "exit;" ::: __clobber_all); } + +/* + * Same as spill_join_with_multi_off but the write is BPF_ST (store + * immediate) instead of BPF_STX. BPF_ST goes through + * clear_stack_for_all_offs() rather than spill_to_stack(), and that + * path also needs to join instead of overwriting. + * + * fp-8 = &fp-24 + * fp-16 = &fp-32 + * r1 = fp-8 or fp-16 (two offsets from branch) + * *(u64 *)(r1 + 0) = 0 -- BPF_ST with immediate + * r0 = *(u64 *)(r10 - 16) -- fill from fp-16 + * r0 = *(u64 *)(r0 + 0) -- deref: should produce use + */ +SEC("socket") +__log_level(2) +__failure +__msg("15: (7a) *(u64 *)(r1 +0) = 0 fp-8: fp0-24 -> fp0-24|fp0+0 fp-16: fp0-32 -> fp0-32|fp0+0") +__msg("17: (79) r0 = *(u64 *)(r0 +0) ; use: fp0-32") +__naked void st_imm_join_with_multi_off(void) +{ + asm volatile ( + "*(u64 *)(r10 - 24) = 0;" + "*(u64 *)(r10 - 32) = 0;" + "r1 = r10;" + "r1 += -24;" + "*(u64 *)(r10 - 8) = r1;" + "r1 = r10;" + "r1 += -32;" + "*(u64 *)(r10 - 16) = r1;" + /* create r1 with two candidate offsets: fp-8 or fp-16 */ + "call %[bpf_get_prandom_u32];" + "if r0 == 0 goto 1f;" + "r1 = r10;" + "r1 += -8;" + "goto 2f;" +"1:" + "r1 = r10;" + "r1 += -16;" +"2:" + /* BPF_ST: store immediate through multi-offset r1 */ + "*(u64 *)(r1 + 0) = 0;" + /* read back fp-16 and deref */ + "r0 = *(u64 *)(r10 - 16);" + "r0 = *(u64 *)(r0 + 0);" + "r0 = 0;" + "exit;" + :: __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +/* + * Check that BPF_ST with a known offset fully overwrites stack slot + * from the arg tracking point of view. + */ +SEC("socket") +__log_level(2) +__success +__msg("5: (7a) *(u64 *)(r1 +0) = 0 fp-8: fp0-16 -> _{{$}}") +__naked void st_imm_join_with_single_off(void) +{ + asm volatile ( + "r2 = r10;" + "r2 += -16;" + "*(u64 *)(r10 - 8) = r2;" + "r1 = r10;" + "r1 += -8;" + "*(u64 *)(r1 + 0) = 0;" + "r0 = 0;" + "exit;" + ::: __clobber_all); +} + +/* + * Same as spill_join_with_imprecise_off but the write is BPF_ST. + * Use "r2 = -8; r1 += r2" to make arg tracking lose offset + * precision while the main verifier keeps r1 as fixed-offset. + * + * fp-8 = &fp-24 + * fp-16 = &fp-32 + * r1 = fp-8 (imprecise to arg tracking) + * *(u64 *)(r1 + 0) = 0 -- BPF_ST with immediate + * r0 = *(u64 *)(r10 - 16) -- fill from fp-16 + * r0 = *(u64 *)(r0 + 0) -- deref: should produce use + */ +SEC("socket") +__log_level(2) +__success +__msg("13: (79) r0 = *(u64 *)(r0 +0) ; use: fp0-32") +__naked void st_imm_join_with_imprecise_off(void) +{ + asm volatile ( + "*(u64 *)(r10 - 24) = 0;" + "*(u64 *)(r10 - 32) = 0;" + "r1 = r10;" + "r1 += -24;" + "*(u64 *)(r10 - 8) = r1;" + "r1 = r10;" + "r1 += -32;" + "*(u64 *)(r10 - 16) = r1;" + /* r1 = fp-8 but arg tracking sees off_cnt == 0 */ + "r1 = r10;" + "r2 = -8;" + "r1 += r2;" + /* store immediate through imprecise r1 */ + "*(u64 *)(r1 + 0) = 0;" + /* read back fp-16 */ + "r0 = *(u64 *)(r10 - 16);" + /* deref: should produce use */ + "r0 = *(u64 *)(r0 + 0);" + "r0 = 0;" + "exit;" + ::: __clobber_all); +} + +/* + * Test that spilling through an ARG_IMPRECISE pointer joins with + * existing at_stack values. Subprog receives r1 = fp0-24 and + * r2 = map_value, creates an ARG_IMPRECISE pointer by joining caller + * and callee FP on two branches. + * + * Setup: callee spills &fp1-16 to fp1-8 (precise, tracked). + * Then writes map_value through ARG_IMPRECISE r1 — on path A + * this hits fp1-8, on path B it hits caller stack. + * Since spill_to_stack is skipped for ARG_IMPRECISE dst, + * fp1-8 tracking isn't joined with none. + * + * Expected after the imprecise write: + * - arg tracking should show fp1-8 = fp1-16|fp1+0 (joined with none) + * - read from fp1-8 and deref should produce use for fp1-16 + * - write through it should NOT produce def for fp1-16 + */ +SEC("socket") +__log_level(2) +__success +__msg("26: (79) r0 = *(u64 *)(r10 -8) // r1=IMP3 r6=fp0-24 r7=fp1-16 fp-8=fp1-16|fp1+0") +__naked void imprecise_dst_spill_join(void) +{ + asm volatile ( + "*(u64 *)(r10 - 24) = 0;" + /* map lookup for a valid non-FP pointer */ + "*(u32 *)(r10 - 32) = 0;" + "r1 = %[map] ll;" + "r2 = r10;" + "r2 += -32;" + "call %[bpf_map_lookup_elem];" + "if r0 == 0 goto 1f;" + /* r1 = &caller_fp-24, r2 = map_value */ + "r1 = r10;" + "r1 += -24;" + "r2 = r0;" + "call imprecise_dst_spill_join_sub;" +"1:" + "r0 = 0;" + "exit;" + :: __imm_addr(map), + __imm(bpf_map_lookup_elem) + : __clobber_all); +} + +static __used __naked void imprecise_dst_spill_join_sub(void) +{ + asm volatile ( + /* r6 = &caller_fp-24 (frame=0), r8 = map_value */ + "r6 = r1;" + "r8 = r2;" + /* spill &fp1-16 to fp1-8: at_stack[0] = fp1-16 */ + "*(u64 *)(r10 - 16) = 0;" + "r7 = r10;" + "r7 += -16;" + "*(u64 *)(r10 - 8) = r7;" + /* branch to create ARG_IMPRECISE pointer */ + "call %[bpf_get_prandom_u32];" + /* path B: r1 = caller fp-24 (frame=0) */ + "r1 = r6;" + "if r0 == 0 goto 1f;" + /* path A: r1 = callee fp-8 (frame=1) */ + "r1 = r10;" + "r1 += -8;" +"1:" + /* r1 = ARG_IMPRECISE{mask=BIT(0)|BIT(1)}. + * Write map_value (non-FP) through r1. On path A this overwrites fp1-8. + * Should join at_stack[0] with none: fp1-16|fp1+0. + */ + "*(u64 *)(r1 + 0) = r8;" + /* read fp1-8: should be fp1-16|fp1+0 (joined) */ + "r0 = *(u64 *)(r10 - 8);" + "*(u64 *)(r0 + 0) = 42;" + "r0 = 0;" + "exit;" + :: __imm(bpf_get_prandom_u32) + : __clobber_all); +} diff --git a/tools/testing/selftests/bpf/sysctl_helpers.c b/tools/testing/selftests/bpf/sysctl_helpers.c new file mode 100644 index 000000000000..e2bd824f12d5 --- /dev/null +++ b/tools/testing/selftests/bpf/sysctl_helpers.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdio.h> +#include <errno.h> +#include <string.h> + +#include "sysctl_helpers.h" +#include "test_progs.h" + +int sysctl_set(const char *sysctl_path, char *old_val, const char *new_val) +{ + int ret = 0; + FILE *fp; + + fp = fopen(sysctl_path, "r+"); + if (!fp) + return -errno; + if (old_val && fscanf(fp, "%s", old_val) <= 0) { + ret = -ENOENT; + } else if (!old_val || strcmp(old_val, new_val) != 0) { + fseek(fp, 0, SEEK_SET); + if (fprintf(fp, "%s", new_val) < 0) + ret = -errno; + } + fclose(fp); + + return ret; +} + +int sysctl_set_or_fail(const char *sysctl_path, char *old_val, const char *new_val) +{ + int err; + + err = sysctl_set(sysctl_path, old_val, new_val); + if (err) + PRINT_FAIL("failed to set %s to %s: %s\n", sysctl_path, new_val, strerror(-err)); + return err; +} diff --git a/tools/testing/selftests/bpf/sysctl_helpers.h b/tools/testing/selftests/bpf/sysctl_helpers.h new file mode 100644 index 000000000000..35e37bfe1b3b --- /dev/null +++ b/tools/testing/selftests/bpf/sysctl_helpers.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __SYSCTL_HELPERS_H +#define __SYSCTL_HELPERS_H + +int sysctl_set(const char *sysctl_path, char *old_val, const char *new_val); +int sysctl_set_or_fail(const char *sysctl_path, char *old_val, const char *new_val); + +#endif |
