From 12df58ad294253ac1d8df0c9bb9cf726397a671d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 31 Jul 2025 01:47:30 +0200 Subject: bpf: Add cookie object to bpf maps Add a cookie to BPF maps to uniquely identify BPF maps for the timespan when the node is up. This is different to comparing a pointer or BPF map id which could get rolled over and reused. Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20250730234733.530041-1-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- kernel/bpf/syscall.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel') diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index e63039817af3..7a814e98d5f5 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -53,6 +54,7 @@ #define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY) DEFINE_PER_CPU(int, bpf_prog_active); +DEFINE_COOKIE(bpf_map_cookie); static DEFINE_IDR(prog_idr); static DEFINE_SPINLOCK(prog_idr_lock); static DEFINE_IDR(map_idr); @@ -1487,6 +1489,10 @@ static int map_create(union bpf_attr *attr, bool kernel) if (err < 0) goto free_map; + preempt_disable(); + map->cookie = gen_cookie_next(&bpf_map_cookie); + preempt_enable(); + atomic64_set(&map->refcnt, 1); atomic64_set(&map->usercnt, 1); mutex_init(&map->freeze_mutex); -- cgit v1.2.3 From fd1c98f0ef5cbcec842209776505d9e70d8fcd53 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 31 Jul 2025 01:47:31 +0200 Subject: bpf: Move bpf map owner out of common struct Given this is only relevant for BPF tail call maps, it is adding up space and penalizing other map types. We also need to extend this with further objects to track / compare to. Therefore, lets move this out into a separate structure and dynamically allocate it only for BPF tail call maps. Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20250730234733.530041-2-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 36 ++++++++++++++++++++++++------------ kernel/bpf/core.c | 35 ++++++++++++++++++----------------- kernel/bpf/syscall.c | 13 +++++++------ 3 files changed, 49 insertions(+), 35 deletions(-) (limited to 'kernel') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 308530c8326b..a87646cc5398 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -260,6 +260,18 @@ struct bpf_list_node_kern { void *owner; } __attribute__((aligned(8))); +/* 'Ownership' of program-containing map is claimed by the first program + * that is going to use this map or by the first program which FD is + * stored in the map to make sure that all callers and callees have the + * same prog type, JITed flag and xdp_has_frags flag. + */ +struct bpf_map_owner { + enum bpf_prog_type type; + bool jited; + bool xdp_has_frags; + const struct btf_type *attach_func_proto; +}; + struct bpf_map { const struct bpf_map_ops *ops; struct bpf_map *inner_map_meta; @@ -292,18 +304,8 @@ struct bpf_map { struct rcu_head rcu; }; atomic64_t writecnt; - /* 'Ownership' of program-containing map is claimed by the first program - * that is going to use this map or by the first program which FD is - * stored in the map to make sure that all callers and callees have the - * same prog type, JITed flag and xdp_has_frags flag. - */ - struct { - const struct btf_type *attach_func_proto; - spinlock_t lock; - enum bpf_prog_type type; - bool jited; - bool xdp_has_frags; - } owner; + spinlock_t owner_lock; + struct bpf_map_owner *owner; bool bypass_spec_v1; bool frozen; /* write-once; write-protected by freeze_mutex */ bool free_after_mult_rcu_gp; @@ -2109,6 +2111,16 @@ static inline bool bpf_map_flags_access_ok(u32 access_flags) (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG); } +static inline struct bpf_map_owner *bpf_map_owner_alloc(struct bpf_map *map) +{ + return kzalloc(sizeof(*map->owner), GFP_ATOMIC); +} + +static inline void bpf_map_owner_free(struct bpf_map *map) +{ + kfree(map->owner); +} + struct bpf_event_entry { struct perf_event *event; struct file *perf_file; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 09dde5b00d0c..6e5b3a67e87f 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2377,28 +2377,29 @@ static bool __bpf_prog_map_compatible(struct bpf_map *map, const struct bpf_prog *fp) { enum bpf_prog_type prog_type = resolve_prog_type(fp); - bool ret; struct bpf_prog_aux *aux = fp->aux; + bool ret = false; if (fp->kprobe_override) - return false; + return ret; - spin_lock(&map->owner.lock); - if (!map->owner.type) { - /* There's no owner yet where we could check for - * compatibility. - */ - map->owner.type = prog_type; - map->owner.jited = fp->jited; - map->owner.xdp_has_frags = aux->xdp_has_frags; - map->owner.attach_func_proto = aux->attach_func_proto; + spin_lock(&map->owner_lock); + /* There's no owner yet where we could check for compatibility. */ + if (!map->owner) { + map->owner = bpf_map_owner_alloc(map); + if (!map->owner) + goto err; + map->owner->type = prog_type; + map->owner->jited = fp->jited; + map->owner->xdp_has_frags = aux->xdp_has_frags; + map->owner->attach_func_proto = aux->attach_func_proto; ret = true; } else { - ret = map->owner.type == prog_type && - map->owner.jited == fp->jited && - map->owner.xdp_has_frags == aux->xdp_has_frags; + ret = map->owner->type == prog_type && + map->owner->jited == fp->jited && + map->owner->xdp_has_frags == aux->xdp_has_frags; if (ret && - map->owner.attach_func_proto != aux->attach_func_proto) { + map->owner->attach_func_proto != aux->attach_func_proto) { switch (prog_type) { case BPF_PROG_TYPE_TRACING: case BPF_PROG_TYPE_LSM: @@ -2411,8 +2412,8 @@ static bool __bpf_prog_map_compatible(struct bpf_map *map, } } } - spin_unlock(&map->owner.lock); - +err: + spin_unlock(&map->owner_lock); return ret; } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 7a814e98d5f5..0fbfa8532c39 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -887,6 +887,7 @@ static void bpf_map_free_deferred(struct work_struct *work) security_bpf_map_free(map); bpf_map_release_memcg(map); + bpf_map_owner_free(map); bpf_map_free(map); } @@ -981,12 +982,12 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) struct bpf_map *map = filp->private_data; u32 type = 0, jited = 0; - if (map_type_contains_progs(map)) { - spin_lock(&map->owner.lock); - type = map->owner.type; - jited = map->owner.jited; - spin_unlock(&map->owner.lock); + spin_lock(&map->owner_lock); + if (map->owner) { + type = map->owner->type; + jited = map->owner->jited; } + spin_unlock(&map->owner_lock); seq_printf(m, "map_type:\t%u\n" @@ -1496,7 +1497,7 @@ static int map_create(union bpf_attr *attr, bool kernel) atomic64_set(&map->refcnt, 1); atomic64_set(&map->usercnt, 1); mutex_init(&map->freeze_mutex); - spin_lock_init(&map->owner.lock); + spin_lock_init(&map->owner_lock); if (attr->btf_key_type_id || attr->btf_value_type_id || /* Even the map's value is a kernel's struct, -- cgit v1.2.3 From abad3d0bad72a52137e0c350c59542d75ae4f513 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 31 Jul 2025 01:47:33 +0200 Subject: bpf: Fix oob access in cgroup local storage Lonial reported that an out-of-bounds access in cgroup local storage can be crafted via tail calls. Given two programs each utilizing a cgroup local storage with a different value size, and one program doing a tail call into the other. The verifier will validate each of the indivial programs just fine. However, in the runtime context the bpf_cg_run_ctx holds an bpf_prog_array_item which contains the BPF program as well as any cgroup local storage flavor the program uses. Helpers such as bpf_get_local_storage() pick this up from the runtime context: ctx = container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx); storage = ctx->prog_item->cgroup_storage[stype]; if (stype == BPF_CGROUP_STORAGE_SHARED) ptr = &READ_ONCE(storage->buf)->data[0]; else ptr = this_cpu_ptr(storage->percpu_buf); For the second program which was called from the originally attached one, this means bpf_get_local_storage() will pick up the former program's map, not its own. With mismatching sizes, this can result in an unintended out-of-bounds access. To fix this issue, we need to extend bpf_map_owner with an array of storage_cookie[] to match on i) the exact maps from the original program if the second program was using bpf_get_local_storage(), or ii) allow the tail call combination if the second program was not using any of the cgroup local storage maps. Fixes: 7d9c3427894f ("bpf: Make cgroup storages shared between programs on the same cgroup") Reported-by: Lonial Con Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20250730234733.530041-4-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + kernel/bpf/core.c | 15 +++++++++++++++ 2 files changed, 16 insertions(+) (limited to 'kernel') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 02aa41e301a5..cc700925b802 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -283,6 +283,7 @@ struct bpf_map_owner { enum bpf_prog_type type; bool jited; bool xdp_has_frags; + u64 storage_cookie[MAX_BPF_CGROUP_STORAGE_TYPE]; const struct btf_type *attach_func_proto; }; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 6e5b3a67e87f..5d1650af899d 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2378,7 +2378,9 @@ static bool __bpf_prog_map_compatible(struct bpf_map *map, { enum bpf_prog_type prog_type = resolve_prog_type(fp); struct bpf_prog_aux *aux = fp->aux; + enum bpf_cgroup_storage_type i; bool ret = false; + u64 cookie; if (fp->kprobe_override) return ret; @@ -2393,11 +2395,24 @@ static bool __bpf_prog_map_compatible(struct bpf_map *map, map->owner->jited = fp->jited; map->owner->xdp_has_frags = aux->xdp_has_frags; map->owner->attach_func_proto = aux->attach_func_proto; + for_each_cgroup_storage_type(i) { + map->owner->storage_cookie[i] = + aux->cgroup_storage[i] ? + aux->cgroup_storage[i]->cookie : 0; + } ret = true; } else { ret = map->owner->type == prog_type && map->owner->jited == fp->jited && map->owner->xdp_has_frags == aux->xdp_has_frags; + for_each_cgroup_storage_type(i) { + if (!ret) + break; + cookie = aux->cgroup_storage[i] ? + aux->cgroup_storage[i]->cookie : 0; + ret = map->owner->storage_cookie[i] == cookie || + !cookie; + } if (ret && map->owner->attach_func_proto != aux->attach_func_proto) { switch (prog_type) { -- cgit v1.2.3 From f1befc82addda926c8301436123d041bf3249505 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 1 Aug 2025 00:10:07 +0000 Subject: cfi: Move BPF CFI types and helpers to generic code Instead of duplicating the same code for each architecture, move the CFI type hash variables for BPF function types and related helper functions to generic CFI code, and allow architectures to override the function definitions if needed. Signed-off-by: Sami Tolvanen Link: https://lore.kernel.org/r/20250801001004.1859976-7-samitolvanen@google.com Signed-off-by: Alexei Starovoitov --- arch/riscv/include/asm/cfi.h | 16 --------------- arch/riscv/kernel/cfi.c | 24 ---------------------- arch/x86/include/asm/cfi.h | 10 ++------- arch/x86/kernel/alternative.c | 12 ----------- include/linux/cfi.h | 47 +++++++++++++++++++++++++++++++++++-------- kernel/cfi.c | 15 ++++++++++++++ 6 files changed, 56 insertions(+), 68 deletions(-) (limited to 'kernel') diff --git a/arch/riscv/include/asm/cfi.h b/arch/riscv/include/asm/cfi.h index fb9696d7a3f2..4508aaa7a2fd 100644 --- a/arch/riscv/include/asm/cfi.h +++ b/arch/riscv/include/asm/cfi.h @@ -14,27 +14,11 @@ struct pt_regs; #ifdef CONFIG_CFI_CLANG enum bug_trap_type handle_cfi_failure(struct pt_regs *regs); #define __bpfcall -static inline int cfi_get_offset(void) -{ - return 4; -} - -#define cfi_get_offset cfi_get_offset -extern u32 cfi_bpf_hash; -extern u32 cfi_bpf_subprog_hash; -extern u32 cfi_get_func_hash(void *func); #else static inline enum bug_trap_type handle_cfi_failure(struct pt_regs *regs) { return BUG_TRAP_TYPE_NONE; } - -#define cfi_bpf_hash 0U -#define cfi_bpf_subprog_hash 0U -static inline u32 cfi_get_func_hash(void *func) -{ - return 0; -} #endif /* CONFIG_CFI_CLANG */ #endif /* _ASM_RISCV_CFI_H */ diff --git a/arch/riscv/kernel/cfi.c b/arch/riscv/kernel/cfi.c index e7aec5f36dd5..6ec9dbd7292e 100644 --- a/arch/riscv/kernel/cfi.c +++ b/arch/riscv/kernel/cfi.c @@ -4,7 +4,6 @@ * * Copyright (C) 2023 Google LLC */ -#include #include #include @@ -76,26 +75,3 @@ enum bug_trap_type handle_cfi_failure(struct pt_regs *regs) return report_cfi_failure(regs, regs->epc, &target, type); } - -#ifdef CONFIG_CFI_CLANG -struct bpf_insn; - -/* Must match bpf_func_t / DEFINE_BPF_PROG_RUN() */ -extern unsigned int __bpf_prog_runX(const void *ctx, - const struct bpf_insn *insn); -DEFINE_CFI_TYPE(cfi_bpf_hash, __bpf_prog_runX); - -/* Must match bpf_callback_t */ -extern u64 __bpf_callback_fn(u64, u64, u64, u64, u64); -DEFINE_CFI_TYPE(cfi_bpf_subprog_hash, __bpf_callback_fn); - -u32 cfi_get_func_hash(void *func) -{ - u32 hash; - - if (get_kernel_nofault(hash, func - cfi_get_offset())) - return 0; - - return hash; -} -#endif diff --git a/arch/x86/include/asm/cfi.h b/arch/x86/include/asm/cfi.h index 3e51ba459154..1751f1eb95ef 100644 --- a/arch/x86/include/asm/cfi.h +++ b/arch/x86/include/asm/cfi.h @@ -116,8 +116,6 @@ struct pt_regs; #ifdef CONFIG_CFI_CLANG enum bug_trap_type handle_cfi_failure(struct pt_regs *regs); #define __bpfcall -extern u32 cfi_bpf_hash; -extern u32 cfi_bpf_subprog_hash; static inline int cfi_get_offset(void) { @@ -135,6 +133,8 @@ static inline int cfi_get_offset(void) #define cfi_get_offset cfi_get_offset extern u32 cfi_get_func_hash(void *func); +#define cfi_get_func_hash cfi_get_func_hash + extern int cfi_get_func_arity(void *func); #ifdef CONFIG_FINEIBT @@ -153,12 +153,6 @@ static inline enum bug_trap_type handle_cfi_failure(struct pt_regs *regs) { return BUG_TRAP_TYPE_NONE; } -#define cfi_bpf_hash 0U -#define cfi_bpf_subprog_hash 0U -static inline u32 cfi_get_func_hash(void *func) -{ - return 0; -} static inline int cfi_get_func_arity(void *func) { return 0; diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index a555665b4d9c..9f6b7dab2d9a 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -2,7 +2,6 @@ #define pr_fmt(fmt) "SMP alternatives: " fmt #include -#include #include #include #include @@ -1185,17 +1184,6 @@ bool cfi_bhi __ro_after_init = false; #endif #ifdef CONFIG_CFI_CLANG -struct bpf_insn; - -/* Must match bpf_func_t / DEFINE_BPF_PROG_RUN() */ -extern unsigned int __bpf_prog_runX(const void *ctx, - const struct bpf_insn *insn); -DEFINE_CFI_TYPE(cfi_bpf_hash, __bpf_prog_runX); - -/* Must match bpf_callback_t */ -extern u64 __bpf_callback_fn(u64, u64, u64, u64, u64); -DEFINE_CFI_TYPE(cfi_bpf_subprog_hash, __bpf_callback_fn); - u32 cfi_get_func_hash(void *func) { u32 hash; diff --git a/include/linux/cfi.h b/include/linux/cfi.h index 1db17ecbb86c..52a98886a455 100644 --- a/include/linux/cfi.h +++ b/include/linux/cfi.h @@ -11,16 +11,9 @@ #include #include +#ifdef CONFIG_CFI_CLANG extern bool cfi_warn; -#ifndef cfi_get_offset -static inline int cfi_get_offset(void) -{ - return 0; -} -#endif - -#ifdef CONFIG_CFI_CLANG enum bug_trap_type report_cfi_failure(struct pt_regs *regs, unsigned long addr, unsigned long *target, u32 type); @@ -29,6 +22,44 @@ static inline enum bug_trap_type report_cfi_failure_noaddr(struct pt_regs *regs, { return report_cfi_failure(regs, addr, NULL, 0); } + +#ifndef cfi_get_offset +/* + * Returns the CFI prefix offset. By default, the compiler emits only + * a 4-byte CFI type hash before the function. If an architecture + * uses -fpatchable-function-entry=N,M where M>0 to change the prefix + * offset, they must override this function. + */ +static inline int cfi_get_offset(void) +{ + return 4; +} +#endif + +#ifndef cfi_get_func_hash +static inline u32 cfi_get_func_hash(void *func) +{ + u32 hash; + + if (get_kernel_nofault(hash, func - cfi_get_offset())) + return 0; + + return hash; +} +#endif + +/* CFI type hashes for BPF function types */ +extern u32 cfi_bpf_hash; +extern u32 cfi_bpf_subprog_hash; + +#else /* CONFIG_CFI_CLANG */ + +static inline int cfi_get_offset(void) { return 0; } +static inline u32 cfi_get_func_hash(void *func) { return 0; } + +#define cfi_bpf_hash 0U +#define cfi_bpf_subprog_hash 0U + #endif /* CONFIG_CFI_CLANG */ #ifdef CONFIG_ARCH_USES_CFI_TRAPS diff --git a/kernel/cfi.c b/kernel/cfi.c index 422fa4f958ae..4dad04ead06c 100644 --- a/kernel/cfi.c +++ b/kernel/cfi.c @@ -5,6 +5,8 @@ * Copyright (C) 2022 Google LLC */ +#include +#include #include bool cfi_warn __ro_after_init = IS_ENABLED(CONFIG_CFI_PERMISSIVE); @@ -27,6 +29,19 @@ enum bug_trap_type report_cfi_failure(struct pt_regs *regs, unsigned long addr, return BUG_TRAP_TYPE_BUG; } +/* + * Declare two non-existent functions with types that match bpf_func_t and + * bpf_callback_t pointers, and use DEFINE_CFI_TYPE to define type hash + * variables for each function type. The cfi_bpf_* variables are used by + * arch-specific BPF JIT implementations to ensure indirectly callable JIT + * code has matching CFI type hashes. + */ +extern typeof(*(bpf_func_t)0) __bpf_prog_runX; +DEFINE_CFI_TYPE(cfi_bpf_hash, __bpf_prog_runX); + +extern typeof(*(bpf_callback_t)0) __bpf_callback_fn; +DEFINE_CFI_TYPE(cfi_bpf_subprog_hash, __bpf_callback_fn); + #ifdef CONFIG_ARCH_USES_CFI_TRAPS static inline unsigned long trap_address(s32 *p) { -- cgit v1.2.3 From f914876eec9e72ae94b5cee81a9dc7935c255b2f Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Fri, 1 Aug 2025 11:49:15 +0200 Subject: bpf: Improve ctx access verifier error message We've already had two "error during ctx access conversion" warnings triggered by syzkaller. Let's improve the error message by dumping the cnt variable so that we can more easily differentiate between the different error cases. Signed-off-by: Paul Chaignon Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/cc94316c30dd76fae4a75a664b61a2dbfe68e205.1754039605.git.paul.chaignon@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 399f03e62508..0806295945e4 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -21445,7 +21445,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) &target_size); if (cnt == 0 || cnt >= INSN_BUF_SIZE || (ctx_field_size && !target_size)) { - verifier_bug(env, "error during ctx access conversion"); + verifier_bug(env, "error during ctx access conversion (%d)", cnt); return -EFAULT; } -- cgit v1.2.3