diff options
Diffstat (limited to 'kernel/bpf/verifier.c')
-rw-r--r-- | kernel/bpf/verifier.c | 1293 |
1 files changed, 1044 insertions, 249 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 7ee218827259..272563a0b770 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -190,6 +190,10 @@ struct bpf_verifier_stack_elem { static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx); static int release_reference(struct bpf_verifier_env *env, int ref_obj_id); +static void invalidate_non_owning_refs(struct bpf_verifier_env *env); +static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env); +static int ref_set_non_owning(struct bpf_verifier_env *env, + struct bpf_reg_state *reg); static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux) { @@ -255,6 +259,7 @@ struct bpf_call_arg_meta { int mem_size; u64 msize_max_value; int ref_obj_id; + int dynptr_id; int map_uid; int func_id; struct btf *btf; @@ -456,6 +461,11 @@ static bool type_is_ptr_alloc_obj(u32 type) return base_type(type) == PTR_TO_BTF_ID && type_flag(type) & MEM_ALLOC; } +static bool type_is_non_owning_ref(u32 type) +{ + return type_is_ptr_alloc_obj(type) && type_flag(type) & NON_OWN_REF; +} + static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg) { struct btf_record *rec = NULL; @@ -638,31 +648,57 @@ static void print_liveness(struct bpf_verifier_env *env, verbose(env, "D"); } -static int get_spi(s32 off) +static int __get_spi(s32 off) { return (-off - 1) / BPF_REG_SIZE; } +static struct bpf_func_state *func(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg) +{ + struct bpf_verifier_state *cur = env->cur_state; + + return cur->frame[reg->frameno]; +} + static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots) { - int allocated_slots = state->allocated_stack / BPF_REG_SIZE; + int allocated_slots = state->allocated_stack / BPF_REG_SIZE; - /* We need to check that slots between [spi - nr_slots + 1, spi] are - * within [0, allocated_stack). - * - * Please note that the spi grows downwards. For example, a dynptr - * takes the size of two stack slots; the first slot will be at - * spi and the second slot will be at spi - 1. - */ - return spi - nr_slots + 1 >= 0 && spi < allocated_slots; + /* We need to check that slots between [spi - nr_slots + 1, spi] are + * within [0, allocated_stack). + * + * Please note that the spi grows downwards. For example, a dynptr + * takes the size of two stack slots; the first slot will be at + * spi and the second slot will be at spi - 1. + */ + return spi - nr_slots + 1 >= 0 && spi < allocated_slots; } -static struct bpf_func_state *func(struct bpf_verifier_env *env, - const struct bpf_reg_state *reg) +static int dynptr_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg) { - struct bpf_verifier_state *cur = env->cur_state; + int off, spi; - return cur->frame[reg->frameno]; + if (!tnum_is_const(reg->var_off)) { + verbose(env, "dynptr has to be at a constant offset\n"); + return -EINVAL; + } + + off = reg->off + reg->var_off.value; + if (off % BPF_REG_SIZE) { + verbose(env, "cannot pass in dynptr at an offset=%d\n", off); + return -EINVAL; + } + + spi = __get_spi(off); + if (spi < 1) { + verbose(env, "cannot pass in dynptr at an offset=%d\n", off); + return -EINVAL; + } + + if (!is_spi_bounds_valid(func(env, reg), spi, BPF_DYNPTR_NR_SLOTS)) + return -ERANGE; + return spi; } static const char *kernel_type_name(const struct btf* btf, u32 id) @@ -727,37 +763,58 @@ static bool dynptr_type_refcounted(enum bpf_dynptr_type type) static void __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type type, - bool first_slot); + bool first_slot, int dynptr_id); static void __mark_reg_not_init(const struct bpf_verifier_env *env, struct bpf_reg_state *reg); -static void mark_dynptr_stack_regs(struct bpf_reg_state *sreg1, +static void mark_dynptr_stack_regs(struct bpf_verifier_env *env, + struct bpf_reg_state *sreg1, struct bpf_reg_state *sreg2, enum bpf_dynptr_type type) { - __mark_dynptr_reg(sreg1, type, true); - __mark_dynptr_reg(sreg2, type, false); + int id = ++env->id_gen; + + __mark_dynptr_reg(sreg1, type, true, id); + __mark_dynptr_reg(sreg2, type, false, id); } -static void mark_dynptr_cb_reg(struct bpf_reg_state *reg, +static void mark_dynptr_cb_reg(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, enum bpf_dynptr_type type) { - __mark_dynptr_reg(reg, type, true); + __mark_dynptr_reg(reg, type, true, ++env->id_gen); } +static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env, + struct bpf_func_state *state, int spi); static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg, enum bpf_arg_type arg_type, int insn_idx) { struct bpf_func_state *state = func(env, reg); enum bpf_dynptr_type type; - int spi, i, id; - - spi = get_spi(reg->off); - - if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS)) - return -EINVAL; + int spi, i, id, err; + + spi = dynptr_get_spi(env, reg); + if (spi < 0) + return spi; + + /* We cannot assume both spi and spi - 1 belong to the same dynptr, + * hence we need to call destroy_if_dynptr_stack_slot twice for both, + * to ensure that for the following example: + * [d1][d1][d2][d2] + * spi 3 2 1 0 + * So marking spi = 2 should lead to destruction of both d1 and d2. In + * case they do belong to same dynptr, second call won't see slot_type + * as STACK_DYNPTR and will simply skip destruction. + */ + err = destroy_if_dynptr_stack_slot(env, state, spi); + if (err) + return err; + err = destroy_if_dynptr_stack_slot(env, state, spi - 1); + if (err) + return err; for (i = 0; i < BPF_REG_SIZE; i++) { state->stack[spi].slot_type[i] = STACK_DYNPTR; @@ -768,7 +825,7 @@ static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_ if (type == BPF_DYNPTR_TYPE_INVALID) return -EINVAL; - mark_dynptr_stack_regs(&state->stack[spi].spilled_ptr, + mark_dynptr_stack_regs(env, &state->stack[spi].spilled_ptr, &state->stack[spi - 1].spilled_ptr, type); if (dynptr_type_refcounted(type)) { @@ -781,6 +838,9 @@ static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_ state->stack[spi - 1].spilled_ptr.ref_obj_id = id; } + state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; + state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN; + return 0; } @@ -789,10 +849,9 @@ static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_re struct bpf_func_state *state = func(env, reg); int spi, i; - spi = get_spi(reg->off); - - if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS)) - return -EINVAL; + spi = dynptr_get_spi(env, reg); + if (spi < 0) + return spi; for (i = 0; i < BPF_REG_SIZE; i++) { state->stack[spi].slot_type[i] = STACK_INVALID; @@ -805,43 +864,133 @@ static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_re __mark_reg_not_init(env, &state->stack[spi].spilled_ptr); __mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr); + + /* Why do we need to set REG_LIVE_WRITTEN for STACK_INVALID slot? + * + * While we don't allow reading STACK_INVALID, it is still possible to + * do <8 byte writes marking some but not all slots as STACK_MISC. Then, + * helpers or insns can do partial read of that part without failing, + * but check_stack_range_initialized, check_stack_read_var_off, and + * check_stack_read_fixed_off will do mark_reg_read for all 8-bytes of + * the slot conservatively. Hence we need to prevent those liveness + * marking walks. + * + * This was not a problem before because STACK_INVALID is only set by + * default (where the default reg state has its reg->parent as NULL), or + * in clean_live_states after REG_LIVE_DONE (at which point + * mark_reg_read won't walk reg->parent chain), but not randomly during + * verifier state exploration (like we did above). Hence, for our case + * parentage chain will still be live (i.e. reg->parent may be + * non-NULL), while earlier reg->parent was NULL, so we need + * REG_LIVE_WRITTEN to screen off read marker propagation when it is + * done later on reads or by mark_dynptr_read as well to unnecessary + * mark registers in verifier state. + */ + state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; + state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN; + return 0; } -static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +static void __mark_reg_unknown(const struct bpf_verifier_env *env, + struct bpf_reg_state *reg); + +static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env, + struct bpf_func_state *state, int spi) { - struct bpf_func_state *state = func(env, reg); - int spi, i; + struct bpf_func_state *fstate; + struct bpf_reg_state *dreg; + int i, dynptr_id; - if (reg->type == CONST_PTR_TO_DYNPTR) - return false; + /* We always ensure that STACK_DYNPTR is never set partially, + * hence just checking for slot_type[0] is enough. This is + * different for STACK_SPILL, where it may be only set for + * 1 byte, so code has to use is_spilled_reg. + */ + if (state->stack[spi].slot_type[0] != STACK_DYNPTR) + return 0; - spi = get_spi(reg->off); - if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS)) - return true; + /* Reposition spi to first slot */ + if (!state->stack[spi].spilled_ptr.dynptr.first_slot) + spi = spi + 1; + if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) { + verbose(env, "cannot overwrite referenced dynptr\n"); + return -EINVAL; + } + + mark_stack_slot_scratched(env, spi); + mark_stack_slot_scratched(env, spi - 1); + + /* Writing partially to one dynptr stack slot destroys both. */ for (i = 0; i < BPF_REG_SIZE; i++) { - if (state->stack[spi].slot_type[i] == STACK_DYNPTR || - state->stack[spi - 1].slot_type[i] == STACK_DYNPTR) - return false; + state->stack[spi].slot_type[i] = STACK_INVALID; + state->stack[spi - 1].slot_type[i] = STACK_INVALID; } + dynptr_id = state->stack[spi].spilled_ptr.id; + /* Invalidate any slices associated with this dynptr */ + bpf_for_each_reg_in_vstate(env->cur_state, fstate, dreg, ({ + /* Dynptr slices are only PTR_TO_MEM_OR_NULL and PTR_TO_MEM */ + if (dreg->type != (PTR_TO_MEM | PTR_MAYBE_NULL) && dreg->type != PTR_TO_MEM) + continue; + if (dreg->dynptr_id == dynptr_id) { + if (!env->allow_ptr_leaks) + __mark_reg_not_init(env, dreg); + else + __mark_reg_unknown(env, dreg); + } + })); + + /* Do not release reference state, we are destroying dynptr on stack, + * not using some helper to release it. Just reset register. + */ + __mark_reg_not_init(env, &state->stack[spi].spilled_ptr); + __mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr); + + /* Same reason as unmark_stack_slots_dynptr above */ + state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; + state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN; + + return 0; +} + +static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg, + int spi) +{ + if (reg->type == CONST_PTR_TO_DYNPTR) + return false; + + /* For -ERANGE (i.e. spi not falling into allocated stack slots), we + * will do check_mem_access to check and update stack bounds later, so + * return true for that case. + */ + if (spi < 0) + return spi == -ERANGE; + /* We allow overwriting existing unreferenced STACK_DYNPTR slots, see + * mark_stack_slots_dynptr which calls destroy_if_dynptr_stack_slot to + * ensure dynptr objects at the slots we are touching are completely + * destructed before we reinitialize them for a new one. For referenced + * ones, destroy_if_dynptr_stack_slot returns an error early instead of + * delaying it until the end where the user will get "Unreleased + * reference" error. + */ return true; } -static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg, + int spi) { struct bpf_func_state *state = func(env, reg); - int spi; int i; /* This already represents first slot of initialized bpf_dynptr */ if (reg->type == CONST_PTR_TO_DYNPTR) return true; - spi = get_spi(reg->off); - if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) || - !state->stack[spi].spilled_ptr.dynptr.first_slot) + if (spi < 0) + return false; + if (!state->stack[spi].spilled_ptr.dynptr.first_slot) return false; for (i = 0; i < BPF_REG_SIZE; i++) { @@ -868,7 +1017,9 @@ static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg if (reg->type == CONST_PTR_TO_DYNPTR) { return reg->dynptr.type == dynptr_type; } else { - spi = get_spi(reg->off); + spi = dynptr_get_spi(env, reg); + if (spi < 0) + return false; return state->stack[spi].spilled_ptr.dynptr.type == dynptr_type; } } @@ -931,6 +1082,8 @@ static void print_verifier_state(struct bpf_verifier_env *env, verbose_a("id=%d", reg->id); if (reg->ref_obj_id) verbose_a("ref_obj_id=%d", reg->ref_obj_id); + if (type_is_non_owning_ref(reg->type)) + verbose_a("%s", "non_own_ref"); if (t != SCALAR_VALUE) verbose_a("off=%d", reg->off); if (type_is_pkt_pointer(t)) @@ -1404,9 +1557,11 @@ static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm) */ static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm) { - /* Clear id, off, and union(map_ptr, range) */ + /* Clear off and union(map_ptr, range) */ memset(((u8 *)reg) + sizeof(reg->type), 0, offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type)); + reg->id = 0; + reg->ref_obj_id = 0; ___mark_reg_known(reg, imm); } @@ -1447,7 +1602,7 @@ static void mark_reg_known_zero(struct bpf_verifier_env *env, } static void __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type type, - bool first_slot) + bool first_slot, int dynptr_id) { /* reg->type has no meaning for STACK_DYNPTR, but when we set reg for * callback arguments, it does need to be CONST_PTR_TO_DYNPTR, so simply @@ -1455,6 +1610,8 @@ static void __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type ty */ __mark_reg_known_zero(reg); reg->type = CONST_PTR_TO_DYNPTR; + /* Give each dynptr a unique id to uniquely associate slices to it. */ + reg->id = dynptr_id; reg->dynptr.type = type; reg->dynptr.first_slot = first_slot; } @@ -1486,6 +1643,16 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg) reg->type &= ~PTR_MAYBE_NULL; } +static void mark_reg_graph_node(struct bpf_reg_state *regs, u32 regno, + struct btf_field_graph_root *ds_head) +{ + __mark_reg_known_zero(®s[regno]); + regs[regno].type = PTR_TO_BTF_ID | MEM_ALLOC; + regs[regno].btf = ds_head->btf; + regs[regno].btf_id = ds_head->value_btf_id; + regs[regno].off = ds_head->node_offset; +} + static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg) { return type_is_pkt_pointer(reg->type); @@ -1752,11 +1919,13 @@ static void __mark_reg_unknown(const struct bpf_verifier_env *env, struct bpf_reg_state *reg) { /* - * Clear type, id, off, and union(map_ptr, range) and + * Clear type, off, and union(map_ptr, range) and * padding between 'type' and union */ memset(reg, 0, offsetof(struct bpf_reg_state, var_off)); reg->type = SCALAR_VALUE; + reg->id = 0; + reg->ref_obj_id = 0; reg->var_off = tnum_unknown; reg->frameno = 0; reg->precise = !env->bpf_capable; @@ -2185,6 +2354,12 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) return -EINVAL; } + if (bpf_dev_bound_kfunc_id(func_id)) { + err = bpf_dev_bound_kfunc_check(&env->log, prog_aux); + if (err) + return err; + } + desc = &tab->descs[tab->nr_descs++]; desc->func_id = func_id; desc->imm = call_imm; @@ -2386,6 +2561,32 @@ static int mark_reg_read(struct bpf_verifier_env *env, return 0; } +static int mark_dynptr_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + struct bpf_func_state *state = func(env, reg); + int spi, ret; + + /* For CONST_PTR_TO_DYNPTR, it must have already been done by + * check_reg_arg in check_helper_call and mark_btf_func_reg_size in + * check_kfunc_call. + */ + if (reg->type == CONST_PTR_TO_DYNPTR) + return 0; + spi = dynptr_get_spi(env, reg); + if (spi < 0) + return spi; + /* Caller ensures dynptr is valid and initialized, which means spi is in + * bounds and spi is the first dynptr slot. Simply mark stack slot as + * read. + */ + ret = mark_reg_read(env, &state->stack[spi].spilled_ptr, + state->stack[spi].spilled_ptr.parent, REG_LIVE_READ64); + if (ret) + return ret; + return mark_reg_read(env, &state->stack[spi - 1].spilled_ptr, + state->stack[spi - 1].spilled_ptr.parent, REG_LIVE_READ64); +} + /* This function is supposed to be used by the following 32-bit optimization * code only. It returns TRUE if the source or destination register operates * on 64-bit, otherwise return FALSE. @@ -3272,6 +3473,11 @@ static void save_register_state(struct bpf_func_state *state, scrub_spilled_slot(&state->stack[spi].slot_type[i - 1]); } +static bool is_bpf_st_mem(struct bpf_insn *insn) +{ + return BPF_CLASS(insn->code) == BPF_ST && BPF_MODE(insn->code) == BPF_MEM; +} + /* check_stack_{read,write}_fixed_off functions track spill/fill of registers, * stack boundary and alignment are checked in check_mem_access() */ @@ -3283,8 +3489,9 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, { struct bpf_func_state *cur; /* state of the current function */ int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err; - u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg; + struct bpf_insn *insn = &env->prog->insnsi[insn_idx]; struct bpf_reg_state *reg = NULL; + u32 dst_reg = insn->dst_reg; err = grow_stack_state(state, round_up(slot + 1, BPF_REG_SIZE)); if (err) @@ -3318,6 +3525,10 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, env->insn_aux_data[insn_idx].sanitize_stack_spill = true; } + err = destroy_if_dynptr_stack_slot(env, state, spi); + if (err) + return err; + mark_stack_slot_scratched(env, spi); if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) && !register_is_null(reg) && env->bpf_capable) { @@ -3333,6 +3544,13 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, return err; } save_register_state(state, spi, reg, size); + } else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) && + insn->imm != 0 && env->bpf_capable) { + struct bpf_reg_state fake_reg = {}; + + __mark_reg_known(&fake_reg, (u32)insn->imm); + fake_reg.type = SCALAR_VALUE; + save_register_state(state, spi, &fake_reg, size); } else if (reg && is_spillable_regtype(reg->type)) { /* register containing pointer is being spilled into stack */ if (size != BPF_REG_SIZE) { @@ -3367,7 +3585,8 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; /* when we zero initialize stack slots mark them as such */ - if (reg && register_is_null(reg)) { + if ((reg && register_is_null(reg)) || + (!reg && is_bpf_st_mem(insn) && insn->imm == 0)) { /* backtracking doesn't work for STACK_ZERO yet. */ err = mark_chain_precision(env, value_regno); if (err) @@ -3412,6 +3631,7 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env, int min_off, max_off; int i, err; struct bpf_reg_state *ptr_reg = NULL, *value_reg = NULL; + struct bpf_insn *insn = &env->prog->insnsi[insn_idx]; bool writing_zero = false; /* set if the fact that we're writing a zero is used to let any * stack slots remain STACK_ZERO @@ -3424,13 +3644,22 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env, max_off = ptr_reg->smax_value + off + size; if (value_regno >= 0) value_reg = &cur->regs[value_regno]; - if (value_reg && register_is_null(value_reg)) + if ((value_reg && register_is_null(value_reg)) || + (!value_reg && is_bpf_st_mem(insn) && insn->imm == 0)) writing_zero = true; err = grow_stack_state(state, round_up(-min_off, BPF_REG_SIZE)); if (err) return err; + for (i = min_off; i < max_off; i++) { + int spi; + + spi = __get_spi(i); + err = destroy_if_dynptr_stack_slot(env, state, spi); + if (err) + return err; + } /* Variable offset writes destroy any spilled pointers in range. */ for (i = min_off; i < max_off; i++) { @@ -4770,6 +4999,25 @@ static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val) return 0; } +#define BTF_TYPE_SAFE_NESTED(__type) __PASTE(__type, __safe_fields) + +BTF_TYPE_SAFE_NESTED(struct task_struct) { + const cpumask_t *cpus_ptr; +}; + +static bool nested_ptr_is_trusted(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, + int off) +{ + /* If its parent is not trusted, it can't regain its trusted status. */ + if (!is_trusted_reg(reg)) + return false; + + BTF_TYPE_EMIT(BTF_TYPE_SAFE_NESTED(struct task_struct)); + + return btf_nested_type_is_trusted(&env->log, reg, off); +} + static int check_ptr_to_btf_access(struct bpf_verifier_env *env, struct bpf_reg_state *regs, int regno, int off, int size, @@ -4841,7 +5089,8 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, return -EACCES; } - if (type_is_alloc(reg->type) && !reg->ref_obj_id) { + if (type_is_alloc(reg->type) && !type_is_non_owning_ref(reg->type) && + !reg->ref_obj_id) { verbose(env, "verifier internal error: ref_obj_id for allocated object must be non-zero\n"); return -EFAULT; } @@ -4858,10 +5107,17 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, if (type_flag(reg->type) & PTR_UNTRUSTED) flag |= PTR_UNTRUSTED; - /* By default any pointer obtained from walking a trusted pointer is - * no longer trusted except the rcu case below. + /* By default any pointer obtained from walking a trusted pointer is no + * longer trusted, unless the field being accessed has explicitly been + * marked as inheriting its parent's state of trust. + * + * An RCU-protected pointer can also be deemed trusted if we are in an + * RCU read region. This case is handled below. */ - flag &= ~PTR_TRUSTED; + if (nested_ptr_is_trusted(env, reg, off)) + flag |= PTR_TRUSTED; + else + flag &= ~PTR_TRUSTED; if (flag & MEM_RCU) { /* Mark value register as MEM_RCU only if it is protected by @@ -5458,6 +5714,31 @@ static int check_stack_range_initialized( } if (meta && meta->raw_mode) { + /* Ensure we won't be overwriting dynptrs when simulating byte + * by byte access in check_helper_call using meta.access_size. + * This would be a problem if we have a helper in the future + * which takes: + * + * helper(uninit_mem, len, dynptr) + * + * Now, uninint_mem may overlap with dynptr pointer. Hence, it + * may end up writing to dynptr itself when touching memory from + * arg 1. This can be relaxed on a case by case basis for known + * safe cases, but reject due to the possibilitiy of aliasing by + * default. + */ + for (i = min_off; i < max_off + access_size; i++) { + int stack_off = -i - 1; + + spi = __get_spi(i); + /* raw_mode may write past allocated_stack */ + if (state->allocated_stack <= stack_off) + continue; + if (state->stack[spi].slot_type[stack_off % BPF_REG_SIZE] == STACK_DYNPTR) { + verbose(env, "potential write to dynptr at off=%d disallowed\n", i); + return -EACCES; + } + } meta->access_size = access_size; meta->regno = regno; return 0; @@ -5799,9 +6080,7 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno, cur->active_lock.ptr = btf; cur->active_lock.id = reg->id; } else { - struct bpf_func_state *fstate = cur_func(env); void *ptr; - int i; if (map) ptr = map; @@ -5817,25 +6096,11 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno, verbose(env, "bpf_spin_unlock of different lock\n"); return -EINVAL; } - cur->active_lock.ptr = NULL; - cur->active_lock.id = 0; - for (i = fstate->acquired_refs - 1; i >= 0; i--) { - int err; + invalidate_non_owning_refs(env); - /* Complain on error because this reference state cannot - * be freed before this point, as bpf_spin_lock critical - * section does not allow functions that release the - * allocated object immediately. - */ - if (!fstate->refs[i].release_on_unlock) - continue; - err = release_reference(env, fstate->refs[i].id); - if (err) { - verbose(env, "failed to release release_on_unlock reference"); - return err; - } - } + cur->active_lock.ptr = NULL; + cur->active_lock.id = 0; } return 0; } @@ -5945,6 +6210,7 @@ int process_dynptr_func(struct bpf_verifier_env *env, int regno, enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta) { struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; + int spi = 0; /* MEM_UNINIT and MEM_RDONLY are exclusive, when applied to an * ARG_PTR_TO_DYNPTR (or ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_*): @@ -5955,12 +6221,14 @@ int process_dynptr_func(struct bpf_verifier_env *env, int regno, } /* CONST_PTR_TO_DYNPTR already has fixed and var_off as 0 due to * check_func_arg_reg_off's logic. We only need to check offset - * alignment for PTR_TO_STACK. + * and its alignment for PTR_TO_STACK. */ - if (reg->type == PTR_TO_STACK && (reg->off % BPF_REG_SIZE)) { - verbose(env, "cannot pass in dynptr at an offset=%d\n", reg->off); - return -EINVAL; + if (reg->type == PTR_TO_STACK) { + spi = dynptr_get_spi(env, reg); + if (spi < 0 && spi != -ERANGE) + return spi; } + /* MEM_UNINIT - Points to memory that is an appropriate candidate for * constructing a mutable bpf_dynptr object. * @@ -5977,7 +6245,7 @@ int process_dynptr_func(struct bpf_verifier_env *env, int regno, * to. */ if (arg_type & MEM_UNINIT) { - if (!is_dynptr_reg_valid_uninit(env, reg)) { + if (!is_dynptr_reg_valid_uninit(env, reg, spi)) { verbose(env, "Dynptr has to be an uninitialized dynptr\n"); return -EINVAL; } @@ -5992,13 +6260,15 @@ int process_dynptr_func(struct bpf_verifier_env *env, int regno, meta->uninit_dynptr_regno = regno; } else /* MEM_RDONLY and None case from above */ { + int err; + /* For the reg->type == PTR_TO_STACK case, bpf_dynptr is never const */ if (reg->type == CONST_PTR_TO_DYNPTR && !(arg_type & MEM_RDONLY)) { verbose(env, "cannot pass pointer to const bpf_dynptr, the helper mutates it\n"); return -EINVAL; } - if (!is_dynptr_reg_valid_init(env, reg)) { + if (!is_dynptr_reg_valid_init(env, reg, spi)) { verbose(env, "Expected an initialized dynptr as arg #%d\n", regno); @@ -6025,6 +6295,10 @@ int process_dynptr_func(struct bpf_verifier_env *env, int regno, err_extra, regno); return -EINVAL; } + + err = mark_dynptr_read(env, reg); + if (err) + return err; } return 0; } @@ -6294,6 +6568,23 @@ found: return 0; } +static struct btf_field * +reg_find_field_offset(const struct bpf_reg_state *reg, s32 off, u32 fields) +{ + struct btf_field *field; + struct btf_record *rec; + + rec = reg_btf_record(reg); + if (!rec) + return NULL; + + field = btf_record_find(rec, off, fields); + if (!field) + return NULL; + + return field; +} + int check_func_arg_reg_off(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno, enum bpf_arg_type arg_type) @@ -6315,6 +6606,18 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env, */ if (arg_type_is_dynptr(arg_type) && type == PTR_TO_STACK) return 0; + + if ((type_is_ptr_alloc_obj(type) || type_is_non_owning_ref(type)) && reg->off) { + if (reg_find_field_offset(reg, reg->off, BPF_GRAPH_NODE_OR_ROOT)) + return __check_ptr_off_reg(env, reg, regno, true); + + verbose(env, "R%d must have zero offset when passed to release func\n", + regno); + verbose(env, "No graph node or root found at R%d type:%s off:%d\n", regno, + kernel_type_name(reg->btf, reg->btf_id), reg->off); + return -EINVAL; + } + /* Doing check_ptr_off_reg check for the offset will catch this * because fixed_off_ok is false, but checking here allows us * to give the user a better error message. @@ -6349,6 +6652,7 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env, case PTR_TO_BTF_ID | PTR_TRUSTED: case PTR_TO_BTF_ID | MEM_RCU: case PTR_TO_BTF_ID | MEM_ALLOC | PTR_TRUSTED: + case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF: /* When referenced PTR_TO_BTF_ID is passed to release function, * its fixed offset must be 0. In the other cases, fixed offset * can be non-zero. This was already checked above. So pass @@ -6362,15 +6666,29 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env, } } -static u32 dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +static int dynptr_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg) { struct bpf_func_state *state = func(env, reg); int spi; if (reg->type == CONST_PTR_TO_DYNPTR) - return reg->ref_obj_id; + return reg->id; + spi = dynptr_get_spi(env, reg); + if (spi < 0) + return spi; + return state->stack[spi].spilled_ptr.id; +} - spi = get_spi(reg->off); +static int dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + struct bpf_func_state *state = func(env, reg); + int spi; + + if (reg->type == CONST_PTR_TO_DYNPTR) + return reg->ref_obj_id; + spi = dynptr_get_spi(env, reg); + if (spi < 0) + return spi; return state->stack[spi].spilled_ptr.ref_obj_id; } @@ -6444,9 +6762,8 @@ skip_type_check: * PTR_TO_STACK. */ if (reg->type == PTR_TO_STACK) { - spi = get_spi(reg->off); - if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) || - !state->stack[spi].spilled_ptr.ref_obj_id) { + spi = dynptr_get_spi(env, reg); + if (spi < 0 || !state->stack[spi].spilled_ptr.ref_obj_id) { verbose(env, "arg %d is an unacquired reference\n", regno); return -EINVAL; } @@ -6547,6 +6864,10 @@ skip_type_check: meta->ret_btf_id = reg->btf_id; break; case ARG_PTR_TO_SPIN_LOCK: + if (in_rbtree_lock_required_cb(env)) { + verbose(env, "can't spin_{lock,unlock} in rbtree cb\n"); + return -EACCES; + } if (meta->func_id == BPF_FUNC_spin_lock) { err = process_spin_lock(env, regno, true); if (err) @@ -7098,6 +7419,17 @@ static int release_reference(struct bpf_verifier_env *env, return 0; } +static void invalidate_non_owning_refs(struct bpf_verifier_env *env) +{ + struct bpf_func_state *unused; + struct bpf_reg_state *reg; + + bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({ + if (type_is_non_owning_ref(reg->type)) + __mark_reg_unknown(env, reg); + })); +} + static void clear_caller_saved_regs(struct bpf_verifier_env *env, struct bpf_reg_state *regs) { @@ -7119,6 +7451,8 @@ static int set_callee_state(struct bpf_verifier_env *env, struct bpf_func_state *caller, struct bpf_func_state *callee, int insn_idx); +static bool is_callback_calling_kfunc(u32 btf_id); + static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx, int subprog, set_callee_state_fn set_callee_state_cb) @@ -7173,10 +7507,18 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn * interested in validating only BPF helpers that can call subprogs as * callbacks */ - if (set_callee_state_cb != set_callee_state && !is_callback_calling_function(insn->imm)) { - verbose(env, "verifier bug: helper %s#%d is not marked as callback-calling\n", - func_id_name(insn->imm), insn->imm); - return -EFAULT; + if (set_callee_state_cb != set_callee_state) { + if (bpf_pseudo_kfunc_call(insn) && + !is_callback_calling_kfunc(insn->imm)) { + verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n", + func_id_name(insn->imm), insn->imm); + return -EFAULT; + } else if (!bpf_pseudo_kfunc_call(insn) && + !is_callback_calling_function(insn->imm)) { /* helper */ + verbose(env, "verifier bug: helper %s#%d not marked as callback-calling\n", + func_id_name(insn->imm), insn->imm); + return -EFAULT; + } } if (insn->code == (BPF_JMP | BPF_CALL) && @@ -7428,7 +7770,7 @@ static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env, * callback_fn(const struct bpf_dynptr_t* dynptr, void *callback_ctx); */ __mark_reg_not_init(env, &callee->regs[BPF_REG_0]); - mark_dynptr_cb_reg(&callee->regs[BPF_REG_1], BPF_DYNPTR_TYPE_LOCAL); + mark_dynptr_cb_reg(env, &callee->regs[BPF_REG_1], BPF_DYNPTR_TYPE_LOCAL); callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3]; /* unused */ @@ -7441,6 +7783,63 @@ static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env, return 0; } +static int set_rbtree_add_callback_state(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee, + int insn_idx) +{ + /* void bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node, + * bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b)); + * + * 'struct bpf_rb_node *node' arg to bpf_rbtree_add is the same PTR_TO_BTF_ID w/ offset + * that 'less' callback args will be receiving. However, 'node' arg was release_reference'd + * by this point, so look at 'root' + */ + struct btf_field *field; + + field = reg_find_field_offset(&caller->regs[BPF_REG_1], caller->regs[BPF_REG_1].off, + BPF_RB_ROOT); + if (!field || !field->graph_root.value_btf_id) + return -EFAULT; + + mark_reg_graph_node(callee->regs, BPF_REG_1, &field->graph_root); + ref_set_non_owning(env, &callee->regs[BPF_REG_1]); + mark_reg_graph_node(callee->regs, BPF_REG_2, &field->graph_root); + ref_set_non_owning(env, &callee->regs[BPF_REG_2]); + + __mark_reg_not_init(env, &callee->regs[BPF_REG_3]); + __mark_reg_not_init(env, &callee->regs[BPF_REG_4]); + __mark_reg_not_init(env, &callee->regs[BPF_REG_5]); + callee->in_callback_fn = true; + callee->callback_ret_range = tnum_range(0, 1); + return 0; +} + +static bool is_rbtree_lock_required_kfunc(u32 btf_id); + +/* Are we currently verifying the callback for a rbtree helper that must + * be called with lock held? If so, no need to complain about unreleased + * lock + */ +static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env) +{ + struct bpf_verifier_state *state = env->cur_state; + struct bpf_insn *insn = env->prog->insnsi; + struct bpf_func_state *callee; + int kfunc_btf_id; + + if (!state->curframe) + return false; + + callee = state->frame[state->curframe]; + + if (!callee->in_callback_fn) + return false; + + kfunc_btf_id = insn[callee->callsite].imm; + return is_rbtree_lock_required_kfunc(kfunc_btf_id); +} + static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) { struct bpf_verifier_state *state = env->cur_state; @@ -7633,6 +8032,7 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env, struct bpf_reg_state *fmt_reg = ®s[BPF_REG_3]; struct bpf_reg_state *data_len_reg = ®s[BPF_REG_5]; struct bpf_map *fmt_map = fmt_reg->map_ptr; + struct bpf_bprintf_data data = {}; int err, fmt_map_off, num_args; u64 fmt_addr; char *fmt; @@ -7657,7 +8057,7 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env, /* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we * can focus on validating the format specifiers. */ - err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, NULL, num_args); + err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, num_args, &data); if (err < 0) verbose(env, "Invalid format string\n"); @@ -7933,13 +8333,32 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) { if (arg_type_is_dynptr(fn->arg_type[i])) { struct bpf_reg_state *reg = ®s[BPF_REG_1 + i]; + int id, ref_obj_id; + + if (meta.dynptr_id) { + verbose(env, "verifier internal error: meta.dynptr_id already set\n"); + return -EFAULT; + } if (meta.ref_obj_id) { verbose(env, "verifier internal error: meta.ref_obj_id already set\n"); return -EFAULT; } - meta.ref_obj_id = dynptr_ref_obj_id(env, reg); + id = dynptr_id(env, reg); + if (id < 0) { + verbose(env, "verifier internal error: failed to obtain dynptr id\n"); + return id; + } + + ref_obj_id = dynptr_ref_obj_id(env, reg); + if (ref_obj_id < 0) { + verbose(env, "verifier internal error: failed to obtain dynptr ref_obj_id\n"); + return ref_obj_id; + } + + meta.dynptr_id = id; + meta.ref_obj_id = ref_obj_id; break; } } @@ -8095,6 +8514,9 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn return -EFAULT; } + if (is_dynptr_ref_function(func_id)) + regs[BPF_REG_0].dynptr_id = meta.dynptr_id; + if (is_ptr_cast_function(func_id) || is_dynptr_ref_function(func_id)) { /* For release_reference() */ regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id; @@ -8186,6 +8608,7 @@ struct bpf_kfunc_call_arg_meta { bool r0_rdonly; u32 ret_btf_id; u64 r0_size; + u32 subprogno; struct { u64 value; bool found; @@ -8197,6 +8620,9 @@ struct bpf_kfunc_call_arg_meta { struct { struct btf_field *field; } arg_list_head; + struct { + struct btf_field *field; + } arg_rbtree_root; }; static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta) @@ -8308,12 +8734,16 @@ enum { KF_ARG_DYNPTR_ID, KF_ARG_LIST_HEAD_ID, KF_ARG_LIST_NODE_ID, + KF_ARG_RB_ROOT_ID, + KF_ARG_RB_NODE_ID, }; BTF_ID_LIST(kf_arg_btf_ids) BTF_ID(struct, bpf_dynptr_kern) BTF_ID(struct, bpf_list_head) BTF_ID(struct, bpf_list_node) +BTF_ID(struct, bpf_rb_root) +BTF_ID(struct, bpf_rb_node) static bool __is_kfunc_ptr_arg_type(const struct btf *btf, const struct btf_param *arg, int type) @@ -8347,6 +8777,28 @@ static bool is_kfunc_arg_list_node(const struct btf *btf, const struct btf_param return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_NODE_ID); } +static bool is_kfunc_arg_rbtree_root(const struct btf *btf, const struct btf_param *arg) +{ + return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_ROOT_ID); +} + +static bool is_kfunc_arg_rbtree_node(const struct btf *btf, const struct btf_param *arg) +{ + return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_NODE_ID); +} + +static bool is_kfunc_arg_callback(struct bpf_verifier_env *env, const struct btf *btf, + const struct btf_param *arg) +{ + const struct btf_type *t; + + t = btf_type_resolve_func_ptr(btf, arg->type, NULL); + if (!t) + return false; + + return true; +} + /* Returns true if struct is composed of scalars, 4 levels of nesting allowed */ static bool __btf_type_is_scalar_struct(struct bpf_verifier_env *env, const struct btf *btf, @@ -8406,6 +8858,9 @@ enum kfunc_ptr_arg_type { KF_ARG_PTR_TO_BTF_ID, /* Also covers reg2btf_ids conversions */ KF_ARG_PTR_TO_MEM, KF_ARG_PTR_TO_MEM_SIZE, /* Size derived from next argument, skip it */ + KF_ARG_PTR_TO_CALLBACK, + KF_ARG_PTR_TO_RB_ROOT, + KF_ARG_PTR_TO_RB_NODE, }; enum special_kfunc_type { @@ -8419,6 +8874,9 @@ enum special_kfunc_type { KF_bpf_rdonly_cast, KF_bpf_rcu_read_lock, KF_bpf_rcu_read_unlock, + KF_bpf_rbtree_remove, + KF_bpf_rbtree_add, + KF_bpf_rbtree_first, }; BTF_SET_START(special_kfunc_set) @@ -8430,6 +8888,9 @@ BTF_ID(func, bpf_list_pop_front) BTF_ID(func, bpf_list_pop_back) BTF_ID(func, bpf_cast_to_kern_ctx) BTF_ID(func, bpf_rdonly_cast) +BTF_ID(func, bpf_rbtree_remove) +BTF_ID(func, bpf_rbtree_add) +BTF_ID(func, bpf_rbtree_first) BTF_SET_END(special_kfunc_set) BTF_ID_LIST(special_kfunc_list) @@ -8443,6 +8904,9 @@ BTF_ID(func, bpf_cast_to_kern_ctx) BTF_ID(func, bpf_rdonly_cast) BTF_ID(func, bpf_rcu_read_lock) BTF_ID(func, bpf_rcu_read_unlock) +BTF_ID(func, bpf_rbtree_remove) +BTF_ID(func, bpf_rbtree_add) +BTF_ID(func, bpf_rbtree_first) static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta) { @@ -8504,6 +8968,12 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, if (is_kfunc_arg_list_node(meta->btf, &args[argno])) return KF_ARG_PTR_TO_LIST_NODE; + if (is_kfunc_arg_rbtree_root(meta->btf, &args[argno])) + return KF_ARG_PTR_TO_RB_ROOT; + + if (is_kfunc_arg_rbtree_node(meta->btf, &args[argno])) + return KF_ARG_PTR_TO_RB_NODE; + if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) { if (!btf_type_is_struct(ref_t)) { verbose(env, "kernel function %s args#%d pointer type %s %s is not supported\n", @@ -8513,6 +8983,9 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, return KF_ARG_PTR_TO_BTF_ID; } + if (is_kfunc_arg_callback(env, meta->btf, &args[argno])) + return KF_ARG_PTR_TO_CALLBACK; + if (argno + 1 < nargs && is_kfunc_arg_mem_size(meta->btf, &args[argno + 1], ®s[regno + 1])) arg_mem_size = true; @@ -8551,9 +9024,37 @@ static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env, reg_ref_id = *reg2btf_ids[base_type(reg->type)]; } - if (is_kfunc_trusted_args(meta) || (is_kfunc_release(meta) && reg->ref_obj_id)) + /* Enforce strict type matching for calls to kfuncs that are acquiring + * or releasing a reference, or are no-cast aliases. We do _not_ + * enforce strict matching for plain KF_TRUSTED_ARGS kfuncs by default, + * as we want to enable BPF programs to pass types that are bitwise + * equivalent without forcing them to explicitly cast with something + * like bpf_cast_to_kern_ctx(). + * + * For example, say we had a type like the following: + * + * struct bpf_cpumask { + * cpumask_t cpumask; + * refcount_t usage; + * }; + * + * Note that as specified in <linux/cpumask.h>, cpumask_t is typedef'ed + * to a struct cpumask, so it would be safe to pass a struct + * bpf_cpumask * to a kfunc expecting a struct cpumask *. + * + * The philosophy here is similar to how we allow scalars of different + * types to be passed to kfuncs as long as the size is the same. The + * only difference here is that we're simply allowing + * btf_struct_ids_match() to walk the struct at the 0th offset, and + * resolve types. + */ + if (is_kfunc_acquire(meta) || + (is_kfunc_release(meta) && reg->ref_obj_id) || + btf_type_ids_nocast_alias(&env->log, reg_btf, reg_ref_id, meta->btf, ref_id)) strict_type_match = true; + WARN_ON_ONCE(is_kfunc_trusted_args(meta) && reg->off); + reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, ®_ref_id); reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off); if (!btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->off, meta->btf, ref_id, strict_type_match)) { @@ -8599,38 +9100,54 @@ static int process_kf_arg_ptr_to_kptr(struct bpf_verifier_env *env, return 0; } -static int ref_set_release_on_unlock(struct bpf_verifier_env *env, u32 ref_obj_id) +static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg) { - struct bpf_func_state *state = cur_func(env); + struct bpf_verifier_state *state = env->cur_state; + + if (!state->active_lock.ptr) { + verbose(env, "verifier internal error: ref_set_non_owning w/o active lock\n"); + return -EFAULT; + } + + if (type_flag(reg->type) & NON_OWN_REF) { + verbose(env, "verifier internal error: NON_OWN_REF already set\n"); + return -EFAULT; + } + + reg->type |= NON_OWN_REF; + return 0; +} + +static int ref_convert_owning_non_owning(struct bpf_verifier_env *env, u32 ref_obj_id) +{ + struct bpf_func_state *state, *unused; struct bpf_reg_state *reg; int i; - /* bpf_spin_lock only allows calling list_push and list_pop, no BPF - * subprogs, no global functions. This means that the references would - * not be released inside the critical section but they may be added to - * the reference state, and the acquired_refs are never copied out for a - * different frame as BPF to BPF calls don't work in bpf_spin_lock - * critical sections. - */ + state = cur_func(env); + if (!ref_obj_id) { - verbose(env, "verifier internal error: ref_obj_id is zero for release_on_unlock\n"); + verbose(env, "verifier internal error: ref_obj_id is zero for " + "owning -> non-owning conversion\n"); return -EFAULT; } + for (i = 0; i < state->acquired_refs; i++) { - if (state->refs[i].id == ref_obj_id) { - if (state->refs[i].release_on_unlock) { - verbose(env, "verifier internal error: expected false release_on_unlock"); - return -EFAULT; + if (state->refs[i].id != ref_obj_id) + continue; + + /* Clear ref_obj_id here so release_reference doesn't clobber + * the whole reg + */ + bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({ + if (reg->ref_obj_id == ref_obj_id) { + reg->ref_obj_id = 0; + ref_set_non_owning(env, reg); } - state->refs[i].release_on_unlock = true; - /* Now mark everyone sharing same ref_obj_id as untrusted */ - bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ - if (reg->ref_obj_id == ref_obj_id) - reg->type |= PTR_UNTRUSTED; - })); - return 0; - } + })); + return 0; } + verbose(env, "verifier internal error: ref state missing for ref_obj_id\n"); return -EFAULT; } @@ -8716,101 +9233,226 @@ static bool is_bpf_list_api_kfunc(u32 btf_id) btf_id == special_kfunc_list[KF_bpf_list_pop_back]; } -static int process_kf_arg_ptr_to_list_head(struct bpf_verifier_env *env, - struct bpf_reg_state *reg, u32 regno, - struct bpf_kfunc_call_arg_meta *meta) +static bool is_bpf_rbtree_api_kfunc(u32 btf_id) +{ + return btf_id == special_kfunc_list[KF_bpf_rbtree_add] || + btf_id == special_kfunc_list[KF_bpf_rbtree_remove] || + btf_id == special_kfunc_list[KF_bpf_rbtree_first]; +} + +static bool is_bpf_graph_api_kfunc(u32 btf_id) +{ + return is_bpf_list_api_kfunc(btf_id) || is_bpf_rbtree_api_kfunc(btf_id); +} + +static bool is_callback_calling_kfunc(u32 btf_id) +{ + return btf_id == special_kfunc_list[KF_bpf_rbtree_add]; +} + +static bool is_rbtree_lock_required_kfunc(u32 btf_id) +{ + return is_bpf_rbtree_api_kfunc(btf_id); +} + +static bool check_kfunc_is_graph_root_api(struct bpf_verifier_env *env, + enum btf_field_type head_field_type, + u32 kfunc_btf_id) +{ + bool ret; + + switch (head_field_type) { + case BPF_LIST_HEAD: + ret = is_bpf_list_api_kfunc(kfunc_btf_id); + break; + case BPF_RB_ROOT: + ret = is_bpf_rbtree_api_kfunc(kfunc_btf_id); + break; + default: + verbose(env, "verifier internal error: unexpected graph root argument type %s\n", + btf_field_type_name(head_field_type)); + return false; + } + + if (!ret) + verbose(env, "verifier internal error: %s head arg for unknown kfunc\n", + btf_field_type_name(head_field_type)); + return ret; +} + +static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env, + enum btf_field_type node_field_type, + u32 kfunc_btf_id) { + bool ret; + + switch (node_field_type) { + case BPF_LIST_NODE: + ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_front] || + kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_back]); + break; + case BPF_RB_NODE: + ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] || + kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add]); + break; + default: + verbose(env, "verifier internal error: unexpected graph node argument type %s\n", + btf_field_type_name(node_field_type)); + return false; + } + + if (!ret) + verbose(env, "verifier internal error: %s node arg for unknown kfunc\n", + btf_field_type_name(node_field_type)); + return ret; +} + +static int +__process_kf_arg_ptr_to_graph_root(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, u32 regno, + struct bpf_kfunc_call_arg_meta *meta, + enum btf_field_type head_field_type, + struct btf_field **head_field) +{ + const char *head_type_name; struct btf_field *field; struct btf_record *rec; - u32 list_head_off; + u32 head_off; - if (meta->btf != btf_vmlinux || !is_bpf_list_api_kfunc(meta->func_id)) { - verbose(env, "verifier internal error: bpf_list_head argument for unknown kfunc\n"); + if (meta->btf != btf_vmlinux) { + verbose(env, "verifier internal error: unexpected btf mismatch in kfunc call\n"); return -EFAULT; } + if (!check_kfunc_is_graph_root_api(env, head_field_type, meta->func_id)) + return -EFAULT; + + head_type_name = btf_field_type_name(head_field_type); if (!tnum_is_const(reg->var_off)) { verbose(env, - "R%d doesn't have constant offset. bpf_list_head has to be at the constant offset\n", - regno); + "R%d doesn't have constant offset. %s has to be at the constant offset\n", + regno, head_type_name); return -EINVAL; } rec = reg_btf_record(reg); - list_head_off = reg->off + reg->var_off.value; - field = btf_record_find(rec, list_head_off, BPF_LIST_HEAD); + head_off = reg->off + reg->var_off.value; + field = btf_record_find(rec, head_off, head_field_type); if (!field) { - verbose(env, "bpf_list_head not found at offset=%u\n", list_head_off); + verbose(env, "%s not found at offset=%u\n", head_type_name, head_off); return -EINVAL; } /* All functions require bpf_list_head to be protected using a bpf_spin_lock */ if (check_reg_allocation_locked(env, reg)) { - verbose(env, "bpf_spin_lock at off=%d must be held for bpf_list_head\n", - rec->spin_lock_off); + verbose(env, "bpf_spin_lock at off=%d must be held for %s\n", + rec->spin_lock_off, head_type_name); return -EINVAL; } - if (meta->arg_list_head.field) { - verbose(env, "verifier internal error: repeating bpf_list_head arg\n"); + if (*head_field) { + verbose(env, "verifier internal error: repeating %s arg\n", head_type_name); return -EFAULT; } - meta->arg_list_head.field = field; + *head_field = field; return 0; } -static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env, +static int process_kf_arg_ptr_to_list_head(struct bpf_verifier_env *env, struct bpf_reg_state *reg, u32 regno, struct bpf_kfunc_call_arg_meta *meta) { + return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, BPF_LIST_HEAD, + &meta->arg_list_head.field); +} + +static int process_kf_arg_ptr_to_rbtree_root(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, u32 regno, + struct bpf_kfunc_call_arg_meta *meta) +{ + return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, BPF_RB_ROOT, + &meta->arg_rbtree_root.field); +} + +static int +__process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, u32 regno, + struct bpf_kfunc_call_arg_meta *meta, + enum btf_field_type head_field_type, + enum btf_field_type node_field_type, + struct btf_field **node_field) +{ + const char *node_type_name; const struct btf_type *et, *t; struct btf_field *field; - struct btf_record *rec; - u32 list_node_off; + u32 node_off; - if (meta->btf != btf_vmlinux || - (meta->func_id != special_kfunc_list[KF_bpf_list_push_front] && - meta->func_id != special_kfunc_list[KF_bpf_list_push_back])) { - verbose(env, "verifier internal error: bpf_list_node argument for unknown kfunc\n"); + if (meta->btf != btf_vmlinux) { + verbose(env, "verifier internal error: unexpected btf mismatch in kfunc call\n"); return -EFAULT; } + if (!check_kfunc_is_graph_node_api(env, node_field_type, meta->func_id)) + return -EFAULT; + + node_type_name = btf_field_type_name(node_field_type); if (!tnum_is_const(reg->var_off)) { verbose(env, - "R%d doesn't have constant offset. bpf_list_node has to be at the constant offset\n", - regno); + "R%d doesn't have constant offset. %s has to be at the constant offset\n", + regno, node_type_name); return -EINVAL; } - rec = reg_btf_record(reg); - list_node_off = reg->off + reg->var_off.value; - field = btf_record_find(rec, list_node_off, BPF_LIST_NODE); - if (!field || field->offset != list_node_off) { - verbose(env, "bpf_list_node not found at offset=%u\n", list_node_off); + node_off = reg->off + reg->var_off.value; + field = reg_find_field_offset(reg, node_off, node_field_type); + if (!field || field->offset != node_off) { + verbose(env, "%s not found at offset=%u\n", node_type_name, node_off); return -EINVAL; } - field = meta->arg_list_head.field; + field = *node_field; - et = btf_type_by_id(field->list_head.btf, field->list_head.value_btf_id); + et = btf_type_by_id(field->graph_root.btf, field->graph_root.value_btf_id); t = btf_type_by_id(reg->btf, reg->btf_id); - if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, 0, field->list_head.btf, - field->list_head.value_btf_id, true)) { - verbose(env, "operation on bpf_list_head expects arg#1 bpf_list_node at offset=%d " + if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, 0, field->graph_root.btf, + field->graph_root.value_btf_id, true)) { + verbose(env, "operation on %s expects arg#1 %s at offset=%d " "in struct %s, but arg is at offset=%d in struct %s\n", - field->list_head.node_offset, btf_name_by_offset(field->list_head.btf, et->name_off), - list_node_off, btf_name_by_offset(reg->btf, t->name_off)); + btf_field_type_name(head_field_type), + btf_field_type_name(node_field_type), + field->graph_root.node_offset, + btf_name_by_offset(field->graph_root.btf, et->name_off), + node_off, btf_name_by_offset(reg->btf, t->name_off)); return -EINVAL; } - if (list_node_off != field->list_head.node_offset) { - verbose(env, "arg#1 offset=%d, but expected bpf_list_node at offset=%d in struct %s\n", - list_node_off, field->list_head.node_offset, - btf_name_by_offset(field->list_head.btf, et->name_off)); + if (node_off != field->graph_root.node_offset) { + verbose(env, "arg#1 offset=%d, but expected %s at offset=%d in struct %s\n", + node_off, btf_field_type_name(node_field_type), + field->graph_root.node_offset, + btf_name_by_offset(field->graph_root.btf, et->name_off)); return -EINVAL; } - /* Set arg#1 for expiration after unlock */ - return ref_set_release_on_unlock(env, reg->ref_obj_id); + + return 0; +} + +static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, u32 regno, + struct bpf_kfunc_call_arg_meta *meta) +{ + return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta, + BPF_LIST_HEAD, BPF_LIST_NODE, + &meta->arg_list_head.field); +} + +static int process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, u32 regno, + struct bpf_kfunc_call_arg_meta *meta) +{ + return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta, + BPF_RB_ROOT, BPF_RB_NODE, + &meta->arg_rbtree_root.field); } static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta) @@ -8896,6 +9538,12 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ return -EINVAL; } + if (is_kfunc_trusted_args(meta) && + (register_is_null(reg) || type_may_be_null(reg->type))) { + verbose(env, "Possibly NULL pointer passed to trusted arg%d\n", i); + return -EACCES; + } + if (reg->ref_obj_id) { if (is_kfunc_release(meta) && meta->ref_obj_id) { verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n", @@ -8941,8 +9589,11 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ case KF_ARG_PTR_TO_DYNPTR: case KF_ARG_PTR_TO_LIST_HEAD: case KF_ARG_PTR_TO_LIST_NODE: + case KF_ARG_PTR_TO_RB_ROOT: + case KF_ARG_PTR_TO_RB_NODE: case KF_ARG_PTR_TO_MEM: case KF_ARG_PTR_TO_MEM_SIZE: + case KF_ARG_PTR_TO_CALLBACK: /* Trusted by default */ break; default: @@ -9019,6 +9670,20 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ if (ret < 0) return ret; break; + case KF_ARG_PTR_TO_RB_ROOT: + if (reg->type != PTR_TO_MAP_VALUE && + reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) { + verbose(env, "arg#%d expected pointer to map value or allocated object\n", i); + return -EINVAL; + } + if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) { + verbose(env, "allocated object must be referenced\n"); + return -EINVAL; + } + ret = process_kf_arg_ptr_to_rbtree_root(env, reg, regno, meta); + if (ret < 0) + return ret; + break; case KF_ARG_PTR_TO_LIST_NODE: if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) { verbose(env, "arg#%d expected pointer to allocated object\n", i); @@ -9032,6 +9697,31 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ if (ret < 0) return ret; break; + case KF_ARG_PTR_TO_RB_NODE: + if (meta->func_id == special_kfunc_list[KF_bpf_rbtree_remove]) { + if (!type_is_non_owning_ref(reg->type) || reg->ref_obj_id) { + verbose(env, "rbtree_remove node input must be non-owning ref\n"); + return -EINVAL; + } + if (in_rbtree_lock_required_cb(env)) { + verbose(env, "rbtree_remove not allowed in rbtree cb\n"); + return -EINVAL; + } + } else { + if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) { + verbose(env, "arg#%d expected pointer to allocated object\n", i); + return -EINVAL; + } + if (!reg->ref_obj_id) { + verbose(env, "allocated object must be referenced\n"); + return -EINVAL; + } + } + + ret = process_kf_arg_ptr_to_rbtree_node(env, reg, regno, meta); + if (ret < 0) + return ret; + break; case KF_ARG_PTR_TO_BTF_ID: /* Only base_type is checked, further checks are done here */ if ((base_type(reg->type) != PTR_TO_BTF_ID || @@ -9067,6 +9757,9 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ /* Skip next '__sz' argument */ i++; break; + case KF_ARG_PTR_TO_CALLBACK: + meta->subprogno = reg->subprogno; + break; } } @@ -9083,11 +9776,11 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx_p) { const struct btf_type *t, *func, *func_proto, *ptr_type; + u32 i, nargs, func_id, ptr_type_id, release_ref_obj_id; struct bpf_reg_state *regs = cur_regs(env); const char *func_name, *ptr_type_name; bool sleepable, rcu_lock, rcu_unlock; struct bpf_kfunc_call_arg_meta meta; - u32 i, nargs, func_id, ptr_type_id; int err, insn_idx = *insn_idx_p; const struct btf_param *args; const struct btf_type *ret_t; @@ -9182,6 +9875,35 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, } } + if (meta.func_id == special_kfunc_list[KF_bpf_list_push_front] || + meta.func_id == special_kfunc_list[KF_bpf_list_push_back] || + meta.func_id == special_kfunc_list[KF_bpf_rbtree_add]) { + release_ref_obj_id = regs[BPF_REG_2].ref_obj_id; + err = ref_convert_owning_non_owning(env, release_ref_obj_id); + if (err) { + verbose(env, "kfunc %s#%d conversion of owning ref to non-owning failed\n", + func_name, func_id); + return err; + } + + err = release_reference(env, release_ref_obj_id); + if (err) { + verbose(env, "kfunc %s#%d reference has not been acquired before\n", + func_name, func_id); + return err; + } + } + + if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add]) { + err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, + set_rbtree_add_callback_state); + if (err) { + verbose(env, "kfunc %s#%d failed callback verification\n", + func_name, func_id); + return err; + } + } + for (i = 0; i < CALLER_SAVED_REGS; i++) mark_reg_not_init(env, regs, caller_saved[i]); @@ -9246,11 +9968,12 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) { struct btf_field *field = meta.arg_list_head.field; - mark_reg_known_zero(env, regs, BPF_REG_0); - regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC; - regs[BPF_REG_0].btf = field->list_head.btf; - regs[BPF_REG_0].btf_id = field->list_head.value_btf_id; - regs[BPF_REG_0].off = field->list_head.node_offset; + mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root); + } else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove] || + meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) { + struct btf_field *field = meta.arg_rbtree_root.field; + + mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root); } else if (meta.func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) { mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED; @@ -9316,7 +10039,13 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, if (is_kfunc_ret_null(&meta)) regs[BPF_REG_0].id = id; regs[BPF_REG_0].ref_obj_id = id; + } else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) { + ref_set_non_owning(env, ®s[BPF_REG_0]); } + + if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove]) + invalidate_non_owning_refs(env); + if (reg_may_point_to_spin_lock(®s[BPF_REG_0]) && !regs[BPF_REG_0].id) regs[BPF_REG_0].id = ++env->id_gen; } /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */ @@ -11502,8 +12231,10 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, */ if (WARN_ON_ONCE(reg->smin_value || reg->smax_value || !tnum_equals_const(reg->var_off, 0))) return; - if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC | PTR_MAYBE_NULL) && WARN_ON_ONCE(reg->off)) + if (!(type_is_ptr_alloc_obj(reg->type) || type_is_non_owning_ref(reg->type)) && + WARN_ON_ONCE(reg->off)) return; + if (is_null) { reg->type = SCALAR_VALUE; /* We don't need id and ref_obj_id from this point @@ -12969,6 +13700,13 @@ static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap) { unsigned int i; + /* either both IDs should be set or both should be zero */ + if (!!old_id != !!cur_id) + return false; + + if (old_id == 0) /* cur_id == 0 as well */ + return true; + for (i = 0; i < BPF_ID_MAP_SIZE; i++) { if (!idmap[i].old) { /* Reached an empty slot; haven't seen this id before */ @@ -13080,79 +13818,74 @@ next: } } +static bool regs_exact(const struct bpf_reg_state *rold, + const struct bpf_reg_state *rcur, + struct bpf_id_pair *idmap) +{ + return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && + check_ids(rold->id, rcur->id, idmap) && + check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap); +} + /* Returns true if (rold safe implies rcur safe) */ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, struct bpf_reg_state *rcur, struct bpf_id_pair *idmap) { - bool equal; - if (!(rold->live & REG_LIVE_READ)) /* explored state didn't use this */ return true; - - equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0; - if (rold->type == NOT_INIT) /* explored state can't have used this */ return true; if (rcur->type == NOT_INIT) return false; + + /* Enforce that register types have to match exactly, including their + * modifiers (like PTR_MAYBE_NULL, MEM_RDONLY, etc), as a general + * rule. + * + * One can make a point that using a pointer register as unbounded + * SCALAR would be technically acceptable, but this could lead to + * pointer leaks because scalars are allowed to leak while pointers + * are not. We could make this safe in special cases if root is + * calling us, but it's probably not worth the hassle. + * + * Also, register types that are *not* MAYBE_NULL could technically be + * safe to use as their MAYBE_NULL variants (e.g., PTR_TO_MAP_VALUE + * is safe to be used as PTR_TO_MAP_VALUE_OR_NULL, provided both point + * to the same map). + * However, if the old MAYBE_NULL register then got NULL checked, + * doing so could have affected others with the same id, and we can't + * check for that because we lost the id when we converted to + * a non-MAYBE_NULL variant. + * So, as a general rule we don't allow mixing MAYBE_NULL and + * non-MAYBE_NULL registers as well. + */ + if (rold->type != rcur->type) + return false; + switch (base_type(rold->type)) { case SCALAR_VALUE: - if (equal) + if (regs_exact(rold, rcur, idmap)) return true; if (env->explore_alu_limits) return false; - if (rcur->type == SCALAR_VALUE) { - if (!rold->precise) - return true; - /* new val must satisfy old val knowledge */ - return range_within(rold, rcur) && - tnum_in(rold->var_off, rcur->var_off); - } else { - /* We're trying to use a pointer in place of a scalar. - * Even if the scalar was unbounded, this could lead to - * pointer leaks because scalars are allowed to leak - * while pointers are not. We could make this safe in - * special cases if root is calling us, but it's - * probably not worth the hassle. - */ - return false; - } + if (!rold->precise) + return true; + /* new val must satisfy old val knowledge */ + return range_within(rold, rcur) && + tnum_in(rold->var_off, rcur->var_off); case PTR_TO_MAP_KEY: case PTR_TO_MAP_VALUE: - /* a PTR_TO_MAP_VALUE could be safe to use as a - * PTR_TO_MAP_VALUE_OR_NULL into the same map. - * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL- - * checked, doing so could have affected others with the same - * id, and we can't check for that because we lost the id when - * we converted to a PTR_TO_MAP_VALUE. - */ - if (type_may_be_null(rold->type)) { - if (!type_may_be_null(rcur->type)) - return false; - if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id))) - return false; - /* Check our ids match any regs they're supposed to */ - return check_ids(rold->id, rcur->id, idmap); - } - /* If the new min/max/var_off satisfy the old ones and * everything else matches, we are OK. - * 'id' is not compared, since it's only used for maps with - * bpf_spin_lock inside map element and in such cases if - * the rest of the prog is valid for one map element then - * it's valid for all map elements regardless of the key - * used in bpf_map_lookup() */ - return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && + return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 && range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off) && check_ids(rold->id, rcur->id, idmap); case PTR_TO_PACKET_META: case PTR_TO_PACKET: - if (rcur->type != rold->type) - return false; /* We must have at least as much range as the old ptr * did, so that any accesses which were safe before are * still safe. This is true even if old range < old off, @@ -13167,7 +13900,7 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, if (rold->off != rcur->off) return false; /* id relations must be preserved */ - if (rold->id && !check_ids(rold->id, rcur->id, idmap)) + if (!check_ids(rold->id, rcur->id, idmap)) return false; /* new val must satisfy old val knowledge */ return range_within(rold, rcur) && @@ -13176,15 +13909,10 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, /* two stack pointers are equal only if they're pointing to * the same stack frame, since fp-8 in foo != fp-8 in bar */ - return equal && rold->frameno == rcur->frameno; + return regs_exact(rold, rcur, idmap) && rold->frameno == rcur->frameno; default: - /* Only valid matches are exact, which memcmp() */ - return equal; + return regs_exact(rold, rcur, idmap); } - - /* Shouldn't get here; if we do, say it's not safe */ - WARN_ON_ONCE(1); - return false; } static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, @@ -13231,10 +13959,9 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, return false; if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1) continue; - if (!is_spilled_reg(&old->stack[spi])) - continue; - if (!regsafe(env, &old->stack[spi].spilled_ptr, - &cur->stack[spi].spilled_ptr, idmap)) + /* Both old and cur are having same slot_type */ + switch (old->stack[spi].slot_type[BPF_REG_SIZE - 1]) { + case STACK_SPILL: /* when explored and current stack slot are both storing * spilled registers, check that stored pointers types * are the same as well. @@ -13245,17 +13972,48 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, * such verifier states are not equivalent. * return false to continue verification of this path */ + if (!regsafe(env, &old->stack[spi].spilled_ptr, + &cur->stack[spi].spilled_ptr, idmap)) + return false; + break; + case STACK_DYNPTR: + { + const struct bpf_reg_state *old_reg, *cur_reg; + + old_reg = &old->stack[spi].spilled_ptr; + cur_reg = &cur->stack[spi].spilled_ptr; + if (old_reg->dynptr.type != cur_reg->dynptr.type || + old_reg->dynptr.first_slot != cur_reg->dynptr.first_slot || + !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap)) + return false; + break; + } + case STACK_MISC: + case STACK_ZERO: + case STACK_INVALID: + continue; + /* Ensure that new unhandled slot types return false by default */ + default: return false; + } } return true; } -static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur) +static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur, + struct bpf_id_pair *idmap) { + int i; + if (old->acquired_refs != cur->acquired_refs) return false; - return !memcmp(old->refs, cur->refs, - sizeof(*old->refs) * old->acquired_refs); + + for (i = 0; i < old->acquired_refs; i++) { + if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap)) + return false; + } + + return true; } /* compare two verifier states @@ -13297,7 +14055,7 @@ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_stat if (!stacksafe(env, old, cur, env->idmap_scratch)) return false; - if (!refsafe(old, cur)) + if (!refsafe(old, cur, env->idmap_scratch)) return false; return true; @@ -13834,7 +14592,7 @@ static int do_check(struct bpf_verifier_env *env) env->prev_log_len = env->log.len_used; } - if (bpf_prog_is_dev_bound(env->prog->aux)) { + if (bpf_prog_is_offloaded(env->prog->aux)) { err = bpf_prog_offload_verify_insn(env, env->insn_idx, env->prev_insn_idx); if (err) @@ -13985,7 +14743,7 @@ static int do_check(struct bpf_verifier_env *env) if ((insn->src_reg == BPF_REG_0 && insn->imm != BPF_FUNC_spin_unlock) || (insn->src_reg == BPF_PSEUDO_CALL) || (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && - (insn->off != 0 || !is_bpf_list_api_kfunc(insn->imm)))) { + (insn->off != 0 || !is_bpf_graph_api_kfunc(insn->imm)))) { verbose(env, "function calls are not allowed while holding a lock\n"); return -EINVAL; } @@ -14021,7 +14779,8 @@ static int do_check(struct bpf_verifier_env *env) return -EINVAL; } - if (env->cur_state->active_lock.ptr) { + if (env->cur_state->active_lock.ptr && + !in_rbtree_lock_required_cb(env)) { verbose(env, "bpf_spin_unlock is missing\n"); return -EINVAL; } @@ -14283,9 +15042,10 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, { enum bpf_prog_type prog_type = resolve_prog_type(prog); - if (btf_record_has_field(map->record, BPF_LIST_HEAD)) { + if (btf_record_has_field(map->record, BPF_LIST_HEAD) || + btf_record_has_field(map->record, BPF_RB_ROOT)) { if (is_tracing_prog_type(prog_type)) { - verbose(env, "tracing progs cannot use bpf_list_head yet\n"); + verbose(env, "tracing progs cannot use bpf_{list_head,rb_root} yet\n"); return -EINVAL; } } @@ -14314,7 +15074,7 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, } } - if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) && + if ((bpf_prog_is_offloaded(prog->aux) || bpf_map_is_offloaded(map)) && !bpf_offload_prog_map_match(prog, map)) { verbose(env, "offload device mismatch between prog and map\n"); return -EINVAL; @@ -14795,7 +15555,7 @@ static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt) unsigned int orig_prog_len = env->prog->len; int err; - if (bpf_prog_is_dev_bound(env->prog->aux)) + if (bpf_prog_is_offloaded(env->prog->aux)) bpf_prog_offload_remove_insns(env, off, cnt); err = bpf_remove_insns(env->prog, off, cnt); @@ -14876,7 +15636,7 @@ static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env) else continue; - if (bpf_prog_is_dev_bound(env->prog->aux)) + if (bpf_prog_is_offloaded(env->prog->aux)) bpf_prog_offload_replace_insn(env, i, &ja); memcpy(insn, &ja, sizeof(ja)); @@ -15063,7 +15823,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) } } - if (bpf_prog_is_dev_bound(env->prog->aux)) + if (bpf_prog_is_offloaded(env->prog->aux)) return 0; insn = env->prog->insnsi + delta; @@ -15463,7 +16223,7 @@ static int fixup_call_args(struct bpf_verifier_env *env) int err = 0; if (env->prog->jit_requested && - !bpf_prog_is_dev_bound(env->prog->aux)) { + !bpf_prog_is_offloaded(env->prog->aux)) { err = jit_subprogs(env); if (err == 0) return 0; @@ -15507,12 +16267,25 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, struct bpf_insn *insn_buf, int insn_idx, int *cnt) { const struct bpf_kfunc_desc *desc; + void *xdp_kfunc; if (!insn->imm) { verbose(env, "invalid kernel function call not eliminated in verifier pass\n"); return -EINVAL; } + *cnt = 0; + + if (bpf_dev_bound_kfunc_id(insn->imm)) { + xdp_kfunc = bpf_dev_bound_resolve_kfunc(env->prog, insn->imm); + if (xdp_kfunc) { + insn->imm = BPF_CALL_IMM(xdp_kfunc); + return 0; + } + + /* fallback to default kfunc when not supported by netdev */ + } + /* insn->imm has the btf func_id. Replace it with * an address (relative to __bpf_call_base). */ @@ -15523,7 +16296,6 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, return -EFAULT; } - *cnt = 0; insn->imm = desc->imm; if (insn->off) return 0; @@ -16449,7 +17221,7 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env) } if (st_ops->check_member) { - int err = st_ops->check_member(t, member); + int err = st_ops->check_member(t, member, prog); if (err) { verbose(env, "attach to unsupported member %s of struct %s\n", @@ -16530,6 +17302,12 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, if (tgt_prog) { struct bpf_prog_aux *aux = tgt_prog->aux; + if (bpf_prog_is_dev_bound(prog->aux) && + !bpf_prog_dev_bound_match(prog, tgt_prog)) { + bpf_log(log, "Target program bound device mismatch"); + return -EINVAL; + } + for (i = 0; i < aux->func_info_cnt; i++) if (aux->func_info[i].type_id == btf_id) { subprog = i; @@ -16751,6 +17529,24 @@ BTF_ID(func, rcu_read_unlock_strict) #endif BTF_SET_END(btf_id_deny) +static bool can_be_sleepable(struct bpf_prog *prog) +{ + if (prog->type == BPF_PROG_TYPE_TRACING) { + switch (prog->expected_attach_type) { + case BPF_TRACE_FENTRY: + case BPF_TRACE_FEXIT: + case BPF_MODIFY_RETURN: + case BPF_TRACE_ITER: + return true; + default: + return false; + } + } + return prog->type == BPF_PROG_TYPE_LSM || + prog->type == BPF_PROG_TYPE_KPROBE /* only for uprobes */ || + prog->type == BPF_PROG_TYPE_STRUCT_OPS; +} + static int check_attach_btf_id(struct bpf_verifier_env *env) { struct bpf_prog *prog = env->prog; @@ -16769,9 +17565,8 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) return -EINVAL; } - if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING && - prog->type != BPF_PROG_TYPE_LSM && prog->type != BPF_PROG_TYPE_KPROBE) { - verbose(env, "Only fentry/fexit/fmod_ret, lsm, and kprobe/uprobe programs can be sleepable\n"); + if (prog->aux->sleepable && !can_be_sleepable(prog)) { + verbose(env, "Only fentry/fexit/fmod_ret, lsm, iter, uprobe, and struct_ops programs can be sleepable\n"); return -EINVAL; } @@ -16950,7 +17745,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr) if (ret < 0) goto skip_full_check; - if (bpf_prog_is_dev_bound(env->prog->aux)) { + if (bpf_prog_is_offloaded(env->prog->aux)) { ret = bpf_prog_offload_verifier_prep(env->prog); if (ret) goto skip_full_check; @@ -16963,7 +17758,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr) ret = do_check_subprogs(env); ret = ret ?: do_check_main(env); - if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux)) + if (ret == 0 && bpf_prog_is_offloaded(env->prog->aux)) ret = bpf_prog_offload_finalize(env); skip_full_check: @@ -16998,7 +17793,7 @@ skip_full_check: /* do 32-bit optimization after insn patching has done so those patched * insns could be handled correctly. */ - if (ret == 0 && !bpf_prog_is_dev_bound(env->prog->aux)) { + if (ret == 0 && !bpf_prog_is_offloaded(env->prog->aux)) { ret = opt_subreg_zext_lo32_rnd_hi32(env, attr); env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret : false; |