diff options
Diffstat (limited to 'include/linux/bpf.h')
-rw-r--r-- | include/linux/bpf.h | 164 |
1 files changed, 114 insertions, 50 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f715e8863f4d..fa517ae604ad 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -193,7 +193,7 @@ struct bpf_map { atomic64_t usercnt; struct work_struct work; struct mutex freeze_mutex; - u64 writecnt; /* writable mmap cnt; protected by freeze_mutex */ + atomic64_t writecnt; }; static inline bool map_value_has_spin_lock(const struct bpf_map *map) @@ -297,6 +297,39 @@ bool bpf_map_meta_equal(const struct bpf_map *meta0, extern const struct bpf_map_ops bpf_map_offload_ops; +/* bpf_type_flag contains a set of flags that are applicable to the values of + * arg_type, ret_type and reg_type. For example, a pointer value may be null, + * or a memory is read-only. We classify types into two categories: base types + * and extended types. Extended types are base types combined with a type flag. + * + * Currently there are no more than 32 base types in arg_type, ret_type and + * reg_types. + */ +#define BPF_BASE_TYPE_BITS 8 + +enum bpf_type_flag { + /* PTR may be NULL. */ + PTR_MAYBE_NULL = BIT(0 + BPF_BASE_TYPE_BITS), + + /* MEM is read-only. When applied on bpf_arg, it indicates the arg is + * compatible with both mutable and immutable memory. + */ + MEM_RDONLY = BIT(1 + BPF_BASE_TYPE_BITS), + + /* MEM was "allocated" from a different helper, and cannot be mixed + * with regular non-MEM_ALLOC'ed MEM types. + */ + MEM_ALLOC = BIT(2 + BPF_BASE_TYPE_BITS), + + __BPF_TYPE_LAST_FLAG = MEM_ALLOC, +}; + +/* Max number of base types. */ +#define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS) + +/* Max number of all types. */ +#define BPF_TYPE_LIMIT (__BPF_TYPE_LAST_FLAG | (__BPF_TYPE_LAST_FLAG - 1)) + /* function argument constraints */ enum bpf_arg_type { ARG_DONTCARE = 0, /* unused argument in helper function */ @@ -308,13 +341,11 @@ enum bpf_arg_type { ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */ ARG_PTR_TO_MAP_VALUE, /* pointer to stack used as map value */ ARG_PTR_TO_UNINIT_MAP_VALUE, /* pointer to valid memory used to store a map value */ - ARG_PTR_TO_MAP_VALUE_OR_NULL, /* pointer to stack used as map value or NULL */ /* the following constraints used to prototype bpf_memcmp() and other * functions that access data on eBPF program stack */ ARG_PTR_TO_MEM, /* pointer to valid memory (stack, packet, map value) */ - ARG_PTR_TO_MEM_OR_NULL, /* pointer to valid memory or NULL */ ARG_PTR_TO_UNINIT_MEM, /* pointer to memory does not need to be initialized, * helper function must fill all bytes or clear * them in error case. @@ -324,42 +355,65 @@ enum bpf_arg_type { ARG_CONST_SIZE_OR_ZERO, /* number of bytes accessed from memory or 0 */ ARG_PTR_TO_CTX, /* pointer to context */ - ARG_PTR_TO_CTX_OR_NULL, /* pointer to context or NULL */ ARG_ANYTHING, /* any (initialized) argument is ok */ ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */ ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */ ARG_PTR_TO_INT, /* pointer to int */ ARG_PTR_TO_LONG, /* pointer to long */ ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */ - ARG_PTR_TO_SOCKET_OR_NULL, /* pointer to bpf_sock (fullsock) or NULL */ ARG_PTR_TO_BTF_ID, /* pointer to in-kernel struct */ ARG_PTR_TO_ALLOC_MEM, /* pointer to dynamically allocated memory */ - ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */ ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */ ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */ ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */ ARG_PTR_TO_FUNC, /* pointer to a bpf program function */ - ARG_PTR_TO_STACK_OR_NULL, /* pointer to stack or NULL */ + ARG_PTR_TO_STACK, /* pointer to stack */ ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */ ARG_PTR_TO_TIMER, /* pointer to bpf_timer */ __BPF_ARG_TYPE_MAX, + + /* Extended arg_types. */ + ARG_PTR_TO_MAP_VALUE_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_MAP_VALUE, + ARG_PTR_TO_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_MEM, + ARG_PTR_TO_CTX_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_CTX, + ARG_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_SOCKET, + ARG_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_ALLOC_MEM, + ARG_PTR_TO_STACK_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_STACK, + + /* This must be the last entry. Its purpose is to ensure the enum is + * wide enough to hold the higher bits reserved for bpf_type_flag. + */ + __BPF_ARG_TYPE_LIMIT = BPF_TYPE_LIMIT, }; +static_assert(__BPF_ARG_TYPE_MAX <= BPF_BASE_TYPE_LIMIT); /* type of values returned from helper functions */ enum bpf_return_type { RET_INTEGER, /* function returns integer */ RET_VOID, /* function doesn't return anything */ RET_PTR_TO_MAP_VALUE, /* returns a pointer to map elem value */ - RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */ - RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or NULL */ - RET_PTR_TO_TCP_SOCK_OR_NULL, /* returns a pointer to a tcp_sock or NULL */ - RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */ - RET_PTR_TO_ALLOC_MEM_OR_NULL, /* returns a pointer to dynamically allocated memory or NULL */ - RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */ - RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */ + RET_PTR_TO_SOCKET, /* returns a pointer to a socket */ + RET_PTR_TO_TCP_SOCK, /* returns a pointer to a tcp_sock */ + RET_PTR_TO_SOCK_COMMON, /* returns a pointer to a sock_common */ + RET_PTR_TO_ALLOC_MEM, /* returns a pointer to dynamically allocated memory */ RET_PTR_TO_MEM_OR_BTF_ID, /* returns a pointer to a valid memory or a btf_id */ RET_PTR_TO_BTF_ID, /* returns a pointer to a btf_id */ + __BPF_RET_TYPE_MAX, + + /* Extended ret_types. */ + RET_PTR_TO_MAP_VALUE_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_MAP_VALUE, + RET_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCKET, + RET_PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_TCP_SOCK, + RET_PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON, + RET_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | MEM_ALLOC | RET_PTR_TO_ALLOC_MEM, + RET_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID, + + /* This must be the last entry. Its purpose is to ensure the enum is + * wide enough to hold the higher bits reserved for bpf_type_flag. + */ + __BPF_RET_TYPE_LIMIT = BPF_TYPE_LIMIT, }; +static_assert(__BPF_RET_TYPE_MAX <= BPF_BASE_TYPE_LIMIT); /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs * to in-kernel helper functions and for adjusting imm32 field in BPF_CALL @@ -421,18 +475,15 @@ enum bpf_reg_type { PTR_TO_CTX, /* reg points to bpf_context */ CONST_PTR_TO_MAP, /* reg points to struct bpf_map */ PTR_TO_MAP_VALUE, /* reg points to map element value */ - PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */ + PTR_TO_MAP_KEY, /* reg points to a map element key */ PTR_TO_STACK, /* reg == frame_pointer + offset */ PTR_TO_PACKET_META, /* skb->data - meta_len */ PTR_TO_PACKET, /* reg points to skb->data */ PTR_TO_PACKET_END, /* skb->data + headlen */ PTR_TO_FLOW_KEYS, /* reg points to bpf_flow_keys */ PTR_TO_SOCKET, /* reg points to struct bpf_sock */ - PTR_TO_SOCKET_OR_NULL, /* reg points to struct bpf_sock or NULL */ PTR_TO_SOCK_COMMON, /* reg points to sock_common */ - PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */ PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */ - PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */ PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */ PTR_TO_XDP_SOCK, /* reg points to struct xdp_sock */ /* PTR_TO_BTF_ID points to a kernel struct that does not need @@ -450,18 +501,25 @@ enum bpf_reg_type { * been checked for null. Used primarily to inform the verifier * an explicit null check is required for this struct. */ - PTR_TO_BTF_ID_OR_NULL, PTR_TO_MEM, /* reg points to valid memory region */ - PTR_TO_MEM_OR_NULL, /* reg points to valid memory region or NULL */ - PTR_TO_RDONLY_BUF, /* reg points to a readonly buffer */ - PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */ - PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */ - PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */ + PTR_TO_BUF, /* reg points to a read/write buffer */ PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */ PTR_TO_FUNC, /* reg points to a bpf program function */ - PTR_TO_MAP_KEY, /* reg points to a map element key */ __BPF_REG_TYPE_MAX, + + /* Extended reg_types. */ + PTR_TO_MAP_VALUE_OR_NULL = PTR_MAYBE_NULL | PTR_TO_MAP_VALUE, + PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | PTR_TO_SOCKET, + PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | PTR_TO_SOCK_COMMON, + PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | PTR_TO_TCP_SOCK, + PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | PTR_TO_BTF_ID, + + /* This must be the last entry. Its purpose is to ensure the enum is + * wide enough to hold the higher bits reserved for bpf_type_flag. + */ + __BPF_REG_TYPE_LIMIT = BPF_TYPE_LIMIT, }; +static_assert(__BPF_REG_TYPE_MAX <= BPF_BASE_TYPE_LIMIT); /* The information passed from prog-specific *_is_valid_access * back to the verifier. @@ -732,6 +790,7 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info); void bpf_trampoline_put(struct bpf_trampoline *tr); +int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs); #define BPF_DISPATCHER_INIT(_name) { \ .mutex = __MUTEX_INITIALIZER(_name.mutex), \ .func = &_name##_func, \ @@ -776,6 +835,7 @@ void bpf_ksym_add(struct bpf_ksym *ksym); void bpf_ksym_del(struct bpf_ksym *ksym); int bpf_jit_charge_modmem(u32 pages); void bpf_jit_uncharge_modmem(u32 pages); +bool bpf_prog_has_trampoline(const struct bpf_prog *prog); #else static inline int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) @@ -804,6 +864,10 @@ static inline bool is_bpf_image_address(unsigned long address) { return false; } +static inline bool bpf_prog_has_trampoline(const struct bpf_prog *prog) +{ + return false; +} #endif struct bpf_func_info_aux { @@ -1081,7 +1145,7 @@ struct bpf_array { }; #define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */ -#define MAX_TAIL_CALL_CNT 32 +#define MAX_TAIL_CALL_CNT 33 #define BPF_F_ACCESS_MASK (BPF_F_RDONLY | \ BPF_F_RDONLY_PROG | \ @@ -1352,28 +1416,16 @@ extern struct mutex bpf_stats_enabled_mutex; * kprobes, tracepoints) to prevent deadlocks on map operations as any of * these events can happen inside a region which holds a map bucket lock * and can deadlock on it. - * - * Use the preemption safe inc/dec variants on RT because migrate disable - * is preemptible on RT and preemption in the middle of the RMW operation - * might lead to inconsistent state. Use the raw variants for non RT - * kernels as migrate_disable() maps to preempt_disable() so the slightly - * more expensive save operation can be avoided. */ static inline void bpf_disable_instrumentation(void) { migrate_disable(); - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - this_cpu_inc(bpf_prog_active); - else - __this_cpu_inc(bpf_prog_active); + this_cpu_inc(bpf_prog_active); } static inline void bpf_enable_instrumentation(void) { - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - this_cpu_dec(bpf_prog_active); - else - __this_cpu_dec(bpf_prog_active); + this_cpu_dec(bpf_prog_active); migrate_enable(); } @@ -1419,6 +1471,7 @@ void bpf_map_put(struct bpf_map *map); void *bpf_map_area_alloc(u64 size, int numa_node); void *bpf_map_area_mmapable_alloc(u64 size, int numa_node); void bpf_map_area_free(void *base); +bool bpf_map_write_active(const struct bpf_map *map); void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr); int generic_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, @@ -1621,17 +1674,17 @@ void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth); struct btf *bpf_get_btf_vmlinux(void); /* Map specifics */ -struct xdp_buff; +struct xdp_frame; struct sk_buff; struct bpf_dtab_netdev; struct bpf_cpu_map_entry; void __dev_flush(void); -int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, +int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf, struct net_device *dev_rx); -int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, +int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf, struct net_device *dev_rx); -int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx, +int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx, struct bpf_map *map, bool exclude_ingress); int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, struct bpf_prog *xdp_prog); @@ -1640,7 +1693,7 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, bool exclude_ingress); void __cpu_map_flush(void); -int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, +int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf, struct net_device *dev_rx); int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu, struct sk_buff *skb); @@ -1732,6 +1785,14 @@ bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog); const struct btf_func_model * bpf_jit_find_kfunc_model(const struct bpf_prog *prog, const struct bpf_insn *insn); +struct bpf_core_ctx { + struct bpf_verifier_log *log; + const struct btf *btf; +}; + +int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo, + int relo_idx, void *insn); + #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -1810,26 +1871,26 @@ static inline void __dev_flush(void) { } -struct xdp_buff; +struct xdp_frame; struct bpf_dtab_netdev; struct bpf_cpu_map_entry; static inline -int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, +int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf, struct net_device *dev_rx) { return 0; } static inline -int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, +int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf, struct net_device *dev_rx) { return 0; } static inline -int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx, +int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx, struct bpf_map *map, bool exclude_ingress) { return 0; @@ -1857,7 +1918,7 @@ static inline void __cpu_map_flush(void) } static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, - struct xdp_buff *xdp, + struct xdp_frame *xdpf, struct net_device *dev_rx) { return 0; @@ -2163,6 +2224,9 @@ extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto; extern const struct bpf_func_proto bpf_sk_setsockopt_proto; extern const struct bpf_func_proto bpf_sk_getsockopt_proto; extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto; +extern const struct bpf_func_proto bpf_find_vma_proto; +extern const struct bpf_func_proto bpf_loop_proto; +extern const struct bpf_func_proto bpf_strncmp_proto; const struct bpf_func_proto *tracing_prog_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); |