summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/linux/bpf.h54
-rw-r--r--include/linux/bpf_local_storage.h19
-rw-r--r--include/linux/bpf_mem_alloc.h2
-rw-r--r--include/linux/bpf_verifier.h72
-rw-r--r--include/linux/btf.h8
-rw-r--r--include/linux/filter.h9
-rw-r--r--include/linux/module.h127
-rw-r--r--include/linux/sched.h7
-rw-r--r--include/linux/skbuff.h40
-rw-r--r--include/net/fou.h2
-rw-r--r--include/net/ip_tunnels.h27
-rw-r--r--include/net/tcp.h3
-rw-r--r--include/net/xdp.h29
-rw-r--r--include/net/xsk_buff_pool.h9
-rw-r--r--include/uapi/linux/bpf.h61
15 files changed, 281 insertions, 188 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 6792a7940e1e..2c6095bd7d69 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -96,11 +96,11 @@ struct bpf_map_ops {
/* funcs callable from userspace and from eBPF programs */
void *(*map_lookup_elem)(struct bpf_map *map, void *key);
- int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags);
- int (*map_delete_elem)(struct bpf_map *map, void *key);
- int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags);
- int (*map_pop_elem)(struct bpf_map *map, void *value);
- int (*map_peek_elem)(struct bpf_map *map, void *value);
+ long (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags);
+ long (*map_delete_elem)(struct bpf_map *map, void *key);
+ long (*map_push_elem)(struct bpf_map *map, void *value, u64 flags);
+ long (*map_pop_elem)(struct bpf_map *map, void *value);
+ long (*map_peek_elem)(struct bpf_map *map, void *value);
void *(*map_lookup_percpu_elem)(struct bpf_map *map, void *key, u32 cpu);
/* funcs called by prog_array and perf_event_array map */
@@ -139,7 +139,7 @@ struct bpf_map_ops {
struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(void *owner);
/* Misc helpers.*/
- int (*map_redirect)(struct bpf_map *map, u64 key, u64 flags);
+ long (*map_redirect)(struct bpf_map *map, u64 key, u64 flags);
/* map_meta_equal must be implemented for maps that can be
* used as an inner map. It is a runtime check to ensure
@@ -157,7 +157,7 @@ struct bpf_map_ops {
int (*map_set_for_each_callback_args)(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee);
- int (*map_for_each_callback)(struct bpf_map *map,
+ long (*map_for_each_callback)(struct bpf_map *map,
bpf_callback_t callback_fn,
void *callback_ctx, u64 flags);
@@ -189,9 +189,14 @@ enum btf_field_type {
BPF_RB_NODE | BPF_RB_ROOT,
};
+typedef void (*btf_dtor_kfunc_t)(void *);
+
struct btf_field_kptr {
struct btf *btf;
struct module *module;
+ /* dtor used if btf_is_kernel(btf), otherwise the type is
+ * program-allocated, dtor is NULL, and __bpf_obj_drop_impl is used
+ */
btf_dtor_kfunc_t dtor;
u32 btf_id;
};
@@ -888,8 +893,7 @@ struct bpf_verifier_ops {
struct bpf_prog *prog, u32 *target_size);
int (*btf_struct_access)(struct bpf_verifier_log *log,
const struct bpf_reg_state *reg,
- int off, int size, enum bpf_access_type atype,
- u32 *next_btf_id, enum bpf_type_flag *flag);
+ int off, int size);
};
struct bpf_prog_offload_ops {
@@ -1098,6 +1102,7 @@ struct bpf_trampoline {
struct bpf_attach_target_info {
struct btf_func_model fmodel;
long tgt_addr;
+ struct module *tgt_mod;
const char *tgt_name;
const struct btf_type *tgt_type;
};
@@ -1401,6 +1406,7 @@ struct bpf_prog_aux {
* main prog always has linfo_idx == 0
*/
u32 linfo_idx;
+ struct module *mod;
u32 num_exentries;
struct exception_table_entry *extable;
union {
@@ -1469,6 +1475,8 @@ struct bpf_link_ops {
void (*show_fdinfo)(const struct bpf_link *link, struct seq_file *seq);
int (*fill_link_info)(const struct bpf_link *link,
struct bpf_link_info *info);
+ int (*update_map)(struct bpf_link *link, struct bpf_map *new_map,
+ struct bpf_map *old_map);
};
struct bpf_tramp_link {
@@ -1511,6 +1519,8 @@ struct bpf_struct_ops {
void *kdata, const void *udata);
int (*reg)(void *kdata);
void (*unreg)(void *kdata);
+ int (*update)(void *kdata, void *old_kdata);
+ int (*validate)(void *kdata);
const struct btf_type *type;
const struct btf_type *value_type;
const char *name;
@@ -1545,6 +1555,7 @@ static inline void bpf_module_put(const void *data, struct module *owner)
else
module_put(owner);
}
+int bpf_struct_ops_link_create(union bpf_attr *attr);
#ifdef CONFIG_NET
/* Define it here to avoid the use of forward declaration */
@@ -1585,6 +1596,11 @@ static inline int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map,
{
return -EINVAL;
}
+static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
+{
+ return -EOPNOTSUPP;
+}
+
#endif
#if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM)
@@ -1617,8 +1633,12 @@ struct bpf_array {
#define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */
#define MAX_TAIL_CALL_CNT 33
-/* Maximum number of loops for bpf_loop */
-#define BPF_MAX_LOOPS BIT(23)
+/* Maximum number of loops for bpf_loop and bpf_iter_num.
+ * It's enum to expose it (and thus make it discoverable) through BTF.
+ */
+enum {
+ BPF_MAX_LOOPS = 8 * 1024 * 1024,
+};
#define BPF_F_ACCESS_MASK (BPF_F_RDONLY | \
BPF_F_RDONLY_PROG | \
@@ -1921,7 +1941,7 @@ void bpf_prog_free_id(struct bpf_prog *prog);
void bpf_map_free_id(struct bpf_map *map);
struct btf_field *btf_record_find(const struct btf_record *rec,
- u32 offset, enum btf_field_type type);
+ u32 offset, u32 field_mask);
void btf_record_free(struct btf_record *rec);
void bpf_map_free_record(struct bpf_map *map);
struct btf_record *btf_record_dup(const struct btf_record *rec);
@@ -1934,6 +1954,7 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd);
struct bpf_map *__bpf_map_get(struct fd f);
void bpf_map_inc(struct bpf_map *map);
void bpf_map_inc_with_uref(struct bpf_map *map);
+struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref);
struct bpf_map * __must_check bpf_map_inc_not_zero(struct bpf_map *map);
void bpf_map_put_with_uref(struct bpf_map *map);
void bpf_map_put(struct bpf_map *map);
@@ -2154,7 +2175,7 @@ int bpf_check_uarg_tail_zero(bpfptr_t uaddr, size_t expected_size,
size_t actual_size);
/* verify correctness of eBPF program */
-int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, bpfptr_t uattr);
+int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size);
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
@@ -2242,7 +2263,7 @@ static inline bool bpf_tracing_btf_ctx_access(int off, int size,
int btf_struct_access(struct bpf_verifier_log *log,
const struct bpf_reg_state *reg,
int off, int size, enum bpf_access_type atype,
- u32 *next_btf_id, enum bpf_type_flag *flag);
+ u32 *next_btf_id, enum bpf_type_flag *flag, const char **field_name);
bool btf_struct_ids_match(struct bpf_verifier_log *log,
const struct btf *btf, u32 id, int off,
const struct btf *need_btf, u32 need_type_id,
@@ -2281,7 +2302,7 @@ struct bpf_core_ctx {
bool btf_nested_type_is_trusted(struct bpf_verifier_log *log,
const struct bpf_reg_state *reg,
- int off, const char *suffix);
+ const char *field_name, u32 btf_id, const char *suffix);
bool btf_type_ids_nocast_alias(struct bpf_verifier_log *log,
const struct btf *reg_btf, u32 reg_id,
@@ -2496,7 +2517,8 @@ static inline struct bpf_prog *bpf_prog_by_id(u32 id)
static inline int btf_struct_access(struct bpf_verifier_log *log,
const struct bpf_reg_state *reg,
int off, int size, enum bpf_access_type atype,
- u32 *next_btf_id, enum bpf_type_flag *flag)
+ u32 *next_btf_id, enum bpf_type_flag *flag,
+ const char **field_name)
{
return -EACCES;
}
diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
index d934248b8e81..173ec7f43ed1 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -13,6 +13,7 @@
#include <linux/list.h>
#include <linux/hash.h>
#include <linux/types.h>
+#include <linux/bpf_mem_alloc.h>
#include <uapi/linux/btf.h>
#define BPF_LOCAL_STORAGE_CACHE_SIZE 16
@@ -55,6 +56,9 @@ struct bpf_local_storage_map {
u32 bucket_log;
u16 elem_size;
u16 cache_idx;
+ struct bpf_mem_alloc selem_ma;
+ struct bpf_mem_alloc storage_ma;
+ bool bpf_ma;
};
struct bpf_local_storage_data {
@@ -83,6 +87,7 @@ struct bpf_local_storage_elem {
struct bpf_local_storage {
struct bpf_local_storage_data __rcu *cache[BPF_LOCAL_STORAGE_CACHE_SIZE];
+ struct bpf_local_storage_map __rcu *smap;
struct hlist_head list; /* List of bpf_local_storage_elem */
void *owner; /* The object that owns the above "list" of
* bpf_local_storage_elem.
@@ -121,14 +126,15 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr);
struct bpf_map *
bpf_local_storage_map_alloc(union bpf_attr *attr,
- struct bpf_local_storage_cache *cache);
+ struct bpf_local_storage_cache *cache,
+ bool bpf_ma);
struct bpf_local_storage_data *
bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
struct bpf_local_storage_map *smap,
bool cacheit_lockit);
-bool bpf_local_storage_unlink_nolock(struct bpf_local_storage *local_storage);
+void bpf_local_storage_destroy(struct bpf_local_storage *local_storage);
void bpf_local_storage_map_free(struct bpf_map *map,
struct bpf_local_storage_cache *cache,
@@ -142,17 +148,19 @@ int bpf_local_storage_map_check_btf(const struct bpf_map *map,
void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
struct bpf_local_storage_elem *selem);
-void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu);
+void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now);
void bpf_selem_link_map(struct bpf_local_storage_map *smap,
struct bpf_local_storage_elem *selem);
-void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem);
-
struct bpf_local_storage_elem *
bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value,
bool charge_mem, gfp_t gfp_flags);
+void bpf_selem_free(struct bpf_local_storage_elem *selem,
+ struct bpf_local_storage_map *smap,
+ bool reuse_now);
+
int
bpf_local_storage_alloc(void *owner,
struct bpf_local_storage_map *smap,
@@ -163,7 +171,6 @@ struct bpf_local_storage_data *
bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
void *value, u64 map_flags, gfp_t gfp_flags);
-void bpf_local_storage_free_rcu(struct rcu_head *rcu);
u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map);
#endif /* _BPF_LOCAL_STORAGE_H */
diff --git a/include/linux/bpf_mem_alloc.h b/include/linux/bpf_mem_alloc.h
index a7104af61ab4..3929be5743f4 100644
--- a/include/linux/bpf_mem_alloc.h
+++ b/include/linux/bpf_mem_alloc.h
@@ -31,5 +31,7 @@ void bpf_mem_free(struct bpf_mem_alloc *ma, void *ptr);
/* kmem_cache_alloc/free equivalent: */
void *bpf_mem_cache_alloc(struct bpf_mem_alloc *ma);
void bpf_mem_cache_free(struct bpf_mem_alloc *ma, void *ptr);
+void bpf_mem_cache_raw_free(void *ptr);
+void *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags);
#endif /* _BPF_MEM_ALLOC_H */
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 18538bad2b8c..f03852b89d28 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -59,6 +59,14 @@ struct bpf_active_lock {
u32 id;
};
+#define ITER_PREFIX "bpf_iter_"
+
+enum bpf_iter_state {
+ BPF_ITER_STATE_INVALID, /* for non-first slot */
+ BPF_ITER_STATE_ACTIVE,
+ BPF_ITER_STATE_DRAINED,
+};
+
struct bpf_reg_state {
/* Ordering of fields matters. See states_equal() */
enum bpf_reg_type type;
@@ -103,6 +111,18 @@ struct bpf_reg_state {
bool first_slot;
} dynptr;
+ /* For bpf_iter stack slots */
+ struct {
+ /* BTF container and BTF type ID describing
+ * struct bpf_iter_<type> of an iterator state
+ */
+ struct btf *btf;
+ u32 btf_id;
+ /* packing following two fields to fit iter state into 16 bytes */
+ enum bpf_iter_state state:2;
+ int depth:30;
+ } iter;
+
/* Max size from any of the above. */
struct {
unsigned long raw1;
@@ -141,6 +161,8 @@ struct bpf_reg_state {
* same reference to the socket, to determine proper reference freeing.
* For stack slots that are dynptrs, this is used to track references to
* the dynptr to determine proper reference freeing.
+ * Similarly to dynptrs, we use ID to track "belonging" of a reference
+ * to a specific instance of bpf_iter.
*/
u32 id;
/* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned
@@ -211,9 +233,11 @@ enum bpf_stack_slot_type {
* is stored in bpf_stack_state->spilled_ptr.dynptr.type
*/
STACK_DYNPTR,
+ STACK_ITER,
};
#define BPF_REG_SIZE 8 /* size of eBPF register in bytes */
+
#define BPF_DYNPTR_SIZE sizeof(struct bpf_dynptr_kern)
#define BPF_DYNPTR_NR_SLOTS (BPF_DYNPTR_SIZE / BPF_REG_SIZE)
@@ -448,12 +472,17 @@ struct bpf_insn_aux_data {
bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */
bool zext_dst; /* this insn zero extends dst reg */
bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */
+ bool is_iter_next; /* bpf_iter_<type>_next() kfunc call */
u8 alu_state; /* used in combination with alu_limit */
/* below fields are initialized once */
unsigned int orig_idx; /* original instruction index */
- bool prune_point;
bool jmp_point;
+ bool prune_point;
+ /* ensure we check state equivalence and save state checkpoint and
+ * this instruction, regardless of any heuristics
+ */
+ bool force_checkpoint;
};
#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
@@ -462,39 +491,36 @@ struct bpf_insn_aux_data {
#define BPF_VERIFIER_TMP_LOG_SIZE 1024
struct bpf_verifier_log {
- u32 level;
- char kbuf[BPF_VERIFIER_TMP_LOG_SIZE];
+ /* Logical start and end positions of a "log window" of the verifier log.
+ * start_pos == 0 means we haven't truncated anything.
+ * Once truncation starts to happen, start_pos + len_total == end_pos,
+ * except during log reset situations, in which (end_pos - start_pos)
+ * might get smaller than len_total (see bpf_vlog_reset()).
+ * Generally, (end_pos - start_pos) gives number of useful data in
+ * user log buffer.
+ */
+ u64 start_pos;
+ u64 end_pos;
char __user *ubuf;
- u32 len_used;
+ u32 level;
u32 len_total;
+ u32 len_max;
+ char kbuf[BPF_VERIFIER_TMP_LOG_SIZE];
};
-static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log)
-{
- return log->len_used >= log->len_total - 1;
-}
-
#define BPF_LOG_LEVEL1 1
#define BPF_LOG_LEVEL2 2
#define BPF_LOG_STATS 4
+#define BPF_LOG_FIXED 8
#define BPF_LOG_LEVEL (BPF_LOG_LEVEL1 | BPF_LOG_LEVEL2)
-#define BPF_LOG_MASK (BPF_LOG_LEVEL | BPF_LOG_STATS)
+#define BPF_LOG_MASK (BPF_LOG_LEVEL | BPF_LOG_STATS | BPF_LOG_FIXED)
#define BPF_LOG_KERNEL (BPF_LOG_MASK + 1) /* kernel internal flag */
#define BPF_LOG_MIN_ALIGNMENT 8U
#define BPF_LOG_ALIGNMENT 40U
static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
{
- return log &&
- ((log->level && log->ubuf && !bpf_verifier_log_full(log)) ||
- log->level == BPF_LOG_KERNEL);
-}
-
-static inline bool
-bpf_verifier_log_attr_valid(const struct bpf_verifier_log *log)
-{
- return log->len_total >= 128 && log->len_total <= UINT_MAX >> 2 &&
- log->level && log->ubuf && !(log->level & ~BPF_LOG_MASK);
+ return log && log->level;
}
#define BPF_MAX_SUBPROGS 256
@@ -574,7 +600,7 @@ struct bpf_verifier_env {
u32 scratched_regs;
/* Same as scratched_regs but for stack slots */
u64 scratched_stack_slots;
- u32 prev_log_len, prev_insn_print_len;
+ u64 prev_log_pos, prev_insn_print_pos;
/* buffer used in reg_type_str() to generate reg_type string */
char type_str_buf[TYPE_STR_BUF_LEN];
};
@@ -585,6 +611,10 @@ __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
const char *fmt, ...);
__printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
const char *fmt, ...);
+int bpf_vlog_init(struct bpf_verifier_log *log, u32 log_level,
+ char __user *log_buf, u32 log_size);
+void bpf_vlog_reset(struct bpf_verifier_log *log, u64 new_pos);
+int bpf_vlog_finalize(struct bpf_verifier_log *log, u32 *log_size_actual);
static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env)
{
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 556b3e2e7471..495250162422 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -71,6 +71,10 @@
#define KF_SLEEPABLE (1 << 5) /* kfunc may sleep */
#define KF_DESTRUCTIVE (1 << 6) /* kfunc performs destructive actions */
#define KF_RCU (1 << 7) /* kfunc takes either rcu or trusted pointer arguments */
+/* only one of KF_ITER_{NEW,NEXT,DESTROY} could be specified per kfunc */
+#define KF_ITER_NEW (1 << 8) /* kfunc implements BPF iter constructor */
+#define KF_ITER_NEXT (1 << 9) /* kfunc implements BPF iter next method */
+#define KF_ITER_DESTROY (1 << 10) /* kfunc implements BPF iter destructor */
/*
* Tag marking a kernel function as a kfunc. This is meant to minimize the
@@ -117,13 +121,11 @@ struct btf_struct_metas {
struct btf_struct_meta types[];
};
-typedef void (*btf_dtor_kfunc_t)(void *);
-
extern const struct file_operations btf_fops;
void btf_get(struct btf *btf);
void btf_put(struct btf *btf);
-int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr);
+int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_sz);
struct btf *btf_get_by_fd(int fd);
int btf_get_info_by_fd(const struct btf *btf,
const union bpf_attr *attr,
diff --git a/include/linux/filter.h b/include/linux/filter.h
index efa5d4a1677e..5364b0c52c1d 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -571,8 +571,7 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
extern struct mutex nf_conn_btf_access_lock;
extern int (*nfct_btf_struct_access)(struct bpf_verifier_log *log,
const struct bpf_reg_state *reg,
- int off, int size, enum bpf_access_type atype,
- u32 *next_btf_id, enum bpf_type_flag *flag);
+ int off, int size);
typedef unsigned int (*bpf_dispatcher_fn)(const void *ctx,
const struct bpf_insn *insnsi,
@@ -1504,9 +1503,9 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol,
}
#endif /* IS_ENABLED(CONFIG_IPV6) */
-static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u64 index,
- u64 flags, const u64 flag_mask,
- void *lookup_elem(struct bpf_map *map, u32 key))
+static __always_inline long __bpf_xdp_redirect_map(struct bpf_map *map, u64 index,
+ u64 flags, const u64 flag_mask,
+ void *lookup_elem(struct bpf_map *map, u32 key))
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
const u64 action_mask = XDP_ABORTED | XDP_DROP | XDP_PASS | XDP_TX;
diff --git a/include/linux/module.h b/include/linux/module.h
index 4435ad9439ab..886d24877c7c 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -608,14 +608,6 @@ static inline bool within_module(unsigned long addr, const struct module *mod)
/* Search for module by name: must be in a RCU-sched critical section. */
struct module *find_module(const char *name);
-/* Returns 0 and fills in value, defined and namebuf, or -ERANGE if
- symnum out of range. */
-int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
- char *name, char *module_name, int *exported);
-
-/* Look for this name: can be of form module:name. */
-unsigned long module_kallsyms_lookup_name(const char *name);
-
extern void __noreturn __module_put_and_kthread_exit(struct module *mod,
long code);
#define module_put_and_kthread_exit(code) __module_put_and_kthread_exit(THIS_MODULE, code)
@@ -662,17 +654,6 @@ static inline void __module_get(struct module *module)
/* Dereference module function descriptor */
void *dereference_module_function_descriptor(struct module *mod, void *ptr);
-/* For kallsyms to ask for address resolution. namebuf should be at
- * least KSYM_NAME_LEN long: a pointer to namebuf is returned if
- * found, otherwise NULL. */
-const char *module_address_lookup(unsigned long addr,
- unsigned long *symbolsize,
- unsigned long *offset,
- char **modname, const unsigned char **modbuildid,
- char *namebuf);
-int lookup_module_symbol_name(unsigned long addr, char *symname);
-int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name);
-
int register_module_notifier(struct notifier_block *nb);
int unregister_module_notifier(struct notifier_block *nb);
@@ -763,39 +744,6 @@ static inline void module_put(struct module *module)
#define module_name(mod) "kernel"
-/* For kallsyms to ask for address resolution. NULL means not found. */
-static inline const char *module_address_lookup(unsigned long addr,
- unsigned long *symbolsize,
- unsigned long *offset,
- char **modname,
- const unsigned char **modbuildid,
- char *namebuf)
-{
- return NULL;
-}
-
-static inline int lookup_module_symbol_name(unsigned long addr, char *symname)
-{
- return -ERANGE;
-}
-
-static inline int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name)
-{
- return -ERANGE;
-}
-
-static inline int module_get_kallsym(unsigned int symnum, unsigned long *value,
- char *type, char *name,
- char *module_name, int *exported)
-{
- return -ERANGE;
-}
-
-static inline unsigned long module_kallsyms_lookup_name(const char *name)
-{
- return 0;
-}
-
static inline int register_module_notifier(struct notifier_block *nb)
{
/* no events will happen anyway, so this can always succeed */
@@ -891,7 +839,36 @@ int module_kallsyms_on_each_symbol(const char *modname,
int (*fn)(void *, const char *,
struct module *, unsigned long),
void *data);
-#else
+
+/* For kallsyms to ask for address resolution. namebuf should be at
+ * least KSYM_NAME_LEN long: a pointer to namebuf is returned if
+ * found, otherwise NULL.
+ */
+const char *module_address_lookup(unsigned long addr,
+ unsigned long *symbolsize,
+ unsigned long *offset,
+ char **modname, const unsigned char **modbuildid,
+ char *namebuf);
+int lookup_module_symbol_name(unsigned long addr, char *symname);
+int lookup_module_symbol_attrs(unsigned long addr,
+ unsigned long *size,
+ unsigned long *offset,
+ char *modname,
+ char *name);
+
+/* Returns 0 and fills in value, defined and namebuf, or -ERANGE if
+ * symnum out of range.
+ */
+int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
+ char *name, char *module_name, int *exported);
+
+/* Look for this name: can be of form module:name. */
+unsigned long module_kallsyms_lookup_name(const char *name);
+
+unsigned long find_kallsyms_symbol_value(struct module *mod, const char *name);
+
+#else /* CONFIG_MODULES && CONFIG_KALLSYMS */
+
static inline int module_kallsyms_on_each_symbol(const char *modname,
int (*fn)(void *, const char *,
struct module *, unsigned long),
@@ -899,6 +876,50 @@ static inline int module_kallsyms_on_each_symbol(const char *modname,
{
return -EOPNOTSUPP;
}
+
+/* For kallsyms to ask for address resolution. NULL means not found. */
+static inline const char *module_address_lookup(unsigned long addr,
+ unsigned long *symbolsize,
+ unsigned long *offset,
+ char **modname,
+ const unsigned char **modbuildid,
+ char *namebuf)
+{
+ return NULL;
+}
+
+static inline int lookup_module_symbol_name(unsigned long addr, char *symname)
+{
+ return -ERANGE;
+}
+
+static inline int lookup_module_symbol_attrs(unsigned long addr,
+ unsigned long *size,
+ unsigned long *offset,
+ char *modname,
+ char *name)
+{
+ return -ERANGE;
+}
+
+static inline int module_get_kallsym(unsigned int symnum, unsigned long *value,
+ char *type, char *name,
+ char *module_name, int *exported)
+{
+ return -ERANGE;
+}
+
+static inline unsigned long module_kallsyms_lookup_name(const char *name)
+{
+ return 0;
+}
+
+static inline unsigned long find_kallsyms_symbol_value(struct module *mod,
+ const char *name)
+{
+ return 0;
+}
+
#endif /* CONFIG_MODULES && CONFIG_KALLSYMS */
#endif /* _LINUX_MODULE_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 63d242164b1a..b11b4517760f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1318,11 +1318,6 @@ struct task_struct {
struct tlbflush_unmap_batch tlb_ubc;
- union {
- refcount_t rcu_users;
- struct rcu_head rcu;
- };
-
/* Cache last used pipe for splice(): */
struct pipe_inode_info *splice_pipe;
@@ -1459,6 +1454,8 @@ struct task_struct {
unsigned long saved_state_change;
# endif
#endif
+ struct rcu_head rcu;
+ refcount_t rcu_users;
int pagefault_disabled;
#ifdef CONFIG_MMU
struct task_struct *oom_reaper_list;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 82511b2f61ea..494a23a976b0 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -938,6 +938,19 @@ struct sk_buff {
__u8 ip_summed:2;
__u8 ooo_okay:1;
+ /* private: */
+ __u8 __mono_tc_offset[0];
+ /* public: */
+ __u8 mono_delivery_time:1; /* See SKB_MONO_DELIVERY_TIME_MASK */
+#ifdef CONFIG_NET_CLS_ACT
+ __u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */
+ __u8 tc_skip_classify:1;
+#endif
+ __u8 remcsum_offload:1;
+ __u8 csum_complete_sw:1;
+ __u8 csum_level:2;
+ __u8 dst_pending_confirm:1;
+
__u8 l4_hash:1;
__u8 sw_hash:1;
__u8 wifi_acked_valid:1;
@@ -947,19 +960,6 @@ struct sk_buff {
__u8 encapsulation:1;
__u8 encap_hdr_csum:1;
__u8 csum_valid:1;
-
- /* private: */
- __u8 __pkt_vlan_present_offset[0];
- /* public: */
- __u8 remcsum_offload:1;
- __u8 csum_complete_sw:1;
- __u8 csum_level:2;
- __u8 dst_pending_confirm:1;
- __u8 mono_delivery_time:1; /* See SKB_MONO_DELIVERY_TIME_MASK */
-#ifdef CONFIG_NET_CLS_ACT
- __u8 tc_skip_classify:1;
- __u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */
-#endif
#ifdef CONFIG_IPV6_NDISC_NODETYPE
__u8 ndisc_nodetype:2;
#endif
@@ -1066,13 +1066,13 @@ struct sk_buff {
* around, you also must adapt these constants.
*/
#ifdef __BIG_ENDIAN_BITFIELD
-#define TC_AT_INGRESS_MASK (1 << 0)
-#define SKB_MONO_DELIVERY_TIME_MASK (1 << 2)
+#define SKB_MONO_DELIVERY_TIME_MASK (1 << 7)
+#define TC_AT_INGRESS_MASK (1 << 6)
#else
-#define TC_AT_INGRESS_MASK (1 << 7)
-#define SKB_MONO_DELIVERY_TIME_MASK (1 << 5)
+#define SKB_MONO_DELIVERY_TIME_MASK (1 << 0)
+#define TC_AT_INGRESS_MASK (1 << 1)
#endif
-#define PKT_VLAN_PRESENT_OFFSET offsetof(struct sk_buff, __pkt_vlan_present_offset)
+#define SKB_BF_MONO_TC_OFFSET offsetof(struct sk_buff, __mono_tc_offset)
#ifdef __KERNEL__
/*
@@ -5063,12 +5063,12 @@ static inline u64 skb_get_kcov_handle(struct sk_buff *skb)
#endif
}
-#ifdef CONFIG_PAGE_POOL
static inline void skb_mark_for_recycle(struct sk_buff *skb)
{
+#ifdef CONFIG_PAGE_POOL
skb->pp_recycle = 1;
-}
#endif
+}
#endif /* __KERNEL__ */
#endif /* _LINUX_SKBUFF_H */
diff --git a/include/net/fou.h b/include/net/fou.h
index 80f56e275b08..824eb4b231fd 100644
--- a/include/net/fou.h
+++ b/include/net/fou.h
@@ -17,4 +17,6 @@ int __fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
int __gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
u8 *protocol, __be16 *sport, int type);
+int register_fou_bpf(void);
+
#endif
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 255b32a90850..ed4b6ad3fcac 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -57,6 +57,13 @@ struct ip_tunnel_key {
__u8 flow_flags;
};
+struct ip_tunnel_encap {
+ u16 type;
+ u16 flags;
+ __be16 sport;
+ __be16 dport;
+};
+
/* Flags for ip_tunnel_info mode. */
#define IP_TUNNEL_INFO_TX 0x01 /* represents tx tunnel parameters */
#define IP_TUNNEL_INFO_IPV6 0x02 /* key contains IPv6 addresses */
@@ -75,6 +82,7 @@ struct ip_tunnel_key {
struct ip_tunnel_info {
struct ip_tunnel_key key;
+ struct ip_tunnel_encap encap;
#ifdef CONFIG_DST_CACHE
struct dst_cache dst_cache;
#endif
@@ -92,13 +100,6 @@ struct ip_tunnel_6rd_parm {
};
#endif
-struct ip_tunnel_encap {
- u16 type;
- u16 flags;
- __be16 sport;
- __be16 dport;
-};
-
struct ip_tunnel_prl_entry {
struct ip_tunnel_prl_entry __rcu *next;
__be32 addr;
@@ -299,6 +300,7 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
__be32 remote, __be32 local,
__be32 key);
+void ip_tunnel_md_udp_encap(struct sk_buff *skb, struct ip_tunnel_info *info);
int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
bool log_ecn_error);
@@ -377,22 +379,23 @@ static inline int ip_encap_hlen(struct ip_tunnel_encap *e)
return hlen;
}
-static inline int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
+static inline int ip_tunnel_encap(struct sk_buff *skb,
+ struct ip_tunnel_encap *e,
u8 *protocol, struct flowi4 *fl4)
{
const struct ip_tunnel_encap_ops *ops;
int ret = -EINVAL;
- if (t->encap.type == TUNNEL_ENCAP_NONE)
+ if (e->type == TUNNEL_ENCAP_NONE)
return 0;
- if (t->encap.type >= MAX_IPTUN_ENCAP_OPS)
+ if (e->type >= MAX_IPTUN_ENCAP_OPS)
return -EINVAL;
rcu_read_lock();
- ops = rcu_dereference(iptun_encaps[t->encap.type]);
+ ops = rcu_dereference(iptun_encaps[e->type]);
if (likely(ops && ops->build_header))
- ret = ops->build_header(skb, &t->encap, protocol, fl4);
+ ret = ops->build_header(skb, e, protocol, fl4);
rcu_read_unlock();
return ret;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index a0a91a988272..04a31643cda3 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1117,6 +1117,9 @@ struct tcp_congestion_ops {
int tcp_register_congestion_control(struct tcp_congestion_ops *type);
void tcp_unregister_congestion_control(struct tcp_congestion_ops *type);
+int tcp_update_congestion_control(struct tcp_congestion_ops *type,
+ struct tcp_congestion_ops *old_type);
+int tcp_validate_congestion_control(struct tcp_congestion_ops *ca);
void tcp_assign_congestion_control(struct sock *sk);
void tcp_init_congestion_control(struct sock *sk);
diff --git a/include/net/xdp.h b/include/net/xdp.h
index 76aa748e7923..d1c5381fc95f 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -318,35 +318,6 @@ void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq);
void xdp_return_frame_bulk(struct xdp_frame *xdpf,
struct xdp_frame_bulk *bq);
-/* When sending xdp_frame into the network stack, then there is no
- * return point callback, which is needed to release e.g. DMA-mapping
- * resources with page_pool. Thus, have explicit function to release
- * frame resources.
- */
-void __xdp_release_frame(void *data, struct xdp_mem_info *mem);
-static inline void xdp_release_frame(struct xdp_frame *xdpf)
-{
- struct xdp_mem_info *mem = &xdpf->mem;
- struct skb_shared_info *sinfo;
- int i;
-
- /* Curr only page_pool needs this */
- if (mem->type != MEM_TYPE_PAGE_POOL)
- return;
-
- if (likely(!xdp_frame_has_frags(xdpf)))
- goto out;
-
- sinfo = xdp_get_shared_info_from_frame(xdpf);
- for (i = 0; i < sinfo->nr_frags; i++) {
- struct page *page = skb_frag_page(&sinfo->frags[i]);
-
- __xdp_release_frame(page_address(page), mem);
- }
-out:
- __xdp_release_frame(xdpf->data, mem);
-}
-
static __always_inline unsigned int xdp_get_frame_len(struct xdp_frame *xdpf)
{
struct skb_shared_info *sinfo;
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index 3e952e569418..d318c769b445 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -180,13 +180,8 @@ static inline bool xp_desc_crosses_non_contig_pg(struct xsk_buff_pool *pool,
if (likely(!cross_pg))
return false;
- if (pool->dma_pages_cnt) {
- return !(pool->dma_pages[addr >> PAGE_SHIFT] &
- XSK_NEXT_PG_CONTIG_MASK);
- }
-
- /* skb path */
- return addr + len > pool->addrs_cnt;
+ return pool->dma_pages_cnt &&
+ !(pool->dma_pages[addr >> PAGE_SHIFT] & XSK_NEXT_PG_CONTIG_MASK);
}
static inline u64 xp_aligned_extract_addr(struct xsk_buff_pool *pool, u64 addr)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 976b194eb775..3823100b7934 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1033,6 +1033,7 @@ enum bpf_attach_type {
BPF_PERF_EVENT,
BPF_TRACE_KPROBE_MULTI,
BPF_LSM_CGROUP,
+ BPF_STRUCT_OPS,
__MAX_BPF_ATTACH_TYPE
};
@@ -1108,7 +1109,7 @@ enum bpf_link_type {
*/
#define BPF_F_STRICT_ALIGNMENT (1U << 0)
-/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the
+/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROG_LOAD command, the
* verifier will allow any alignment whatsoever. On platforms
* with strict alignment requirements for loads ands stores (such
* as sparc and mips) the verifier validates that all loads and
@@ -1266,6 +1267,9 @@ enum {
/* Create a map that is suitable to be an inner map with dynamic max entries */
BPF_F_INNER_MAP = (1U << 12),
+
+/* Create a map that will be registered/unregesitered by the backed bpf_link */
+ BPF_F_LINK = (1U << 13),
};
/* Flags for BPF_PROG_QUERY. */
@@ -1403,6 +1407,11 @@ union bpf_attr {
__aligned_u64 fd_array; /* array of FDs */
__aligned_u64 core_relos;
__u32 core_relo_rec_size; /* sizeof(struct bpf_core_relo) */
+ /* output: actual total log contents size (including termintaing zero).
+ * It could be both larger than original log_size (if log was
+ * truncated), or smaller (if log buffer wasn't filled completely).
+ */
+ __u32 log_true_size;
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -1488,6 +1497,11 @@ union bpf_attr {
__u32 btf_size;
__u32 btf_log_size;
__u32 btf_log_level;
+ /* output: actual total log contents size (including termintaing zero).
+ * It could be both larger than original log_size (if log was
+ * truncated), or smaller (if log buffer wasn't filled completely).
+ */
+ __u32 btf_log_true_size;
};
struct {
@@ -1507,7 +1521,10 @@ union bpf_attr {
} task_fd_query;
struct { /* struct used by BPF_LINK_CREATE command */
- __u32 prog_fd; /* eBPF program to attach */
+ union {
+ __u32 prog_fd; /* eBPF program to attach */
+ __u32 map_fd; /* struct_ops to attach */
+ };
union {
__u32 target_fd; /* object to attach to */
__u32 target_ifindex; /* target ifindex */
@@ -1548,12 +1565,23 @@ union bpf_attr {
struct { /* struct used by BPF_LINK_UPDATE command */
__u32 link_fd; /* link fd */
- /* new program fd to update link with */
- __u32 new_prog_fd;
+ union {
+ /* new program fd to update link with */
+ __u32 new_prog_fd;
+ /* new struct_ops map fd to update link with */
+ __u32 new_map_fd;
+ };
__u32 flags; /* extra flags */
- /* expected link's program fd; is specified only if
- * BPF_F_REPLACE flag is set in flags */
- __u32 old_prog_fd;
+ union {
+ /* expected link's program fd; is specified only if
+ * BPF_F_REPLACE flag is set in flags.
+ */
+ __u32 old_prog_fd;
+ /* expected link's map fd; is specified only
+ * if BPF_F_REPLACE flag is set.
+ */
+ __u32 old_map_fd;
+ };
} link_update;
struct {
@@ -1647,17 +1675,17 @@ union bpf_attr {
* Description
* This helper is a "printk()-like" facility for debugging. It
* prints a message defined by format *fmt* (of size *fmt_size*)
- * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if
+ * to file *\/sys/kernel/tracing/trace* from TraceFS, if
* available. It can take up to three additional **u64**
* arguments (as an eBPF helpers, the total number of arguments is
* limited to five).
*
* Each time the helper is called, it appends a line to the trace.
- * Lines are discarded while *\/sys/kernel/debug/tracing/trace* is
- * open, use *\/sys/kernel/debug/tracing/trace_pipe* to avoid this.
+ * Lines are discarded while *\/sys/kernel/tracing/trace* is
+ * open, use *\/sys/kernel/tracing/trace_pipe* to avoid this.
* The format of the trace is customizable, and the exact output
* one will get depends on the options set in
- * *\/sys/kernel/debug/tracing/trace_options* (see also the
+ * *\/sys/kernel/tracing/trace_options* (see also the
* *README* file under the same directory). However, it usually
* defaults to something like:
*
@@ -6379,6 +6407,9 @@ struct bpf_link_info {
struct {
__u32 ifindex;
} xdp;
+ struct {
+ __u32 map_id;
+ } struct_ops;
};
} __attribute__((aligned(8)));
@@ -7112,4 +7143,12 @@ enum {
BPF_F_TIMER_ABS = (1ULL << 0),
};
+/* BPF numbers iterator state */
+struct bpf_iter_num {
+ /* opaque iterator state; having __u64 here allows to preserve correct
+ * alignment requirements in vmlinux.h, generated from BTF
+ */
+ __u64 __opaque[1];
+} __attribute__((aligned(8)));
+
#endif /* _UAPI__LINUX_BPF_H__ */