summaryrefslogtreecommitdiff
path: root/kernel/bpf
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/bpf')
-rw-r--r--kernel/bpf/arraymap.c30
-rw-r--r--kernel/bpf/bpf_local_storage.c20
-rw-r--r--kernel/bpf/bpf_lsm.c52
-rw-r--r--kernel/bpf/bpf_struct_ops.c19
-rw-r--r--kernel/bpf/btf.c70
-rw-r--r--kernel/bpf/core.c23
-rw-r--r--kernel/bpf/cpumap.c37
-rw-r--r--kernel/bpf/devmap.c25
-rw-r--r--kernel/bpf/hashtab.c43
-rw-r--r--kernel/bpf/helpers.c13
-rw-r--r--kernel/bpf/local_storage.c44
-rw-r--r--kernel/bpf/lpm_trie.c19
-rw-r--r--kernel/bpf/queue_stack_maps.c16
-rw-r--r--kernel/bpf/reuseport_array.c12
-rw-r--r--kernel/bpf/ringbuf.c35
-rw-r--r--kernel/bpf/stackmap.c16
-rw-r--r--kernel/bpf/syscall.c310
-rw-r--r--kernel/bpf/task_iter.c54
-rw-r--r--kernel/bpf/verifier.c256
19 files changed, 514 insertions, 580 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index c6c81eceb68f..1f8453343bf2 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -34,8 +34,8 @@ static int bpf_array_alloc_percpu(struct bpf_array *array)
int i;
for (i = 0; i < array->map.max_entries; i++) {
- ptr = __alloc_percpu_gfp(array->elem_size, 8,
- GFP_USER | __GFP_NOWARN);
+ ptr = bpf_map_alloc_percpu(&array->map, array->elem_size, 8,
+ GFP_USER | __GFP_NOWARN);
if (!ptr) {
bpf_array_free_percpu(array);
return -ENOMEM;
@@ -81,11 +81,10 @@ int array_map_alloc_check(union bpf_attr *attr)
static struct bpf_map *array_map_alloc(union bpf_attr *attr)
{
bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
- int ret, numa_node = bpf_map_attr_numa_node(attr);
+ int numa_node = bpf_map_attr_numa_node(attr);
u32 elem_size, index_mask, max_entries;
bool bypass_spec_v1 = bpf_bypass_spec_v1();
- u64 cost, array_size, mask64;
- struct bpf_map_memory mem;
+ u64 array_size, mask64;
struct bpf_array *array;
elem_size = round_up(attr->value_size, 8);
@@ -126,44 +125,29 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
}
}
- /* make sure there is no u32 overflow later in round_up() */
- cost = array_size;
- if (percpu)
- cost += (u64)attr->max_entries * elem_size * num_possible_cpus();
-
- ret = bpf_map_charge_init(&mem, cost);
- if (ret < 0)
- return ERR_PTR(ret);
-
/* allocate all map elements and zero-initialize them */
if (attr->map_flags & BPF_F_MMAPABLE) {
void *data;
/* kmalloc'ed memory can't be mmap'ed, use explicit vmalloc */
data = bpf_map_area_mmapable_alloc(array_size, numa_node);
- if (!data) {
- bpf_map_charge_finish(&mem);
+ if (!data)
return ERR_PTR(-ENOMEM);
- }
array = data + PAGE_ALIGN(sizeof(struct bpf_array))
- offsetof(struct bpf_array, value);
} else {
array = bpf_map_area_alloc(array_size, numa_node);
}
- if (!array) {
- bpf_map_charge_finish(&mem);
+ if (!array)
return ERR_PTR(-ENOMEM);
- }
array->index_mask = index_mask;
array->map.bypass_spec_v1 = bypass_spec_v1;
/* copy mandatory map attributes */
bpf_map_init_from_attr(&array->map, attr);
- bpf_map_charge_move(&array->map.memory, &mem);
array->elem_size = elem_size;
if (percpu && bpf_array_alloc_percpu(array)) {
- bpf_map_charge_finish(&array->map.memory);
bpf_map_area_free(array);
return ERR_PTR(-ENOMEM);
}
@@ -1018,7 +1002,7 @@ static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr)
struct bpf_array_aux *aux;
struct bpf_map *map;
- aux = kzalloc(sizeof(*aux), GFP_KERNEL);
+ aux = kzalloc(sizeof(*aux), GFP_KERNEL_ACCOUNT);
if (!aux)
return ERR_PTR(-ENOMEM);
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index 5d3a7af9ba9b..dd5aedee99e7 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -67,7 +67,8 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
if (charge_mem && mem_charge(smap, owner, smap->elem_size))
return NULL;
- selem = kzalloc(smap->elem_size, GFP_ATOMIC | __GFP_NOWARN);
+ selem = bpf_map_kzalloc(&smap->map, smap->elem_size,
+ GFP_ATOMIC | __GFP_NOWARN);
if (selem) {
if (value)
memcpy(SDATA(selem)->data, value, smap->map.value_size);
@@ -264,7 +265,8 @@ int bpf_local_storage_alloc(void *owner,
if (err)
return err;
- storage = kzalloc(sizeof(*storage), GFP_ATOMIC | __GFP_NOWARN);
+ storage = bpf_map_kzalloc(&smap->map, sizeof(*storage),
+ GFP_ATOMIC | __GFP_NOWARN);
if (!storage) {
err = -ENOMEM;
goto uncharge;
@@ -543,10 +545,8 @@ struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr)
struct bpf_local_storage_map *smap;
unsigned int i;
u32 nbuckets;
- u64 cost;
- int ret;
- smap = kzalloc(sizeof(*smap), GFP_USER | __GFP_NOWARN);
+ smap = kzalloc(sizeof(*smap), GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT);
if (!smap)
return ERR_PTR(-ENOMEM);
bpf_map_init_from_attr(&smap->map, attr);
@@ -555,18 +555,10 @@ struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr)
/* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */
nbuckets = max_t(u32, 2, nbuckets);
smap->bucket_log = ilog2(nbuckets);
- cost = sizeof(*smap->buckets) * nbuckets + sizeof(*smap);
-
- ret = bpf_map_charge_init(&smap->map.memory, cost);
- if (ret < 0) {
- kfree(smap);
- return ERR_PTR(ret);
- }
smap->buckets = kvcalloc(sizeof(*smap->buckets), nbuckets,
- GFP_USER | __GFP_NOWARN);
+ GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT);
if (!smap->buckets) {
- bpf_map_charge_finish(&smap->map.memory);
kfree(smap);
return ERR_PTR(-ENOMEM);
}
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 553107f4706a..70e5e0b6d69d 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -7,6 +7,7 @@
#include <linux/filter.h>
#include <linux/bpf.h>
#include <linux/btf.h>
+#include <linux/binfmts.h>
#include <linux/lsm_hooks.h>
#include <linux/bpf_lsm.h>
#include <linux/kallsyms.h>
@@ -14,6 +15,7 @@
#include <net/bpf_sk_storage.h>
#include <linux/bpf_local_storage.h>
#include <linux/btf_ids.h>
+#include <linux/ima.h>
/* For every LSM hook that allows attachment of BPF programs, declare a nop
* function where a BPF program can be attached.
@@ -51,6 +53,52 @@ int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
return 0;
}
+/* Mask for all the currently supported BPRM option flags */
+#define BPF_F_BRPM_OPTS_MASK BPF_F_BPRM_SECUREEXEC
+
+BPF_CALL_2(bpf_bprm_opts_set, struct linux_binprm *, bprm, u64, flags)
+{
+ if (flags & ~BPF_F_BRPM_OPTS_MASK)
+ return -EINVAL;
+
+ bprm->secureexec = (flags & BPF_F_BPRM_SECUREEXEC);
+ return 0;
+}
+
+BTF_ID_LIST_SINGLE(bpf_bprm_opts_set_btf_ids, struct, linux_binprm)
+
+const static struct bpf_func_proto bpf_bprm_opts_set_proto = {
+ .func = bpf_bprm_opts_set,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg1_btf_id = &bpf_bprm_opts_set_btf_ids[0],
+ .arg2_type = ARG_ANYTHING,
+};
+
+BPF_CALL_3(bpf_ima_inode_hash, struct inode *, inode, void *, dst, u32, size)
+{
+ return ima_inode_hash(inode, dst, size);
+}
+
+static bool bpf_ima_inode_hash_allowed(const struct bpf_prog *prog)
+{
+ return bpf_lsm_is_sleepable_hook(prog->aux->attach_btf_id);
+}
+
+BTF_ID_LIST_SINGLE(bpf_ima_inode_hash_btf_ids, struct, inode)
+
+const static struct bpf_func_proto bpf_ima_inode_hash_proto = {
+ .func = bpf_ima_inode_hash,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg1_btf_id = &bpf_ima_inode_hash_btf_ids[0],
+ .arg2_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .allowed = bpf_ima_inode_hash_allowed,
+};
+
static const struct bpf_func_proto *
bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
@@ -71,6 +119,10 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_task_storage_get_proto;
case BPF_FUNC_task_storage_delete:
return &bpf_task_storage_delete_proto;
+ case BPF_FUNC_bprm_opts_set:
+ return &bpf_bprm_opts_set_proto;
+ case BPF_FUNC_ima_inode_hash:
+ return prog->aux->sleepable ? &bpf_ima_inode_hash_proto : NULL;
default:
return tracing_prog_func_proto(func_id, prog);
}
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 4c3b543bb33b..1a666a975416 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -548,12 +548,10 @@ static int bpf_struct_ops_map_alloc_check(union bpf_attr *attr)
static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
{
const struct bpf_struct_ops *st_ops;
- size_t map_total_size, st_map_size;
+ size_t st_map_size;
struct bpf_struct_ops_map *st_map;
const struct btf_type *t, *vt;
- struct bpf_map_memory mem;
struct bpf_map *map;
- int err;
if (!bpf_capable())
return ERR_PTR(-EPERM);
@@ -573,20 +571,11 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
* struct bpf_struct_ops_tcp_congestions_ops
*/
(vt->size - sizeof(struct bpf_struct_ops_value));
- map_total_size = st_map_size +
- /* uvalue */
- sizeof(vt->size) +
- /* struct bpf_progs **progs */
- btf_type_vlen(t) * sizeof(struct bpf_prog *);
- err = bpf_map_charge_init(&mem, map_total_size);
- if (err < 0)
- return ERR_PTR(err);
st_map = bpf_map_area_alloc(st_map_size, NUMA_NO_NODE);
- if (!st_map) {
- bpf_map_charge_finish(&mem);
+ if (!st_map)
return ERR_PTR(-ENOMEM);
- }
+
st_map->st_ops = st_ops;
map = &st_map->map;
@@ -597,14 +586,12 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
st_map->image = bpf_jit_alloc_exec(PAGE_SIZE);
if (!st_map->uvalue || !st_map->progs || !st_map->image) {
bpf_struct_ops_map_free(map);
- bpf_map_charge_finish(&mem);
return ERR_PTR(-ENOMEM);
}
mutex_init(&st_map->lock);
set_vm_flush_reset_perms(st_map->image);
bpf_map_init_from_attr(map, attr);
- bpf_map_charge_move(&map->memory, &mem);
return map;
}
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 6b2d508b33d4..8d6bdb4f4d61 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -1524,6 +1524,11 @@ static void btf_free_rcu(struct rcu_head *rcu)
btf_free(btf);
}
+void btf_get(struct btf *btf)
+{
+ refcount_inc(&btf->refcnt);
+}
+
void btf_put(struct btf *btf)
{
if (btf && refcount_dec_and_test(&btf->refcnt)) {
@@ -4555,11 +4560,10 @@ struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog)
{
struct bpf_prog *tgt_prog = prog->aux->dst_prog;
- if (tgt_prog) {
+ if (tgt_prog)
return tgt_prog->aux->btf;
- } else {
- return btf_vmlinux;
- }
+ else
+ return prog->aux->attach_btf;
}
static bool is_string_ptr(struct btf *btf, const struct btf_type *t)
@@ -4700,6 +4704,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
if (ctx_arg_info->offset == off) {
info->reg_type = ctx_arg_info->reg_type;
+ info->btf = btf_vmlinux;
info->btf_id = ctx_arg_info->btf_id;
return true;
}
@@ -4716,6 +4721,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
ret = btf_translate_to_vmlinux(log, btf, t, tgt_type, arg);
if (ret > 0) {
+ info->btf = btf_vmlinux;
info->btf_id = ret;
return true;
} else {
@@ -4723,6 +4729,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
}
}
+ info->btf = btf;
info->btf_id = t->type;
t = btf_type_by_id(btf, t->type);
/* skip modifiers */
@@ -4749,7 +4756,7 @@ enum bpf_struct_walk_result {
WALK_STRUCT,
};
-static int btf_struct_walk(struct bpf_verifier_log *log,
+static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf,
const struct btf_type *t, int off, int size,
u32 *next_btf_id)
{
@@ -4760,7 +4767,7 @@ static int btf_struct_walk(struct bpf_verifier_log *log,
u32 vlen, elem_id, mid;
again:
- tname = __btf_name_by_offset(btf_vmlinux, t->name_off);
+ tname = __btf_name_by_offset(btf, t->name_off);
if (!btf_type_is_struct(t)) {
bpf_log(log, "Type '%s' is not a struct\n", tname);
return -EINVAL;
@@ -4777,7 +4784,7 @@ again:
goto error;
member = btf_type_member(t) + vlen - 1;
- mtype = btf_type_skip_modifiers(btf_vmlinux, member->type,
+ mtype = btf_type_skip_modifiers(btf, member->type,
NULL);
if (!btf_type_is_array(mtype))
goto error;
@@ -4793,7 +4800,7 @@ again:
/* Only allow structure for now, can be relaxed for
* other types later.
*/
- t = btf_type_skip_modifiers(btf_vmlinux, array_elem->type,
+ t = btf_type_skip_modifiers(btf, array_elem->type,
NULL);
if (!btf_type_is_struct(t))
goto error;
@@ -4851,10 +4858,10 @@ error:
/* type of the field */
mid = member->type;
- mtype = btf_type_by_id(btf_vmlinux, member->type);
- mname = __btf_name_by_offset(btf_vmlinux, member->name_off);
+ mtype = btf_type_by_id(btf, member->type);
+ mname = __btf_name_by_offset(btf, member->name_off);
- mtype = __btf_resolve_size(btf_vmlinux, mtype, &msize,
+ mtype = __btf_resolve_size(btf, mtype, &msize,
&elem_type, &elem_id, &total_nelems,
&mid);
if (IS_ERR(mtype)) {
@@ -4949,7 +4956,7 @@ error:
mname, moff, tname, off, size);
return -EACCES;
}
- stype = btf_type_skip_modifiers(btf_vmlinux, mtype->type, &id);
+ stype = btf_type_skip_modifiers(btf, mtype->type, &id);
if (btf_type_is_struct(stype)) {
*next_btf_id = id;
return WALK_PTR;
@@ -4975,7 +4982,7 @@ error:
return -EINVAL;
}
-int btf_struct_access(struct bpf_verifier_log *log,
+int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf,
const struct btf_type *t, int off, int size,
enum bpf_access_type atype __maybe_unused,
u32 *next_btf_id)
@@ -4984,7 +4991,7 @@ int btf_struct_access(struct bpf_verifier_log *log,
u32 id;
do {
- err = btf_struct_walk(log, t, off, size, &id);
+ err = btf_struct_walk(log, btf, t, off, size, &id);
switch (err) {
case WALK_PTR:
@@ -5000,7 +5007,7 @@ int btf_struct_access(struct bpf_verifier_log *log,
* by diving in it. At this point the offset is
* aligned with the new type, so set it to 0.
*/
- t = btf_type_by_id(btf_vmlinux, id);
+ t = btf_type_by_id(btf, id);
off = 0;
break;
default:
@@ -5016,21 +5023,37 @@ int btf_struct_access(struct bpf_verifier_log *log,
return -EINVAL;
}
+/* Check that two BTF types, each specified as an BTF object + id, are exactly
+ * the same. Trivial ID check is not enough due to module BTFs, because we can
+ * end up with two different module BTFs, but IDs point to the common type in
+ * vmlinux BTF.
+ */
+static bool btf_types_are_same(const struct btf *btf1, u32 id1,
+ const struct btf *btf2, u32 id2)
+{
+ if (id1 != id2)
+ return false;
+ if (btf1 == btf2)
+ return true;
+ return btf_type_by_id(btf1, id1) == btf_type_by_id(btf2, id2);
+}
+
bool btf_struct_ids_match(struct bpf_verifier_log *log,
- int off, u32 id, u32 need_type_id)
+ const struct btf *btf, u32 id, int off,
+ const struct btf *need_btf, u32 need_type_id)
{
const struct btf_type *type;
int err;
/* Are we already done? */
- if (need_type_id == id && off == 0)
+ if (off == 0 && btf_types_are_same(btf, id, need_btf, need_type_id))
return true;
again:
- type = btf_type_by_id(btf_vmlinux, id);
+ type = btf_type_by_id(btf, id);
if (!type)
return false;
- err = btf_struct_walk(log, type, off, 1, &id);
+ err = btf_struct_walk(log, btf, type, off, 1, &id);
if (err != WALK_STRUCT)
return false;
@@ -5039,7 +5062,7 @@ again:
* continue the search with offset 0 in the new
* type.
*/
- if (need_type_id != id) {
+ if (!btf_types_are_same(btf, id, need_btf, need_type_id)) {
off = 0;
goto again;
}
@@ -5710,11 +5733,16 @@ int btf_get_fd_by_id(u32 id)
return fd;
}
-u32 btf_id(const struct btf *btf)
+u32 btf_obj_id(const struct btf *btf)
{
return btf->id;
}
+bool btf_is_kernel(const struct btf *btf)
+{
+ return btf->kernel_btf;
+}
+
static int btf_id_cmp_func(const void *a, const void *b)
{
const int *pa = a, *pb = b;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 55454d2278b1..261f8692d0d2 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -77,7 +77,7 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns
struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags)
{
- gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
+ gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO | gfp_extra_flags;
struct bpf_prog_aux *aux;
struct bpf_prog *fp;
@@ -86,7 +86,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
if (fp == NULL)
return NULL;
- aux = kzalloc(sizeof(*aux), GFP_KERNEL | gfp_extra_flags);
+ aux = kzalloc(sizeof(*aux), GFP_KERNEL_ACCOUNT | gfp_extra_flags);
if (aux == NULL) {
vfree(fp);
return NULL;
@@ -106,7 +106,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
{
- gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
+ gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO | gfp_extra_flags;
struct bpf_prog *prog;
int cpu;
@@ -138,7 +138,7 @@ int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog)
prog->aux->jited_linfo = kcalloc(prog->aux->nr_linfo,
sizeof(*prog->aux->jited_linfo),
- GFP_KERNEL | __GFP_NOWARN);
+ GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
if (!prog->aux->jited_linfo)
return -ENOMEM;
@@ -219,25 +219,17 @@ void bpf_prog_free_linfo(struct bpf_prog *prog)
struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
gfp_t gfp_extra_flags)
{
- gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
+ gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO | gfp_extra_flags;
struct bpf_prog *fp;
- u32 pages, delta;
- int ret;
+ u32 pages;
size = round_up(size, PAGE_SIZE);
pages = size / PAGE_SIZE;
if (pages <= fp_old->pages)
return fp_old;
- delta = pages - fp_old->pages;
- ret = __bpf_prog_charge(fp_old->aux->user, delta);
- if (ret)
- return NULL;
-
fp = __vmalloc(size, gfp_flags);
- if (fp == NULL) {
- __bpf_prog_uncharge(fp_old->aux->user, delta);
- } else {
+ if (fp) {
memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE);
fp->pages = pages;
fp->aux->prog = fp;
@@ -2211,6 +2203,7 @@ const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
const struct bpf_func_proto bpf_get_numa_node_id_proto __weak;
const struct bpf_func_proto bpf_ktime_get_ns_proto __weak;
const struct bpf_func_proto bpf_ktime_get_boot_ns_proto __weak;
+const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto __weak;
const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index c61a23b564aa..747313698178 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -84,8 +84,6 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
u32 value_size = attr->value_size;
struct bpf_cpu_map *cmap;
int err = -ENOMEM;
- u64 cost;
- int ret;
if (!bpf_capable())
return ERR_PTR(-EPERM);
@@ -97,7 +95,7 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
attr->map_flags & ~BPF_F_NUMA_NODE)
return ERR_PTR(-EINVAL);
- cmap = kzalloc(sizeof(*cmap), GFP_USER);
+ cmap = kzalloc(sizeof(*cmap), GFP_USER | __GFP_ACCOUNT);
if (!cmap)
return ERR_PTR(-ENOMEM);
@@ -109,26 +107,14 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
goto free_cmap;
}
- /* make sure page count doesn't overflow */
- cost = (u64) cmap->map.max_entries * sizeof(struct bpf_cpu_map_entry *);
-
- /* Notice returns -EPERM on if map size is larger than memlock limit */
- ret = bpf_map_charge_init(&cmap->map.memory, cost);
- if (ret) {
- err = ret;
- goto free_cmap;
- }
-
/* Alloc array for possible remote "destination" CPUs */
cmap->cpu_map = bpf_map_area_alloc(cmap->map.max_entries *
sizeof(struct bpf_cpu_map_entry *),
cmap->map.numa_node);
if (!cmap->cpu_map)
- goto free_charge;
+ goto free_cmap;
return &cmap->map;
-free_charge:
- bpf_map_charge_finish(&cmap->map.memory);
free_cmap:
kfree(cmap);
return ERR_PTR(err);
@@ -412,7 +398,8 @@ static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd)
}
static struct bpf_cpu_map_entry *
-__cpu_map_entry_alloc(struct bpf_cpumap_val *value, u32 cpu, int map_id)
+__cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value,
+ u32 cpu)
{
int numa, err, i, fd = value->bpf_prog.fd;
gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
@@ -422,13 +409,13 @@ __cpu_map_entry_alloc(struct bpf_cpumap_val *value, u32 cpu, int map_id)
/* Have map->numa_node, but choose node of redirect target CPU */
numa = cpu_to_node(cpu);
- rcpu = kzalloc_node(sizeof(*rcpu), gfp, numa);
+ rcpu = bpf_map_kmalloc_node(map, sizeof(*rcpu), gfp | __GFP_ZERO, numa);
if (!rcpu)
return NULL;
/* Alloc percpu bulkq */
- rcpu->bulkq = __alloc_percpu_gfp(sizeof(*rcpu->bulkq),
- sizeof(void *), gfp);
+ rcpu->bulkq = bpf_map_alloc_percpu(map, sizeof(*rcpu->bulkq),
+ sizeof(void *), gfp);
if (!rcpu->bulkq)
goto free_rcu;
@@ -438,7 +425,8 @@ __cpu_map_entry_alloc(struct bpf_cpumap_val *value, u32 cpu, int map_id)
}
/* Alloc queue */
- rcpu->queue = kzalloc_node(sizeof(*rcpu->queue), gfp, numa);
+ rcpu->queue = bpf_map_kmalloc_node(map, sizeof(*rcpu->queue), gfp,
+ numa);
if (!rcpu->queue)
goto free_bulkq;
@@ -447,7 +435,7 @@ __cpu_map_entry_alloc(struct bpf_cpumap_val *value, u32 cpu, int map_id)
goto free_queue;
rcpu->cpu = cpu;
- rcpu->map_id = map_id;
+ rcpu->map_id = map->id;
rcpu->value.qsize = value->qsize;
if (fd > 0 && __cpu_map_load_bpf_program(rcpu, fd))
@@ -455,7 +443,8 @@ __cpu_map_entry_alloc(struct bpf_cpumap_val *value, u32 cpu, int map_id)
/* Setup kthread */
rcpu->kthread = kthread_create_on_node(cpu_map_kthread_run, rcpu, numa,
- "cpumap/%d/map:%d", cpu, map_id);
+ "cpumap/%d/map:%d", cpu,
+ map->id);
if (IS_ERR(rcpu->kthread))
goto free_prog;
@@ -571,7 +560,7 @@ static int cpu_map_update_elem(struct bpf_map *map, void *key, void *value,
rcpu = NULL; /* Same as deleting */
} else {
/* Updating qsize cause re-allocation of bpf_cpu_map_entry */
- rcpu = __cpu_map_entry_alloc(&cpumap_value, key_cpu, map->id);
+ rcpu = __cpu_map_entry_alloc(map, &cpumap_value, key_cpu);
if (!rcpu)
return -ENOMEM;
rcpu->cmap = cmap;
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 2b5ca93c17de..f6e9c68afdd4 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -109,8 +109,6 @@ static inline struct hlist_head *dev_map_index_hash(struct bpf_dtab *dtab,
static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
{
u32 valsize = attr->value_size;
- u64 cost = 0;
- int err;
/* check sanity of attributes. 2 value sizes supported:
* 4 bytes: ifindex
@@ -135,21 +133,13 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
if (!dtab->n_buckets) /* Overflow check */
return -EINVAL;
- cost += (u64) sizeof(struct hlist_head) * dtab->n_buckets;
- } else {
- cost += (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *);
}
- /* if map size is larger than memlock limit, reject it */
- err = bpf_map_charge_init(&dtab->map.memory, cost);
- if (err)
- return -EINVAL;
-
if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets,
dtab->map.numa_node);
if (!dtab->dev_index_head)
- goto free_charge;
+ return -ENOMEM;
spin_lock_init(&dtab->index_lock);
} else {
@@ -157,14 +147,10 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
sizeof(struct bpf_dtab_netdev *),
dtab->map.numa_node);
if (!dtab->netdev_map)
- goto free_charge;
+ return -ENOMEM;
}
return 0;
-
-free_charge:
- bpf_map_charge_finish(&dtab->map.memory);
- return -ENOMEM;
}
static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
@@ -175,7 +161,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
if (!capable(CAP_NET_ADMIN))
return ERR_PTR(-EPERM);
- dtab = kzalloc(sizeof(*dtab), GFP_USER);
+ dtab = kzalloc(sizeof(*dtab), GFP_USER | __GFP_ACCOUNT);
if (!dtab)
return ERR_PTR(-ENOMEM);
@@ -602,8 +588,9 @@ static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
struct bpf_prog *prog = NULL;
struct bpf_dtab_netdev *dev;
- dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN,
- dtab->map.numa_node);
+ dev = bpf_map_kmalloc_node(&dtab->map, sizeof(*dev),
+ GFP_ATOMIC | __GFP_NOWARN,
+ dtab->map.numa_node);
if (!dev)
return ERR_PTR(-ENOMEM);
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index ec46266aaf1c..fe7a0733a63a 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -292,7 +292,8 @@ static int prealloc_init(struct bpf_htab *htab)
u32 size = round_up(htab->map.value_size, 8);
void __percpu *pptr;
- pptr = __alloc_percpu_gfp(size, 8, GFP_USER | __GFP_NOWARN);
+ pptr = bpf_map_alloc_percpu(&htab->map, size, 8,
+ GFP_USER | __GFP_NOWARN);
if (!pptr)
goto free_elems;
htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size,
@@ -346,8 +347,8 @@ static int alloc_extra_elems(struct bpf_htab *htab)
struct pcpu_freelist_node *l;
int cpu;
- pptr = __alloc_percpu_gfp(sizeof(struct htab_elem *), 8,
- GFP_USER | __GFP_NOWARN);
+ pptr = bpf_map_alloc_percpu(&htab->map, sizeof(struct htab_elem *), 8,
+ GFP_USER | __GFP_NOWARN);
if (!pptr)
return -ENOMEM;
@@ -442,9 +443,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
struct bpf_htab *htab;
int err, i;
- u64 cost;
- htab = kzalloc(sizeof(*htab), GFP_USER);
+ htab = kzalloc(sizeof(*htab), GFP_USER | __GFP_ACCOUNT);
if (!htab)
return ERR_PTR(-ENOMEM);
@@ -480,30 +480,18 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
htab->n_buckets > U32_MAX / sizeof(struct bucket))
goto free_htab;
- cost = (u64) htab->n_buckets * sizeof(struct bucket) +
- (u64) htab->elem_size * htab->map.max_entries;
-
- if (percpu)
- cost += (u64) round_up(htab->map.value_size, 8) *
- num_possible_cpus() * htab->map.max_entries;
- else
- cost += (u64) htab->elem_size * num_possible_cpus();
-
- /* if map size is larger than memlock limit, reject it */
- err = bpf_map_charge_init(&htab->map.memory, cost);
- if (err)
- goto free_htab;
-
err = -ENOMEM;
htab->buckets = bpf_map_area_alloc(htab->n_buckets *
sizeof(struct bucket),
htab->map.numa_node);
if (!htab->buckets)
- goto free_charge;
+ goto free_htab;
for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++) {
- htab->map_locked[i] = __alloc_percpu_gfp(sizeof(int),
- sizeof(int), GFP_USER);
+ htab->map_locked[i] = bpf_map_alloc_percpu(&htab->map,
+ sizeof(int),
+ sizeof(int),
+ GFP_USER);
if (!htab->map_locked[i])
goto free_map_locked;
}
@@ -538,8 +526,6 @@ free_map_locked:
for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++)
free_percpu(htab->map_locked[i]);
bpf_map_area_free(htab->buckets);
-free_charge:
- bpf_map_charge_finish(&htab->map.memory);
free_htab:
lockdep_unregister_key(&htab->lockdep_key);
kfree(htab);
@@ -925,8 +911,9 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
l_new = ERR_PTR(-E2BIG);
goto dec_count;
}
- l_new = kmalloc_node(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN,
- htab->map.numa_node);
+ l_new = bpf_map_kmalloc_node(&htab->map, htab->elem_size,
+ GFP_ATOMIC | __GFP_NOWARN,
+ htab->map.numa_node);
if (!l_new) {
l_new = ERR_PTR(-ENOMEM);
goto dec_count;
@@ -942,8 +929,8 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
pptr = htab_elem_get_ptr(l_new, key_size);
} else {
/* alloc_percpu zero-fills */
- pptr = __alloc_percpu_gfp(size, 8,
- GFP_ATOMIC | __GFP_NOWARN);
+ pptr = bpf_map_alloc_percpu(&htab->map, size, 8,
+ GFP_ATOMIC | __GFP_NOWARN);
if (!pptr) {
kfree(l_new);
l_new = ERR_PTR(-ENOMEM);
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 25520f5eeaf6..2c395deae279 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -167,6 +167,17 @@ const struct bpf_func_proto bpf_ktime_get_boot_ns_proto = {
.ret_type = RET_INTEGER,
};
+BPF_CALL_0(bpf_ktime_get_coarse_ns)
+{
+ return ktime_get_coarse_ns();
+}
+
+const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto = {
+ .func = bpf_ktime_get_coarse_ns,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+};
+
BPF_CALL_0(bpf_get_current_pid_tgid)
{
struct task_struct *task = current;
@@ -685,6 +696,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
return &bpf_ktime_get_ns_proto;
case BPF_FUNC_ktime_get_boot_ns:
return &bpf_ktime_get_boot_ns_proto;
+ case BPF_FUNC_ktime_get_coarse_ns:
+ return &bpf_ktime_get_coarse_ns_proto;
case BPF_FUNC_ringbuf_output:
return &bpf_ringbuf_output_proto;
case BPF_FUNC_ringbuf_reserve:
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 571bb351ed3b..2d4f9ac12377 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -164,10 +164,10 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *key,
return 0;
}
- new = kmalloc_node(sizeof(struct bpf_storage_buffer) +
- map->value_size,
- __GFP_ZERO | GFP_ATOMIC | __GFP_NOWARN,
- map->numa_node);
+ new = bpf_map_kmalloc_node(map, sizeof(struct bpf_storage_buffer) +
+ map->value_size,
+ __GFP_ZERO | GFP_ATOMIC | __GFP_NOWARN,
+ map->numa_node);
if (!new)
return -ENOMEM;
@@ -287,8 +287,6 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
{
int numa_node = bpf_map_attr_numa_node(attr);
struct bpf_cgroup_storage_map *map;
- struct bpf_map_memory mem;
- int ret;
if (attr->key_size != sizeof(struct bpf_cgroup_storage_key) &&
attr->key_size != sizeof(__u64))
@@ -308,18 +306,10 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
/* max_entries is not used and enforced to be 0 */
return ERR_PTR(-EINVAL);
- ret = bpf_map_charge_init(&mem, sizeof(struct bpf_cgroup_storage_map));
- if (ret < 0)
- return ERR_PTR(ret);
-
map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map),
- __GFP_ZERO | GFP_USER, numa_node);
- if (!map) {
- bpf_map_charge_finish(&mem);
+ __GFP_ZERO | GFP_USER | __GFP_ACCOUNT, numa_node);
+ if (!map)
return ERR_PTR(-ENOMEM);
- }
-
- bpf_map_charge_move(&map->map.memory, &mem);
/* copy mandatory map attributes */
bpf_map_init_from_attr(&map->map, attr);
@@ -496,9 +486,9 @@ static size_t bpf_cgroup_storage_calculate_size(struct bpf_map *map, u32 *pages)
struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
enum bpf_cgroup_storage_type stype)
{
+ const gfp_t gfp = __GFP_ZERO | GFP_USER;
struct bpf_cgroup_storage *storage;
struct bpf_map *map;
- gfp_t flags;
size_t size;
u32 pages;
@@ -508,23 +498,19 @@ struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
size = bpf_cgroup_storage_calculate_size(map, &pages);
- if (bpf_map_charge_memlock(map, pages))
- return ERR_PTR(-EPERM);
-
- storage = kmalloc_node(sizeof(struct bpf_cgroup_storage),
- __GFP_ZERO | GFP_USER, map->numa_node);
+ storage = bpf_map_kmalloc_node(map, sizeof(struct bpf_cgroup_storage),
+ gfp, map->numa_node);
if (!storage)
goto enomem;
- flags = __GFP_ZERO | GFP_USER;
-
if (stype == BPF_CGROUP_STORAGE_SHARED) {
- storage->buf = kmalloc_node(size, flags, map->numa_node);
+ storage->buf = bpf_map_kmalloc_node(map, size, gfp,
+ map->numa_node);
if (!storage->buf)
goto enomem;
check_and_init_map_lock(map, storage->buf->data);
} else {
- storage->percpu_buf = __alloc_percpu_gfp(size, 8, flags);
+ storage->percpu_buf = bpf_map_alloc_percpu(map, size, 8, gfp);
if (!storage->percpu_buf)
goto enomem;
}
@@ -534,7 +520,6 @@ struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
return storage;
enomem:
- bpf_map_uncharge_memlock(map, pages);
kfree(storage);
return ERR_PTR(-ENOMEM);
}
@@ -561,16 +546,11 @@ void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage)
{
enum bpf_cgroup_storage_type stype;
struct bpf_map *map;
- u32 pages;
if (!storage)
return;
map = &storage->map->map;
-
- bpf_cgroup_storage_calculate_size(map, &pages);
- bpf_map_uncharge_memlock(map, pages);
-
stype = cgroup_storage_type(map);
if (stype == BPF_CGROUP_STORAGE_SHARED)
call_rcu(&storage->rcu, free_shared_cgroup_storage_rcu);
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 00e32f2ec3e6..cec792a17e5f 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -282,8 +282,8 @@ static struct lpm_trie_node *lpm_trie_node_alloc(const struct lpm_trie *trie,
if (value)
size += trie->map.value_size;
- node = kmalloc_node(size, GFP_ATOMIC | __GFP_NOWARN,
- trie->map.numa_node);
+ node = bpf_map_kmalloc_node(&trie->map, size, GFP_ATOMIC | __GFP_NOWARN,
+ trie->map.numa_node);
if (!node)
return NULL;
@@ -540,8 +540,6 @@ out:
static struct bpf_map *trie_alloc(union bpf_attr *attr)
{
struct lpm_trie *trie;
- u64 cost = sizeof(*trie), cost_per_node;
- int ret;
if (!bpf_capable())
return ERR_PTR(-EPERM);
@@ -557,7 +555,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
attr->value_size > LPM_VAL_SIZE_MAX)
return ERR_PTR(-EINVAL);
- trie = kzalloc(sizeof(*trie), GFP_USER | __GFP_NOWARN);
+ trie = kzalloc(sizeof(*trie), GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT);
if (!trie)
return ERR_PTR(-ENOMEM);
@@ -567,20 +565,9 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
offsetof(struct bpf_lpm_trie_key, data);
trie->max_prefixlen = trie->data_size * 8;
- cost_per_node = sizeof(struct lpm_trie_node) +
- attr->value_size + trie->data_size;
- cost += (u64) attr->max_entries * cost_per_node;
-
- ret = bpf_map_charge_init(&trie->map.memory, cost);
- if (ret)
- goto out_err;
-
spin_lock_init(&trie->lock);
return &trie->map;
-out_err:
- kfree(trie);
- return ERR_PTR(ret);
}
static void trie_free(struct bpf_map *map)
diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c
index 0ee2347ba510..f9c734aaa990 100644
--- a/kernel/bpf/queue_stack_maps.c
+++ b/kernel/bpf/queue_stack_maps.c
@@ -66,29 +66,21 @@ static int queue_stack_map_alloc_check(union bpf_attr *attr)
static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr)
{
- int ret, numa_node = bpf_map_attr_numa_node(attr);
- struct bpf_map_memory mem = {0};
+ int numa_node = bpf_map_attr_numa_node(attr);
struct bpf_queue_stack *qs;
- u64 size, queue_size, cost;
+ u64 size, queue_size;
size = (u64) attr->max_entries + 1;
- cost = queue_size = sizeof(*qs) + size * attr->value_size;
-
- ret = bpf_map_charge_init(&mem, cost);
- if (ret < 0)
- return ERR_PTR(ret);
+ queue_size = sizeof(*qs) + size * attr->value_size;
qs = bpf_map_area_alloc(queue_size, numa_node);
- if (!qs) {
- bpf_map_charge_finish(&mem);
+ if (!qs)
return ERR_PTR(-ENOMEM);
- }
memset(qs, 0, sizeof(*qs));
bpf_map_init_from_attr(&qs->map, attr);
- bpf_map_charge_move(&qs->map.memory, &mem);
qs->size = size;
raw_spin_lock_init(&qs->lock);
diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c
index a55cd542f2ce..4838922f723d 100644
--- a/kernel/bpf/reuseport_array.c
+++ b/kernel/bpf/reuseport_array.c
@@ -150,9 +150,8 @@ static void reuseport_array_free(struct bpf_map *map)
static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr)
{
- int err, numa_node = bpf_map_attr_numa_node(attr);
+ int numa_node = bpf_map_attr_numa_node(attr);
struct reuseport_array *array;
- struct bpf_map_memory mem;
u64 array_size;
if (!bpf_capable())
@@ -161,20 +160,13 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr)
array_size = sizeof(*array);
array_size += (u64)attr->max_entries * sizeof(struct sock *);
- err = bpf_map_charge_init(&mem, array_size);
- if (err)
- return ERR_PTR(err);
-
/* allocate all map elements and zero-initialize them */
array = bpf_map_area_alloc(array_size, numa_node);
- if (!array) {
- bpf_map_charge_finish(&mem);
+ if (!array)
return ERR_PTR(-ENOMEM);
- }
/* copy mandatory map attributes */
bpf_map_init_from_attr(&array->map, attr);
- bpf_map_charge_move(&array->map.memory, &mem);
return &array->map;
}
diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
index 31cb04a4dd2d..f25b719ac786 100644
--- a/kernel/bpf/ringbuf.c
+++ b/kernel/bpf/ringbuf.c
@@ -48,7 +48,6 @@ struct bpf_ringbuf {
struct bpf_ringbuf_map {
struct bpf_map map;
- struct bpf_map_memory memory;
struct bpf_ringbuf *rb;
};
@@ -60,8 +59,8 @@ struct bpf_ringbuf_hdr {
static struct bpf_ringbuf *bpf_ringbuf_area_alloc(size_t data_sz, int numa_node)
{
- const gfp_t flags = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN |
- __GFP_ZERO;
+ const gfp_t flags = GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL |
+ __GFP_NOWARN | __GFP_ZERO;
int nr_meta_pages = RINGBUF_PGOFF + RINGBUF_POS_PAGES;
int nr_data_pages = data_sz >> PAGE_SHIFT;
int nr_pages = nr_meta_pages + nr_data_pages;
@@ -88,10 +87,7 @@ static struct bpf_ringbuf *bpf_ringbuf_area_alloc(size_t data_sz, int numa_node)
* user-space implementations significantly.
*/
array_size = (nr_meta_pages + 2 * nr_data_pages) * sizeof(*pages);
- if (array_size > PAGE_SIZE)
- pages = vmalloc_node(array_size, numa_node);
- else
- pages = kmalloc_node(array_size, flags, numa_node);
+ pages = bpf_map_area_alloc(array_size, numa_node);
if (!pages)
return NULL;
@@ -134,7 +130,7 @@ static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node)
rb = bpf_ringbuf_area_alloc(data_sz, numa_node);
if (!rb)
- return ERR_PTR(-ENOMEM);
+ return NULL;
spin_lock_init(&rb->spinlock);
init_waitqueue_head(&rb->waitq);
@@ -150,8 +146,6 @@ static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node)
static struct bpf_map *ringbuf_map_alloc(union bpf_attr *attr)
{
struct bpf_ringbuf_map *rb_map;
- u64 cost;
- int err;
if (attr->map_flags & ~RINGBUF_CREATE_FLAG_MASK)
return ERR_PTR(-EINVAL);
@@ -167,32 +161,19 @@ static struct bpf_map *ringbuf_map_alloc(union bpf_attr *attr)
return ERR_PTR(-E2BIG);
#endif
- rb_map = kzalloc(sizeof(*rb_map), GFP_USER);
+ rb_map = kzalloc(sizeof(*rb_map), GFP_USER | __GFP_ACCOUNT);
if (!rb_map)
return ERR_PTR(-ENOMEM);
bpf_map_init_from_attr(&rb_map->map, attr);
- cost = sizeof(struct bpf_ringbuf_map) +
- sizeof(struct bpf_ringbuf) +
- attr->max_entries;
- err = bpf_map_charge_init(&rb_map->map.memory, cost);
- if (err)
- goto err_free_map;
-
rb_map->rb = bpf_ringbuf_alloc(attr->max_entries, rb_map->map.numa_node);
- if (IS_ERR(rb_map->rb)) {
- err = PTR_ERR(rb_map->rb);
- goto err_uncharge;
+ if (!rb_map->rb) {
+ kfree(rb_map);
+ return ERR_PTR(-ENOMEM);
}
return &rb_map->map;
-
-err_uncharge:
- bpf_map_charge_finish(&rb_map->map.memory);
-err_free_map:
- kfree(rb_map);
- return ERR_PTR(err);
}
static void bpf_ringbuf_free(struct bpf_ringbuf *rb)
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 06065fa27124..3325add8e629 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -90,7 +90,6 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
{
u32 value_size = attr->value_size;
struct bpf_stack_map *smap;
- struct bpf_map_memory mem;
u64 cost, n_buckets;
int err;
@@ -119,15 +118,9 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap);
cost += n_buckets * (value_size + sizeof(struct stack_map_bucket));
- err = bpf_map_charge_init(&mem, cost);
- if (err)
- return ERR_PTR(err);
-
smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr));
- if (!smap) {
- bpf_map_charge_finish(&mem);
+ if (!smap)
return ERR_PTR(-ENOMEM);
- }
bpf_map_init_from_attr(&smap->map, attr);
smap->map.value_size = value_size;
@@ -135,20 +128,17 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
err = get_callchain_buffers(sysctl_perf_event_max_stack);
if (err)
- goto free_charge;
+ goto free_smap;
err = prealloc_elems_and_freelist(smap);
if (err)
goto put_buffers;
- bpf_map_charge_move(&smap->map.memory, &mem);
-
return &smap->map;
put_buffers:
put_callchain_buffers();
-free_charge:
- bpf_map_charge_finish(&mem);
+free_smap:
bpf_map_area_free(smap);
return ERR_PTR(err);
}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index f3fe9f53f93c..0cd3cc2af9c1 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -31,6 +31,7 @@
#include <linux/poll.h>
#include <linux/bpf-netns.h>
#include <linux/rcupdate_trace.h>
+#include <linux/memcontrol.h>
#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
(map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
@@ -127,7 +128,7 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
return map;
}
-static u32 bpf_map_value_size(struct bpf_map *map)
+static u32 bpf_map_value_size(const struct bpf_map *map)
{
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
@@ -267,6 +268,10 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
return err;
}
+/* Please, do not use this function outside from the map creation path
+ * (e.g. in map update path) without taking care of setting the active
+ * memory cgroup (see at bpf_map_kmalloc_node() for example).
+ */
static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable)
{
/* We really just want to fail instead of triggering OOM killer
@@ -279,7 +284,7 @@ static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable)
* __GFP_RETRY_MAYFAIL to avoid such situations.
*/
- const gfp_t gfp = __GFP_NOWARN | __GFP_ZERO;
+ const gfp_t gfp = __GFP_NOWARN | __GFP_ZERO | __GFP_ACCOUNT;
unsigned int flags = 0;
unsigned long align = 1;
void *area;
@@ -341,77 +346,6 @@ void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr)
map->numa_node = bpf_map_attr_numa_node(attr);
}
-static int bpf_charge_memlock(struct user_struct *user, u32 pages)
-{
- unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
- if (atomic_long_add_return(pages, &user->locked_vm) > memlock_limit) {
- atomic_long_sub(pages, &user->locked_vm);
- return -EPERM;
- }
- return 0;
-}
-
-static void bpf_uncharge_memlock(struct user_struct *user, u32 pages)
-{
- if (user)
- atomic_long_sub(pages, &user->locked_vm);
-}
-
-int bpf_map_charge_init(struct bpf_map_memory *mem, u64 size)
-{
- u32 pages = round_up(size, PAGE_SIZE) >> PAGE_SHIFT;
- struct user_struct *user;
- int ret;
-
- if (size >= U32_MAX - PAGE_SIZE)
- return -E2BIG;
-
- user = get_current_user();
- ret = bpf_charge_memlock(user, pages);
- if (ret) {
- free_uid(user);
- return ret;
- }
-
- mem->pages = pages;
- mem->user = user;
-
- return 0;
-}
-
-void bpf_map_charge_finish(struct bpf_map_memory *mem)
-{
- bpf_uncharge_memlock(mem->user, mem->pages);
- free_uid(mem->user);
-}
-
-void bpf_map_charge_move(struct bpf_map_memory *dst,
- struct bpf_map_memory *src)
-{
- *dst = *src;
-
- /* Make sure src will not be used for the redundant uncharging. */
- memset(src, 0, sizeof(struct bpf_map_memory));
-}
-
-int bpf_map_charge_memlock(struct bpf_map *map, u32 pages)
-{
- int ret;
-
- ret = bpf_charge_memlock(map->memory.user, pages);
- if (ret)
- return ret;
- map->memory.pages += pages;
- return ret;
-}
-
-void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages)
-{
- bpf_uncharge_memlock(map->memory.user, pages);
- map->memory.pages -= pages;
-}
-
static int bpf_map_alloc_id(struct bpf_map *map)
{
int id;
@@ -456,17 +390,74 @@ void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
__release(&map_idr_lock);
}
+#ifdef CONFIG_MEMCG_KMEM
+static void bpf_map_save_memcg(struct bpf_map *map)
+{
+ map->memcg = get_mem_cgroup_from_mm(current->mm);
+}
+
+static void bpf_map_release_memcg(struct bpf_map *map)
+{
+ mem_cgroup_put(map->memcg);
+}
+
+void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
+ int node)
+{
+ struct mem_cgroup *old_memcg;
+ void *ptr;
+
+ old_memcg = set_active_memcg(map->memcg);
+ ptr = kmalloc_node(size, flags | __GFP_ACCOUNT, node);
+ set_active_memcg(old_memcg);
+
+ return ptr;
+}
+
+void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags)
+{
+ struct mem_cgroup *old_memcg;
+ void *ptr;
+
+ old_memcg = set_active_memcg(map->memcg);
+ ptr = kzalloc(size, flags | __GFP_ACCOUNT);
+ set_active_memcg(old_memcg);
+
+ return ptr;
+}
+
+void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size,
+ size_t align, gfp_t flags)
+{
+ struct mem_cgroup *old_memcg;
+ void __percpu *ptr;
+
+ old_memcg = set_active_memcg(map->memcg);
+ ptr = __alloc_percpu_gfp(size, align, flags | __GFP_ACCOUNT);
+ set_active_memcg(old_memcg);
+
+ return ptr;
+}
+
+#else
+static void bpf_map_save_memcg(struct bpf_map *map)
+{
+}
+
+static void bpf_map_release_memcg(struct bpf_map *map)
+{
+}
+#endif
+
/* called from workqueue */
static void bpf_map_free_deferred(struct work_struct *work)
{
struct bpf_map *map = container_of(work, struct bpf_map, work);
- struct bpf_map_memory mem;
- bpf_map_charge_move(&mem, &map->memory);
security_bpf_map_free(map);
+ bpf_map_release_memcg(map);
/* implementation dependent freeing */
map->ops->map_free(map);
- bpf_map_charge_finish(&mem);
}
static void bpf_map_put_uref(struct bpf_map *map)
@@ -527,6 +518,19 @@ static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f)
}
#ifdef CONFIG_PROC_FS
+/* Provides an approximation of the map's memory footprint.
+ * Used only to provide a backward compatibility and display
+ * a reasonable "memlock" info.
+ */
+static unsigned long bpf_map_memory_footprint(const struct bpf_map *map)
+{
+ unsigned long size;
+
+ size = round_up(map->key_size + bpf_map_value_size(map), 8);
+
+ return round_up(map->max_entries * size, PAGE_SIZE);
+}
+
static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
{
const struct bpf_map *map = filp->private_data;
@@ -545,7 +549,7 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
"value_size:\t%u\n"
"max_entries:\t%u\n"
"map_flags:\t%#x\n"
- "memlock:\t%llu\n"
+ "memlock:\t%lu\n"
"map_id:\t%u\n"
"frozen:\t%u\n",
map->map_type,
@@ -553,7 +557,7 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
map->value_size,
map->max_entries,
map->map_flags,
- map->memory.pages * 1ULL << PAGE_SHIFT,
+ bpf_map_memory_footprint(map),
map->id,
READ_ONCE(map->frozen));
if (type) {
@@ -796,7 +800,6 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
static int map_create(union bpf_attr *attr)
{
int numa_node = bpf_map_attr_numa_node(attr);
- struct bpf_map_memory mem;
struct bpf_map *map;
int f_flags;
int err;
@@ -875,6 +878,8 @@ static int map_create(union bpf_attr *attr)
if (err)
goto free_map_sec;
+ bpf_map_save_memcg(map);
+
err = bpf_map_new_fd(map, f_flags);
if (err < 0) {
/* failed to allocate fd.
@@ -893,9 +898,7 @@ free_map_sec:
security_bpf_map_free(map);
free_map:
btf_put(map->btf);
- bpf_map_charge_move(&mem, &map->memory);
map->ops->map_free(map);
- bpf_map_charge_finish(&mem);
return err;
}
@@ -1629,51 +1632,6 @@ static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op)
audit_log_end(ab);
}
-int __bpf_prog_charge(struct user_struct *user, u32 pages)
-{
- unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
- unsigned long user_bufs;
-
- if (user) {
- user_bufs = atomic_long_add_return(pages, &user->locked_vm);
- if (user_bufs > memlock_limit) {
- atomic_long_sub(pages, &user->locked_vm);
- return -EPERM;
- }
- }
-
- return 0;
-}
-
-void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
-{
- if (user)
- atomic_long_sub(pages, &user->locked_vm);
-}
-
-static int bpf_prog_charge_memlock(struct bpf_prog *prog)
-{
- struct user_struct *user = get_current_user();
- int ret;
-
- ret = __bpf_prog_charge(user, prog->pages);
- if (ret) {
- free_uid(user);
- return ret;
- }
-
- prog->aux->user = user;
- return 0;
-}
-
-static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
-{
- struct user_struct *user = prog->aux->user;
-
- __bpf_prog_uncharge(user, prog->pages);
- free_uid(user);
-}
-
static int bpf_prog_alloc_id(struct bpf_prog *prog)
{
int id;
@@ -1723,7 +1681,7 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu)
kvfree(aux->func_info);
kfree(aux->func_info_aux);
- bpf_prog_uncharge_memlock(aux->prog);
+ free_uid(aux->user);
security_bpf_prog_free(aux);
bpf_prog_free(aux->prog);
}
@@ -1733,6 +1691,8 @@ static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)
bpf_prog_kallsyms_del_all(prog);
btf_put(prog->aux->btf);
bpf_prog_free_linfo(prog);
+ if (prog->aux->attach_btf)
+ btf_put(prog->aux->attach_btf);
if (deferred) {
if (prog->aux->sleepable)
@@ -1966,12 +1926,16 @@ static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr)
static int
bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
enum bpf_attach_type expected_attach_type,
- u32 btf_id, u32 prog_fd)
+ struct btf *attach_btf, u32 btf_id,
+ struct bpf_prog *dst_prog)
{
if (btf_id) {
if (btf_id > BTF_MAX_TYPE)
return -EINVAL;
+ if (!attach_btf && !dst_prog)
+ return -EINVAL;
+
switch (prog_type) {
case BPF_PROG_TYPE_TRACING:
case BPF_PROG_TYPE_LSM:
@@ -1983,7 +1947,10 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
}
}
- if (prog_fd && prog_type != BPF_PROG_TYPE_TRACING &&
+ if (attach_btf && (!btf_id || dst_prog))
+ return -EINVAL;
+
+ if (dst_prog && prog_type != BPF_PROG_TYPE_TRACING &&
prog_type != BPF_PROG_TYPE_EXT)
return -EINVAL;
@@ -2100,7 +2067,8 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
{
enum bpf_prog_type type = attr->prog_type;
- struct bpf_prog *prog;
+ struct bpf_prog *prog, *dst_prog = NULL;
+ struct btf *attach_btf = NULL;
int err;
char license[128];
bool is_gpl;
@@ -2142,47 +2110,70 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
if (is_perfmon_prog_type(type) && !perfmon_capable())
return -EPERM;
+ /* attach_prog_fd/attach_btf_obj_fd can specify fd of either bpf_prog
+ * or btf, we need to check which one it is
+ */
+ if (attr->attach_prog_fd) {
+ dst_prog = bpf_prog_get(attr->attach_prog_fd);
+ if (IS_ERR(dst_prog)) {
+ dst_prog = NULL;
+ attach_btf = btf_get_by_fd(attr->attach_btf_obj_fd);
+ if (IS_ERR(attach_btf))
+ return -EINVAL;
+ if (!btf_is_kernel(attach_btf)) {
+ btf_put(attach_btf);
+ return -EINVAL;
+ }
+ }
+ } else if (attr->attach_btf_id) {
+ /* fall back to vmlinux BTF, if BTF type ID is specified */
+ attach_btf = bpf_get_btf_vmlinux();
+ if (IS_ERR(attach_btf))
+ return PTR_ERR(attach_btf);
+ if (!attach_btf)
+ return -EINVAL;
+ btf_get(attach_btf);
+ }
+
bpf_prog_load_fixup_attach_type(attr);
if (bpf_prog_load_check_attach(type, attr->expected_attach_type,
- attr->attach_btf_id,
- attr->attach_prog_fd))
+ attach_btf, attr->attach_btf_id,
+ dst_prog)) {
+ if (dst_prog)
+ bpf_prog_put(dst_prog);
+ if (attach_btf)
+ btf_put(attach_btf);
return -EINVAL;
+ }
/* plain bpf_prog allocation */
prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
- if (!prog)
+ if (!prog) {
+ if (dst_prog)
+ bpf_prog_put(dst_prog);
+ if (attach_btf)
+ btf_put(attach_btf);
return -ENOMEM;
+ }
prog->expected_attach_type = attr->expected_attach_type;
+ prog->aux->attach_btf = attach_btf;
prog->aux->attach_btf_id = attr->attach_btf_id;
- if (attr->attach_prog_fd) {
- struct bpf_prog *dst_prog;
-
- dst_prog = bpf_prog_get(attr->attach_prog_fd);
- if (IS_ERR(dst_prog)) {
- err = PTR_ERR(dst_prog);
- goto free_prog_nouncharge;
- }
- prog->aux->dst_prog = dst_prog;
- }
-
+ prog->aux->dst_prog = dst_prog;
prog->aux->offload_requested = !!attr->prog_ifindex;
prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE;
err = security_bpf_prog_alloc(prog->aux);
if (err)
- goto free_prog_nouncharge;
-
- err = bpf_prog_charge_memlock(prog);
- if (err)
- goto free_prog_sec;
+ goto free_prog;
+ prog->aux->user = get_current_user();
prog->len = attr->insn_cnt;
err = -EFAULT;
if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns),
bpf_prog_insn_size(prog)) != 0)
- goto free_prog;
+ goto free_prog_sec;
prog->orig_prog = NULL;
prog->jited = 0;
@@ -2193,19 +2184,19 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
if (bpf_prog_is_dev_bound(prog->aux)) {
err = bpf_prog_offload_init(prog, attr);
if (err)
- goto free_prog;
+ goto free_prog_sec;
}
/* find program type: socket_filter vs tracing_filter */
err = find_prog_type(type, prog);
if (err < 0)
- goto free_prog;
+ goto free_prog_sec;
prog->aux->load_time = ktime_get_boottime_ns();
err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name,
sizeof(attr->prog_name));
if (err < 0)
- goto free_prog;
+ goto free_prog_sec;
/* run eBPF verifier */
err = bpf_check(&prog, attr, uattr);
@@ -2250,11 +2241,12 @@ free_used_maps:
*/
__bpf_prog_put_noref(prog, prog->aux->func_cnt);
return err;
-free_prog:
- bpf_prog_uncharge_memlock(prog);
free_prog_sec:
+ free_uid(prog->aux->user);
security_bpf_prog_free(prog->aux);
-free_prog_nouncharge:
+free_prog:
+ if (prog->aux->attach_btf)
+ btf_put(prog->aux->attach_btf);
bpf_prog_free(prog);
return err;
}
@@ -2612,7 +2604,7 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
goto out_put_prog;
}
- key = bpf_trampoline_compute_key(tgt_prog, btf_id);
+ key = bpf_trampoline_compute_key(tgt_prog, NULL, btf_id);
}
link = kzalloc(sizeof(*link), GFP_USER);
@@ -3589,7 +3581,7 @@ static int bpf_prog_get_info_by_fd(struct file *file,
}
if (prog->aux->btf)
- info.btf_id = btf_id(prog->aux->btf);
+ info.btf_id = btf_obj_id(prog->aux->btf);
ulen = info.nr_func_info;
info.nr_func_info = prog->aux->func_info_cnt;
@@ -3692,7 +3684,7 @@ static int bpf_map_get_info_by_fd(struct file *file,
memcpy(info.name, map->name, sizeof(map->name));
if (map->btf) {
- info.btf_id = btf_id(map->btf);
+ info.btf_id = btf_obj_id(map->btf);
info.btf_key_type_id = map->btf_key_type_id;
info.btf_value_type_id = map->btf_value_type_id;
}
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index 1fdb2fc196cd..0458a40edf10 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -136,8 +136,7 @@ struct bpf_iter_seq_task_file_info {
};
static struct file *
-task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info,
- struct task_struct **task, struct files_struct **fstruct)
+task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info)
{
struct pid_namespace *ns = info->common.ns;
u32 curr_tid = info->tid, max_fds;
@@ -150,14 +149,17 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info,
* Otherwise, it does not hold any reference.
*/
again:
- if (*task) {
- curr_task = *task;
- curr_files = *fstruct;
+ if (info->task) {
+ curr_task = info->task;
+ curr_files = info->files;
curr_fd = info->fd;
} else {
curr_task = task_seq_get_next(ns, &curr_tid, true);
- if (!curr_task)
+ if (!curr_task) {
+ info->task = NULL;
+ info->files = NULL;
return NULL;
+ }
curr_files = get_files_struct(curr_task);
if (!curr_files) {
@@ -167,9 +169,8 @@ again:
goto again;
}
- /* set *fstruct, *task and info->tid */
- *fstruct = curr_files;
- *task = curr_task;
+ info->files = curr_files;
+ info->task = curr_task;
if (curr_tid == info->tid) {
curr_fd = info->fd;
} else {
@@ -199,8 +200,8 @@ again:
rcu_read_unlock();
put_files_struct(curr_files);
put_task_struct(curr_task);
- *task = NULL;
- *fstruct = NULL;
+ info->task = NULL;
+ info->files = NULL;
info->fd = 0;
curr_tid = ++(info->tid);
goto again;
@@ -209,21 +210,13 @@ again:
static void *task_file_seq_start(struct seq_file *seq, loff_t *pos)
{
struct bpf_iter_seq_task_file_info *info = seq->private;
- struct files_struct *files = NULL;
- struct task_struct *task = NULL;
struct file *file;
- file = task_file_seq_get_next(info, &task, &files);
- if (!file) {
- info->files = NULL;
- info->task = NULL;
- return NULL;
- }
-
- if (*pos == 0)
+ info->task = NULL;
+ info->files = NULL;
+ file = task_file_seq_get_next(info);
+ if (file && *pos == 0)
++*pos;
- info->task = task;
- info->files = files;
return file;
}
@@ -231,24 +224,11 @@ static void *task_file_seq_start(struct seq_file *seq, loff_t *pos)
static void *task_file_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct bpf_iter_seq_task_file_info *info = seq->private;
- struct files_struct *files = info->files;
- struct task_struct *task = info->task;
- struct file *file;
++*pos;
++info->fd;
fput((struct file *)v);
- file = task_file_seq_get_next(info, &task, &files);
- if (!file) {
- info->files = NULL;
- info->task = NULL;
- return NULL;
- }
-
- info->task = task;
- info->files = files;
-
- return file;
+ return task_file_seq_get_next(info);
}
struct bpf_iter__task_file {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index ab2d6a02aee0..93def76cf32b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -238,7 +238,9 @@ struct bpf_call_arg_meta {
u64 msize_max_value;
int ref_obj_id;
int func_id;
+ struct btf *btf;
u32 btf_id;
+ struct btf *ret_btf;
u32 ret_btf_id;
};
@@ -556,10 +558,9 @@ static struct bpf_func_state *func(struct bpf_verifier_env *env,
return cur->frame[reg->frameno];
}
-const char *kernel_type_name(u32 id)
+static const char *kernel_type_name(const struct btf* btf, u32 id)
{
- return btf_name_by_offset(btf_vmlinux,
- btf_type_by_id(btf_vmlinux, id)->name_off);
+ return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
}
static void print_verifier_state(struct bpf_verifier_env *env,
@@ -589,7 +590,7 @@ static void print_verifier_state(struct bpf_verifier_env *env,
if (t == PTR_TO_BTF_ID ||
t == PTR_TO_BTF_ID_OR_NULL ||
t == PTR_TO_PERCPU_BTF_ID)
- verbose(env, "%s", kernel_type_name(reg->btf_id));
+ verbose(env, "%s", kernel_type_name(reg->btf, reg->btf_id));
verbose(env, "(id=%d", reg->id);
if (reg_type_may_be_refcounted_or_null(t))
verbose(env, ",ref_obj_id=%d", reg->ref_obj_id);
@@ -1383,7 +1384,8 @@ static void mark_reg_not_init(struct bpf_verifier_env *env,
static void mark_btf_ld_reg(struct bpf_verifier_env *env,
struct bpf_reg_state *regs, u32 regno,
- enum bpf_reg_type reg_type, u32 btf_id)
+ enum bpf_reg_type reg_type,
+ struct btf *btf, u32 btf_id)
{
if (reg_type == SCALAR_VALUE) {
mark_reg_unknown(env, regs, regno);
@@ -1391,6 +1393,7 @@ static void mark_btf_ld_reg(struct bpf_verifier_env *env,
}
mark_reg_known_zero(env, regs, regno);
regs[regno].type = PTR_TO_BTF_ID;
+ regs[regno].btf = btf;
regs[regno].btf_id = btf_id;
}
@@ -2764,7 +2767,7 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
/* check access to 'struct bpf_context' fields. Supports fixed offsets only */
static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
enum bpf_access_type t, enum bpf_reg_type *reg_type,
- u32 *btf_id)
+ struct btf **btf, u32 *btf_id)
{
struct bpf_insn_access_aux info = {
.reg_type = *reg_type,
@@ -2782,10 +2785,12 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
*/
*reg_type = info.reg_type;
- if (*reg_type == PTR_TO_BTF_ID || *reg_type == PTR_TO_BTF_ID_OR_NULL)
+ if (*reg_type == PTR_TO_BTF_ID || *reg_type == PTR_TO_BTF_ID_OR_NULL) {
+ *btf = info.btf;
*btf_id = info.btf_id;
- else
+ } else {
env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
+ }
/* remember the offset of last byte accessed in ctx */
if (env->prog->aux->max_ctx_offset < off + size)
env->prog->aux->max_ctx_offset = off + size;
@@ -3297,8 +3302,8 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
int value_regno)
{
struct bpf_reg_state *reg = regs + regno;
- const struct btf_type *t = btf_type_by_id(btf_vmlinux, reg->btf_id);
- const char *tname = btf_name_by_offset(btf_vmlinux, t->name_off);
+ const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id);
+ const char *tname = btf_name_by_offset(reg->btf, t->name_off);
u32 btf_id;
int ret;
@@ -3319,23 +3324,23 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
}
if (env->ops->btf_struct_access) {
- ret = env->ops->btf_struct_access(&env->log, t, off, size,
- atype, &btf_id);
+ ret = env->ops->btf_struct_access(&env->log, reg->btf, t,
+ off, size, atype, &btf_id);
} else {
if (atype != BPF_READ) {
verbose(env, "only read is supported\n");
return -EACCES;
}
- ret = btf_struct_access(&env->log, t, off, size, atype,
- &btf_id);
+ ret = btf_struct_access(&env->log, reg->btf, t, off, size,
+ atype, &btf_id);
}
if (ret < 0)
return ret;
if (atype == BPF_READ && value_regno >= 0)
- mark_btf_ld_reg(env, regs, value_regno, ret, btf_id);
+ mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id);
return 0;
}
@@ -3385,12 +3390,12 @@ static int check_ptr_to_map_access(struct bpf_verifier_env *env,
return -EACCES;
}
- ret = btf_struct_access(&env->log, t, off, size, atype, &btf_id);
+ ret = btf_struct_access(&env->log, btf_vmlinux, t, off, size, atype, &btf_id);
if (ret < 0)
return ret;
if (value_regno >= 0)
- mark_btf_ld_reg(env, regs, value_regno, ret, btf_id);
+ mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id);
return 0;
}
@@ -3466,6 +3471,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
mark_reg_unknown(env, regs, value_regno);
} else if (reg->type == PTR_TO_CTX) {
enum bpf_reg_type reg_type = SCALAR_VALUE;
+ struct btf *btf = NULL;
u32 btf_id = 0;
if (t == BPF_WRITE && value_regno >= 0 &&
@@ -3478,7 +3484,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
if (err < 0)
return err;
- err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf_id);
+ err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf, &btf_id);
if (err)
verbose_linfo(env, insn_idx, "; ");
if (!err && t == BPF_READ && value_regno >= 0) {
@@ -3500,8 +3506,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
*/
regs[value_regno].subreg_def = DEF_NOT_SUBREG;
if (reg_type == PTR_TO_BTF_ID ||
- reg_type == PTR_TO_BTF_ID_OR_NULL)
+ reg_type == PTR_TO_BTF_ID_OR_NULL) {
+ regs[value_regno].btf = btf;
regs[value_regno].btf_id = btf_id;
+ }
}
regs[value_regno].type = reg_type;
}
@@ -4118,11 +4126,11 @@ found:
arg_btf_id = compatible->btf_id;
}
- if (!btf_struct_ids_match(&env->log, reg->off, reg->btf_id,
- *arg_btf_id)) {
+ if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
+ btf_vmlinux, *arg_btf_id)) {
verbose(env, "R%d is of type %s but %s is expected\n",
- regno, kernel_type_name(reg->btf_id),
- kernel_type_name(*arg_btf_id));
+ regno, kernel_type_name(reg->btf, reg->btf_id),
+ kernel_type_name(btf_vmlinux, *arg_btf_id));
return -EACCES;
}
@@ -4244,6 +4252,7 @@ skip_type_check:
verbose(env, "Helper has invalid btf_id in R%d\n", regno);
return -EACCES;
}
+ meta->ret_btf = reg->btf;
meta->ret_btf_id = reg->btf_id;
} else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
if (meta->func_id == BPF_FUNC_spin_lock) {
@@ -5190,16 +5199,16 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
const struct btf_type *t;
mark_reg_known_zero(env, regs, BPF_REG_0);
- t = btf_type_skip_modifiers(btf_vmlinux, meta.ret_btf_id, NULL);
+ t = btf_type_skip_modifiers(meta.ret_btf, meta.ret_btf_id, NULL);
if (!btf_type_is_struct(t)) {
u32 tsize;
const struct btf_type *ret;
const char *tname;
/* resolve the type size of ksym. */
- ret = btf_resolve_size(btf_vmlinux, t, &tsize);
+ ret = btf_resolve_size(meta.ret_btf, t, &tsize);
if (IS_ERR(ret)) {
- tname = btf_name_by_offset(btf_vmlinux, t->name_off);
+ tname = btf_name_by_offset(meta.ret_btf, t->name_off);
verbose(env, "unable to resolve the size of type '%s': %ld\n",
tname, PTR_ERR(ret));
return -EINVAL;
@@ -5212,6 +5221,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
regs[BPF_REG_0].type =
fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ?
PTR_TO_BTF_ID : PTR_TO_BTF_ID_OR_NULL;
+ regs[BPF_REG_0].btf = meta.ret_btf;
regs[BPF_REG_0].btf_id = meta.ret_btf_id;
}
} else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL ||
@@ -5228,6 +5238,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
fn->ret_type, func_id_name(func_id), func_id);
return -EINVAL;
}
+ /* current BPF helper definitions are only coming from
+ * built-in code with type IDs from vmlinux BTF
+ */
+ regs[BPF_REG_0].btf = btf_vmlinux;
regs[BPF_REG_0].btf_id = ret_btf_id;
} else {
verbose(env, "unknown return type %d of func %s#%d\n",
@@ -5627,7 +5641,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
if (reg_is_pkt_pointer(ptr_reg)) {
dst_reg->id = ++env->id_gen;
/* something was added to pkt_ptr, set range to zero */
- dst_reg->raw = 0;
+ memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
}
break;
case BPF_SUB:
@@ -5692,7 +5706,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
dst_reg->id = ++env->id_gen;
/* something was added to pkt_ptr, set range to zero */
if (smin_val < 0)
- dst_reg->raw = 0;
+ memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
}
break;
case BPF_AND:
@@ -7744,6 +7758,7 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
break;
case PTR_TO_BTF_ID:
case PTR_TO_PERCPU_BTF_ID:
+ dst_reg->btf = aux->btf_var.btf;
dst_reg->btf_id = aux->btf_var.btf_id;
break;
default:
@@ -8058,6 +8073,11 @@ static void init_explored_state(struct bpf_verifier_env *env, int idx)
env->insn_aux_data[idx].prune_point = true;
}
+enum {
+ DONE_EXPLORING = 0,
+ KEEP_EXPLORING = 1,
+};
+
/* t, w, e - match pseudo-code above:
* t - index of current instruction
* w - next instruction
@@ -8070,10 +8090,10 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
int *insn_state = env->cfg.insn_state;
if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
- return 0;
+ return DONE_EXPLORING;
if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
- return 0;
+ return DONE_EXPLORING;
if (w < 0 || w >= env->prog->len) {
verbose_linfo(env, t, "%d: ", t);
@@ -8092,10 +8112,10 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
if (env->cfg.cur_stack >= env->prog->len)
return -E2BIG;
insn_stack[env->cfg.cur_stack++] = w;
- return 1;
+ return KEEP_EXPLORING;
} else if ((insn_state[w] & 0xF0) == DISCOVERED) {
if (loop_ok && env->bpf_capable)
- return 0;
+ return DONE_EXPLORING;
verbose_linfo(env, t, "%d: ", t);
verbose_linfo(env, w, "%d: ", w);
verbose(env, "back-edge from insn %d to %d\n", t, w);
@@ -8107,7 +8127,74 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
verbose(env, "insn state internal bug\n");
return -EFAULT;
}
- return 0;
+ return DONE_EXPLORING;
+}
+
+/* Visits the instruction at index t and returns one of the following:
+ * < 0 - an error occurred
+ * DONE_EXPLORING - the instruction was fully explored
+ * KEEP_EXPLORING - there is still work to be done before it is fully explored
+ */
+static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env)
+{
+ struct bpf_insn *insns = env->prog->insnsi;
+ int ret;
+
+ /* All non-branch instructions have a single fall-through edge. */
+ if (BPF_CLASS(insns[t].code) != BPF_JMP &&
+ BPF_CLASS(insns[t].code) != BPF_JMP32)
+ return push_insn(t, t + 1, FALLTHROUGH, env, false);
+
+ switch (BPF_OP(insns[t].code)) {
+ case BPF_EXIT:
+ return DONE_EXPLORING;
+
+ case BPF_CALL:
+ ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
+ if (ret)
+ return ret;
+
+ if (t + 1 < insn_cnt)
+ init_explored_state(env, t + 1);
+ if (insns[t].src_reg == BPF_PSEUDO_CALL) {
+ init_explored_state(env, t);
+ ret = push_insn(t, t + insns[t].imm + 1, BRANCH,
+ env, false);
+ }
+ return ret;
+
+ case BPF_JA:
+ if (BPF_SRC(insns[t].code) != BPF_K)
+ return -EINVAL;
+
+ /* unconditional jump with single edge */
+ ret = push_insn(t, t + insns[t].off + 1, FALLTHROUGH, env,
+ true);
+ if (ret)
+ return ret;
+
+ /* unconditional jmp is not a good pruning point,
+ * but it's marked, since backtracking needs
+ * to record jmp history in is_state_visited().
+ */
+ init_explored_state(env, t + insns[t].off + 1);
+ /* tell verifier to check for equivalent states
+ * after every call and jump
+ */
+ if (t + 1 < insn_cnt)
+ init_explored_state(env, t + 1);
+
+ return ret;
+
+ default:
+ /* conditional jump with two edges */
+ init_explored_state(env, t);
+ ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
+ if (ret)
+ return ret;
+
+ return push_insn(t, t + insns[t].off + 1, BRANCH, env, true);
+ }
}
/* non-recursive depth-first-search to detect loops in BPF program
@@ -8115,11 +8202,10 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
*/
static int check_cfg(struct bpf_verifier_env *env)
{
- struct bpf_insn *insns = env->prog->insnsi;
int insn_cnt = env->prog->len;
int *insn_stack, *insn_state;
int ret = 0;
- int i, t;
+ int i;
insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
if (!insn_state)
@@ -8135,92 +8221,32 @@ static int check_cfg(struct bpf_verifier_env *env)
insn_stack[0] = 0; /* 0 is the first instruction */
env->cfg.cur_stack = 1;
-peek_stack:
- if (env->cfg.cur_stack == 0)
- goto check_state;
- t = insn_stack[env->cfg.cur_stack - 1];
-
- if (BPF_CLASS(insns[t].code) == BPF_JMP ||
- BPF_CLASS(insns[t].code) == BPF_JMP32) {
- u8 opcode = BPF_OP(insns[t].code);
-
- if (opcode == BPF_EXIT) {
- goto mark_explored;
- } else if (opcode == BPF_CALL) {
- ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
- if (ret == 1)
- goto peek_stack;
- else if (ret < 0)
- goto err_free;
- if (t + 1 < insn_cnt)
- init_explored_state(env, t + 1);
- if (insns[t].src_reg == BPF_PSEUDO_CALL) {
- init_explored_state(env, t);
- ret = push_insn(t, t + insns[t].imm + 1, BRANCH,
- env, false);
- if (ret == 1)
- goto peek_stack;
- else if (ret < 0)
- goto err_free;
- }
- } else if (opcode == BPF_JA) {
- if (BPF_SRC(insns[t].code) != BPF_K) {
- ret = -EINVAL;
- goto err_free;
- }
- /* unconditional jump with single edge */
- ret = push_insn(t, t + insns[t].off + 1,
- FALLTHROUGH, env, true);
- if (ret == 1)
- goto peek_stack;
- else if (ret < 0)
- goto err_free;
- /* unconditional jmp is not a good pruning point,
- * but it's marked, since backtracking needs
- * to record jmp history in is_state_visited().
- */
- init_explored_state(env, t + insns[t].off + 1);
- /* tell verifier to check for equivalent states
- * after every call and jump
- */
- if (t + 1 < insn_cnt)
- init_explored_state(env, t + 1);
- } else {
- /* conditional jump with two edges */
- init_explored_state(env, t);
- ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
- if (ret == 1)
- goto peek_stack;
- else if (ret < 0)
- goto err_free;
+ while (env->cfg.cur_stack > 0) {
+ int t = insn_stack[env->cfg.cur_stack - 1];
- ret = push_insn(t, t + insns[t].off + 1, BRANCH, env, true);
- if (ret == 1)
- goto peek_stack;
- else if (ret < 0)
- goto err_free;
- }
- } else {
- /* all other non-branch instructions with single
- * fall-through edge
- */
- ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
- if (ret == 1)
- goto peek_stack;
- else if (ret < 0)
+ ret = visit_insn(t, insn_cnt, env);
+ switch (ret) {
+ case DONE_EXPLORING:
+ insn_state[t] = EXPLORED;
+ env->cfg.cur_stack--;
+ break;
+ case KEEP_EXPLORING:
+ break;
+ default:
+ if (ret > 0) {
+ verbose(env, "visit_insn internal bug\n");
+ ret = -EFAULT;
+ }
goto err_free;
+ }
}
-mark_explored:
- insn_state[t] = EXPLORED;
- if (env->cfg.cur_stack-- <= 0) {
+ if (env->cfg.cur_stack < 0) {
verbose(env, "pop stack internal bug\n");
ret = -EFAULT;
goto err_free;
}
- goto peek_stack;
-check_state:
for (i = 0; i < insn_cnt; i++) {
if (insn_state[i] != EXPLORED) {
verbose(env, "unreachable insn %d\n", i);
@@ -9740,6 +9766,7 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env,
t = btf_type_skip_modifiers(btf_vmlinux, type, NULL);
if (percpu) {
aux->btf_var.reg_type = PTR_TO_PERCPU_BTF_ID;
+ aux->btf_var.btf = btf_vmlinux;
aux->btf_var.btf_id = type;
} else if (!btf_type_is_struct(t)) {
const struct btf_type *ret;
@@ -9758,6 +9785,7 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env,
aux->btf_var.mem_size = tsize;
} else {
aux->btf_var.reg_type = PTR_TO_BTF_ID;
+ aux->btf_var.btf = btf_vmlinux;
aux->btf_var.btf_id = type;
}
return 0;
@@ -11610,7 +11638,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
bpf_log(log, "Tracing programs must provide btf_id\n");
return -EINVAL;
}
- btf = tgt_prog ? tgt_prog->aux->btf : btf_vmlinux;
+ btf = tgt_prog ? tgt_prog->aux->btf : prog->aux->attach_btf;
if (!btf) {
bpf_log(log,
"FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
@@ -11886,7 +11914,7 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
return ret;
}
- key = bpf_trampoline_compute_key(tgt_prog, btf_id);
+ key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id);
tr = bpf_trampoline_get(key, &tgt_info);
if (!tr)
return -ENOMEM;