summaryrefslogtreecommitdiff
path: root/kernel/bpf
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/bpf')
-rw-r--r--kernel/bpf/Makefile2
-rw-r--r--kernel/bpf/arraymap.c138
-rw-r--r--kernel/bpf/bpf_iter.c85
-rw-r--r--kernel/bpf/btf.c13
-rw-r--r--kernel/bpf/cgroup.c82
-rw-r--r--kernel/bpf/core.c12
-rw-r--r--kernel/bpf/hashtab.c194
-rw-r--r--kernel/bpf/local_storage.c216
-rw-r--r--kernel/bpf/map_iter.c78
-rw-r--r--kernel/bpf/net_namespace.c8
-rw-r--r--kernel/bpf/prog_iter.c107
-rw-r--r--kernel/bpf/stackmap.c183
-rw-r--r--kernel/bpf/syscall.c52
-rw-r--r--kernel/bpf/task_iter.c24
-rw-r--r--kernel/bpf/verifier.c96
15 files changed, 1088 insertions, 202 deletions
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 1131a921e1a6..e6eb9c0402da 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -2,7 +2,7 @@
obj-y := core.o
CFLAGS_core.o += $(call cc-disable-warning, override-init)
-obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o
+obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
obj-$(CONFIG_BPF_SYSCALL) += disasm.o
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index c66e8273fccd..8ff419b632a6 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -487,6 +487,142 @@ static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
vma->vm_pgoff + pgoff);
}
+struct bpf_iter_seq_array_map_info {
+ struct bpf_map *map;
+ void *percpu_value_buf;
+ u32 index;
+};
+
+static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ struct bpf_iter_seq_array_map_info *info = seq->private;
+ struct bpf_map *map = info->map;
+ struct bpf_array *array;
+ u32 index;
+
+ if (info->index >= map->max_entries)
+ return NULL;
+
+ if (*pos == 0)
+ ++*pos;
+ array = container_of(map, struct bpf_array, map);
+ index = info->index & array->index_mask;
+ if (info->percpu_value_buf)
+ return array->pptrs[index];
+ return array->value + array->elem_size * index;
+}
+
+static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct bpf_iter_seq_array_map_info *info = seq->private;
+ struct bpf_map *map = info->map;
+ struct bpf_array *array;
+ u32 index;
+
+ ++*pos;
+ ++info->index;
+ if (info->index >= map->max_entries)
+ return NULL;
+
+ array = container_of(map, struct bpf_array, map);
+ index = info->index & array->index_mask;
+ if (info->percpu_value_buf)
+ return array->pptrs[index];
+ return array->value + array->elem_size * index;
+}
+
+static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
+{
+ struct bpf_iter_seq_array_map_info *info = seq->private;
+ struct bpf_iter__bpf_map_elem ctx = {};
+ struct bpf_map *map = info->map;
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+ int off = 0, cpu = 0;
+ void __percpu **pptr;
+ u32 size;
+
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, v == NULL);
+ if (!prog)
+ return 0;
+
+ ctx.meta = &meta;
+ ctx.map = info->map;
+ if (v) {
+ ctx.key = &info->index;
+
+ if (!info->percpu_value_buf) {
+ ctx.value = v;
+ } else {
+ pptr = v;
+ size = round_up(map->value_size, 8);
+ for_each_possible_cpu(cpu) {
+ bpf_long_memcpy(info->percpu_value_buf + off,
+ per_cpu_ptr(pptr, cpu),
+ size);
+ off += size;
+ }
+ ctx.value = info->percpu_value_buf;
+ }
+ }
+
+ return bpf_iter_run_prog(prog, &ctx);
+}
+
+static int bpf_array_map_seq_show(struct seq_file *seq, void *v)
+{
+ return __bpf_array_map_seq_show(seq, v);
+}
+
+static void bpf_array_map_seq_stop(struct seq_file *seq, void *v)
+{
+ if (!v)
+ (void)__bpf_array_map_seq_show(seq, NULL);
+}
+
+static int bpf_iter_init_array_map(void *priv_data,
+ struct bpf_iter_aux_info *aux)
+{
+ struct bpf_iter_seq_array_map_info *seq_info = priv_data;
+ struct bpf_map *map = aux->map;
+ void *value_buf;
+ u32 buf_size;
+
+ if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
+ buf_size = round_up(map->value_size, 8) * num_possible_cpus();
+ value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN);
+ if (!value_buf)
+ return -ENOMEM;
+
+ seq_info->percpu_value_buf = value_buf;
+ }
+
+ seq_info->map = map;
+ return 0;
+}
+
+static void bpf_iter_fini_array_map(void *priv_data)
+{
+ struct bpf_iter_seq_array_map_info *seq_info = priv_data;
+
+ kfree(seq_info->percpu_value_buf);
+}
+
+static const struct seq_operations bpf_array_map_seq_ops = {
+ .start = bpf_array_map_seq_start,
+ .next = bpf_array_map_seq_next,
+ .stop = bpf_array_map_seq_stop,
+ .show = bpf_array_map_seq_show,
+};
+
+static const struct bpf_iter_seq_info iter_seq_info = {
+ .seq_ops = &bpf_array_map_seq_ops,
+ .init_seq_private = bpf_iter_init_array_map,
+ .fini_seq_private = bpf_iter_fini_array_map,
+ .seq_priv_size = sizeof(struct bpf_iter_seq_array_map_info),
+};
+
static int array_map_btf_id;
const struct bpf_map_ops array_map_ops = {
.map_alloc_check = array_map_alloc_check,
@@ -506,6 +642,7 @@ const struct bpf_map_ops array_map_ops = {
.map_update_batch = generic_map_update_batch,
.map_btf_name = "bpf_array",
.map_btf_id = &array_map_btf_id,
+ .iter_seq_info = &iter_seq_info,
};
static int percpu_array_map_btf_id;
@@ -521,6 +658,7 @@ const struct bpf_map_ops percpu_array_map_ops = {
.map_check_btf = array_map_check_btf,
.map_btf_name = "bpf_array",
.map_btf_id = &percpu_array_map_btf_id,
+ .iter_seq_info = &iter_seq_info,
};
static int fd_array_map_alloc_check(union bpf_attr *attr)
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index dd612b80b9fe..363b9cafc2d8 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -14,11 +14,13 @@ struct bpf_iter_target_info {
struct bpf_iter_link {
struct bpf_link link;
+ struct bpf_iter_aux_info aux;
struct bpf_iter_target_info *tinfo;
};
struct bpf_iter_priv_data {
struct bpf_iter_target_info *tinfo;
+ const struct bpf_iter_seq_info *seq_info;
struct bpf_prog *prog;
u64 session_id;
u64 seq_num;
@@ -35,7 +37,8 @@ static DEFINE_MUTEX(link_mutex);
/* incremented on every opened seq_file */
static atomic64_t session_id;
-static int prepare_seq_file(struct file *file, struct bpf_iter_link *link);
+static int prepare_seq_file(struct file *file, struct bpf_iter_link *link,
+ const struct bpf_iter_seq_info *seq_info);
static void bpf_iter_inc_seq_num(struct seq_file *seq)
{
@@ -199,11 +202,25 @@ done:
return copied;
}
+static const struct bpf_iter_seq_info *
+__get_seq_info(struct bpf_iter_link *link)
+{
+ const struct bpf_iter_seq_info *seq_info;
+
+ if (link->aux.map) {
+ seq_info = link->aux.map->ops->iter_seq_info;
+ if (seq_info)
+ return seq_info;
+ }
+
+ return link->tinfo->reg_info->seq_info;
+}
+
static int iter_open(struct inode *inode, struct file *file)
{
struct bpf_iter_link *link = inode->i_private;
- return prepare_seq_file(file, link);
+ return prepare_seq_file(file, link, __get_seq_info(link));
}
static int iter_release(struct inode *inode, struct file *file)
@@ -218,8 +235,8 @@ static int iter_release(struct inode *inode, struct file *file)
iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
target_private);
- if (iter_priv->tinfo->reg_info->fini_seq_private)
- iter_priv->tinfo->reg_info->fini_seq_private(seq->private);
+ if (iter_priv->seq_info->fini_seq_private)
+ iter_priv->seq_info->fini_seq_private(seq->private);
bpf_prog_put(iter_priv->prog);
seq->private = iter_priv;
@@ -318,6 +335,11 @@ bool bpf_iter_prog_supported(struct bpf_prog *prog)
static void bpf_iter_link_release(struct bpf_link *link)
{
+ struct bpf_iter_link *iter_link =
+ container_of(link, struct bpf_iter_link, link);
+
+ if (iter_link->aux.map)
+ bpf_map_put_with_uref(iter_link->aux.map);
}
static void bpf_iter_link_dealloc(struct bpf_link *link)
@@ -370,14 +392,13 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{
struct bpf_link_primer link_primer;
struct bpf_iter_target_info *tinfo;
+ struct bpf_iter_aux_info aux = {};
struct bpf_iter_link *link;
+ u32 prog_btf_id, target_fd;
bool existed = false;
- u32 prog_btf_id;
+ struct bpf_map *map;
int err;
- if (attr->link_create.target_fd || attr->link_create.flags)
- return -EINVAL;
-
prog_btf_id = prog->aux->attach_btf_id;
mutex_lock(&targets_mutex);
list_for_each_entry(tinfo, &targets, list) {
@@ -390,6 +411,13 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
if (!existed)
return -ENOENT;
+ /* Make sure user supplied flags are target expected. */
+ target_fd = attr->link_create.target_fd;
+ if (attr->link_create.flags != tinfo->reg_info->req_linfo)
+ return -EINVAL;
+ if (!attr->link_create.flags && target_fd)
+ return -EINVAL;
+
link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN);
if (!link)
return -ENOMEM;
@@ -403,21 +431,45 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
return err;
}
+ if (tinfo->reg_info->req_linfo == BPF_ITER_LINK_MAP_FD) {
+ map = bpf_map_get_with_uref(target_fd);
+ if (IS_ERR(map)) {
+ err = PTR_ERR(map);
+ goto cleanup_link;
+ }
+
+ aux.map = map;
+ err = tinfo->reg_info->check_target(prog, &aux);
+ if (err) {
+ bpf_map_put_with_uref(map);
+ goto cleanup_link;
+ }
+
+ link->aux.map = map;
+ }
+
return bpf_link_settle(&link_primer);
+
+cleanup_link:
+ bpf_link_cleanup(&link_primer);
+ return err;
}
static void init_seq_meta(struct bpf_iter_priv_data *priv_data,
struct bpf_iter_target_info *tinfo,
+ const struct bpf_iter_seq_info *seq_info,
struct bpf_prog *prog)
{
priv_data->tinfo = tinfo;
+ priv_data->seq_info = seq_info;
priv_data->prog = prog;
priv_data->session_id = atomic64_inc_return(&session_id);
priv_data->seq_num = 0;
priv_data->done_stop = false;
}
-static int prepare_seq_file(struct file *file, struct bpf_iter_link *link)
+static int prepare_seq_file(struct file *file, struct bpf_iter_link *link,
+ const struct bpf_iter_seq_info *seq_info)
{
struct bpf_iter_priv_data *priv_data;
struct bpf_iter_target_info *tinfo;
@@ -433,21 +485,21 @@ static int prepare_seq_file(struct file *file, struct bpf_iter_link *link)
tinfo = link->tinfo;
total_priv_dsize = offsetof(struct bpf_iter_priv_data, target_private) +
- tinfo->reg_info->seq_priv_size;
- priv_data = __seq_open_private(file, tinfo->reg_info->seq_ops,
+ seq_info->seq_priv_size;
+ priv_data = __seq_open_private(file, seq_info->seq_ops,
total_priv_dsize);
if (!priv_data) {
err = -ENOMEM;
goto release_prog;
}
- if (tinfo->reg_info->init_seq_private) {
- err = tinfo->reg_info->init_seq_private(priv_data->target_private);
+ if (seq_info->init_seq_private) {
+ err = seq_info->init_seq_private(priv_data->target_private, &link->aux);
if (err)
goto release_seq_file;
}
- init_seq_meta(priv_data, tinfo, prog);
+ init_seq_meta(priv_data, tinfo, seq_info, prog);
seq = file->private_data;
seq->private = priv_data->target_private;
@@ -463,6 +515,7 @@ release_prog:
int bpf_iter_new_fd(struct bpf_link *link)
{
+ struct bpf_iter_link *iter_link;
struct file *file;
unsigned int flags;
int err, fd;
@@ -481,8 +534,8 @@ int bpf_iter_new_fd(struct bpf_link *link)
goto free_fd;
}
- err = prepare_seq_file(file,
- container_of(link, struct bpf_iter_link, link));
+ iter_link = container_of(link, struct bpf_iter_link, link);
+ err = prepare_seq_file(file, iter_link, __get_seq_info(iter_link));
if (err)
goto free_file;
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 4ff0de860ab7..91afdd4c82e3 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3806,6 +3806,19 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
btf_kind_str[BTF_INFO_KIND(t->info)]);
return false;
}
+
+ /* check for PTR_TO_RDONLY_BUF_OR_NULL or PTR_TO_RDWR_BUF_OR_NULL */
+ for (i = 0; i < prog->aux->ctx_arg_info_size; i++) {
+ const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i];
+
+ if (ctx_arg_info->offset == off &&
+ (ctx_arg_info->reg_type == PTR_TO_RDONLY_BUF_OR_NULL ||
+ ctx_arg_info->reg_type == PTR_TO_RDWR_BUF_OR_NULL)) {
+ info->reg_type = ctx_arg_info->reg_type;
+ return true;
+ }
+ }
+
if (t->type == 0)
/* This is a pointer to void.
* It is the same as scalar from the verifier safety pov.
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index ac53102e244a..83ff127ef7ae 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -37,17 +37,34 @@ static void bpf_cgroup_storages_free(struct bpf_cgroup_storage *storages[])
}
static int bpf_cgroup_storages_alloc(struct bpf_cgroup_storage *storages[],
- struct bpf_prog *prog)
+ struct bpf_cgroup_storage *new_storages[],
+ enum bpf_attach_type type,
+ struct bpf_prog *prog,
+ struct cgroup *cgrp)
{
enum bpf_cgroup_storage_type stype;
+ struct bpf_cgroup_storage_key key;
+ struct bpf_map *map;
+
+ key.cgroup_inode_id = cgroup_id(cgrp);
+ key.attach_type = type;
for_each_cgroup_storage_type(stype) {
+ map = prog->aux->cgroup_storage[stype];
+ if (!map)
+ continue;
+
+ storages[stype] = cgroup_storage_lookup((void *)map, &key, false);
+ if (storages[stype])
+ continue;
+
storages[stype] = bpf_cgroup_storage_alloc(prog, stype);
if (IS_ERR(storages[stype])) {
- storages[stype] = NULL;
- bpf_cgroup_storages_free(storages);
+ bpf_cgroup_storages_free(new_storages);
return -ENOMEM;
}
+
+ new_storages[stype] = storages[stype];
}
return 0;
@@ -63,7 +80,7 @@ static void bpf_cgroup_storages_assign(struct bpf_cgroup_storage *dst[],
}
static void bpf_cgroup_storages_link(struct bpf_cgroup_storage *storages[],
- struct cgroup* cgrp,
+ struct cgroup *cgrp,
enum bpf_attach_type attach_type)
{
enum bpf_cgroup_storage_type stype;
@@ -72,14 +89,6 @@ static void bpf_cgroup_storages_link(struct bpf_cgroup_storage *storages[],
bpf_cgroup_storage_link(storages[stype], cgrp, attach_type);
}
-static void bpf_cgroup_storages_unlink(struct bpf_cgroup_storage *storages[])
-{
- enum bpf_cgroup_storage_type stype;
-
- for_each_cgroup_storage_type(stype)
- bpf_cgroup_storage_unlink(storages[stype]);
-}
-
/* Called when bpf_cgroup_link is auto-detached from dying cgroup.
* It drops cgroup and bpf_prog refcounts, and marks bpf_link as defunct. It
* doesn't free link memory, which will eventually be done by bpf_link's
@@ -101,22 +110,23 @@ static void cgroup_bpf_release(struct work_struct *work)
struct cgroup *p, *cgrp = container_of(work, struct cgroup,
bpf.release_work);
struct bpf_prog_array *old_array;
+ struct list_head *storages = &cgrp->bpf.storages;
+ struct bpf_cgroup_storage *storage, *stmp;
+
unsigned int type;
mutex_lock(&cgroup_mutex);
for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
struct list_head *progs = &cgrp->bpf.progs[type];
- struct bpf_prog_list *pl, *tmp;
+ struct bpf_prog_list *pl, *pltmp;
- list_for_each_entry_safe(pl, tmp, progs, node) {
+ list_for_each_entry_safe(pl, pltmp, progs, node) {
list_del(&pl->node);
if (pl->prog)
bpf_prog_put(pl->prog);
if (pl->link)
bpf_cgroup_link_auto_detach(pl->link);
- bpf_cgroup_storages_unlink(pl->storage);
- bpf_cgroup_storages_free(pl->storage);
kfree(pl);
static_branch_dec(&cgroup_bpf_enabled_key);
}
@@ -126,6 +136,11 @@ static void cgroup_bpf_release(struct work_struct *work)
bpf_prog_array_free(old_array);
}
+ list_for_each_entry_safe(storage, stmp, storages, list_cg) {
+ bpf_cgroup_storage_unlink(storage);
+ bpf_cgroup_storage_free(storage);
+ }
+
mutex_unlock(&cgroup_mutex);
for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
@@ -290,6 +305,8 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
for (i = 0; i < NR; i++)
INIT_LIST_HEAD(&cgrp->bpf.progs[i]);
+ INIT_LIST_HEAD(&cgrp->bpf.storages);
+
for (i = 0; i < NR; i++)
if (compute_effective_progs(cgrp, i, &arrays[i]))
goto cleanup;
@@ -422,7 +439,7 @@ int __cgroup_bpf_attach(struct cgroup *cgrp,
struct list_head *progs = &cgrp->bpf.progs[type];
struct bpf_prog *old_prog = NULL;
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
- struct bpf_cgroup_storage *old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
+ struct bpf_cgroup_storage *new_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
struct bpf_prog_list *pl;
int err;
@@ -455,17 +472,16 @@ int __cgroup_bpf_attach(struct cgroup *cgrp,
if (IS_ERR(pl))
return PTR_ERR(pl);
- if (bpf_cgroup_storages_alloc(storage, prog ? : link->link.prog))
+ if (bpf_cgroup_storages_alloc(storage, new_storage, type,
+ prog ? : link->link.prog, cgrp))
return -ENOMEM;
if (pl) {
old_prog = pl->prog;
- bpf_cgroup_storages_unlink(pl->storage);
- bpf_cgroup_storages_assign(old_storage, pl->storage);
} else {
pl = kmalloc(sizeof(*pl), GFP_KERNEL);
if (!pl) {
- bpf_cgroup_storages_free(storage);
+ bpf_cgroup_storages_free(new_storage);
return -ENOMEM;
}
list_add_tail(&pl->node, progs);
@@ -480,12 +496,11 @@ int __cgroup_bpf_attach(struct cgroup *cgrp,
if (err)
goto cleanup;
- bpf_cgroup_storages_free(old_storage);
if (old_prog)
bpf_prog_put(old_prog);
else
static_branch_inc(&cgroup_bpf_enabled_key);
- bpf_cgroup_storages_link(pl->storage, cgrp, type);
+ bpf_cgroup_storages_link(new_storage, cgrp, type);
return 0;
cleanup:
@@ -493,9 +508,7 @@ cleanup:
pl->prog = old_prog;
pl->link = NULL;
}
- bpf_cgroup_storages_free(pl->storage);
- bpf_cgroup_storages_assign(pl->storage, old_storage);
- bpf_cgroup_storages_link(pl->storage, cgrp, type);
+ bpf_cgroup_storages_free(new_storage);
if (!old_prog) {
list_del(&pl->node);
kfree(pl);
@@ -679,8 +692,6 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
/* now can actually delete it from this cgroup list */
list_del(&pl->node);
- bpf_cgroup_storages_unlink(pl->storage);
- bpf_cgroup_storages_free(pl->storage);
kfree(pl);
if (list_empty(progs))
/* last program was detached, reset flags to zero */
@@ -803,6 +814,7 @@ static void bpf_cgroup_link_release(struct bpf_link *link)
{
struct bpf_cgroup_link *cg_link =
container_of(link, struct bpf_cgroup_link, link);
+ struct cgroup *cg;
/* link might have been auto-detached by dying cgroup already,
* in that case our work is done here
@@ -821,8 +833,12 @@ static void bpf_cgroup_link_release(struct bpf_link *link)
WARN_ON(__cgroup_bpf_detach(cg_link->cgroup, NULL, cg_link,
cg_link->type));
+ cg = cg_link->cgroup;
+ cg_link->cgroup = NULL;
+
mutex_unlock(&cgroup_mutex);
- cgroup_put(cg_link->cgroup);
+
+ cgroup_put(cg);
}
static void bpf_cgroup_link_dealloc(struct bpf_link *link)
@@ -833,6 +849,13 @@ static void bpf_cgroup_link_dealloc(struct bpf_link *link)
kfree(cg_link);
}
+static int bpf_cgroup_link_detach(struct bpf_link *link)
+{
+ bpf_cgroup_link_release(link);
+
+ return 0;
+}
+
static void bpf_cgroup_link_show_fdinfo(const struct bpf_link *link,
struct seq_file *seq)
{
@@ -872,6 +895,7 @@ static int bpf_cgroup_link_fill_link_info(const struct bpf_link *link,
static const struct bpf_link_ops bpf_cgroup_link_lops = {
.release = bpf_cgroup_link_release,
.dealloc = bpf_cgroup_link_dealloc,
+ .detach = bpf_cgroup_link_detach,
.update_prog = cgroup_bpf_replace,
.show_fdinfo = bpf_cgroup_link_show_fdinfo,
.fill_link_info = bpf_cgroup_link_fill_link_info,
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 7be02e555ab9..bde93344164d 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2097,24 +2097,12 @@ int bpf_prog_array_copy_info(struct bpf_prog_array *array,
: 0;
}
-static void bpf_free_cgroup_storage(struct bpf_prog_aux *aux)
-{
- enum bpf_cgroup_storage_type stype;
-
- for_each_cgroup_storage_type(stype) {
- if (!aux->cgroup_storage[stype])
- continue;
- bpf_cgroup_storage_release(aux, aux->cgroup_storage[stype]);
- }
-}
-
void __bpf_free_used_maps(struct bpf_prog_aux *aux,
struct bpf_map **used_maps, u32 len)
{
struct bpf_map *map;
u32 i;
- bpf_free_cgroup_storage(aux);
for (i = 0; i < len; i++) {
map = used_maps[i];
if (map->ops->map_poke_untrack)
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 7b136953f214..78dfff6a501b 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1618,6 +1618,196 @@ htab_lru_map_lookup_and_delete_batch(struct bpf_map *map,
true, false);
}
+struct bpf_iter_seq_hash_map_info {
+ struct bpf_map *map;
+ struct bpf_htab *htab;
+ void *percpu_value_buf; // non-zero means percpu hash
+ unsigned long flags;
+ u32 bucket_id;
+ u32 skip_elems;
+};
+
+static struct htab_elem *
+bpf_hash_map_seq_find_next(struct bpf_iter_seq_hash_map_info *info,
+ struct htab_elem *prev_elem)
+{
+ const struct bpf_htab *htab = info->htab;
+ unsigned long flags = info->flags;
+ u32 skip_elems = info->skip_elems;
+ u32 bucket_id = info->bucket_id;
+ struct hlist_nulls_head *head;
+ struct hlist_nulls_node *n;
+ struct htab_elem *elem;
+ struct bucket *b;
+ u32 i, count;
+
+ if (bucket_id >= htab->n_buckets)
+ return NULL;
+
+ /* try to find next elem in the same bucket */
+ if (prev_elem) {
+ /* no update/deletion on this bucket, prev_elem should be still valid
+ * and we won't skip elements.
+ */
+ n = rcu_dereference_raw(hlist_nulls_next_rcu(&prev_elem->hash_node));
+ elem = hlist_nulls_entry_safe(n, struct htab_elem, hash_node);
+ if (elem)
+ return elem;
+
+ /* not found, unlock and go to the next bucket */
+ b = &htab->buckets[bucket_id++];
+ htab_unlock_bucket(htab, b, flags);
+ skip_elems = 0;
+ }
+
+ for (i = bucket_id; i < htab->n_buckets; i++) {
+ b = &htab->buckets[i];
+ flags = htab_lock_bucket(htab, b);
+
+ count = 0;
+ head = &b->head;
+ hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) {
+ if (count >= skip_elems) {
+ info->flags = flags;
+ info->bucket_id = i;
+ info->skip_elems = count;
+ return elem;
+ }
+ count++;
+ }
+
+ htab_unlock_bucket(htab, b, flags);
+ skip_elems = 0;
+ }
+
+ info->bucket_id = i;
+ info->skip_elems = 0;
+ return NULL;
+}
+
+static void *bpf_hash_map_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ struct bpf_iter_seq_hash_map_info *info = seq->private;
+ struct htab_elem *elem;
+
+ elem = bpf_hash_map_seq_find_next(info, NULL);
+ if (!elem)
+ return NULL;
+
+ if (*pos == 0)
+ ++*pos;
+ return elem;
+}
+
+static void *bpf_hash_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct bpf_iter_seq_hash_map_info *info = seq->private;
+
+ ++*pos;
+ ++info->skip_elems;
+ return bpf_hash_map_seq_find_next(info, v);
+}
+
+static int __bpf_hash_map_seq_show(struct seq_file *seq, struct htab_elem *elem)
+{
+ struct bpf_iter_seq_hash_map_info *info = seq->private;
+ u32 roundup_key_size, roundup_value_size;
+ struct bpf_iter__bpf_map_elem ctx = {};
+ struct bpf_map *map = info->map;
+ struct bpf_iter_meta meta;
+ int ret = 0, off = 0, cpu;
+ struct bpf_prog *prog;
+ void __percpu *pptr;
+
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, elem == NULL);
+ if (prog) {
+ ctx.meta = &meta;
+ ctx.map = info->map;
+ if (elem) {
+ roundup_key_size = round_up(map->key_size, 8);
+ ctx.key = elem->key;
+ if (!info->percpu_value_buf) {
+ ctx.value = elem->key + roundup_key_size;
+ } else {
+ roundup_value_size = round_up(map->value_size, 8);
+ pptr = htab_elem_get_ptr(elem, map->key_size);
+ for_each_possible_cpu(cpu) {
+ bpf_long_memcpy(info->percpu_value_buf + off,
+ per_cpu_ptr(pptr, cpu),
+ roundup_value_size);
+ off += roundup_value_size;
+ }
+ ctx.value = info->percpu_value_buf;
+ }
+ }
+ ret = bpf_iter_run_prog(prog, &ctx);
+ }
+
+ return ret;
+}
+
+static int bpf_hash_map_seq_show(struct seq_file *seq, void *v)
+{
+ return __bpf_hash_map_seq_show(seq, v);
+}
+
+static void bpf_hash_map_seq_stop(struct seq_file *seq, void *v)
+{
+ struct bpf_iter_seq_hash_map_info *info = seq->private;
+
+ if (!v)
+ (void)__bpf_hash_map_seq_show(seq, NULL);
+ else
+ htab_unlock_bucket(info->htab,
+ &info->htab->buckets[info->bucket_id],
+ info->flags);
+}
+
+static int bpf_iter_init_hash_map(void *priv_data,
+ struct bpf_iter_aux_info *aux)
+{
+ struct bpf_iter_seq_hash_map_info *seq_info = priv_data;
+ struct bpf_map *map = aux->map;
+ void *value_buf;
+ u32 buf_size;
+
+ if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+ map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
+ buf_size = round_up(map->value_size, 8) * num_possible_cpus();
+ value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN);
+ if (!value_buf)
+ return -ENOMEM;
+
+ seq_info->percpu_value_buf = value_buf;
+ }
+
+ seq_info->map = map;
+ seq_info->htab = container_of(map, struct bpf_htab, map);
+ return 0;
+}
+
+static void bpf_iter_fini_hash_map(void *priv_data)
+{
+ struct bpf_iter_seq_hash_map_info *seq_info = priv_data;
+
+ kfree(seq_info->percpu_value_buf);
+}
+
+static const struct seq_operations bpf_hash_map_seq_ops = {
+ .start = bpf_hash_map_seq_start,
+ .next = bpf_hash_map_seq_next,
+ .stop = bpf_hash_map_seq_stop,
+ .show = bpf_hash_map_seq_show,
+};
+
+static const struct bpf_iter_seq_info iter_seq_info = {
+ .seq_ops = &bpf_hash_map_seq_ops,
+ .init_seq_private = bpf_iter_init_hash_map,
+ .fini_seq_private = bpf_iter_fini_hash_map,
+ .seq_priv_size = sizeof(struct bpf_iter_seq_hash_map_info),
+};
+
static int htab_map_btf_id;
const struct bpf_map_ops htab_map_ops = {
.map_alloc_check = htab_map_alloc_check,
@@ -1632,6 +1822,7 @@ const struct bpf_map_ops htab_map_ops = {
BATCH_OPS(htab),
.map_btf_name = "bpf_htab",
.map_btf_id = &htab_map_btf_id,
+ .iter_seq_info = &iter_seq_info,
};
static int htab_lru_map_btf_id;
@@ -1649,6 +1840,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
BATCH_OPS(htab_lru),
.map_btf_name = "bpf_htab",
.map_btf_id = &htab_lru_map_btf_id,
+ .iter_seq_info = &iter_seq_info,
};
/* Called from eBPF program */
@@ -1766,6 +1958,7 @@ const struct bpf_map_ops htab_percpu_map_ops = {
BATCH_OPS(htab_percpu),
.map_btf_name = "bpf_htab",
.map_btf_id = &htab_percpu_map_btf_id,
+ .iter_seq_info = &iter_seq_info,
};
static int htab_lru_percpu_map_btf_id;
@@ -1781,6 +1974,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
BATCH_OPS(htab_lru_percpu),
.map_btf_name = "bpf_htab",
.map_btf_id = &htab_lru_percpu_map_btf_id,
+ .iter_seq_info = &iter_seq_info,
};
static int fd_htab_map_alloc_check(union bpf_attr *attr)
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 51bd5a8cb01b..571bb351ed3b 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -13,6 +13,8 @@ DEFINE_PER_CPU(struct bpf_cgroup_storage*, bpf_cgroup_storage[MAX_BPF_CGROUP_STO
#ifdef CONFIG_CGROUP_BPF
+#include "../cgroup/cgroup-internal.h"
+
#define LOCAL_STORAGE_CREATE_FLAG_MASK \
(BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK)
@@ -20,7 +22,6 @@ struct bpf_cgroup_storage_map {
struct bpf_map map;
spinlock_t lock;
- struct bpf_prog_aux *aux;
struct rb_root root;
struct list_head list;
};
@@ -30,24 +31,41 @@ static struct bpf_cgroup_storage_map *map_to_storage(struct bpf_map *map)
return container_of(map, struct bpf_cgroup_storage_map, map);
}
-static int bpf_cgroup_storage_key_cmp(
- const struct bpf_cgroup_storage_key *key1,
- const struct bpf_cgroup_storage_key *key2)
+static bool attach_type_isolated(const struct bpf_map *map)
{
- if (key1->cgroup_inode_id < key2->cgroup_inode_id)
- return -1;
- else if (key1->cgroup_inode_id > key2->cgroup_inode_id)
- return 1;
- else if (key1->attach_type < key2->attach_type)
- return -1;
- else if (key1->attach_type > key2->attach_type)
- return 1;
+ return map->key_size == sizeof(struct bpf_cgroup_storage_key);
+}
+
+static int bpf_cgroup_storage_key_cmp(const struct bpf_cgroup_storage_map *map,
+ const void *_key1, const void *_key2)
+{
+ if (attach_type_isolated(&map->map)) {
+ const struct bpf_cgroup_storage_key *key1 = _key1;
+ const struct bpf_cgroup_storage_key *key2 = _key2;
+
+ if (key1->cgroup_inode_id < key2->cgroup_inode_id)
+ return -1;
+ else if (key1->cgroup_inode_id > key2->cgroup_inode_id)
+ return 1;
+ else if (key1->attach_type < key2->attach_type)
+ return -1;
+ else if (key1->attach_type > key2->attach_type)
+ return 1;
+ } else {
+ const __u64 *cgroup_inode_id1 = _key1;
+ const __u64 *cgroup_inode_id2 = _key2;
+
+ if (*cgroup_inode_id1 < *cgroup_inode_id2)
+ return -1;
+ else if (*cgroup_inode_id1 > *cgroup_inode_id2)
+ return 1;
+ }
return 0;
}
-static struct bpf_cgroup_storage *cgroup_storage_lookup(
- struct bpf_cgroup_storage_map *map, struct bpf_cgroup_storage_key *key,
- bool locked)
+struct bpf_cgroup_storage *
+cgroup_storage_lookup(struct bpf_cgroup_storage_map *map,
+ void *key, bool locked)
{
struct rb_root *root = &map->root;
struct rb_node *node;
@@ -61,7 +79,7 @@ static struct bpf_cgroup_storage *cgroup_storage_lookup(
storage = container_of(node, struct bpf_cgroup_storage, node);
- switch (bpf_cgroup_storage_key_cmp(key, &storage->key)) {
+ switch (bpf_cgroup_storage_key_cmp(map, key, &storage->key)) {
case -1:
node = node->rb_left;
break;
@@ -93,7 +111,7 @@ static int cgroup_storage_insert(struct bpf_cgroup_storage_map *map,
this = container_of(*new, struct bpf_cgroup_storage, node);
parent = *new;
- switch (bpf_cgroup_storage_key_cmp(&storage->key, &this->key)) {
+ switch (bpf_cgroup_storage_key_cmp(map, &storage->key, &this->key)) {
case -1:
new = &((*new)->rb_left);
break;
@@ -111,10 +129,9 @@ static int cgroup_storage_insert(struct bpf_cgroup_storage_map *map,
return 0;
}
-static void *cgroup_storage_lookup_elem(struct bpf_map *_map, void *_key)
+static void *cgroup_storage_lookup_elem(struct bpf_map *_map, void *key)
{
struct bpf_cgroup_storage_map *map = map_to_storage(_map);
- struct bpf_cgroup_storage_key *key = _key;
struct bpf_cgroup_storage *storage;
storage = cgroup_storage_lookup(map, key, false);
@@ -124,17 +141,13 @@ static void *cgroup_storage_lookup_elem(struct bpf_map *_map, void *_key)
return &READ_ONCE(storage->buf)->data[0];
}
-static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
+static int cgroup_storage_update_elem(struct bpf_map *map, void *key,
void *value, u64 flags)
{
- struct bpf_cgroup_storage_key *key = _key;
struct bpf_cgroup_storage *storage;
struct bpf_storage_buffer *new;
- if (unlikely(flags & ~(BPF_F_LOCK | BPF_EXIST | BPF_NOEXIST)))
- return -EINVAL;
-
- if (unlikely(flags & BPF_NOEXIST))
+ if (unlikely(flags & ~(BPF_F_LOCK | BPF_EXIST)))
return -EINVAL;
if (unlikely((flags & BPF_F_LOCK) &&
@@ -167,11 +180,10 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
return 0;
}
-int bpf_percpu_cgroup_storage_copy(struct bpf_map *_map, void *_key,
+int bpf_percpu_cgroup_storage_copy(struct bpf_map *_map, void *key,
void *value)
{
struct bpf_cgroup_storage_map *map = map_to_storage(_map);
- struct bpf_cgroup_storage_key *key = _key;
struct bpf_cgroup_storage *storage;
int cpu, off = 0;
u32 size;
@@ -197,11 +209,10 @@ int bpf_percpu_cgroup_storage_copy(struct bpf_map *_map, void *_key,
return 0;
}
-int bpf_percpu_cgroup_storage_update(struct bpf_map *_map, void *_key,
+int bpf_percpu_cgroup_storage_update(struct bpf_map *_map, void *key,
void *value, u64 map_flags)
{
struct bpf_cgroup_storage_map *map = map_to_storage(_map);
- struct bpf_cgroup_storage_key *key = _key;
struct bpf_cgroup_storage *storage;
int cpu, off = 0;
u32 size;
@@ -232,12 +243,10 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *_map, void *_key,
return 0;
}
-static int cgroup_storage_get_next_key(struct bpf_map *_map, void *_key,
+static int cgroup_storage_get_next_key(struct bpf_map *_map, void *key,
void *_next_key)
{
struct bpf_cgroup_storage_map *map = map_to_storage(_map);
- struct bpf_cgroup_storage_key *key = _key;
- struct bpf_cgroup_storage_key *next = _next_key;
struct bpf_cgroup_storage *storage;
spin_lock_bh(&map->lock);
@@ -250,17 +259,23 @@ static int cgroup_storage_get_next_key(struct bpf_map *_map, void *_key,
if (!storage)
goto enoent;
- storage = list_next_entry(storage, list);
+ storage = list_next_entry(storage, list_map);
if (!storage)
goto enoent;
} else {
storage = list_first_entry(&map->list,
- struct bpf_cgroup_storage, list);
+ struct bpf_cgroup_storage, list_map);
}
spin_unlock_bh(&map->lock);
- next->attach_type = storage->key.attach_type;
- next->cgroup_inode_id = storage->key.cgroup_inode_id;
+
+ if (attach_type_isolated(&map->map)) {
+ struct bpf_cgroup_storage_key *next = _next_key;
+ *next = storage->key;
+ } else {
+ __u64 *next = _next_key;
+ *next = storage->key.cgroup_inode_id;
+ }
return 0;
enoent:
@@ -275,7 +290,8 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
struct bpf_map_memory mem;
int ret;
- if (attr->key_size != sizeof(struct bpf_cgroup_storage_key))
+ if (attr->key_size != sizeof(struct bpf_cgroup_storage_key) &&
+ attr->key_size != sizeof(__u64))
return ERR_PTR(-EINVAL);
if (attr->value_size == 0)
@@ -318,6 +334,17 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
static void cgroup_storage_map_free(struct bpf_map *_map)
{
struct bpf_cgroup_storage_map *map = map_to_storage(_map);
+ struct list_head *storages = &map->list;
+ struct bpf_cgroup_storage *storage, *stmp;
+
+ mutex_lock(&cgroup_mutex);
+
+ list_for_each_entry_safe(storage, stmp, storages, list_map) {
+ bpf_cgroup_storage_unlink(storage);
+ bpf_cgroup_storage_free(storage);
+ }
+
+ mutex_unlock(&cgroup_mutex);
WARN_ON(!RB_EMPTY_ROOT(&map->root));
WARN_ON(!list_empty(&map->list));
@@ -335,49 +362,63 @@ static int cgroup_storage_check_btf(const struct bpf_map *map,
const struct btf_type *key_type,
const struct btf_type *value_type)
{
- struct btf_member *m;
- u32 offset, size;
-
- /* Key is expected to be of struct bpf_cgroup_storage_key type,
- * which is:
- * struct bpf_cgroup_storage_key {
- * __u64 cgroup_inode_id;
- * __u32 attach_type;
- * };
- */
+ if (attach_type_isolated(map)) {
+ struct btf_member *m;
+ u32 offset, size;
+
+ /* Key is expected to be of struct bpf_cgroup_storage_key type,
+ * which is:
+ * struct bpf_cgroup_storage_key {
+ * __u64 cgroup_inode_id;
+ * __u32 attach_type;
+ * };
+ */
+
+ /*
+ * Key_type must be a structure with two fields.
+ */
+ if (BTF_INFO_KIND(key_type->info) != BTF_KIND_STRUCT ||
+ BTF_INFO_VLEN(key_type->info) != 2)
+ return -EINVAL;
+
+ /*
+ * The first field must be a 64 bit integer at 0 offset.
+ */
+ m = (struct btf_member *)(key_type + 1);
+ size = sizeof_field(struct bpf_cgroup_storage_key, cgroup_inode_id);
+ if (!btf_member_is_reg_int(btf, key_type, m, 0, size))
+ return -EINVAL;
+
+ /*
+ * The second field must be a 32 bit integer at 64 bit offset.
+ */
+ m++;
+ offset = offsetof(struct bpf_cgroup_storage_key, attach_type);
+ size = sizeof_field(struct bpf_cgroup_storage_key, attach_type);
+ if (!btf_member_is_reg_int(btf, key_type, m, offset, size))
+ return -EINVAL;
+ } else {
+ u32 int_data;
- /*
- * Key_type must be a structure with two fields.
- */
- if (BTF_INFO_KIND(key_type->info) != BTF_KIND_STRUCT ||
- BTF_INFO_VLEN(key_type->info) != 2)
- return -EINVAL;
+ /*
+ * Key is expected to be u64, which stores the cgroup_inode_id
+ */
- /*
- * The first field must be a 64 bit integer at 0 offset.
- */
- m = (struct btf_member *)(key_type + 1);
- size = sizeof_field(struct bpf_cgroup_storage_key, cgroup_inode_id);
- if (!btf_member_is_reg_int(btf, key_type, m, 0, size))
- return -EINVAL;
+ if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
+ return -EINVAL;
- /*
- * The second field must be a 32 bit integer at 64 bit offset.
- */
- m++;
- offset = offsetof(struct bpf_cgroup_storage_key, attach_type);
- size = sizeof_field(struct bpf_cgroup_storage_key, attach_type);
- if (!btf_member_is_reg_int(btf, key_type, m, offset, size))
- return -EINVAL;
+ int_data = *(u32 *)(key_type + 1);
+ if (BTF_INT_BITS(int_data) != 64 || BTF_INT_OFFSET(int_data))
+ return -EINVAL;
+ }
return 0;
}
-static void cgroup_storage_seq_show_elem(struct bpf_map *map, void *_key,
+static void cgroup_storage_seq_show_elem(struct bpf_map *map, void *key,
struct seq_file *m)
{
enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
- struct bpf_cgroup_storage_key *key = _key;
struct bpf_cgroup_storage *storage;
int cpu;
@@ -426,38 +467,13 @@ const struct bpf_map_ops cgroup_storage_map_ops = {
int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *_map)
{
enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map);
- struct bpf_cgroup_storage_map *map = map_to_storage(_map);
- int ret = -EBUSY;
-
- spin_lock_bh(&map->lock);
- if (map->aux && map->aux != aux)
- goto unlock;
if (aux->cgroup_storage[stype] &&
aux->cgroup_storage[stype] != _map)
- goto unlock;
+ return -EBUSY;
- map->aux = aux;
aux->cgroup_storage[stype] = _map;
- ret = 0;
-unlock:
- spin_unlock_bh(&map->lock);
-
- return ret;
-}
-
-void bpf_cgroup_storage_release(struct bpf_prog_aux *aux, struct bpf_map *_map)
-{
- enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map);
- struct bpf_cgroup_storage_map *map = map_to_storage(_map);
-
- spin_lock_bh(&map->lock);
- if (map->aux == aux) {
- WARN_ON(aux->cgroup_storage[stype] != _map);
- map->aux = NULL;
- aux->cgroup_storage[stype] = NULL;
- }
- spin_unlock_bh(&map->lock);
+ return 0;
}
static size_t bpf_cgroup_storage_calculate_size(struct bpf_map *map, u32 *pages)
@@ -578,7 +594,8 @@ void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
spin_lock_bh(&map->lock);
WARN_ON(cgroup_storage_insert(map, storage));
- list_add(&storage->list, &map->list);
+ list_add(&storage->list_map, &map->list);
+ list_add(&storage->list_cg, &cgroup->bpf.storages);
spin_unlock_bh(&map->lock);
}
@@ -596,7 +613,8 @@ void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage)
root = &map->root;
rb_erase(&storage->node, root);
- list_del(&storage->list);
+ list_del(&storage->list_map);
+ list_del(&storage->list_cg);
spin_unlock_bh(&map->lock);
}
diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c
index 8a7af11b411f..fbe1f557cb88 100644
--- a/kernel/bpf/map_iter.c
+++ b/kernel/bpf/map_iter.c
@@ -7,7 +7,7 @@
#include <linux/btf_ids.h>
struct bpf_iter_seq_map_info {
- u32 mid;
+ u32 map_id;
};
static void *bpf_map_seq_start(struct seq_file *seq, loff_t *pos)
@@ -15,27 +15,23 @@ static void *bpf_map_seq_start(struct seq_file *seq, loff_t *pos)
struct bpf_iter_seq_map_info *info = seq->private;
struct bpf_map *map;
- map = bpf_map_get_curr_or_next(&info->mid);
+ map = bpf_map_get_curr_or_next(&info->map_id);
if (!map)
return NULL;
- ++*pos;
+ if (*pos == 0)
+ ++*pos;
return map;
}
static void *bpf_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct bpf_iter_seq_map_info *info = seq->private;
- struct bpf_map *map;
++*pos;
- ++info->mid;
+ ++info->map_id;
bpf_map_put((struct bpf_map *)v);
- map = bpf_map_get_curr_or_next(&info->mid);
- if (!map)
- return NULL;
-
- return map;
+ return bpf_map_get_curr_or_next(&info->map_id);
}
struct bpf_iter__bpf_map {
@@ -85,23 +81,79 @@ static const struct seq_operations bpf_map_seq_ops = {
BTF_ID_LIST(btf_bpf_map_id)
BTF_ID(struct, bpf_map)
-static struct bpf_iter_reg bpf_map_reg_info = {
- .target = "bpf_map",
+static const struct bpf_iter_seq_info bpf_map_seq_info = {
.seq_ops = &bpf_map_seq_ops,
.init_seq_private = NULL,
.fini_seq_private = NULL,
.seq_priv_size = sizeof(struct bpf_iter_seq_map_info),
+};
+
+static struct bpf_iter_reg bpf_map_reg_info = {
+ .target = "bpf_map",
.ctx_arg_info_size = 1,
.ctx_arg_info = {
{ offsetof(struct bpf_iter__bpf_map, map),
PTR_TO_BTF_ID_OR_NULL },
},
+ .seq_info = &bpf_map_seq_info,
+};
+
+static int bpf_iter_check_map(struct bpf_prog *prog,
+ struct bpf_iter_aux_info *aux)
+{
+ u32 key_acc_size, value_acc_size, key_size, value_size;
+ struct bpf_map *map = aux->map;
+ bool is_percpu = false;
+
+ if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+ map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
+ map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
+ is_percpu = true;
+ else if (map->map_type != BPF_MAP_TYPE_HASH &&
+ map->map_type != BPF_MAP_TYPE_LRU_HASH &&
+ map->map_type != BPF_MAP_TYPE_ARRAY)
+ return -EINVAL;
+
+ key_acc_size = prog->aux->max_rdonly_access;
+ value_acc_size = prog->aux->max_rdwr_access;
+ key_size = map->key_size;
+ if (!is_percpu)
+ value_size = map->value_size;
+ else
+ value_size = round_up(map->value_size, 8) * num_possible_cpus();
+
+ if (key_acc_size > key_size || value_acc_size > value_size)
+ return -EACCES;
+
+ return 0;
+}
+
+DEFINE_BPF_ITER_FUNC(bpf_map_elem, struct bpf_iter_meta *meta,
+ struct bpf_map *map, void *key, void *value)
+
+static const struct bpf_iter_reg bpf_map_elem_reg_info = {
+ .target = "bpf_map_elem",
+ .check_target = bpf_iter_check_map,
+ .req_linfo = BPF_ITER_LINK_MAP_FD,
+ .ctx_arg_info_size = 2,
+ .ctx_arg_info = {
+ { offsetof(struct bpf_iter__bpf_map_elem, key),
+ PTR_TO_RDONLY_BUF_OR_NULL },
+ { offsetof(struct bpf_iter__bpf_map_elem, value),
+ PTR_TO_RDWR_BUF_OR_NULL },
+ },
};
static int __init bpf_map_iter_init(void)
{
+ int ret;
+
bpf_map_reg_info.ctx_arg_info[0].btf_id = *btf_bpf_map_id;
- return bpf_iter_reg_target(&bpf_map_reg_info);
+ ret = bpf_iter_reg_target(&bpf_map_reg_info);
+ if (ret)
+ return ret;
+
+ return bpf_iter_reg_target(&bpf_map_elem_reg_info);
}
late_initcall(bpf_map_iter_init);
diff --git a/kernel/bpf/net_namespace.c b/kernel/bpf/net_namespace.c
index 71405edd667c..542f275bf252 100644
--- a/kernel/bpf/net_namespace.c
+++ b/kernel/bpf/net_namespace.c
@@ -142,9 +142,16 @@ static void bpf_netns_link_release(struct bpf_link *link)
bpf_prog_array_free(old_array);
out_unlock:
+ net_link->net = NULL;
mutex_unlock(&netns_bpf_mutex);
}
+static int bpf_netns_link_detach(struct bpf_link *link)
+{
+ bpf_netns_link_release(link);
+ return 0;
+}
+
static void bpf_netns_link_dealloc(struct bpf_link *link)
{
struct bpf_netns_link *net_link =
@@ -228,6 +235,7 @@ static void bpf_netns_link_show_fdinfo(const struct bpf_link *link,
static const struct bpf_link_ops bpf_netns_link_ops = {
.release = bpf_netns_link_release,
.dealloc = bpf_netns_link_dealloc,
+ .detach = bpf_netns_link_detach,
.update_prog = bpf_netns_link_update_prog,
.fill_link_info = bpf_netns_link_fill_info,
.show_fdinfo = bpf_netns_link_show_fdinfo,
diff --git a/kernel/bpf/prog_iter.c b/kernel/bpf/prog_iter.c
new file mode 100644
index 000000000000..53a73c841c13
--- /dev/null
+++ b/kernel/bpf/prog_iter.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 Facebook */
+#include <linux/bpf.h>
+#include <linux/fs.h>
+#include <linux/filter.h>
+#include <linux/kernel.h>
+#include <linux/btf_ids.h>
+
+struct bpf_iter_seq_prog_info {
+ u32 prog_id;
+};
+
+static void *bpf_prog_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ struct bpf_iter_seq_prog_info *info = seq->private;
+ struct bpf_prog *prog;
+
+ prog = bpf_prog_get_curr_or_next(&info->prog_id);
+ if (!prog)
+ return NULL;
+
+ if (*pos == 0)
+ ++*pos;
+ return prog;
+}
+
+static void *bpf_prog_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct bpf_iter_seq_prog_info *info = seq->private;
+
+ ++*pos;
+ ++info->prog_id;
+ bpf_prog_put((struct bpf_prog *)v);
+ return bpf_prog_get_curr_or_next(&info->prog_id);
+}
+
+struct bpf_iter__bpf_prog {
+ __bpf_md_ptr(struct bpf_iter_meta *, meta);
+ __bpf_md_ptr(struct bpf_prog *, prog);
+};
+
+DEFINE_BPF_ITER_FUNC(bpf_prog, struct bpf_iter_meta *meta, struct bpf_prog *prog)
+
+static int __bpf_prog_seq_show(struct seq_file *seq, void *v, bool in_stop)
+{
+ struct bpf_iter__bpf_prog ctx;
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+ int ret = 0;
+
+ ctx.meta = &meta;
+ ctx.prog = v;
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, in_stop);
+ if (prog)
+ ret = bpf_iter_run_prog(prog, &ctx);
+
+ return ret;
+}
+
+static int bpf_prog_seq_show(struct seq_file *seq, void *v)
+{
+ return __bpf_prog_seq_show(seq, v, false);
+}
+
+static void bpf_prog_seq_stop(struct seq_file *seq, void *v)
+{
+ if (!v)
+ (void)__bpf_prog_seq_show(seq, v, true);
+ else
+ bpf_prog_put((struct bpf_prog *)v);
+}
+
+static const struct seq_operations bpf_prog_seq_ops = {
+ .start = bpf_prog_seq_start,
+ .next = bpf_prog_seq_next,
+ .stop = bpf_prog_seq_stop,
+ .show = bpf_prog_seq_show,
+};
+
+BTF_ID_LIST(btf_bpf_prog_id)
+BTF_ID(struct, bpf_prog)
+
+static const struct bpf_iter_seq_info bpf_prog_seq_info = {
+ .seq_ops = &bpf_prog_seq_ops,
+ .init_seq_private = NULL,
+ .fini_seq_private = NULL,
+ .seq_priv_size = sizeof(struct bpf_iter_seq_prog_info),
+};
+
+static struct bpf_iter_reg bpf_prog_reg_info = {
+ .target = "bpf_prog",
+ .ctx_arg_info_size = 1,
+ .ctx_arg_info = {
+ { offsetof(struct bpf_iter__bpf_prog, prog),
+ PTR_TO_BTF_ID_OR_NULL },
+ },
+ .seq_info = &bpf_prog_seq_info,
+};
+
+static int __init bpf_prog_iter_init(void)
+{
+ bpf_prog_reg_info.ctx_arg_info[0].btf_id = *btf_bpf_prog_id;
+ return bpf_iter_reg_target(&bpf_prog_reg_info);
+}
+
+late_initcall(bpf_prog_iter_init);
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 48d8e739975f..4fd830a62be2 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -4,6 +4,7 @@
#include <linux/bpf.h>
#include <linux/jhash.h>
#include <linux/filter.h>
+#include <linux/kernel.h>
#include <linux/stacktrace.h>
#include <linux/perf_event.h>
#include <linux/elf.h>
@@ -387,11 +388,10 @@ get_callchain_entry_for_task(struct task_struct *task, u32 init_nr)
#endif
}
-BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
- u64, flags)
+static long __bpf_get_stackid(struct bpf_map *map,
+ struct perf_callchain_entry *trace, u64 flags)
{
struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
- struct perf_callchain_entry *trace;
struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
u32 max_depth = map->value_size / stack_map_data_size(map);
/* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
@@ -399,21 +399,9 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
u32 hash, id, trace_nr, trace_len;
bool user = flags & BPF_F_USER_STACK;
- bool kernel = !user;
u64 *ips;
bool hash_matches;
- if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
- BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
- return -EINVAL;
-
- trace = get_perf_callchain(regs, init_nr, kernel, user,
- sysctl_perf_event_max_stack, false, false);
-
- if (unlikely(!trace))
- /* couldn't fetch the stack trace */
- return -EFAULT;
-
/* get_perf_callchain() guarantees that trace->nr >= init_nr
* and trace-nr <= sysctl_perf_event_max_stack, so trace_nr <= max_depth
*/
@@ -478,6 +466,30 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
return id;
}
+BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
+ u64, flags)
+{
+ u32 max_depth = map->value_size / stack_map_data_size(map);
+ /* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
+ u32 init_nr = sysctl_perf_event_max_stack - max_depth;
+ bool user = flags & BPF_F_USER_STACK;
+ struct perf_callchain_entry *trace;
+ bool kernel = !user;
+
+ if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
+ BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
+ return -EINVAL;
+
+ trace = get_perf_callchain(regs, init_nr, kernel, user,
+ sysctl_perf_event_max_stack, false, false);
+
+ if (unlikely(!trace))
+ /* couldn't fetch the stack trace */
+ return -EFAULT;
+
+ return __bpf_get_stackid(map, trace, flags);
+}
+
const struct bpf_func_proto bpf_get_stackid_proto = {
.func = bpf_get_stackid,
.gpl_only = true,
@@ -487,7 +499,77 @@ const struct bpf_func_proto bpf_get_stackid_proto = {
.arg3_type = ARG_ANYTHING,
};
+static __u64 count_kernel_ip(struct perf_callchain_entry *trace)
+{
+ __u64 nr_kernel = 0;
+
+ while (nr_kernel < trace->nr) {
+ if (trace->ip[nr_kernel] == PERF_CONTEXT_USER)
+ break;
+ nr_kernel++;
+ }
+ return nr_kernel;
+}
+
+BPF_CALL_3(bpf_get_stackid_pe, struct bpf_perf_event_data_kern *, ctx,
+ struct bpf_map *, map, u64, flags)
+{
+ struct perf_event *event = ctx->event;
+ struct perf_callchain_entry *trace;
+ bool kernel, user;
+ __u64 nr_kernel;
+ int ret;
+
+ /* perf_sample_data doesn't have callchain, use bpf_get_stackid */
+ if (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY))
+ return bpf_get_stackid((unsigned long)(ctx->regs),
+ (unsigned long) map, flags, 0, 0);
+
+ if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
+ BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
+ return -EINVAL;
+
+ user = flags & BPF_F_USER_STACK;
+ kernel = !user;
+
+ trace = ctx->data->callchain;
+ if (unlikely(!trace))
+ return -EFAULT;
+
+ nr_kernel = count_kernel_ip(trace);
+
+ if (kernel) {
+ __u64 nr = trace->nr;
+
+ trace->nr = nr_kernel;
+ ret = __bpf_get_stackid(map, trace, flags);
+
+ /* restore nr */
+ trace->nr = nr;
+ } else { /* user */
+ u64 skip = flags & BPF_F_SKIP_FIELD_MASK;
+
+ skip += nr_kernel;
+ if (skip > BPF_F_SKIP_FIELD_MASK)
+ return -EFAULT;
+
+ flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | skip;
+ ret = __bpf_get_stackid(map, trace, flags);
+ }
+ return ret;
+}
+
+const struct bpf_func_proto bpf_get_stackid_proto_pe = {
+ .func = bpf_get_stackid_pe,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_CONST_MAP_PTR,
+ .arg3_type = ARG_ANYTHING,
+};
+
static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
+ struct perf_callchain_entry *trace_in,
void *buf, u32 size, u64 flags)
{
u32 init_nr, trace_nr, copy_len, elem_size, num_elem;
@@ -520,7 +602,9 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
else
init_nr = sysctl_perf_event_max_stack - num_elem;
- if (kernel && task)
+ if (trace_in)
+ trace = trace_in;
+ else if (kernel && task)
trace = get_callchain_entry_for_task(task, init_nr);
else
trace = get_perf_callchain(regs, init_nr, kernel, user,
@@ -556,7 +640,7 @@ clear:
BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size,
u64, flags)
{
- return __bpf_get_stack(regs, NULL, buf, size, flags);
+ return __bpf_get_stack(regs, NULL, NULL, buf, size, flags);
}
const struct bpf_func_proto bpf_get_stack_proto = {
@@ -574,7 +658,7 @@ BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf,
{
struct pt_regs *regs = task_pt_regs(task);
- return __bpf_get_stack(regs, task, buf, size, flags);
+ return __bpf_get_stack(regs, task, NULL, buf, size, flags);
}
BTF_ID_LIST(bpf_get_task_stack_btf_ids)
@@ -591,6 +675,69 @@ const struct bpf_func_proto bpf_get_task_stack_proto = {
.btf_id = bpf_get_task_stack_btf_ids,
};
+BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx,
+ void *, buf, u32, size, u64, flags)
+{
+ struct pt_regs *regs = (struct pt_regs *)(ctx->regs);
+ struct perf_event *event = ctx->event;
+ struct perf_callchain_entry *trace;
+ bool kernel, user;
+ int err = -EINVAL;
+ __u64 nr_kernel;
+
+ if (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY))
+ return __bpf_get_stack(regs, NULL, NULL, buf, size, flags);
+
+ if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
+ BPF_F_USER_BUILD_ID)))
+ goto clear;
+
+ user = flags & BPF_F_USER_STACK;
+ kernel = !user;
+
+ err = -EFAULT;
+ trace = ctx->data->callchain;
+ if (unlikely(!trace))
+ goto clear;
+
+ nr_kernel = count_kernel_ip(trace);
+
+ if (kernel) {
+ __u64 nr = trace->nr;
+
+ trace->nr = nr_kernel;
+ err = __bpf_get_stack(regs, NULL, trace, buf, size, flags);
+
+ /* restore nr */
+ trace->nr = nr;
+ } else { /* user */
+ u64 skip = flags & BPF_F_SKIP_FIELD_MASK;
+
+ skip += nr_kernel;
+ if (skip > BPF_F_SKIP_FIELD_MASK)
+ goto clear;
+
+ flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | skip;
+ err = __bpf_get_stack(regs, NULL, trace, buf, size, flags);
+ }
+ return err;
+
+clear:
+ memset(buf, 0, size);
+ return err;
+
+}
+
+const struct bpf_func_proto bpf_get_stack_proto_pe = {
+ .func = bpf_get_stack_pe,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
+ .arg4_type = ARG_ANYTHING,
+};
+
/* Called from eBPF program */
static void *stack_map_lookup_elem(struct bpf_map *map, void *key)
{
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index d07417d17712..2f343ce15747 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2824,6 +2824,8 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
return BPF_PROG_TYPE_TRACING;
case BPF_SK_LOOKUP:
return BPF_PROG_TYPE_SK_LOOKUP;
+ case BPF_XDP:
+ return BPF_PROG_TYPE_XDP;
default:
return BPF_PROG_TYPE_UNSPEC;
}
@@ -3044,6 +3046,25 @@ again:
return map;
}
+struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id)
+{
+ struct bpf_prog *prog;
+
+ spin_lock_bh(&prog_idr_lock);
+again:
+ prog = idr_get_next(&prog_idr, id);
+ if (prog) {
+ prog = bpf_prog_inc_not_zero(prog);
+ if (IS_ERR(prog)) {
+ (*id)++;
+ goto again;
+ }
+ }
+ spin_unlock_bh(&prog_idr_lock);
+
+ return prog;
+}
+
#define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id
struct bpf_prog *bpf_prog_by_id(u32 id)
@@ -3902,6 +3923,11 @@ static int link_create(union bpf_attr *attr)
case BPF_PROG_TYPE_SK_LOOKUP:
ret = netns_bpf_link_create(attr, prog);
break;
+#ifdef CONFIG_NET
+ case BPF_PROG_TYPE_XDP:
+ ret = bpf_xdp_link_attach(attr, prog);
+ break;
+#endif
default:
ret = -EINVAL;
}
@@ -3965,6 +3991,29 @@ out_put_link:
return ret;
}
+#define BPF_LINK_DETACH_LAST_FIELD link_detach.link_fd
+
+static int link_detach(union bpf_attr *attr)
+{
+ struct bpf_link *link;
+ int ret;
+
+ if (CHECK_ATTR(BPF_LINK_DETACH))
+ return -EINVAL;
+
+ link = bpf_link_get_from_fd(attr->link_detach.link_fd);
+ if (IS_ERR(link))
+ return PTR_ERR(link);
+
+ if (link->ops->detach)
+ ret = link->ops->detach(link);
+ else
+ ret = -EOPNOTSUPP;
+
+ bpf_link_put(link);
+ return ret;
+}
+
static int bpf_link_inc_not_zero(struct bpf_link *link)
{
return atomic64_fetch_add_unless(&link->refcnt, 1, 0) ? 0 : -ENOENT;
@@ -4214,6 +4263,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_ITER_CREATE:
err = bpf_iter_create(&attr);
break;
+ case BPF_LINK_DETACH:
+ err = link_detach(&attr);
+ break;
default:
err = -EINVAL;
break;
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index 2feecf095609..232df29793e9 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -51,7 +51,8 @@ static void *task_seq_start(struct seq_file *seq, loff_t *pos)
if (!task)
return NULL;
- ++*pos;
+ if (*pos == 0)
+ ++*pos;
return task;
}
@@ -210,7 +211,8 @@ static void *task_file_seq_start(struct seq_file *seq, loff_t *pos)
return NULL;
}
- ++*pos;
+ if (*pos == 0)
+ ++*pos;
info->task = task;
info->files = files;
@@ -291,7 +293,7 @@ static void task_file_seq_stop(struct seq_file *seq, void *v)
}
}
-static int init_seq_pidns(void *priv_data)
+static int init_seq_pidns(void *priv_data, struct bpf_iter_aux_info *aux)
{
struct bpf_iter_seq_task_common *common = priv_data;
@@ -317,25 +319,32 @@ BTF_ID_LIST(btf_task_file_ids)
BTF_ID(struct, task_struct)
BTF_ID(struct, file)
-static struct bpf_iter_reg task_reg_info = {
- .target = "task",
+static const struct bpf_iter_seq_info task_seq_info = {
.seq_ops = &task_seq_ops,
.init_seq_private = init_seq_pidns,
.fini_seq_private = fini_seq_pidns,
.seq_priv_size = sizeof(struct bpf_iter_seq_task_info),
+};
+
+static struct bpf_iter_reg task_reg_info = {
+ .target = "task",
.ctx_arg_info_size = 1,
.ctx_arg_info = {
{ offsetof(struct bpf_iter__task, task),
PTR_TO_BTF_ID_OR_NULL },
},
+ .seq_info = &task_seq_info,
};
-static struct bpf_iter_reg task_file_reg_info = {
- .target = "task_file",
+static const struct bpf_iter_seq_info task_file_seq_info = {
.seq_ops = &task_file_seq_ops,
.init_seq_private = init_seq_pidns,
.fini_seq_private = fini_seq_pidns,
.seq_priv_size = sizeof(struct bpf_iter_seq_task_file_info),
+};
+
+static struct bpf_iter_reg task_file_reg_info = {
+ .target = "task_file",
.ctx_arg_info_size = 2,
.ctx_arg_info = {
{ offsetof(struct bpf_iter__task_file, task),
@@ -343,6 +352,7 @@ static struct bpf_iter_reg task_file_reg_info = {
{ offsetof(struct bpf_iter__task_file, file),
PTR_TO_BTF_ID_OR_NULL },
},
+ .seq_info = &task_file_seq_info,
};
static int __init task_iter_init(void)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9a6703bc3f36..b6ccfce3bf4c 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -409,7 +409,9 @@ static bool reg_type_may_be_null(enum bpf_reg_type type)
type == PTR_TO_SOCK_COMMON_OR_NULL ||
type == PTR_TO_TCP_SOCK_OR_NULL ||
type == PTR_TO_BTF_ID_OR_NULL ||
- type == PTR_TO_MEM_OR_NULL;
+ type == PTR_TO_MEM_OR_NULL ||
+ type == PTR_TO_RDONLY_BUF_OR_NULL ||
+ type == PTR_TO_RDWR_BUF_OR_NULL;
}
static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
@@ -503,6 +505,10 @@ static const char * const reg_type_str[] = {
[PTR_TO_BTF_ID_OR_NULL] = "ptr_or_null_",
[PTR_TO_MEM] = "mem",
[PTR_TO_MEM_OR_NULL] = "mem_or_null",
+ [PTR_TO_RDONLY_BUF] = "rdonly_buf",
+ [PTR_TO_RDONLY_BUF_OR_NULL] = "rdonly_buf_or_null",
+ [PTR_TO_RDWR_BUF] = "rdwr_buf",
+ [PTR_TO_RDWR_BUF_OR_NULL] = "rdwr_buf_or_null",
};
static char slot_type_char[] = {
@@ -2173,6 +2179,10 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
case PTR_TO_XDP_SOCK:
case PTR_TO_BTF_ID:
case PTR_TO_BTF_ID_OR_NULL:
+ case PTR_TO_RDONLY_BUF:
+ case PTR_TO_RDONLY_BUF_OR_NULL:
+ case PTR_TO_RDWR_BUF:
+ case PTR_TO_RDWR_BUF_OR_NULL:
return true;
default:
return false;
@@ -3052,14 +3062,15 @@ int check_ctx_reg(struct bpf_verifier_env *env,
return 0;
}
-static int check_tp_buffer_access(struct bpf_verifier_env *env,
- const struct bpf_reg_state *reg,
- int regno, int off, int size)
+static int __check_buffer_access(struct bpf_verifier_env *env,
+ const char *buf_info,
+ const struct bpf_reg_state *reg,
+ int regno, int off, int size)
{
if (off < 0) {
verbose(env,
- "R%d invalid tracepoint buffer access: off=%d, size=%d",
- regno, off, size);
+ "R%d invalid %s buffer access: off=%d, size=%d\n",
+ regno, buf_info, off, size);
return -EACCES;
}
if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
@@ -3067,16 +3078,49 @@ static int check_tp_buffer_access(struct bpf_verifier_env *env,
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
verbose(env,
- "R%d invalid variable buffer offset: off=%d, var_off=%s",
+ "R%d invalid variable buffer offset: off=%d, var_off=%s\n",
regno, off, tn_buf);
return -EACCES;
}
+
+ return 0;
+}
+
+static int check_tp_buffer_access(struct bpf_verifier_env *env,
+ const struct bpf_reg_state *reg,
+ int regno, int off, int size)
+{
+ int err;
+
+ err = __check_buffer_access(env, "tracepoint", reg, regno, off, size);
+ if (err)
+ return err;
+
if (off + size > env->prog->aux->max_tp_access)
env->prog->aux->max_tp_access = off + size;
return 0;
}
+static int check_buffer_access(struct bpf_verifier_env *env,
+ const struct bpf_reg_state *reg,
+ int regno, int off, int size,
+ bool zero_size_allowed,
+ const char *buf_info,
+ u32 *max_access)
+{
+ int err;
+
+ err = __check_buffer_access(env, buf_info, reg, regno, off, size);
+ if (err)
+ return err;
+
+ if (off + size > *max_access)
+ *max_access = off + size;
+
+ return 0;
+}
+
/* BPF architecture zero extends alu32 ops into 64-bit registesr */
static void zext_32_to_64(struct bpf_reg_state *reg)
{
@@ -3427,6 +3471,23 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
} else if (reg->type == CONST_PTR_TO_MAP) {
err = check_ptr_to_map_access(env, regs, regno, off, size, t,
value_regno);
+ } else if (reg->type == PTR_TO_RDONLY_BUF) {
+ if (t == BPF_WRITE) {
+ verbose(env, "R%d cannot write into %s\n",
+ regno, reg_type_str[reg->type]);
+ return -EACCES;
+ }
+ err = check_buffer_access(env, reg, regno, off, size, false,
+ "rdonly",
+ &env->prog->aux->max_rdonly_access);
+ if (!err && value_regno >= 0)
+ mark_reg_unknown(env, regs, value_regno);
+ } else if (reg->type == PTR_TO_RDWR_BUF) {
+ err = check_buffer_access(env, reg, regno, off, size, false,
+ "rdwr",
+ &env->prog->aux->max_rdwr_access);
+ if (!err && t == BPF_READ && value_regno >= 0)
+ mark_reg_unknown(env, regs, value_regno);
} else {
verbose(env, "R%d invalid mem access '%s'\n", regno,
reg_type_str[reg->type]);
@@ -3668,6 +3729,18 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
return check_mem_region_access(env, regno, reg->off,
access_size, reg->mem_size,
zero_size_allowed);
+ case PTR_TO_RDONLY_BUF:
+ if (meta && meta->raw_mode)
+ return -EACCES;
+ return check_buffer_access(env, reg, regno, reg->off,
+ access_size, zero_size_allowed,
+ "rdonly",
+ &env->prog->aux->max_rdonly_access);
+ case PTR_TO_RDWR_BUF:
+ return check_buffer_access(env, reg, regno, reg->off,
+ access_size, zero_size_allowed,
+ "rdwr",
+ &env->prog->aux->max_rdwr_access);
default: /* scalar_value|ptr_to_stack or invalid ptr */
return check_stack_boundary(env, regno, access_size,
zero_size_allowed, meta);
@@ -3933,6 +4006,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
else if (!type_is_pkt_pointer(type) &&
type != PTR_TO_MAP_VALUE &&
type != PTR_TO_MEM &&
+ type != PTR_TO_RDONLY_BUF &&
+ type != PTR_TO_RDWR_BUF &&
type != expected_type)
goto err_type;
meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM;
@@ -4887,6 +4962,9 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
env->prog->has_callchain_buf = true;
}
+ if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack)
+ env->prog->call_get_stack = true;
+
if (changes_data)
clear_all_pkt_pointers(env);
return 0;
@@ -6806,6 +6884,10 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
reg->type = PTR_TO_BTF_ID;
} else if (reg->type == PTR_TO_MEM_OR_NULL) {
reg->type = PTR_TO_MEM;
+ } else if (reg->type == PTR_TO_RDONLY_BUF_OR_NULL) {
+ reg->type = PTR_TO_RDONLY_BUF;
+ } else if (reg->type == PTR_TO_RDWR_BUF_OR_NULL) {
+ reg->type = PTR_TO_RDWR_BUF;
}
if (is_null) {
/* We don't need id and ref_obj_id from this point