summaryrefslogtreecommitdiff
path: root/kernel/bpf/syscall.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/bpf/syscall.c')
-rw-r--r--kernel/bpf/syscall.c120
1 files changed, 80 insertions, 40 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index c420edbfb7c8..9794446bc8c6 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -569,7 +569,24 @@ static void bpf_map_release_memcg(struct bpf_map *map)
}
#endif
-int bpf_map_alloc_pages(const struct bpf_map *map, gfp_t gfp, int nid,
+static bool can_alloc_pages(void)
+{
+ return preempt_count() == 0 && !irqs_disabled() &&
+ !IS_ENABLED(CONFIG_PREEMPT_RT);
+}
+
+static struct page *__bpf_alloc_page(int nid)
+{
+ if (!can_alloc_pages())
+ return try_alloc_pages(nid, 0);
+
+ return alloc_pages_node(nid,
+ GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT
+ | __GFP_NOWARN,
+ 0);
+}
+
+int bpf_map_alloc_pages(const struct bpf_map *map, int nid,
unsigned long nr_pages, struct page **pages)
{
unsigned long i, j;
@@ -582,14 +599,14 @@ int bpf_map_alloc_pages(const struct bpf_map *map, gfp_t gfp, int nid,
old_memcg = set_active_memcg(memcg);
#endif
for (i = 0; i < nr_pages; i++) {
- pg = alloc_pages_node(nid, gfp | __GFP_ACCOUNT, 0);
+ pg = __bpf_alloc_page(nid);
if (pg) {
pages[i] = pg;
continue;
}
for (j = 0; j < i; j++)
- __free_page(pages[j]);
+ free_pages_nolock(pages[j], 0);
ret = -ENOMEM;
break;
}
@@ -648,6 +665,7 @@ void btf_record_free(struct btf_record *rec)
case BPF_RB_ROOT:
case BPF_RB_NODE:
case BPF_SPIN_LOCK:
+ case BPF_RES_SPIN_LOCK:
case BPF_TIMER:
case BPF_REFCOUNT:
case BPF_WORKQUEUE:
@@ -700,6 +718,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
case BPF_RB_ROOT:
case BPF_RB_NODE:
case BPF_SPIN_LOCK:
+ case BPF_RES_SPIN_LOCK:
case BPF_TIMER:
case BPF_REFCOUNT:
case BPF_WORKQUEUE:
@@ -777,6 +796,7 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
switch (fields[i].type) {
case BPF_SPIN_LOCK:
+ case BPF_RES_SPIN_LOCK:
break;
case BPF_TIMER:
bpf_timer_cancel_and_free(field_ptr);
@@ -1035,7 +1055,7 @@ static const struct vm_operations_struct bpf_map_default_vmops = {
static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct bpf_map *map = filp->private_data;
- int err;
+ int err = 0;
if (!map->ops->map_mmap || !IS_ERR_OR_NULL(map->record))
return -ENOTSUPP;
@@ -1059,24 +1079,33 @@ static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma)
err = -EACCES;
goto out;
}
+ bpf_map_write_active_inc(map);
}
+out:
+ mutex_unlock(&map->freeze_mutex);
+ if (err)
+ return err;
/* set default open/close callbacks */
vma->vm_ops = &bpf_map_default_vmops;
vma->vm_private_data = map;
vm_flags_clear(vma, VM_MAYEXEC);
+ /* If mapping is read-only, then disallow potentially re-mapping with
+ * PROT_WRITE by dropping VM_MAYWRITE flag. This VM_MAYWRITE clearing
+ * means that as far as BPF map's memory-mapped VMAs are concerned,
+ * VM_WRITE and VM_MAYWRITE and equivalent, if one of them is set,
+ * both should be set, so we can forget about VM_MAYWRITE and always
+ * check just VM_WRITE
+ */
if (!(vma->vm_flags & VM_WRITE))
- /* disallow re-mapping with PROT_WRITE */
vm_flags_clear(vma, VM_MAYWRITE);
err = map->ops->map_mmap(map, vma);
- if (err)
- goto out;
+ if (err) {
+ if (vma->vm_flags & VM_WRITE)
+ bpf_map_write_active_dec(map);
+ }
- if (vma->vm_flags & VM_MAYWRITE)
- bpf_map_write_active_inc(map);
-out:
- mutex_unlock(&map->freeze_mutex);
return err;
}
@@ -1203,7 +1232,7 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token,
return -EINVAL;
map->record = btf_parse_fields(btf, value_type,
- BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD |
+ BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD |
BPF_RB_ROOT | BPF_REFCOUNT | BPF_WORKQUEUE | BPF_UPTR,
map->value_size);
if (!IS_ERR_OR_NULL(map->record)) {
@@ -1222,6 +1251,7 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token,
case 0:
continue;
case BPF_SPIN_LOCK:
+ case BPF_RES_SPIN_LOCK:
if (map->map_type != BPF_MAP_TYPE_HASH &&
map->map_type != BPF_MAP_TYPE_ARRAY &&
map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
@@ -1306,7 +1336,7 @@ static bool bpf_net_capable(void)
#define BPF_MAP_CREATE_LAST_FIELD map_token_fd
/* called via syscall */
-static int map_create(union bpf_attr *attr)
+static int map_create(union bpf_attr *attr, bool kernel)
{
const struct bpf_map_ops *ops;
struct bpf_token *token = NULL;
@@ -1496,7 +1526,7 @@ static int map_create(union bpf_attr *attr)
attr->btf_vmlinux_value_type_id;
}
- err = security_bpf_map_create(map, attr, token);
+ err = security_bpf_map_create(map, attr, token, kernel);
if (err)
goto free_map_sec;
@@ -1584,11 +1614,8 @@ struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref)
struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map)
{
- spin_lock_bh(&map_idr_lock);
- map = __bpf_map_inc_not_zero(map, false);
- spin_unlock_bh(&map_idr_lock);
-
- return map;
+ lockdep_assert(rcu_read_lock_held());
+ return __bpf_map_inc_not_zero(map, false);
}
EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero);
@@ -1968,8 +1995,6 @@ int generic_map_update_batch(struct bpf_map *map, struct file *map_file,
return err;
}
-#define MAP_LOOKUP_RETRIES 3
-
int generic_map_lookup_batch(struct bpf_map *map,
const union bpf_attr *attr,
union bpf_attr __user *uattr)
@@ -1979,8 +2004,8 @@ int generic_map_lookup_batch(struct bpf_map *map,
void __user *values = u64_to_user_ptr(attr->batch.values);
void __user *keys = u64_to_user_ptr(attr->batch.keys);
void *buf, *buf_prevkey, *prev_key, *key, *value;
- int err, retry = MAP_LOOKUP_RETRIES;
u32 value_size, cp, max_count;
+ int err;
if (attr->batch.elem_flags & ~BPF_F_LOCK)
return -EINVAL;
@@ -2026,14 +2051,8 @@ int generic_map_lookup_batch(struct bpf_map *map,
err = bpf_map_copy_value(map, key, value,
attr->batch.elem_flags);
- if (err == -ENOENT) {
- if (retry) {
- retry--;
- continue;
- }
- err = -EINTR;
- break;
- }
+ if (err == -ENOENT)
+ goto next_key;
if (err)
goto free_buf;
@@ -2048,12 +2067,12 @@ int generic_map_lookup_batch(struct bpf_map *map,
goto free_buf;
}
+ cp++;
+next_key:
if (!prev_key)
prev_key = buf_prevkey;
swap(prev_key, key);
- retry = MAP_LOOKUP_RETRIES;
- cp++;
cond_resched();
}
@@ -2313,6 +2332,7 @@ static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)
kvfree(prog->aux->jited_linfo);
kvfree(prog->aux->linfo);
kfree(prog->aux->kfunc_tab);
+ kfree(prog->aux->ctx_arg_info);
if (prog->aux->attach_btf)
btf_put(prog->aux->attach_btf);
@@ -2943,7 +2963,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
if (err < 0)
goto free_prog;
- err = security_bpf_prog_load(prog, attr, token);
+ err = security_bpf_prog_load(prog, attr, token, uattr.is_kernel);
if (err)
goto free_prog_sec;
@@ -4168,7 +4188,8 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
#define BPF_F_ATTACH_MASK_BASE \
(BPF_F_ALLOW_OVERRIDE | \
BPF_F_ALLOW_MULTI | \
- BPF_F_REPLACE)
+ BPF_F_REPLACE | \
+ BPF_F_PREORDER)
#define BPF_F_ATTACH_MASK_MPROG \
(BPF_F_REPLACE | \
@@ -4732,6 +4753,8 @@ static int bpf_prog_get_info_by_fd(struct file *file,
info.recursion_misses = stats.misses;
info.verified_insns = prog->aux->verified_insns;
+ if (prog->aux->btf)
+ info.btf_id = btf_obj_id(prog->aux->btf);
if (!bpf_capable()) {
info.jited_prog_len = 0;
@@ -4878,8 +4901,6 @@ static int bpf_prog_get_info_by_fd(struct file *file,
}
}
- if (prog->aux->btf)
- info.btf_id = btf_obj_id(prog->aux->btf);
info.attach_btf_id = prog->aux->attach_btf_id;
if (attach_btf)
info.attach_btf_obj_id = btf_obj_id(attach_btf);
@@ -5120,15 +5141,34 @@ static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_
return btf_new_fd(attr, uattr, uattr_size);
}
-#define BPF_BTF_GET_FD_BY_ID_LAST_FIELD btf_id
+#define BPF_BTF_GET_FD_BY_ID_LAST_FIELD fd_by_id_token_fd
static int bpf_btf_get_fd_by_id(const union bpf_attr *attr)
{
+ struct bpf_token *token = NULL;
+
if (CHECK_ATTR(BPF_BTF_GET_FD_BY_ID))
return -EINVAL;
- if (!capable(CAP_SYS_ADMIN))
+ if (attr->open_flags & ~BPF_F_TOKEN_FD)
+ return -EINVAL;
+
+ if (attr->open_flags & BPF_F_TOKEN_FD) {
+ token = bpf_token_get_from_fd(attr->fd_by_id_token_fd);
+ if (IS_ERR(token))
+ return PTR_ERR(token);
+ if (!bpf_token_allow_cmd(token, BPF_BTF_GET_FD_BY_ID)) {
+ bpf_token_put(token);
+ token = NULL;
+ }
+ }
+
+ if (!bpf_token_capable(token, CAP_SYS_ADMIN)) {
+ bpf_token_put(token);
return -EPERM;
+ }
+
+ bpf_token_put(token);
return btf_get_fd_by_id(attr->btf_id);
}
@@ -5767,13 +5807,13 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size)
if (copy_from_bpfptr(&attr, uattr, size) != 0)
return -EFAULT;
- err = security_bpf(cmd, &attr, size);
+ err = security_bpf(cmd, &attr, size, uattr.is_kernel);
if (err < 0)
return err;
switch (cmd) {
case BPF_MAP_CREATE:
- err = map_create(&attr);
+ err = map_create(&attr, uattr.is_kernel);
break;
case BPF_MAP_LOOKUP_ELEM:
err = map_lookup_elem(&attr);