diff options
Diffstat (limited to 'kernel/bpf/syscall.c')
-rw-r--r-- | kernel/bpf/syscall.c | 120 |
1 files changed, 80 insertions, 40 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index c420edbfb7c8..9794446bc8c6 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -569,7 +569,24 @@ static void bpf_map_release_memcg(struct bpf_map *map) } #endif -int bpf_map_alloc_pages(const struct bpf_map *map, gfp_t gfp, int nid, +static bool can_alloc_pages(void) +{ + return preempt_count() == 0 && !irqs_disabled() && + !IS_ENABLED(CONFIG_PREEMPT_RT); +} + +static struct page *__bpf_alloc_page(int nid) +{ + if (!can_alloc_pages()) + return try_alloc_pages(nid, 0); + + return alloc_pages_node(nid, + GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT + | __GFP_NOWARN, + 0); +} + +int bpf_map_alloc_pages(const struct bpf_map *map, int nid, unsigned long nr_pages, struct page **pages) { unsigned long i, j; @@ -582,14 +599,14 @@ int bpf_map_alloc_pages(const struct bpf_map *map, gfp_t gfp, int nid, old_memcg = set_active_memcg(memcg); #endif for (i = 0; i < nr_pages; i++) { - pg = alloc_pages_node(nid, gfp | __GFP_ACCOUNT, 0); + pg = __bpf_alloc_page(nid); if (pg) { pages[i] = pg; continue; } for (j = 0; j < i; j++) - __free_page(pages[j]); + free_pages_nolock(pages[j], 0); ret = -ENOMEM; break; } @@ -648,6 +665,7 @@ void btf_record_free(struct btf_record *rec) case BPF_RB_ROOT: case BPF_RB_NODE: case BPF_SPIN_LOCK: + case BPF_RES_SPIN_LOCK: case BPF_TIMER: case BPF_REFCOUNT: case BPF_WORKQUEUE: @@ -700,6 +718,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec) case BPF_RB_ROOT: case BPF_RB_NODE: case BPF_SPIN_LOCK: + case BPF_RES_SPIN_LOCK: case BPF_TIMER: case BPF_REFCOUNT: case BPF_WORKQUEUE: @@ -777,6 +796,7 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj) switch (fields[i].type) { case BPF_SPIN_LOCK: + case BPF_RES_SPIN_LOCK: break; case BPF_TIMER: bpf_timer_cancel_and_free(field_ptr); @@ -1035,7 +1055,7 @@ static const struct vm_operations_struct bpf_map_default_vmops = { static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma) { struct bpf_map *map = filp->private_data; - int err; + int err = 0; if (!map->ops->map_mmap || !IS_ERR_OR_NULL(map->record)) return -ENOTSUPP; @@ -1059,24 +1079,33 @@ static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma) err = -EACCES; goto out; } + bpf_map_write_active_inc(map); } +out: + mutex_unlock(&map->freeze_mutex); + if (err) + return err; /* set default open/close callbacks */ vma->vm_ops = &bpf_map_default_vmops; vma->vm_private_data = map; vm_flags_clear(vma, VM_MAYEXEC); + /* If mapping is read-only, then disallow potentially re-mapping with + * PROT_WRITE by dropping VM_MAYWRITE flag. This VM_MAYWRITE clearing + * means that as far as BPF map's memory-mapped VMAs are concerned, + * VM_WRITE and VM_MAYWRITE and equivalent, if one of them is set, + * both should be set, so we can forget about VM_MAYWRITE and always + * check just VM_WRITE + */ if (!(vma->vm_flags & VM_WRITE)) - /* disallow re-mapping with PROT_WRITE */ vm_flags_clear(vma, VM_MAYWRITE); err = map->ops->map_mmap(map, vma); - if (err) - goto out; + if (err) { + if (vma->vm_flags & VM_WRITE) + bpf_map_write_active_dec(map); + } - if (vma->vm_flags & VM_MAYWRITE) - bpf_map_write_active_inc(map); -out: - mutex_unlock(&map->freeze_mutex); return err; } @@ -1203,7 +1232,7 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token, return -EINVAL; map->record = btf_parse_fields(btf, value_type, - BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD | + BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD | BPF_RB_ROOT | BPF_REFCOUNT | BPF_WORKQUEUE | BPF_UPTR, map->value_size); if (!IS_ERR_OR_NULL(map->record)) { @@ -1222,6 +1251,7 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token, case 0: continue; case BPF_SPIN_LOCK: + case BPF_RES_SPIN_LOCK: if (map->map_type != BPF_MAP_TYPE_HASH && map->map_type != BPF_MAP_TYPE_ARRAY && map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE && @@ -1306,7 +1336,7 @@ static bool bpf_net_capable(void) #define BPF_MAP_CREATE_LAST_FIELD map_token_fd /* called via syscall */ -static int map_create(union bpf_attr *attr) +static int map_create(union bpf_attr *attr, bool kernel) { const struct bpf_map_ops *ops; struct bpf_token *token = NULL; @@ -1496,7 +1526,7 @@ static int map_create(union bpf_attr *attr) attr->btf_vmlinux_value_type_id; } - err = security_bpf_map_create(map, attr, token); + err = security_bpf_map_create(map, attr, token, kernel); if (err) goto free_map_sec; @@ -1584,11 +1614,8 @@ struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref) struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map) { - spin_lock_bh(&map_idr_lock); - map = __bpf_map_inc_not_zero(map, false); - spin_unlock_bh(&map_idr_lock); - - return map; + lockdep_assert(rcu_read_lock_held()); + return __bpf_map_inc_not_zero(map, false); } EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero); @@ -1968,8 +1995,6 @@ int generic_map_update_batch(struct bpf_map *map, struct file *map_file, return err; } -#define MAP_LOOKUP_RETRIES 3 - int generic_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr) @@ -1979,8 +2004,8 @@ int generic_map_lookup_batch(struct bpf_map *map, void __user *values = u64_to_user_ptr(attr->batch.values); void __user *keys = u64_to_user_ptr(attr->batch.keys); void *buf, *buf_prevkey, *prev_key, *key, *value; - int err, retry = MAP_LOOKUP_RETRIES; u32 value_size, cp, max_count; + int err; if (attr->batch.elem_flags & ~BPF_F_LOCK) return -EINVAL; @@ -2026,14 +2051,8 @@ int generic_map_lookup_batch(struct bpf_map *map, err = bpf_map_copy_value(map, key, value, attr->batch.elem_flags); - if (err == -ENOENT) { - if (retry) { - retry--; - continue; - } - err = -EINTR; - break; - } + if (err == -ENOENT) + goto next_key; if (err) goto free_buf; @@ -2048,12 +2067,12 @@ int generic_map_lookup_batch(struct bpf_map *map, goto free_buf; } + cp++; +next_key: if (!prev_key) prev_key = buf_prevkey; swap(prev_key, key); - retry = MAP_LOOKUP_RETRIES; - cp++; cond_resched(); } @@ -2313,6 +2332,7 @@ static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred) kvfree(prog->aux->jited_linfo); kvfree(prog->aux->linfo); kfree(prog->aux->kfunc_tab); + kfree(prog->aux->ctx_arg_info); if (prog->aux->attach_btf) btf_put(prog->aux->attach_btf); @@ -2943,7 +2963,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) if (err < 0) goto free_prog; - err = security_bpf_prog_load(prog, attr, token); + err = security_bpf_prog_load(prog, attr, token, uattr.is_kernel); if (err) goto free_prog_sec; @@ -4168,7 +4188,8 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, #define BPF_F_ATTACH_MASK_BASE \ (BPF_F_ALLOW_OVERRIDE | \ BPF_F_ALLOW_MULTI | \ - BPF_F_REPLACE) + BPF_F_REPLACE | \ + BPF_F_PREORDER) #define BPF_F_ATTACH_MASK_MPROG \ (BPF_F_REPLACE | \ @@ -4732,6 +4753,8 @@ static int bpf_prog_get_info_by_fd(struct file *file, info.recursion_misses = stats.misses; info.verified_insns = prog->aux->verified_insns; + if (prog->aux->btf) + info.btf_id = btf_obj_id(prog->aux->btf); if (!bpf_capable()) { info.jited_prog_len = 0; @@ -4878,8 +4901,6 @@ static int bpf_prog_get_info_by_fd(struct file *file, } } - if (prog->aux->btf) - info.btf_id = btf_obj_id(prog->aux->btf); info.attach_btf_id = prog->aux->attach_btf_id; if (attach_btf) info.attach_btf_obj_id = btf_obj_id(attach_btf); @@ -5120,15 +5141,34 @@ static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_ return btf_new_fd(attr, uattr, uattr_size); } -#define BPF_BTF_GET_FD_BY_ID_LAST_FIELD btf_id +#define BPF_BTF_GET_FD_BY_ID_LAST_FIELD fd_by_id_token_fd static int bpf_btf_get_fd_by_id(const union bpf_attr *attr) { + struct bpf_token *token = NULL; + if (CHECK_ATTR(BPF_BTF_GET_FD_BY_ID)) return -EINVAL; - if (!capable(CAP_SYS_ADMIN)) + if (attr->open_flags & ~BPF_F_TOKEN_FD) + return -EINVAL; + + if (attr->open_flags & BPF_F_TOKEN_FD) { + token = bpf_token_get_from_fd(attr->fd_by_id_token_fd); + if (IS_ERR(token)) + return PTR_ERR(token); + if (!bpf_token_allow_cmd(token, BPF_BTF_GET_FD_BY_ID)) { + bpf_token_put(token); + token = NULL; + } + } + + if (!bpf_token_capable(token, CAP_SYS_ADMIN)) { + bpf_token_put(token); return -EPERM; + } + + bpf_token_put(token); return btf_get_fd_by_id(attr->btf_id); } @@ -5767,13 +5807,13 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size) if (copy_from_bpfptr(&attr, uattr, size) != 0) return -EFAULT; - err = security_bpf(cmd, &attr, size); + err = security_bpf(cmd, &attr, size, uattr.is_kernel); if (err < 0) return err; switch (cmd) { case BPF_MAP_CREATE: - err = map_create(&attr); + err = map_create(&attr, uattr.is_kernel); break; case BPF_MAP_LOOKUP_ELEM: err = map_lookup_elem(&attr); |