diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-01-11 14:19:23 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-01-11 14:19:23 -0800 |
commit | 4c72e2b8c42e57f65d8fbfb01329e79d2b450653 (patch) | |
tree | 93dda9fd8adf1c1948c8dd83809aff4f207c7ea5 /io_uring/io_uring.c | |
parent | 01d550f0fcc06c7292f79a6f1453aac122d1d2c8 (diff) | |
parent | 6ff1407e24e6fdfa4a16ba9ba551e3d253a26391 (diff) | |
download | lwn-4c72e2b8c42e57f65d8fbfb01329e79d2b450653.tar.gz lwn-4c72e2b8c42e57f65d8fbfb01329e79d2b450653.zip |
Merge tag 'for-6.8/io_uring-2024-01-08' of git://git.kernel.dk/linux
Pull io_uring updates from Jens Axboe:
"Mostly just come fixes and cleanups, but one feature as well. In
detail:
- Harden the check for handling IOPOLL based on return (Pavel)
- Various minor optimizations (Pavel)
- Drop remnants of SCM_RIGHTS fd passing support, now that it's no
longer supported since 6.7 (me)
- Fix for a case where bytes_done wasn't initialized properly on a
failure condition for read/write requests (me)
- Move the register related code to a separate file (me)
- Add support for returning the provided ring buffer head (me)
- Add support for adding a direct descriptor to the normal file table
(me, Christian Brauner)
- Fix for ensuring pending task_work for a ring with DEFER_TASKRUN is
run even if we timeout waiting (me)"
* tag 'for-6.8/io_uring-2024-01-08' of git://git.kernel.dk/linux:
io_uring: ensure local task_work is run on wait timeout
io_uring/kbuf: add method for returning provided buffer ring head
io_uring/rw: ensure io->bytes_done is always initialized
io_uring: drop any code related to SCM_RIGHTS
io_uring/unix: drop usage of io_uring socket
io_uring/register: move io_uring_register(2) related code to register.c
io_uring/openclose: add support for IORING_OP_FIXED_FD_INSTALL
io_uring/cmd: inline io_uring_cmd_get_task
io_uring/cmd: inline io_uring_cmd_do_in_task_lazy
io_uring: split out cmd api into a separate header
io_uring: optimise ltimeout for inline execution
io_uring: don't check iopoll if request completes
Diffstat (limited to 'io_uring/io_uring.c')
-rw-r--r-- | io_uring/io_uring.c | 663 |
1 files changed, 30 insertions, 633 deletions
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 9839016f82ea..09b6d860deba 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -60,7 +60,6 @@ #include <linux/net.h> #include <net/sock.h> #include <net/af_unix.h> -#include <net/scm.h> #include <linux/anon_inodes.h> #include <linux/sched/mm.h> #include <linux/uaccess.h> @@ -70,6 +69,7 @@ #include <linux/fadvise.h> #include <linux/task_work.h> #include <linux/io_uring.h> +#include <linux/io_uring/cmd.h> #include <linux/audit.h> #include <linux/security.h> #include <asm/shmparam.h> @@ -85,6 +85,7 @@ #include "opdef.h" #include "refs.h" #include "tctx.h" +#include "register.h" #include "sqpoll.h" #include "fdinfo.h" #include "kbuf.h" @@ -103,9 +104,6 @@ #define IORING_MAX_ENTRIES 32768 #define IORING_MAX_CQ_ENTRIES (2 * IORING_MAX_ENTRIES) -#define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \ - IORING_REGISTER_LAST + IORING_OP_LAST) - #define SQE_COMMON_FLAGS (IOSQE_FIXED_FILE | IOSQE_IO_LINK | \ IOSQE_IO_HARDLINK | IOSQE_ASYNC) @@ -129,11 +127,6 @@ enum { IO_CHECK_CQ_DROPPED_BIT, }; -enum { - IO_EVENTFD_OP_SIGNAL_BIT, - IO_EVENTFD_OP_FREE_BIT, -}; - struct io_defer_entry { struct list_head list; struct io_kiocb *req; @@ -177,19 +170,6 @@ static struct ctl_table kernel_io_uring_disabled_table[] = { }; #endif -struct sock *io_uring_get_socket(struct file *file) -{ -#if defined(CONFIG_UNIX) - if (io_is_uring_fops(file)) { - struct io_ring_ctx *ctx = file->private_data; - - return ctx->ring_sock->sk; - } -#endif - return NULL; -} -EXPORT_SYMBOL(io_uring_get_socket); - static inline void io_submit_flush_completions(struct io_ring_ctx *ctx) { if (!wq_list_empty(&ctx->submit_state.compl_reqs) || @@ -554,8 +534,7 @@ static __cold void io_queue_deferred(struct io_ring_ctx *ctx) } } - -static void io_eventfd_ops(struct rcu_head *rcu) +void io_eventfd_ops(struct rcu_head *rcu) { struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu); int ops = atomic_xchg(&ev_fd->ops, 0); @@ -1898,14 +1877,19 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) io_req_complete_defer(req); else io_req_complete_post(req, issue_flags); - } else if (ret != IOU_ISSUE_SKIP_COMPLETE) - return ret; - /* If the op doesn't have a file, we're not polling for it */ - if ((req->ctx->flags & IORING_SETUP_IOPOLL) && def->iopoll_queue) - io_iopoll_req_issued(req, issue_flags); + return 0; + } - return 0; + if (ret == IOU_ISSUE_SKIP_COMPLETE) { + ret = 0; + io_arm_ltimeout(req); + + /* If the op doesn't have a file, we're not polling for it */ + if ((req->ctx->flags & IORING_SETUP_IOPOLL) && def->iopoll_queue) + io_iopoll_req_issued(req, issue_flags); + } + return ret; } int io_poll_issue(struct io_kiocb *req, struct io_tw_state *ts) @@ -2076,9 +2060,7 @@ static inline void io_queue_sqe(struct io_kiocb *req) * We async punt it if the file wasn't marked NOWAIT, or if the file * doesn't support non-blocking read/write attempts */ - if (likely(!ret)) - io_arm_ltimeout(req); - else + if (unlikely(ret)) io_queue_async(req, ret); } @@ -2633,8 +2615,6 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, __set_current_state(TASK_RUNNING); atomic_set(&ctx->cq_wait_nr, 0); - if (ret < 0) - break; /* * Run task_work after scheduling and before io_should_wake(). * If we got woken because of task_work being processed, run it @@ -2644,6 +2624,18 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, if (!llist_empty(&ctx->work_llist)) io_run_local_work(ctx); + /* + * Non-local task_work will be run on exit to userspace, but + * if we're using DEFER_TASKRUN, then we could have waited + * with a timeout for a number of requests. If the timeout + * hits, we could have some requests ready to process. Ensure + * this break is _after_ we have run task_work, to avoid + * deferring running potentially pending requests until the + * next time we wait for events. + */ + if (ret < 0) + break; + check_cq = READ_ONCE(ctx->check_cq); if (unlikely(check_cq)) { /* let the caller flush overflows, retry */ @@ -2831,61 +2823,6 @@ static unsigned long rings_size(struct io_ring_ctx *ctx, unsigned int sq_entries return off; } -static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg, - unsigned int eventfd_async) -{ - struct io_ev_fd *ev_fd; - __s32 __user *fds = arg; - int fd; - - ev_fd = rcu_dereference_protected(ctx->io_ev_fd, - lockdep_is_held(&ctx->uring_lock)); - if (ev_fd) - return -EBUSY; - - if (copy_from_user(&fd, fds, sizeof(*fds))) - return -EFAULT; - - ev_fd = kmalloc(sizeof(*ev_fd), GFP_KERNEL); - if (!ev_fd) - return -ENOMEM; - - ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd); - if (IS_ERR(ev_fd->cq_ev_fd)) { - int ret = PTR_ERR(ev_fd->cq_ev_fd); - kfree(ev_fd); - return ret; - } - - spin_lock(&ctx->completion_lock); - ctx->evfd_last_cq_tail = ctx->cached_cq_tail; - spin_unlock(&ctx->completion_lock); - - ev_fd->eventfd_async = eventfd_async; - ctx->has_evfd = true; - rcu_assign_pointer(ctx->io_ev_fd, ev_fd); - atomic_set(&ev_fd->refs, 1); - atomic_set(&ev_fd->ops, 0); - return 0; -} - -static int io_eventfd_unregister(struct io_ring_ctx *ctx) -{ - struct io_ev_fd *ev_fd; - - ev_fd = rcu_dereference_protected(ctx->io_ev_fd, - lockdep_is_held(&ctx->uring_lock)); - if (ev_fd) { - ctx->has_evfd = false; - rcu_assign_pointer(ctx->io_ev_fd, NULL); - if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_FREE_BIT), &ev_fd->ops)) - call_rcu(&ev_fd->rcu, io_eventfd_ops); - return 0; - } - - return -ENXIO; -} - static void io_req_caches_free(struct io_ring_ctx *ctx) { struct io_kiocb *req; @@ -2938,13 +2875,6 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) io_rsrc_node_destroy(ctx, ctx->rsrc_node); WARN_ON_ONCE(!list_empty(&ctx->rsrc_ref_list)); - -#if defined(CONFIG_UNIX) - if (ctx->ring_sock) { - ctx->ring_sock->file = NULL; /* so that iput() is called */ - sock_release(ctx->ring_sock); - } -#endif WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list)); io_alloc_cache_free(&ctx->rsrc_node_cache, io_rsrc_node_cache_free); @@ -2984,7 +2914,7 @@ static __cold void io_activate_pollwq_cb(struct callback_head *cb) percpu_ref_put(&ctx->refs); } -static __cold void io_activate_pollwq(struct io_ring_ctx *ctx) +__cold void io_activate_pollwq(struct io_ring_ctx *ctx) { spin_lock(&ctx->completion_lock); /* already activated or in progress */ @@ -3043,19 +2973,6 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait) return mask; } -static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id) -{ - const struct cred *creds; - - creds = xa_erase(&ctx->personalities, id); - if (creds) { - put_cred(creds); - return 0; - } - - return -EINVAL; -} - struct io_tctx_exit { struct callback_head task_work; struct completion completion; @@ -3866,32 +3783,12 @@ static int io_uring_install_fd(struct file *file) /* * Allocate an anonymous fd, this is what constitutes the application * visible backing of an io_uring instance. The application mmaps this - * fd to gain access to the SQ/CQ ring details. If UNIX sockets are enabled, - * we have to tie this fd to a socket for file garbage collection purposes. + * fd to gain access to the SQ/CQ ring details. */ static struct file *io_uring_get_file(struct io_ring_ctx *ctx) { - struct file *file; -#if defined(CONFIG_UNIX) - int ret; - - ret = sock_create_kern(&init_net, PF_UNIX, SOCK_RAW, IPPROTO_IP, - &ctx->ring_sock); - if (ret) - return ERR_PTR(ret); -#endif - - file = anon_inode_getfile_secure("[io_uring]", &io_uring_fops, ctx, + return anon_inode_getfile_secure("[io_uring]", &io_uring_fops, ctx, O_RDWR | O_CLOEXEC, NULL); -#if defined(CONFIG_UNIX) - if (IS_ERR(file)) { - sock_release(ctx->ring_sock); - ctx->ring_sock = NULL; - } else { - ctx->ring_sock->file = file; - } -#endif - return file; } static __cold int io_uring_create(unsigned entries, struct io_uring_params *p, @@ -4158,506 +4055,6 @@ SYSCALL_DEFINE2(io_uring_setup, u32, entries, return io_uring_setup(entries, params); } -static __cold int io_probe(struct io_ring_ctx *ctx, void __user *arg, - unsigned nr_args) -{ - struct io_uring_probe *p; - size_t size; - int i, ret; - - size = struct_size(p, ops, nr_args); - if (size == SIZE_MAX) - return -EOVERFLOW; - p = kzalloc(size, GFP_KERNEL); - if (!p) - return -ENOMEM; - - ret = -EFAULT; - if (copy_from_user(p, arg, size)) - goto out; - ret = -EINVAL; - if (memchr_inv(p, 0, size)) - goto out; - - p->last_op = IORING_OP_LAST - 1; - if (nr_args > IORING_OP_LAST) - nr_args = IORING_OP_LAST; - - for (i = 0; i < nr_args; i++) { - p->ops[i].op = i; - if (!io_issue_defs[i].not_supported) - p->ops[i].flags = IO_URING_OP_SUPPORTED; - } - p->ops_len = i; - - ret = 0; - if (copy_to_user(arg, p, size)) - ret = -EFAULT; -out: - kfree(p); - return ret; -} - -static int io_register_personality(struct io_ring_ctx *ctx) -{ - const struct cred *creds; - u32 id; - int ret; - - creds = get_current_cred(); - - ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)creds, - XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL); - if (ret < 0) { - put_cred(creds); - return ret; - } - return id; -} - -static __cold int io_register_restrictions(struct io_ring_ctx *ctx, - void __user *arg, unsigned int nr_args) -{ - struct io_uring_restriction *res; - size_t size; - int i, ret; - - /* Restrictions allowed only if rings started disabled */ - if (!(ctx->flags & IORING_SETUP_R_DISABLED)) - return -EBADFD; - - /* We allow only a single restrictions registration */ - if (ctx->restrictions.registered) - return -EBUSY; - - if (!arg || nr_args > IORING_MAX_RESTRICTIONS) - return -EINVAL; - - size = array_size(nr_args, sizeof(*res)); - if (size == SIZE_MAX) - return -EOVERFLOW; - - res = memdup_user(arg, size); - if (IS_ERR(res)) - return PTR_ERR(res); - - ret = 0; - - for (i = 0; i < nr_args; i++) { - switch (res[i].opcode) { - case IORING_RESTRICTION_REGISTER_OP: - if (res[i].register_op >= IORING_REGISTER_LAST) { - ret = -EINVAL; - goto out; - } - - __set_bit(res[i].register_op, - ctx->restrictions.register_op); - break; - case IORING_RESTRICTION_SQE_OP: - if (res[i].sqe_op >= IORING_OP_LAST) { - ret = -EINVAL; - goto out; - } - - __set_bit(res[i].sqe_op, ctx->restrictions.sqe_op); - break; - case IORING_RESTRICTION_SQE_FLAGS_ALLOWED: - ctx->restrictions.sqe_flags_allowed = res[i].sqe_flags; - break; - case IORING_RESTRICTION_SQE_FLAGS_REQUIRED: - ctx->restrictions.sqe_flags_required = res[i].sqe_flags; - break; - default: - ret = -EINVAL; - goto out; - } - } - -out: - /* Reset all restrictions if an error happened */ - if (ret != 0) - memset(&ctx->restrictions, 0, sizeof(ctx->restrictions)); - else - ctx->restrictions.registered = true; - - kfree(res); - return ret; -} - -static int io_register_enable_rings(struct io_ring_ctx *ctx) -{ - if (!(ctx->flags & IORING_SETUP_R_DISABLED)) - return -EBADFD; - - if (ctx->flags & IORING_SETUP_SINGLE_ISSUER && !ctx->submitter_task) { - WRITE_ONCE(ctx->submitter_task, get_task_struct(current)); - /* - * Lazy activation attempts would fail if it was polled before - * submitter_task is set. - */ - if (wq_has_sleeper(&ctx->poll_wq)) - io_activate_pollwq(ctx); - } - - if (ctx->restrictions.registered) - ctx->restricted = 1; - - ctx->flags &= ~IORING_SETUP_R_DISABLED; - if (ctx->sq_data && wq_has_sleeper(&ctx->sq_data->wait)) - wake_up(&ctx->sq_data->wait); - return 0; -} - -static __cold int __io_register_iowq_aff(struct io_ring_ctx *ctx, - cpumask_var_t new_mask) -{ - int ret; - - if (!(ctx->flags & IORING_SETUP_SQPOLL)) { - ret = io_wq_cpu_affinity(current->io_uring, new_mask); - } else { - mutex_unlock(&ctx->uring_lock); - ret = io_sqpoll_wq_cpu_affinity(ctx, new_mask); - mutex_lock(&ctx->uring_lock); - } - - return ret; -} - -static __cold int io_register_iowq_aff(struct io_ring_ctx *ctx, - void __user *arg, unsigned len) -{ - cpumask_var_t new_mask; - int ret; - - if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) - return -ENOMEM; - - cpumask_clear(new_mask); - if (len > cpumask_size()) - len = cpumask_size(); - - if (in_compat_syscall()) { - ret = compat_get_bitmap(cpumask_bits(new_mask), - (const compat_ulong_t __user *)arg, - len * 8 /* CHAR_BIT */); - } else { - ret = copy_from_user(new_mask, arg, len); - } - - if (ret) { - free_cpumask_var(new_mask); - return -EFAULT; - } - - ret = __io_register_iowq_aff(ctx, new_mask); - free_cpumask_var(new_mask); - return ret; -} - -static __cold int io_unregister_iowq_aff(struct io_ring_ctx *ctx) -{ - return __io_register_iowq_aff(ctx, NULL); -} - -static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx, - void __user *arg) - __must_hold(&ctx->uring_lock) -{ - struct io_tctx_node *node; - struct io_uring_task *tctx = NULL; - struct io_sq_data *sqd = NULL; - __u32 new_count[2]; - int i, ret; - - if (copy_from_user(new_count, arg, sizeof(new_count))) - return -EFAULT; - for (i = 0; i < ARRAY_SIZE(new_count); i++) - if (new_count[i] > INT_MAX) - return -EINVAL; - - if (ctx->flags & IORING_SETUP_SQPOLL) { - sqd = ctx->sq_data; - if (sqd) { - /* - * Observe the correct sqd->lock -> ctx->uring_lock - * ordering. Fine to drop uring_lock here, we hold - * a ref to the ctx. - */ - refcount_inc(&sqd->refs); - mutex_unlock(&ctx->uring_lock); - mutex_lock(&sqd->lock); - mutex_lock(&ctx->uring_lock); - if (sqd->thread) - tctx = sqd->thread->io_uring; - } - } else { - tctx = current->io_uring; - } - - BUILD_BUG_ON(sizeof(new_count) != sizeof(ctx->iowq_limits)); - - for (i = 0; i < ARRAY_SIZE(new_count); i++) - if (new_count[i]) - ctx->iowq_limits[i] = new_count[i]; - ctx->iowq_limits_set = true; - - if (tctx && tctx->io_wq) { - ret = io_wq_max_workers(tctx->io_wq, new_count); - if (ret) - goto err; - } else { - memset(new_count, 0, sizeof(new_count)); - } - - if (sqd) { - mutex_unlock(&sqd->lock); - io_put_sq_data(sqd); - } - - if (copy_to_user(arg, new_count, sizeof(new_count))) - return -EFAULT; - - /* that's it for SQPOLL, only the SQPOLL task creates requests */ - if (sqd) - return 0; - - /* now propagate the restriction to all registered users */ - list_for_each_entry(node, &ctx->tctx_list, ctx_node) { - struct io_uring_task *tctx = node->task->io_uring; - - if (WARN_ON_ONCE(!tctx->io_wq)) - continue; - - for (i = 0; i < ARRAY_SIZE(new_count); i++) - new_count[i] = ctx->iowq_limits[i]; - /* ignore errors, it always returns zero anyway */ - (void)io_wq_max_workers(tctx->io_wq, new_count); - } - return 0; -err: - if (sqd) { - mutex_unlock(&sqd->lock); - io_put_sq_data(sqd); - } - return ret; -} - -static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, - void __user *arg, unsigned nr_args) - __releases(ctx->uring_lock) - __acquires(ctx->uring_lock) -{ - int ret; - - /* - * We don't quiesce the refs for register anymore and so it can't be - * dying as we're holding a file ref here. - */ - if (WARN_ON_ONCE(percpu_ref_is_dying(&ctx->refs))) - return -ENXIO; - - if (ctx->submitter_task && ctx->submitter_task != current) - return -EEXIST; - - if (ctx->restricted) { - opcode = array_index_nospec(opcode, IORING_REGISTER_LAST); - if (!test_bit(opcode, ctx->restrictions.register_op)) - return -EACCES; - } - - switch (opcode) { - case IORING_REGISTER_BUFFERS: - ret = -EFAULT; - if (!arg) - break; - ret = io_sqe_buffers_register(ctx, arg, nr_args, NULL); - break; - case IORING_UNREGISTER_BUFFERS: - ret = -EINVAL; - if (arg || nr_args) - break; - ret = io_sqe_buffers_unregister(ctx); - break; - case IORING_REGISTER_FILES: - ret = -EFAULT; - if (!arg) - break; - ret = io_sqe_files_register(ctx, arg, nr_args, NULL); - break; - case IORING_UNREGISTER_FILES: - ret = -EINVAL; - if (arg || nr_args) - break; - ret = io_sqe_files_unregister(ctx); - break; - case IORING_REGISTER_FILES_UPDATE: - ret = io_register_files_update(ctx, arg, nr_args); - break; - case IORING_REGISTER_EVENTFD: - ret = -EINVAL; - if (nr_args != 1) - break; - ret = io_eventfd_register(ctx, arg, 0); - break; - case IORING_REGISTER_EVENTFD_ASYNC: - ret = -EINVAL; - if (nr_args != 1) - break; - ret = io_eventfd_register(ctx, arg, 1); - break; - case IORING_UNREGISTER_EVENTFD: - ret = -EINVAL; - if (arg || nr_args) - break; - ret = io_eventfd_unregister(ctx); - break; - case IORING_REGISTER_PROBE: - ret = -EINVAL; - if (!arg || nr_args > 256) - break; - ret = io_probe(ctx, arg, nr_args); - break; - case IORING_REGISTER_PERSONALITY: - ret = -EINVAL; - if (arg || nr_args) - break; - ret = io_register_personality(ctx); - break; - case IORING_UNREGISTER_PERSONALITY: - ret = -EINVAL; - if (arg) - break; - ret = io_unregister_personality(ctx, nr_args); - break; - case IORING_REGISTER_ENABLE_RINGS: - ret = -EINVAL; - if (arg || nr_args) - break; - ret = io_register_enable_rings(ctx); - break; - case IORING_REGISTER_RESTRICTIONS: - ret = io_register_restrictions(ctx, arg, nr_args); - break; - case IORING_REGISTER_FILES2: - ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_FILE); - break; - case IORING_REGISTER_FILES_UPDATE2: - ret = io_register_rsrc_update(ctx, arg, nr_args, - IORING_RSRC_FILE); - break; - case IORING_REGISTER_BUFFERS2: - ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_BUFFER); - break; - case IORING_REGISTER_BUFFERS_UPDATE: - ret = io_register_rsrc_update(ctx, arg, nr_args, - IORING_RSRC_BUFFER); - break; - case IORING_REGISTER_IOWQ_AFF: - ret = -EINVAL; - if (!arg || !nr_args) - break; - ret = io_register_iowq_aff(ctx, arg, nr_args); - break; - case IORING_UNREGISTER_IOWQ_AFF: - ret = -EINVAL; - if (arg || nr_args) - break; - ret = io_unregister_iowq_aff(ctx); - break; - case IORING_REGISTER_IOWQ_MAX_WORKERS: - ret = -EINVAL; - if (!arg || nr_args != 2) - break; - ret = io_register_iowq_max_workers(ctx, arg); - break; - case IORING_REGISTER_RING_FDS: - ret = io_ringfd_register(ctx, arg, nr_args); - break; - case IORING_UNREGISTER_RING_FDS: - ret = io_ringfd_unregister(ctx, arg, nr_args); - break; - case IORING_REGISTER_PBUF_RING: - ret = -EINVAL; - if (!arg || nr_args != 1) - break; - ret = io_register_pbuf_ring(ctx, arg); - break; - case IORING_UNREGISTER_PBUF_RING: - ret = -EINVAL; - if (!arg || nr_args != 1) - break; - ret = io_unregister_pbuf_ring(ctx, arg); - break; - case IORING_REGISTER_SYNC_CANCEL: - ret = -EINVAL; - if (!arg || nr_args != 1) - break; - ret = io_sync_cancel(ctx, arg); - break; - case IORING_REGISTER_FILE_ALLOC_RANGE: - ret = -EINVAL; - if (!arg || nr_args) - break; - ret = io_register_file_alloc_range(ctx, arg); - break; - default: - ret = -EINVAL; - break; - } - - return ret; -} - -SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode, - void __user *, arg, unsigned int, nr_args) -{ - struct io_ring_ctx *ctx; - long ret = -EBADF; - struct file *file; - bool use_registered_ring; - - use_registered_ring = !!(opcode & IORING_REGISTER_USE_REGISTERED_RING); - opcode &= ~IORING_REGISTER_USE_REGISTERED_RING; - - if (opcode >= IORING_REGISTER_LAST) - return -EINVAL; - - if (use_registered_ring) { - /* - * Ring fd has been registered via IORING_REGISTER_RING_FDS, we - * need only dereference our task private array to find it. - */ - struct io_uring_task *tctx = current->io_uring; - - if (unlikely(!tctx || fd >= IO_RINGFD_REG_MAX)) - return -EINVAL; - fd = array_index_nospec(fd, IO_RINGFD_REG_MAX); - file = tctx->registered_rings[fd]; - if (unlikely(!file)) - return -EBADF; - } else { - file = fget(fd); - if (unlikely(!file)) - return -EBADF; - ret = -EOPNOTSUPP; - if (!io_is_uring_fops(file)) - goto out_fput; - } - - ctx = file->private_data; - - mutex_lock(&ctx->uring_lock); - ret = __io_uring_register(ctx, opcode, arg, nr_args); - mutex_unlock(&ctx->uring_lock); - trace_io_uring_register(ctx, opcode, ctx->nr_user_files, ctx->nr_user_bufs, ret); -out_fput: - if (!use_registered_ring) - fput(file); - return ret; -} - static int __init io_uring_init(void) { #define __BUILD_BUG_VERIFY_OFFSET_SIZE(stype, eoffset, esize, ename) do { \ |