diff options
author | Jens Axboe <axboe@kernel.dk> | 2019-10-03 13:59:56 -0600 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2019-10-29 10:22:44 -0600 |
commit | c3a31e605620c279163c14068a60869ea3fda203 (patch) | |
tree | 3d1f39bf1ab88bc2377c2da7682504b55f686c83 /fs/io_uring.c | |
parent | 08a451739a9b5783f67de51e84cb6d9559bb9dc4 (diff) | |
download | lwn-c3a31e605620c279163c14068a60869ea3fda203.tar.gz lwn-c3a31e605620c279163c14068a60869ea3fda203.zip |
io_uring: add support for IORING_REGISTER_FILES_UPDATE
Allows the application to remove/replace/add files to/from a file set.
Passes in a struct:
struct io_uring_files_update {
__u32 offset;
__s32 *fds;
};
that holds an array of fds, size of array passed in through the usual
nr_args part of the io_uring_register() system call. The logic is as
follows:
1) If ->fds[i] is -1, the existing file at i + ->offset is removed from
the set.
2) If ->fds[i] is a valid fd, the existing file at i + ->offset is
replaced with ->fds[i].
For case #2, is the existing file is currently empty (fd == -1), the
new fd is simply added to the array.
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'fs/io_uring.c')
-rw-r--r-- | fs/io_uring.c | 175 |
1 files changed, 175 insertions, 0 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c index b85e5feb774a..77774abb1074 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -3224,6 +3224,178 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, return ret; } +static void io_sqe_file_unregister(struct io_ring_ctx *ctx, int index) +{ +#if defined(CONFIG_UNIX) + struct file *file = ctx->user_files[index]; + struct sock *sock = ctx->ring_sock->sk; + struct sk_buff_head list, *head = &sock->sk_receive_queue; + struct sk_buff *skb; + int i; + + __skb_queue_head_init(&list); + + /* + * Find the skb that holds this file in its SCM_RIGHTS. When found, + * remove this entry and rearrange the file array. + */ + skb = skb_dequeue(head); + while (skb) { + struct scm_fp_list *fp; + + fp = UNIXCB(skb).fp; + for (i = 0; i < fp->count; i++) { + int left; + + if (fp->fp[i] != file) + continue; + + unix_notinflight(fp->user, fp->fp[i]); + left = fp->count - 1 - i; + if (left) { + memmove(&fp->fp[i], &fp->fp[i + 1], + left * sizeof(struct file *)); + } + fp->count--; + if (!fp->count) { + kfree_skb(skb); + skb = NULL; + } else { + __skb_queue_tail(&list, skb); + } + fput(file); + file = NULL; + break; + } + + if (!file) + break; + + __skb_queue_tail(&list, skb); + + skb = skb_dequeue(head); + } + + if (skb_peek(&list)) { + spin_lock_irq(&head->lock); + while ((skb = __skb_dequeue(&list)) != NULL) + __skb_queue_tail(head, skb); + spin_unlock_irq(&head->lock); + } +#else + fput(ctx->user_files[index]); +#endif +} + +static int io_sqe_file_register(struct io_ring_ctx *ctx, struct file *file, + int index) +{ +#if defined(CONFIG_UNIX) + struct sock *sock = ctx->ring_sock->sk; + struct sk_buff_head *head = &sock->sk_receive_queue; + struct sk_buff *skb; + + /* + * See if we can merge this file into an existing skb SCM_RIGHTS + * file set. If there's no room, fall back to allocating a new skb + * and filling it in. + */ + spin_lock_irq(&head->lock); + skb = skb_peek(head); + if (skb) { + struct scm_fp_list *fpl = UNIXCB(skb).fp; + + if (fpl->count < SCM_MAX_FD) { + __skb_unlink(skb, head); + spin_unlock_irq(&head->lock); + fpl->fp[fpl->count] = get_file(file); + unix_inflight(fpl->user, fpl->fp[fpl->count]); + fpl->count++; + spin_lock_irq(&head->lock); + __skb_queue_head(head, skb); + } else { + skb = NULL; + } + } + spin_unlock_irq(&head->lock); + + if (skb) { + fput(file); + return 0; + } + + return __io_sqe_files_scm(ctx, 1, index); +#else + return 0; +#endif +} + +static int io_sqe_files_update(struct io_ring_ctx *ctx, void __user *arg, + unsigned nr_args) +{ + struct io_uring_files_update up; + __s32 __user *fds; + int fd, i, err; + __u32 done; + + if (!ctx->user_files) + return -ENXIO; + if (!nr_args) + return -EINVAL; + if (copy_from_user(&up, arg, sizeof(up))) + return -EFAULT; + if (check_add_overflow(up.offset, nr_args, &done)) + return -EOVERFLOW; + if (done > ctx->nr_user_files) + return -EINVAL; + + done = 0; + fds = (__s32 __user *) up.fds; + while (nr_args) { + err = 0; + if (copy_from_user(&fd, &fds[done], sizeof(fd))) { + err = -EFAULT; + break; + } + i = array_index_nospec(up.offset, ctx->nr_user_files); + if (ctx->user_files[i]) { + io_sqe_file_unregister(ctx, i); + ctx->user_files[i] = NULL; + } + if (fd != -1) { + struct file *file; + + file = fget(fd); + if (!file) { + err = -EBADF; + break; + } + /* + * Don't allow io_uring instances to be registered. If + * UNIX isn't enabled, then this causes a reference + * cycle and this instance can never get freed. If UNIX + * is enabled we'll handle it just fine, but there's + * still no point in allowing a ring fd as it doesn't + * support regular read/write anyway. + */ + if (file->f_op == &io_uring_fops) { + fput(file); + err = -EBADF; + break; + } + ctx->user_files[i] = file; + err = io_sqe_file_register(ctx, file, i); + if (err) + break; + } + nr_args--; + done++; + up.offset++; + } + + return done ? done : err; +} + static int io_sq_offload_start(struct io_ring_ctx *ctx, struct io_uring_params *p) { @@ -4031,6 +4203,9 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, break; ret = io_sqe_files_unregister(ctx); break; + case IORING_REGISTER_FILES_UPDATE: + ret = io_sqe_files_update(ctx, arg, nr_args); + break; case IORING_REGISTER_EVENTFD: ret = -EINVAL; if (nr_args != 1) |