diff options
author | Jens Axboe <axboe@kernel.dk> | 2020-07-24 12:53:31 -0600 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2020-07-24 12:53:31 -0600 |
commit | 760618f7a8e3b63aa06266efb301719c374e29d4 (patch) | |
tree | 5a446bbbdfe69885572452a52c795521c405418d /fs/io_uring.c | |
parent | 4349f30ecb8068d146a1e57bb12f46e745323b4c (diff) | |
parent | 4ae6dbd683860b9edc254ea8acf5e04b5ae242e5 (diff) | |
download | lwn-760618f7a8e3b63aa06266efb301719c374e29d4.tar.gz lwn-760618f7a8e3b63aa06266efb301719c374e29d4.zip |
Merge branch 'io_uring-5.8' into for-5.9/io_uring
Merge in io_uring-5.8 fixes, as changes/cleanups to how we do locked
mem accounting require a fixup, and only one of the spots are noticed
by git as the other merges cleanly. The flags fix from io_uring-5.8
also causes a merge conflict, the leak fix for recvmsg, the double poll
fix, and the link failure locking fix.
* io_uring-5.8:
io_uring: fix lockup in io_fail_links()
io_uring: fix ->work corruption with poll_add
io_uring: missed req_init_async() for IOSQE_ASYNC
io_uring: always allow drain/link/hardlink/async sqe flags
io_uring: ensure double poll additions work with both request types
io_uring: fix recvmsg memory leak with buffer selection
io_uring: fix not initialised work->flags
io_uring: fix missing msg_name assignment
io_uring: account user memory freed when exit has been queued
io_uring: fix memleak in io_sqe_files_register()
io_uring: fix memleak in __io_sqe_files_update()
io_uring: export cq overflow status to userspace
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'fs/io_uring.c')
-rw-r--r-- | fs/io_uring.c | 106 |
1 files changed, 74 insertions, 32 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c index 3ce02a1613cc..ff3851d40df4 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -593,6 +593,7 @@ enum { struct async_poll { struct io_poll_iocb poll; + struct io_poll_iocb *double_poll; struct io_wq_work work; }; @@ -1146,6 +1147,8 @@ static void io_prep_async_work(struct io_kiocb *req) { const struct io_op_def *def = &io_op_defs[req->opcode]; + io_req_init_async(req); + if (req->flags & REQ_F_ISREG) { if (def->hash_reg_file) io_wq_hash_work(&req->work, file_inode(req->file)); @@ -1337,6 +1340,7 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force) if (cqe) { clear_bit(0, &ctx->sq_check_overflow); clear_bit(0, &ctx->cq_check_overflow); + ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW; } spin_unlock_irqrestore(&ctx->completion_lock, flags); io_cqring_ev_posted(ctx); @@ -1374,6 +1378,7 @@ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags) if (list_empty(&ctx->cq_overflow_list)) { set_bit(0, &ctx->sq_check_overflow); set_bit(0, &ctx->cq_check_overflow); + ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW; } req->flags |= REQ_F_OVERFLOW; refcount_inc(&req->refs); @@ -3912,6 +3917,7 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (req->flags & REQ_F_NEED_CLEANUP) return 0; + io->msg.msg.msg_name = &io->msg.addr; io->msg.iov = io->msg.fast_iov; ret = sendmsg_copy_msghdr(&io->msg.msg, sr->msg, sr->msg_flags, &io->msg.iov); @@ -4093,6 +4099,7 @@ static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req, static int io_recvmsg_copy_hdr(struct io_kiocb *req, struct io_async_ctx *io) { + io->msg.msg.msg_name = &io->msg.addr; io->msg.iov = io->msg.fast_iov; #ifdef CONFIG_COMPAT @@ -4202,10 +4209,16 @@ static int io_recvmsg(struct io_kiocb *req, bool force_nonblock, ret = __sys_recvmsg_sock(sock, &kmsg->msg, req->sr_msg.msg, kmsg->uaddr, flags); - if (force_nonblock && ret == -EAGAIN) - return io_setup_async_msg(req, kmsg); + if (force_nonblock && ret == -EAGAIN) { + ret = io_setup_async_msg(req, kmsg); + if (ret != -EAGAIN) + kfree(kbuf); + return ret; + } if (ret == -ERESTARTSYS) ret = -EINTR; + if (kbuf) + kfree(kbuf); } if (kmsg && kmsg->iov != kmsg->fast_iov) @@ -4492,9 +4505,9 @@ static bool io_poll_rewait(struct io_kiocb *req, struct io_poll_iocb *poll) return false; } -static void io_poll_remove_double(struct io_kiocb *req) +static void io_poll_remove_double(struct io_kiocb *req, void *data) { - struct io_poll_iocb *poll = (struct io_poll_iocb *) req->io; + struct io_poll_iocb *poll = data; lockdep_assert_held(&req->ctx->completion_lock); @@ -4514,7 +4527,7 @@ static void io_poll_complete(struct io_kiocb *req, __poll_t mask, int error) { struct io_ring_ctx *ctx = req->ctx; - io_poll_remove_double(req); + io_poll_remove_double(req, req->io); req->poll.done = true; io_cqring_fill_event(req, error ? error : mangle_poll(mask)); io_commit_cqring(ctx); @@ -4552,21 +4565,21 @@ static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode, int sync, void *key) { struct io_kiocb *req = wait->private; - struct io_poll_iocb *poll = (struct io_poll_iocb *) req->io; + struct io_poll_iocb *poll = req->apoll->double_poll; __poll_t mask = key_to_poll(key); /* for instances that support it check for an event match first: */ if (mask && !(mask & poll->events)) return 0; - if (req->poll.head) { + if (poll && poll->head) { bool done; - spin_lock(&req->poll.head->lock); - done = list_empty(&req->poll.wait.entry); + spin_lock(&poll->head->lock); + done = list_empty(&poll->wait.entry); if (!done) - list_del_init(&req->poll.wait.entry); - spin_unlock(&req->poll.head->lock); + list_del_init(&poll->wait.entry); + spin_unlock(&poll->head->lock); if (!done) __io_async_wake(req, poll, mask, io_poll_task_func); } @@ -4586,7 +4599,8 @@ static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events, } static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, - struct wait_queue_head *head) + struct wait_queue_head *head, + struct io_poll_iocb **poll_ptr) { struct io_kiocb *req = pt->req; @@ -4597,7 +4611,7 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, */ if (unlikely(poll->head)) { /* already have a 2nd entry, fail a third attempt */ - if (req->io) { + if (*poll_ptr) { pt->error = -EINVAL; return; } @@ -4609,7 +4623,7 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, io_init_poll_iocb(poll, req->poll.events, io_poll_double_wake); refcount_inc(&req->refs); poll->wait.private = req; - req->io = (void *) poll; + *poll_ptr = poll; } pt->error = 0; @@ -4625,8 +4639,9 @@ static void io_async_queue_proc(struct file *file, struct wait_queue_head *head, struct poll_table_struct *p) { struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); + struct async_poll *apoll = pt->req->apoll; - __io_queue_proc(&pt->req->apoll->poll, pt, head); + __io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll); } static void io_async_task_func(struct callback_head *cb) @@ -4646,6 +4661,7 @@ static void io_async_task_func(struct callback_head *cb) if (hash_hashed(&req->hash_node)) hash_del(&req->hash_node); + io_poll_remove_double(req, apoll->double_poll); spin_unlock_irq(&ctx->completion_lock); /* restore ->work in case we need to retry again */ @@ -4657,6 +4673,7 @@ static void io_async_task_func(struct callback_head *cb) else __io_req_task_cancel(req, -ECANCELED); + kfree(apoll->double_poll); kfree(apoll); } @@ -4728,7 +4745,6 @@ static bool io_arm_poll_handler(struct io_kiocb *req) struct async_poll *apoll; struct io_poll_table ipt; __poll_t mask, ret; - bool had_io; if (!req->file || !file_can_poll(req->file)) return false; @@ -4740,11 +4756,11 @@ static bool io_arm_poll_handler(struct io_kiocb *req) apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC); if (unlikely(!apoll)) return false; + apoll->double_poll = NULL; req->flags |= REQ_F_POLLED; if (req->flags & REQ_F_WORK_INITIALIZED) memcpy(&apoll->work, &req->work, sizeof(req->work)); - had_io = req->io != NULL; io_get_req_task(req); req->apoll = apoll; @@ -4762,13 +4778,11 @@ static bool io_arm_poll_handler(struct io_kiocb *req) ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask, io_async_wake); if (ret) { - ipt.error = 0; - /* only remove double add if we did it here */ - if (!had_io) - io_poll_remove_double(req); + io_poll_remove_double(req, apoll->double_poll); spin_unlock_irq(&ctx->completion_lock); if (req->flags & REQ_F_WORK_INITIALIZED) memcpy(&req->work, &apoll->work, sizeof(req->work)); + kfree(apoll->double_poll); kfree(apoll); return false; } @@ -4799,11 +4813,13 @@ static bool io_poll_remove_one(struct io_kiocb *req) bool do_complete; if (req->opcode == IORING_OP_POLL_ADD) { - io_poll_remove_double(req); + io_poll_remove_double(req, req->io); do_complete = __io_poll_remove_one(req, &req->poll); } else { struct async_poll *apoll = req->apoll; + io_poll_remove_double(req, apoll->double_poll); + /* non-poll requests have submit ref still */ do_complete = __io_poll_remove_one(req, &apoll->poll); if (do_complete) { @@ -4816,6 +4832,7 @@ static bool io_poll_remove_one(struct io_kiocb *req) if (req->flags & REQ_F_WORK_INITIALIZED) memcpy(&req->work, &apoll->work, sizeof(req->work)); + kfree(apoll->double_poll); kfree(apoll); } } @@ -4915,7 +4932,7 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head, { struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); - __io_queue_proc(&pt->req->poll, pt, head); + __io_queue_proc(&pt->req->poll, pt, head, (struct io_poll_iocb **) &pt->req->io); } static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) @@ -4948,6 +4965,10 @@ static int io_poll_add(struct io_kiocb *req) struct io_poll_table ipt; __poll_t mask; + /* ->work is in union with hash_node and others */ + io_req_work_drop_env(req); + req->flags &= ~REQ_F_WORK_INITIALIZED; + INIT_HLIST_NODE(&req->hash_node); INIT_LIST_HEAD(&req->list); ipt.pt._qproc = io_poll_queue_proc; @@ -5027,7 +5048,9 @@ static int io_timeout_remove_prep(struct io_kiocb *req, { if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; - if (sqe->flags || sqe->ioprio || sqe->buf_index || sqe->len) + if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) + return -EINVAL; + if (sqe->ioprio || sqe->buf_index || sqe->len) return -EINVAL; req->timeout.addr = READ_ONCE(sqe->addr); @@ -5203,8 +5226,9 @@ static int io_async_cancel_prep(struct io_kiocb *req, { if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; - if (sqe->flags || sqe->ioprio || sqe->off || sqe->len || - sqe->cancel_flags) + if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) + return -EINVAL; + if (sqe->ioprio || sqe->off || sqe->len || sqe->cancel_flags) return -EINVAL; req->cancel.addr = READ_ONCE(sqe->addr); @@ -5222,7 +5246,9 @@ static int io_async_cancel(struct io_kiocb *req) static int io_files_update_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { - if (sqe->flags || sqe->ioprio || sqe->rw_flags) + if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) + return -EINVAL; + if (sqe->ioprio || sqe->rw_flags) return -EINVAL; req->files_update.offset = READ_ONCE(sqe->off); @@ -5996,6 +6022,7 @@ fail_req: * Never try inline submit of IOSQE_ASYNC is set, go straight * to async execution. */ + io_req_init_async(req); req->work.flags |= IO_WQ_WORK_CONCURRENT; io_queue_async_work(req); } else { @@ -6372,9 +6399,9 @@ static int io_sq_thread(void *data) } /* Tell userspace we may need a wakeup call */ + spin_lock_irq(&ctx->completion_lock); ctx->rings->sq_flags |= IORING_SQ_NEED_WAKEUP; - /* make sure to read SQ tail after writing flags */ - smp_mb(); + spin_unlock_irq(&ctx->completion_lock); to_submit = io_sqring_entries(ctx); if (!to_submit || ret == -EBUSY) { @@ -6391,13 +6418,17 @@ static int io_sq_thread(void *data) schedule(); finish_wait(&ctx->sqo_wait, &wait); + spin_lock_irq(&ctx->completion_lock); ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP; + spin_unlock_irq(&ctx->completion_lock); ret = 0; continue; } finish_wait(&ctx->sqo_wait, &wait); + spin_lock_irq(&ctx->completion_lock); ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP; + spin_unlock_irq(&ctx->completion_lock); } mutex_lock(&ctx->uring_lock); @@ -6982,6 +7013,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, for (i = 0; i < nr_tables; i++) kfree(ctx->file_data->table[i].files); + percpu_ref_exit(&ctx->file_data->refs); kfree(ctx->file_data->table); kfree(ctx->file_data); ctx->file_data = NULL; @@ -7134,8 +7166,10 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, } table->files[index] = file; err = io_sqe_file_register(ctx, file, i); - if (err) + if (err) { + fput(file); break; + } } nr_args--; done++; @@ -7665,8 +7699,6 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx) io_mem_free(ctx->sq_sqes); percpu_ref_exit(&ctx->refs); - io_unaccount_mem(ctx, ring_pages(ctx->sq_entries, ctx->cq_entries), - ACCT_LOCKED); free_uid(ctx->user); put_cred(ctx->creds); kfree(ctx->cancel_hash); @@ -7748,6 +7780,15 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) io_cqring_overflow_flush(ctx, true); io_iopoll_try_reap_events(ctx); idr_for_each(&ctx->personality_idr, io_remove_personalities, ctx); + + /* + * Do this upfront, so we won't have a grace period where the ring + * is closed but resources aren't reaped yet. This can cause + * spurious failure in setting up a new ring. + */ + io_unaccount_mem(ctx, ring_pages(ctx->sq_entries, ctx->cq_entries), + ACCT_LOCKED); + INIT_WORK(&ctx->exit_work, io_ring_exit_work); queue_work(system_wq, &ctx->exit_work); } @@ -7807,6 +7848,7 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, if (list_empty(&ctx->cq_overflow_list)) { clear_bit(0, &ctx->sq_check_overflow); clear_bit(0, &ctx->cq_check_overflow); + ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW; } spin_unlock_irq(&ctx->completion_lock); |