diff options
-rw-r--r-- | include/linux/io_uring_types.h | 2 | ||||
-rw-r--r-- | io_uring/io_uring.c | 60 | ||||
-rw-r--r-- | io_uring/io_uring.h | 7 |
3 files changed, 64 insertions, 5 deletions
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 0d94ee191c15..7b5e90520278 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -206,6 +206,7 @@ struct io_ring_ctx { unsigned int syscall_iopoll: 1; /* all CQEs should be posted only by the submitter task */ unsigned int task_complete: 1; + unsigned int poll_activated: 1; enum task_work_notify_mode notify_method; struct io_rings *rings; @@ -357,6 +358,7 @@ struct io_ring_ctx { u32 iowq_limits[2]; bool iowq_limits_set; + struct callback_head poll_wq_task_work; struct list_head defer_list; unsigned sq_thread_idle; /* protected by ->completion_lock */ diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 2ee2bcaeadfd..de71730d9051 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -573,6 +573,8 @@ static void io_eventfd_flush_signal(struct io_ring_ctx *ctx) void __io_commit_cqring_flush(struct io_ring_ctx *ctx) { + if (ctx->poll_activated) + io_poll_wq_wake(ctx); if (ctx->off_timeout_used) io_flush_timeouts(ctx); if (ctx->drain_active) { @@ -2782,11 +2784,53 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) kfree(ctx); } +static __cold void io_activate_pollwq_cb(struct callback_head *cb) +{ + struct io_ring_ctx *ctx = container_of(cb, struct io_ring_ctx, + poll_wq_task_work); + + mutex_lock(&ctx->uring_lock); + ctx->poll_activated = true; + mutex_unlock(&ctx->uring_lock); + + /* + * Wake ups for some events between start of polling and activation + * might've been lost due to loose synchronisation. + */ + wake_up_all(&ctx->poll_wq); + percpu_ref_put(&ctx->refs); +} + +static __cold void io_activate_pollwq(struct io_ring_ctx *ctx) +{ + spin_lock(&ctx->completion_lock); + /* already activated or in progress */ + if (ctx->poll_activated || ctx->poll_wq_task_work.func) + goto out; + if (WARN_ON_ONCE(!ctx->task_complete)) + goto out; + if (!ctx->submitter_task) + goto out; + /* + * with ->submitter_task only the submitter task completes requests, we + * only need to sync with it, which is done by injecting a tw + */ + init_task_work(&ctx->poll_wq_task_work, io_activate_pollwq_cb); + percpu_ref_get(&ctx->refs); + if (task_work_add(ctx->submitter_task, &ctx->poll_wq_task_work, TWA_SIGNAL)) + percpu_ref_put(&ctx->refs); +out: + spin_unlock(&ctx->completion_lock); +} + static __poll_t io_uring_poll(struct file *file, poll_table *wait) { struct io_ring_ctx *ctx = file->private_data; __poll_t mask = 0; + if (unlikely(!ctx->poll_activated)) + io_activate_pollwq(ctx); + poll_wait(file, &ctx->poll_wq, wait); /* * synchronizes with barrier from wq_has_sleeper call in @@ -3594,6 +3638,13 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p, ctx->task_complete = true; /* + * lazy poll_wq activation relies on ->task_complete for synchronisation + * purposes, see io_activate_pollwq() + */ + if (!ctx->task_complete) + ctx->poll_activated = true; + + /* * When SETUP_IOPOLL and SETUP_SQPOLL are both enabled, user * space applications don't need to do io completion events * polling again, they can rely on io_sq_thread to do polling @@ -3886,8 +3937,15 @@ static int io_register_enable_rings(struct io_ring_ctx *ctx) if (!(ctx->flags & IORING_SETUP_R_DISABLED)) return -EBADFD; - if (ctx->flags & IORING_SETUP_SINGLE_ISSUER && !ctx->submitter_task) + if (ctx->flags & IORING_SETUP_SINGLE_ISSUER && !ctx->submitter_task) { WRITE_ONCE(ctx->submitter_task, get_task_struct(current)); + /* + * Lazy activation attempts would fail if it was polled before + * submitter_task is set. + */ + if (wq_has_sleeper(&ctx->poll_wq)) + io_activate_pollwq(ctx); + } if (ctx->restrictions.registered) ctx->restricted = 1; diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index c75bbb94703c..5113e0ddb01d 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -222,7 +222,7 @@ static inline void io_commit_cqring(struct io_ring_ctx *ctx) static inline void io_poll_wq_wake(struct io_ring_ctx *ctx) { - if (waitqueue_active(&ctx->poll_wq)) + if (wq_has_sleeper(&ctx->poll_wq)) __wake_up(&ctx->poll_wq, TASK_NORMAL, 0, poll_to_key(EPOLL_URING_WAKE | EPOLLIN)); } @@ -230,8 +230,6 @@ static inline void io_poll_wq_wake(struct io_ring_ctx *ctx) /* requires smb_mb() prior, see wq_has_sleeper() */ static inline void __io_cqring_wake(struct io_ring_ctx *ctx) { - io_poll_wq_wake(ctx); - /* * Trigger waitqueue handler on all waiters on our waitqueue. This * won't necessarily wake up all the tasks, io_should_wake() will make @@ -316,7 +314,8 @@ static inline void io_req_complete_defer(struct io_kiocb *req) static inline void io_commit_cqring_flush(struct io_ring_ctx *ctx) { - if (unlikely(ctx->off_timeout_used || ctx->drain_active || ctx->has_evfd)) + if (unlikely(ctx->off_timeout_used || ctx->drain_active || + ctx->has_evfd || ctx->poll_activated)) __io_commit_cqring_flush(ctx); } |