summaryrefslogtreecommitdiff
path: root/io_uring
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2024-01-04 12:21:08 -0700
committerJens Axboe <axboe@kernel.dk>2024-01-04 12:21:08 -0700
commit6ff1407e24e6fdfa4a16ba9ba551e3d253a26391 (patch)
tree5a905e6c11599e8b25b5fd25d86f359fc7907f32 /io_uring
parentd293b1a89694fc4918d9a4330a71ba2458f9d581 (diff)
downloadlwn-6ff1407e24e6fdfa4a16ba9ba551e3d253a26391.tar.gz
lwn-6ff1407e24e6fdfa4a16ba9ba551e3d253a26391.zip
io_uring: ensure local task_work is run on wait timeout
A previous commit added an earlier break condition here, which is fine if we're using non-local task_work as it'll be run on return to userspace. However, if DEFER_TASKRUN is used, then we could be leaving local task_work that is ready to process in the ctx list until next time that we enter the kernel to wait for events. Move the break condition to _after_ we have run task_work. Cc: stable@vger.kernel.org Fixes: 846072f16eed ("io_uring: mimimise io_cqring_wait_schedule") Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'io_uring')
-rw-r--r--io_uring/io_uring.c14
1 files changed, 12 insertions, 2 deletions
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index a9a519fa9926..4afb911fc042 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -2615,8 +2615,6 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
__set_current_state(TASK_RUNNING);
atomic_set(&ctx->cq_wait_nr, 0);
- if (ret < 0)
- break;
/*
* Run task_work after scheduling and before io_should_wake().
* If we got woken because of task_work being processed, run it
@@ -2626,6 +2624,18 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
if (!llist_empty(&ctx->work_llist))
io_run_local_work(ctx);
+ /*
+ * Non-local task_work will be run on exit to userspace, but
+ * if we're using DEFER_TASKRUN, then we could have waited
+ * with a timeout for a number of requests. If the timeout
+ * hits, we could have some requests ready to process. Ensure
+ * this break is _after_ we have run task_work, to avoid
+ * deferring running potentially pending requests until the
+ * next time we wait for events.
+ */
+ if (ret < 0)
+ break;
+
check_cq = READ_ONCE(ctx->check_cq);
if (unlikely(check_cq)) {
/* let the caller flush overflows, retry */