From 944d1444d53f5a213457e5096db370cfd06923d4 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 13 Nov 2020 16:48:44 -0700 Subject: io_uring: handle -EOPNOTSUPP on path resolution Any attempt to do path resolution on /proc/self from an async worker will yield -EOPNOTSUPP. We can safely do that resolution from the task itself, and without blocking, so retry it from there. Ideally io_uring would know this upfront and not have to go through the worker thread to find out, but that doesn't currently seem feasible. Signed-off-by: Jens Axboe --- fs/io_uring.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'fs/io_uring.c') diff --git a/fs/io_uring.c b/fs/io_uring.c index c77584de68d7..f05978a74ce1 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -478,6 +478,7 @@ struct io_sr_msg { struct io_open { struct file *file; int dfd; + bool ignore_nonblock; struct filename *filename; struct open_how how; unsigned long nofile; @@ -3796,6 +3797,7 @@ static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe return ret; } req->open.nofile = rlimit(RLIMIT_NOFILE); + req->open.ignore_nonblock = false; req->flags |= REQ_F_NEED_CLEANUP; return 0; } @@ -3839,7 +3841,7 @@ static int io_openat2(struct io_kiocb *req, bool force_nonblock) struct file *file; int ret; - if (force_nonblock) + if (force_nonblock && !req->open.ignore_nonblock) return -EAGAIN; ret = build_open_flags(&req->open.how, &op); @@ -3854,6 +3856,21 @@ static int io_openat2(struct io_kiocb *req, bool force_nonblock) if (IS_ERR(file)) { put_unused_fd(ret); ret = PTR_ERR(file); + /* + * A work-around to ensure that /proc/self works that way + * that it should - if we get -EOPNOTSUPP back, then assume + * that proc_self_get_link() failed us because we're in async + * context. We should be safe to retry this from the task + * itself with force_nonblock == false set, as it should not + * block on lookup. Would be nice to know this upfront and + * avoid the async dance, but doesn't seem feasible. + */ + if (ret == -EOPNOTSUPP && io_wq_current_is_worker()) { + req->open.ignore_nonblock = true; + refcount_inc(&req->refs); + io_req_task_queue(req); + return 0; + } } else { fsnotify_open(file); fd_install(ret, file); -- cgit v1.2.3 From c993df5a688975bf9ce899706ca13d2bc8d6be25 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 17 Nov 2020 07:59:16 -0700 Subject: io_uring: don't double complete failed reissue request Zorro reports that an xfstest test case is failing, and it turns out that for the reissue path we can potentially issue a double completion on the request for the failure path. There's an issue around the retry as well, but for now, at least just make sure that we handle the error path correctly. Cc: stable@vger.kernel.org Fixes: b63534c41e20 ("io_uring: re-issue block requests that failed because of resources") Reported-by: Zorro Lang Signed-off-by: Jens Axboe --- fs/io_uring.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/io_uring.c') diff --git a/fs/io_uring.c b/fs/io_uring.c index f05978a74ce1..b205c1df3f74 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2578,7 +2578,6 @@ static bool io_resubmit_prep(struct io_kiocb *req, int error) } end_req: req_set_fail_links(req); - io_req_complete(req, ret); return false; } #endif -- cgit v1.2.3 From 1e5d770bb8a23dd01e28e92f4fb0b1093c8bdbe6 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 18 Nov 2020 14:56:25 +0000 Subject: io_uring: get an active ref_node from files_data An active ref_node always can be found in ctx->files_data, it's much safer to get it this way instead of poking into files_data->ref_list. Signed-off-by: Pavel Begunkov Cc: stable@vger.kernel.org # v5.7+ Signed-off-by: Jens Axboe --- fs/io_uring.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs/io_uring.c') diff --git a/fs/io_uring.c b/fs/io_uring.c index b205c1df3f74..5cb194ca4fce 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6974,9 +6974,7 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx) return -ENXIO; spin_lock(&data->lock); - if (!list_empty(&data->ref_list)) - ref_node = list_first_entry(&data->ref_list, - struct fixed_file_ref_node, node); + ref_node = data->node; spin_unlock(&data->lock); if (ref_node) percpu_ref_kill(&ref_node->refs); -- cgit v1.2.3 From e297822b20e7fe683e107aea46e6402adcf99c70 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 18 Nov 2020 14:56:26 +0000 Subject: io_uring: order refnode recycling Don't recycle a refnode until we're done with all requests of nodes ejected before. Signed-off-by: Pavel Begunkov Cc: stable@vger.kernel.org # v5.7+ Signed-off-by: Jens Axboe --- fs/io_uring.c | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) (limited to 'fs/io_uring.c') diff --git a/fs/io_uring.c b/fs/io_uring.c index 5cb194ca4fce..7d4b755ab451 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -205,6 +205,7 @@ struct fixed_file_ref_node { struct list_head file_list; struct fixed_file_data *file_data; struct llist_node llist; + bool done; }; struct fixed_file_data { @@ -7323,10 +7324,6 @@ static void __io_file_put_work(struct fixed_file_ref_node *ref_node) kfree(pfile); } - spin_lock(&file_data->lock); - list_del(&ref_node->node); - spin_unlock(&file_data->lock); - percpu_ref_exit(&ref_node->refs); kfree(ref_node); percpu_ref_put(&file_data->refs); @@ -7353,17 +7350,32 @@ static void io_file_put_work(struct work_struct *work) static void io_file_data_ref_zero(struct percpu_ref *ref) { struct fixed_file_ref_node *ref_node; + struct fixed_file_data *data; struct io_ring_ctx *ctx; - bool first_add; + bool first_add = false; int delay = HZ; ref_node = container_of(ref, struct fixed_file_ref_node, refs); - ctx = ref_node->file_data->ctx; + data = ref_node->file_data; + ctx = data->ctx; + + spin_lock(&data->lock); + ref_node->done = true; + + while (!list_empty(&data->ref_list)) { + ref_node = list_first_entry(&data->ref_list, + struct fixed_file_ref_node, node); + /* recycle ref nodes in order */ + if (!ref_node->done) + break; + list_del(&ref_node->node); + first_add |= llist_add(&ref_node->llist, &ctx->file_put_llist); + } + spin_unlock(&data->lock); - if (percpu_ref_is_dying(&ctx->file_data->refs)) + if (percpu_ref_is_dying(&data->refs)) delay = 0; - first_add = llist_add(&ref_node->llist, &ctx->file_put_llist); if (!delay) mod_delayed_work(system_wq, &ctx->file_put_work, 0); else if (first_add) @@ -7387,6 +7399,7 @@ static struct fixed_file_ref_node *alloc_fixed_file_ref_node( INIT_LIST_HEAD(&ref_node->node); INIT_LIST_HEAD(&ref_node->file_list); ref_node->file_data = ctx->file_data; + ref_node->done = false; return ref_node; } @@ -7482,7 +7495,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, file_data->node = ref_node; spin_lock(&file_data->lock); - list_add(&ref_node->node, &file_data->ref_list); + list_add_tail(&ref_node->node, &file_data->ref_list); spin_unlock(&file_data->lock); percpu_ref_get(&file_data->refs); return ret; @@ -7641,7 +7654,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, if (needs_switch) { percpu_ref_kill(&data->node->refs); spin_lock(&data->lock); - list_add(&ref_node->node, &data->ref_list); + list_add_tail(&ref_node->node, &data->ref_list); data->node = ref_node; spin_unlock(&data->lock); percpu_ref_get(&ctx->file_data->refs); -- cgit v1.2.3