diff options
author | Jens Axboe <axboe@kernel.dk> | 2020-02-04 19:54:55 -0700 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2020-02-04 20:04:18 -0700 |
commit | 2faf852d1be8a4960d328492298da6448cca0279 (patch) | |
tree | ebbae989219154e27a95c4558f201202a5fe0681 /fs/io_uring.c | |
parent | df069d80c8e38c19531c392322e9a16617475c44 (diff) | |
download | lwn-2faf852d1be8a4960d328492298da6448cca0279.tar.gz lwn-2faf852d1be8a4960d328492298da6448cca0279.zip |
io_uring: cleanup fixed file data table references
syzbot reports a use-after-free in io_ring_file_ref_switch() when it
tries to switch back to percpu mode. When we put the final reference to
the table by calling percpu_ref_kill_and_confirm(), we don't want the
zero reference to queue async work for flushing the potentially queued
up items. We currently do a few flush_work(), but they merely paper
around the issue, since the work item may not have been queued yet
depending on the when the percpu-ref callback gets run.
Coming into the file unregister, we know we have the ring quiesced.
io_ring_file_ref_switch() can check for whether or not the ref is dying
or not, and not queue anything async at that point. Once the ref has
been confirmed killed, flush any potential items manually.
Reported-by: syzbot+7caeaea49c2c8a591e3d@syzkaller.appspotmail.com
Fixes: 05f3fb3c5397 ("io_uring: avoid ring quiesce for fixed file set unregister and update")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'fs/io_uring.c')
-rw-r--r-- | fs/io_uring.c | 32 |
1 files changed, 19 insertions, 13 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c index 87f8655656b5..deff11e84094 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -753,6 +753,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, struct io_uring_files_update *ip, unsigned nr_args); static int io_grab_files(struct io_kiocb *req); +static void io_ring_file_ref_flush(struct fixed_file_data *data); static struct kmem_cache *req_cachep; @@ -5261,15 +5262,10 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx) if (!data) return -ENXIO; - /* protect against inflight atomic switch, which drops the ref */ - percpu_ref_get(&data->refs); - /* wait for existing switches */ - flush_work(&data->ref_work); percpu_ref_kill_and_confirm(&data->refs, io_file_ref_kill); - wait_for_completion(&data->done); - percpu_ref_put(&data->refs); - /* flush potential new switch */ flush_work(&data->ref_work); + wait_for_completion(&data->done); + io_ring_file_ref_flush(data); percpu_ref_exit(&data->refs); __io_sqe_files_unregister(ctx); @@ -5507,14 +5503,11 @@ struct io_file_put { struct completion *done; }; -static void io_ring_file_ref_switch(struct work_struct *work) +static void io_ring_file_ref_flush(struct fixed_file_data *data) { struct io_file_put *pfile, *tmp; - struct fixed_file_data *data; struct llist_node *node; - data = container_of(work, struct fixed_file_data, ref_work); - while ((node = llist_del_all(&data->put_llist)) != NULL) { llist_for_each_entry_safe(pfile, tmp, node, llist) { io_ring_file_put(data->ctx, pfile->file); @@ -5524,7 +5517,14 @@ static void io_ring_file_ref_switch(struct work_struct *work) kfree(pfile); } } +} + +static void io_ring_file_ref_switch(struct work_struct *work) +{ + struct fixed_file_data *data; + data = container_of(work, struct fixed_file_data, ref_work); + io_ring_file_ref_flush(data); percpu_ref_get(&data->refs); percpu_ref_switch_to_percpu(&data->refs); } @@ -5535,8 +5535,14 @@ static void io_file_data_ref_zero(struct percpu_ref *ref) data = container_of(ref, struct fixed_file_data, refs); - /* we can't safely switch from inside this context, punt to wq */ - queue_work(system_wq, &data->ref_work); + /* + * We can't safely switch from inside this context, punt to wq. If + * the table ref is going away, the table is being unregistered. + * Don't queue up the async work for that case, the caller will + * handle it. + */ + if (!percpu_ref_is_dying(&data->refs)) + queue_work(system_wq, &data->ref_work); } static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, |