summaryrefslogtreecommitdiff
path: root/fs/io_uring.c
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2020-01-16 15:36:52 -0700
committerJens Axboe <axboe@kernel.dk>2020-01-20 17:04:06 -0700
commit66f4af93da5761d2fa05c0dc673a47003cdb9cfe (patch)
tree244089b13c38acd506861d17911e184f00ca2c3e /fs/io_uring.c
parent10fef4bebf979bb705feed087611293d5864adfe (diff)
downloadlwn-66f4af93da5761d2fa05c0dc673a47003cdb9cfe.tar.gz
lwn-66f4af93da5761d2fa05c0dc673a47003cdb9cfe.zip
io_uring: add support for probing opcodes
The application currently has no way of knowing if a given opcode is supported or not without having to try and issue one and see if we get -EINVAL or not. And even this approach is fraught with peril, as maybe we're getting -EINVAL due to some fields being missing, or maybe it's just not that easy to issue that particular command without doing some other leg work in terms of setup first. This adds IORING_REGISTER_PROBE, which fills in a structure with info on what it supported or not. This will work even with sparse opcode fields, which may happen in the future or even today if someone backports specific features to older kernels. Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'fs/io_uring.c')
-rw-r--r--fs/io_uring.c53
1 files changed, 51 insertions, 2 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 8a645a37b4c7..7715a729271a 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -561,6 +561,8 @@ struct io_op_def {
unsigned hash_reg_file : 1;
/* unbound wq insertion if file is a non-regular file */
unsigned unbound_nonreg_file : 1;
+ /* opcode is not supported by this kernel */
+ unsigned not_supported : 1;
};
static const struct io_op_def io_op_defs[] = {
@@ -6566,6 +6568,45 @@ SYSCALL_DEFINE2(io_uring_setup, u32, entries,
return io_uring_setup(entries, params);
}
+static int io_probe(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args)
+{
+ struct io_uring_probe *p;
+ size_t size;
+ int i, ret;
+
+ size = struct_size(p, ops, nr_args);
+ if (size == SIZE_MAX)
+ return -EOVERFLOW;
+ p = kzalloc(size, GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ ret = -EFAULT;
+ if (copy_from_user(p, arg, size))
+ goto out;
+ ret = -EINVAL;
+ if (memchr_inv(p, 0, size))
+ goto out;
+
+ p->last_op = IORING_OP_LAST - 1;
+ if (nr_args > IORING_OP_LAST)
+ nr_args = IORING_OP_LAST;
+
+ for (i = 0; i < nr_args; i++) {
+ p->ops[i].op = i;
+ if (!io_op_defs[i].not_supported)
+ p->ops[i].flags = IO_URING_OP_SUPPORTED;
+ }
+ p->ops_len = i;
+
+ ret = 0;
+ if (copy_to_user(arg, p, size))
+ ret = -EFAULT;
+out:
+ kfree(p);
+ return ret;
+}
+
static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
void __user *arg, unsigned nr_args)
__releases(ctx->uring_lock)
@@ -6582,7 +6623,8 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
return -ENXIO;
if (opcode != IORING_UNREGISTER_FILES &&
- opcode != IORING_REGISTER_FILES_UPDATE) {
+ opcode != IORING_REGISTER_FILES_UPDATE &&
+ opcode != IORING_REGISTER_PROBE) {
percpu_ref_kill(&ctx->refs);
/*
@@ -6644,6 +6686,12 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
break;
ret = io_eventfd_unregister(ctx);
break;
+ case IORING_REGISTER_PROBE:
+ ret = -EINVAL;
+ if (!arg || nr_args > 256)
+ break;
+ ret = io_probe(ctx, arg, nr_args);
+ break;
default:
ret = -EINVAL;
break;
@@ -6651,7 +6699,8 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
if (opcode != IORING_UNREGISTER_FILES &&
- opcode != IORING_REGISTER_FILES_UPDATE) {
+ opcode != IORING_REGISTER_FILES_UPDATE &&
+ opcode != IORING_REGISTER_PROBE) {
/* bring the ctx back to life */
percpu_ref_reinit(&ctx->refs);
out: