diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-05-07 10:00:09 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-05-07 10:00:09 -0700 |
commit | 03e5cb7b50feb687508946a702febaba24c77f0b (patch) | |
tree | 3003da4195f9aa51814c26e263cdee7da46fb181 | |
parent | fc4354c6e5c21257cf4a50b32f7c11c7d65c55b3 (diff) | |
parent | d2b7fa6174bc4260e496cbf84375c73636914641 (diff) | |
download | lwn-03e5cb7b50feb687508946a702febaba24c77f0b.tar.gz lwn-03e5cb7b50feb687508946a702febaba24c77f0b.zip |
Merge tag 'for-6.4/io_uring-2023-05-07' of git://git.kernel.dk/linux
Pull more io_uring updates from Jens Axboe:
"Nothing major in here, just two different parts:
- A small series from Breno that enables passing the full SQE down
for ->uring_cmd().
This is a prerequisite for enabling full network socket operations.
Queued up a bit late because of some stylistic concerns that got
resolved, would be nice to have this in 6.4-rc1 so the dependent
work will be easier to handle for 6.5.
- Fix for the huge page coalescing, which was a regression introduced
in the 6.3 kernel release (Tobias)"
* tag 'for-6.4/io_uring-2023-05-07' of git://git.kernel.dk/linux:
io_uring: Remove unnecessary BUILD_BUG_ON
io_uring: Pass whole sqe to commands
io_uring: Create a helper to return the SQE size
io_uring/rsrc: check for nonconsecutive pages
-rw-r--r-- | drivers/block/ublk_drv.c | 40 | ||||
-rw-r--r-- | drivers/nvme/host/ioctl.c | 2 | ||||
-rw-r--r-- | include/linux/io_uring.h | 7 | ||||
-rw-r--r-- | io_uring/io_uring.h | 10 | ||||
-rw-r--r-- | io_uring/opdef.c | 2 | ||||
-rw-r--r-- | io_uring/rsrc.c | 7 | ||||
-rw-r--r-- | io_uring/uring_cmd.c | 12 | ||||
-rw-r--r-- | io_uring/uring_cmd.h | 8 |
8 files changed, 47 insertions, 41 deletions
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 72a5cde9a5af..c7331f519750 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1035,7 +1035,7 @@ static int ublk_ch_mmap(struct file *filp, struct vm_area_struct *vma) } static void ublk_commit_completion(struct ublk_device *ub, - struct ublksrv_io_cmd *ub_cmd) + const struct ublksrv_io_cmd *ub_cmd) { u32 qid = ub_cmd->q_id, tag = ub_cmd->tag; struct ublk_queue *ubq = ublk_get_queue(ub, qid); @@ -1292,7 +1292,7 @@ static inline int ublk_check_cmd_op(u32 cmd_op) static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags, - struct ublksrv_io_cmd *ub_cmd) + const struct ublksrv_io_cmd *ub_cmd) { struct ublk_device *ub = cmd->file->private_data; struct ublk_queue *ubq; @@ -1399,17 +1399,17 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) { - struct ublksrv_io_cmd *ub_src = (struct ublksrv_io_cmd *) cmd->cmd; - struct ublksrv_io_cmd ub_cmd; - /* * Not necessary for async retry, but let's keep it simple and always * copy the values to avoid any potential reuse. */ - ub_cmd.q_id = READ_ONCE(ub_src->q_id); - ub_cmd.tag = READ_ONCE(ub_src->tag); - ub_cmd.result = READ_ONCE(ub_src->result); - ub_cmd.addr = READ_ONCE(ub_src->addr); + const struct ublksrv_io_cmd *ub_src = io_uring_sqe_cmd(cmd->sqe); + const struct ublksrv_io_cmd ub_cmd = { + .q_id = READ_ONCE(ub_src->q_id), + .tag = READ_ONCE(ub_src->tag), + .result = READ_ONCE(ub_src->result), + .addr = READ_ONCE(ub_src->addr) + }; return __ublk_ch_uring_cmd(cmd, issue_flags, &ub_cmd); } @@ -1619,7 +1619,7 @@ static struct ublk_device *ublk_get_device_from_id(int idx) static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) { - struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; + const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); int ublksrv_pid = (int)header->data[0]; struct gendisk *disk; int ret = -EINVAL; @@ -1682,7 +1682,7 @@ out_unlock: static int ublk_ctrl_get_queue_affinity(struct ublk_device *ub, struct io_uring_cmd *cmd) { - struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; + const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); void __user *argp = (void __user *)(unsigned long)header->addr; cpumask_var_t cpumask; unsigned long queue; @@ -1733,7 +1733,7 @@ static inline void ublk_dump_dev_info(struct ublksrv_ctrl_dev_info *info) static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd) { - struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; + const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); void __user *argp = (void __user *)(unsigned long)header->addr; struct ublksrv_ctrl_dev_info info; struct ublk_device *ub; @@ -1910,7 +1910,7 @@ static int ublk_ctrl_del_dev(struct ublk_device **p_ub) static inline void ublk_ctrl_cmd_dump(struct io_uring_cmd *cmd) { - struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; + const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); pr_devel("%s: cmd_op %x, dev id %d qid %d data %llx buf %llx len %u\n", __func__, cmd->cmd_op, header->dev_id, header->queue_id, @@ -1929,7 +1929,7 @@ static int ublk_ctrl_stop_dev(struct ublk_device *ub) static int ublk_ctrl_get_dev_info(struct ublk_device *ub, struct io_uring_cmd *cmd) { - struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; + const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); void __user *argp = (void __user *)(unsigned long)header->addr; if (header->len < sizeof(struct ublksrv_ctrl_dev_info) || !header->addr) @@ -1960,7 +1960,7 @@ static void ublk_ctrl_fill_params_devt(struct ublk_device *ub) static int ublk_ctrl_get_params(struct ublk_device *ub, struct io_uring_cmd *cmd) { - struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; + const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); void __user *argp = (void __user *)(unsigned long)header->addr; struct ublk_params_header ph; int ret; @@ -1991,7 +1991,7 @@ static int ublk_ctrl_get_params(struct ublk_device *ub, static int ublk_ctrl_set_params(struct ublk_device *ub, struct io_uring_cmd *cmd) { - struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; + const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); void __user *argp = (void __user *)(unsigned long)header->addr; struct ublk_params_header ph; int ret = -EFAULT; @@ -2052,7 +2052,7 @@ static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq) static int ublk_ctrl_start_recovery(struct ublk_device *ub, struct io_uring_cmd *cmd) { - struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; + const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); int ret = -EINVAL; int i; @@ -2094,7 +2094,7 @@ static int ublk_ctrl_start_recovery(struct ublk_device *ub, static int ublk_ctrl_end_recovery(struct ublk_device *ub, struct io_uring_cmd *cmd) { - struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; + const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); int ublksrv_pid = (int)header->data[0]; int ret = -EINVAL; @@ -2161,7 +2161,7 @@ exit: static int ublk_ctrl_uring_cmd_permission(struct ublk_device *ub, struct io_uring_cmd *cmd) { - struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; + struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)io_uring_sqe_cmd(cmd->sqe); bool unprivileged = ub->dev_info.flags & UBLK_F_UNPRIVILEGED_DEV; void __user *argp = (void __user *)(unsigned long)header->addr; char *dev_path = NULL; @@ -2240,7 +2240,7 @@ exit: static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) { - struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; + const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); struct ublk_device *ub = NULL; u32 cmd_op = cmd->cmd_op; int ret = -EINVAL; diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index d24ea2e05156..81c5c9e38477 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -552,7 +552,7 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec) { struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); - const struct nvme_uring_cmd *cmd = ioucmd->cmd; + const struct nvme_uring_cmd *cmd = io_uring_sqe_cmd(ioucmd->sqe); struct request_queue *q = ns ? ns->queue : ctrl->admin_q; struct nvme_uring_data d; struct nvme_command c; diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 35b9328ca335..3399d979ee1c 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -24,7 +24,7 @@ enum io_uring_cmd_flags { struct io_uring_cmd { struct file *file; - const void *cmd; + const struct io_uring_sqe *sqe; union { /* callback to defer completions to task context */ void (*task_work_cb)(struct io_uring_cmd *cmd, unsigned); @@ -66,6 +66,11 @@ static inline void io_uring_free(struct task_struct *tsk) if (tsk->io_uring) __io_uring_free(tsk); } + +static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe) +{ + return sqe->cmd; +} #else static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, struct iov_iter *iter, void *ioucmd) diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index 25515d69d205..259bf798a390 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -394,4 +394,14 @@ static inline void io_req_queue_tw_complete(struct io_kiocb *req, s32 res) io_req_task_work_add(req); } +/* + * IORING_SETUP_SQE128 contexts allocate twice the normal SQE size for each + * slot. + */ +static inline size_t uring_sqe_size(struct io_ring_ctx *ctx) +{ + if (ctx->flags & IORING_SETUP_SQE128) + return 2 * sizeof(struct io_uring_sqe); + return sizeof(struct io_uring_sqe); +} #endif diff --git a/io_uring/opdef.c b/io_uring/opdef.c index cca7c5b55208..3b9c6489b8b6 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -627,7 +627,7 @@ const struct io_cold_def io_cold_defs[] = { }, [IORING_OP_URING_CMD] = { .name = "URING_CMD", - .async_size = uring_cmd_pdu_size(1), + .async_size = 2 * sizeof(struct io_uring_sqe), .prep_async = io_uring_cmd_prep_async, }, [IORING_OP_SEND_ZC] = { diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index d4c91393e0d3..d46f72a5ef73 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -1116,7 +1116,12 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, if (nr_pages > 1) { folio = page_folio(pages[0]); for (i = 1; i < nr_pages; i++) { - if (page_folio(pages[i]) != folio) { + /* + * Pages must be consecutive and on the same folio for + * this to work + */ + if (page_folio(pages[i]) != folio || + pages[i] != pages[i - 1] + 1) { folio = NULL; break; } diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index 5113c9a48583..5e32db48696d 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -69,15 +69,9 @@ EXPORT_SYMBOL_GPL(io_uring_cmd_done); int io_uring_cmd_prep_async(struct io_kiocb *req) { struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd); - size_t cmd_size; - BUILD_BUG_ON(uring_cmd_pdu_size(0) != 16); - BUILD_BUG_ON(uring_cmd_pdu_size(1) != 80); - - cmd_size = uring_cmd_pdu_size(req->ctx->flags & IORING_SETUP_SQE128); - - memcpy(req->async_data, ioucmd->cmd, cmd_size); - ioucmd->cmd = req->async_data; + memcpy(req->async_data, ioucmd->sqe, uring_sqe_size(req->ctx)); + ioucmd->sqe = req->async_data; return 0; } @@ -103,7 +97,7 @@ int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) req->imu = ctx->user_bufs[index]; io_req_set_rsrc_node(req, ctx, 0); } - ioucmd->cmd = sqe->cmd; + ioucmd->sqe = sqe; ioucmd->cmd_op = READ_ONCE(sqe->cmd_op); return 0; } diff --git a/io_uring/uring_cmd.h b/io_uring/uring_cmd.h index 7c6697d13cb2..8117684ec3ca 100644 --- a/io_uring/uring_cmd.h +++ b/io_uring/uring_cmd.h @@ -3,11 +3,3 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags); int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_uring_cmd_prep_async(struct io_kiocb *req); - -/* - * The URING_CMD payload starts at 'cmd' in the first sqe, and continues into - * the following sqe if SQE128 is used. - */ -#define uring_cmd_pdu_size(is_sqe128) \ - ((1 + !!(is_sqe128)) * sizeof(struct io_uring_sqe) - \ - offsetof(struct io_uring_sqe, cmd)) |