diff options
Diffstat (limited to 'drivers/block/ublk_drv.c')
| -rw-r--r-- | drivers/block/ublk_drv.c | 208 |
1 files changed, 142 insertions, 66 deletions
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 6ba2c1dd1d87..c6d18cd8af44 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -60,7 +60,12 @@ | UBLK_F_UNPRIVILEGED_DEV \ | UBLK_F_CMD_IOCTL_ENCODE \ | UBLK_F_USER_COPY \ - | UBLK_F_ZONED) + | UBLK_F_ZONED \ + | UBLK_F_USER_RECOVERY_FAIL_IO) + +#define UBLK_F_ALL_RECOVERY_FLAGS (UBLK_F_USER_RECOVERY \ + | UBLK_F_USER_RECOVERY_REISSUE \ + | UBLK_F_USER_RECOVERY_FAIL_IO) /* All UBLK_PARAM_TYPE_* should be included here */ #define UBLK_PARAM_TYPE_ALL \ @@ -143,6 +148,7 @@ struct ublk_queue { bool force_abort; bool timeout; bool canceling; + bool fail_io; /* copy of dev->state == UBLK_S_DEV_FAIL_IO */ unsigned short nr_io_ready; /* how many ios setup */ spinlock_t cancel_lock; struct ublk_device *dev; @@ -179,8 +185,7 @@ struct ublk_device { unsigned int nr_queues_ready; unsigned int nr_privileged_daemon; - struct work_struct quiesce_work; - struct work_struct stop_work; + struct work_struct nosrv_work; }; /* header of ublk_params */ @@ -664,30 +669,69 @@ static inline char *ublk_queue_cmd_buf(struct ublk_device *ub, int q_id) return ublk_get_queue(ub, q_id)->io_cmd_buf; } +static inline int __ublk_queue_cmd_buf_size(int depth) +{ + return round_up(depth * sizeof(struct ublksrv_io_desc), PAGE_SIZE); +} + static inline int ublk_queue_cmd_buf_size(struct ublk_device *ub, int q_id) { struct ublk_queue *ubq = ublk_get_queue(ub, q_id); - return round_up(ubq->q_depth * sizeof(struct ublksrv_io_desc), - PAGE_SIZE); + return __ublk_queue_cmd_buf_size(ubq->q_depth); +} + +static int ublk_max_cmd_buf_size(void) +{ + return __ublk_queue_cmd_buf_size(UBLK_MAX_QUEUE_DEPTH); +} + +/* + * Should I/O outstanding to the ublk server when it exits be reissued? + * If not, outstanding I/O will get errors. + */ +static inline bool ublk_nosrv_should_reissue_outstanding(struct ublk_device *ub) +{ + return (ub->dev_info.flags & UBLK_F_USER_RECOVERY) && + (ub->dev_info.flags & UBLK_F_USER_RECOVERY_REISSUE); +} + +/* + * Should I/O issued while there is no ublk server queue? If not, I/O + * issued while there is no ublk server will get errors. + */ +static inline bool ublk_nosrv_dev_should_queue_io(struct ublk_device *ub) +{ + return (ub->dev_info.flags & UBLK_F_USER_RECOVERY) && + !(ub->dev_info.flags & UBLK_F_USER_RECOVERY_FAIL_IO); } -static inline bool ublk_queue_can_use_recovery_reissue( - struct ublk_queue *ubq) +/* + * Same as ublk_nosrv_dev_should_queue_io, but uses a queue-local copy + * of the device flags for smaller cache footprint - better for fast + * paths. + */ +static inline bool ublk_nosrv_should_queue_io(struct ublk_queue *ubq) { return (ubq->flags & UBLK_F_USER_RECOVERY) && - (ubq->flags & UBLK_F_USER_RECOVERY_REISSUE); + !(ubq->flags & UBLK_F_USER_RECOVERY_FAIL_IO); } -static inline bool ublk_queue_can_use_recovery( - struct ublk_queue *ubq) +/* + * Should ublk devices be stopped (i.e. no recovery possible) when the + * ublk server exits? If not, devices can be used again by a future + * incarnation of a ublk server via the start_recovery/end_recovery + * commands. + */ +static inline bool ublk_nosrv_should_stop_dev(struct ublk_device *ub) { - return ubq->flags & UBLK_F_USER_RECOVERY; + return !(ub->dev_info.flags & UBLK_F_USER_RECOVERY); } -static inline bool ublk_can_use_recovery(struct ublk_device *ub) +static inline bool ublk_dev_in_recoverable_state(struct ublk_device *ub) { - return ub->dev_info.flags & UBLK_F_USER_RECOVERY; + return ub->dev_info.state == UBLK_S_DEV_QUIESCED || + ub->dev_info.state == UBLK_S_DEV_FAIL_IO; } static void ublk_free_disk(struct gendisk *disk) @@ -1063,7 +1107,7 @@ static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io, { WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_ACTIVE); - if (ublk_queue_can_use_recovery_reissue(ubq)) + if (ublk_nosrv_should_reissue_outstanding(ubq->dev)) blk_mq_requeue_request(req, false); else ublk_put_req_ref(ubq, req); @@ -1091,7 +1135,7 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq, struct request *rq) { /* We cannot process this rq so just requeue it. */ - if (ublk_queue_can_use_recovery(ubq)) + if (ublk_nosrv_dev_should_queue_io(ubq->dev)) blk_mq_requeue_request(rq, false); else blk_mq_end_request(rq, BLK_STS_IOERR); @@ -1236,10 +1280,7 @@ static enum blk_eh_timer_return ublk_timeout(struct request *rq) struct ublk_device *ub = ubq->dev; if (ublk_abort_requests(ub, ubq)) { - if (ublk_can_use_recovery(ub)) - schedule_work(&ub->quiesce_work); - else - schedule_work(&ub->stop_work); + schedule_work(&ub->nosrv_work); } return BLK_EH_DONE; } @@ -1254,6 +1295,10 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq = bd->rq; blk_status_t res; + if (unlikely(ubq->fail_io)) { + return BLK_STS_TARGET; + } + /* fill iod to slot in io cmd buffer */ res = ublk_setup_iod(ubq, rq); if (unlikely(res != BLK_STS_OK)) @@ -1268,7 +1313,7 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, * Note: force_abort is guaranteed to be seen because it is set * before request queue is unqiuesced. */ - if (ublk_queue_can_use_recovery(ubq) && unlikely(ubq->force_abort)) + if (ublk_nosrv_should_queue_io(ubq) && unlikely(ubq->force_abort)) return BLK_STS_IOERR; if (unlikely(ubq->canceling)) { @@ -1322,7 +1367,7 @@ static int ublk_ch_mmap(struct file *filp, struct vm_area_struct *vma) { struct ublk_device *ub = filp->private_data; size_t sz = vma->vm_end - vma->vm_start; - unsigned max_sz = UBLK_MAX_QUEUE_DEPTH * sizeof(struct ublksrv_io_desc); + unsigned max_sz = ublk_max_cmd_buf_size(); unsigned long pfn, end, phys_off = vma->vm_pgoff << PAGE_SHIFT; int q_id, ret = 0; @@ -1489,10 +1534,7 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd, ublk_cancel_cmd(ubq, io, issue_flags); if (need_schedule) { - if (ublk_can_use_recovery(ub)) - schedule_work(&ub->quiesce_work); - else - schedule_work(&ub->stop_work); + schedule_work(&ub->nosrv_work); } } @@ -1555,20 +1597,6 @@ static void __ublk_quiesce_dev(struct ublk_device *ub) ub->dev_info.state = UBLK_S_DEV_QUIESCED; } -static void ublk_quiesce_work_fn(struct work_struct *work) -{ - struct ublk_device *ub = - container_of(work, struct ublk_device, quiesce_work); - - mutex_lock(&ub->mutex); - if (ub->dev_info.state != UBLK_S_DEV_LIVE) - goto unlock; - __ublk_quiesce_dev(ub); - unlock: - mutex_unlock(&ub->mutex); - ublk_cancel_dev(ub); -} - static void ublk_unquiesce_dev(struct ublk_device *ub) { int i; @@ -1597,7 +1625,7 @@ static void ublk_stop_dev(struct ublk_device *ub) mutex_lock(&ub->mutex); if (ub->dev_info.state == UBLK_S_DEV_DEAD) goto unlock; - if (ublk_can_use_recovery(ub)) { + if (ublk_nosrv_dev_should_queue_io(ub)) { if (ub->dev_info.state == UBLK_S_DEV_LIVE) __ublk_quiesce_dev(ub); ublk_unquiesce_dev(ub); @@ -1617,6 +1645,37 @@ static void ublk_stop_dev(struct ublk_device *ub) ublk_cancel_dev(ub); } +static void ublk_nosrv_work(struct work_struct *work) +{ + struct ublk_device *ub = + container_of(work, struct ublk_device, nosrv_work); + int i; + + if (ublk_nosrv_should_stop_dev(ub)) { + ublk_stop_dev(ub); + return; + } + + mutex_lock(&ub->mutex); + if (ub->dev_info.state != UBLK_S_DEV_LIVE) + goto unlock; + + if (ublk_nosrv_dev_should_queue_io(ub)) { + __ublk_quiesce_dev(ub); + } else { + blk_mq_quiesce_queue(ub->ub_disk->queue); + ub->dev_info.state = UBLK_S_DEV_FAIL_IO; + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) { + ublk_get_queue(ub, i)->fail_io = true; + } + blk_mq_unquiesce_queue(ub->ub_disk->queue); + } + + unlock: + mutex_unlock(&ub->mutex); + ublk_cancel_dev(ub); +} + /* device can only be started after all IOs are ready */ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq) { @@ -2130,14 +2189,6 @@ static int ublk_add_chdev(struct ublk_device *ub) return ret; } -static void ublk_stop_work_fn(struct work_struct *work) -{ - struct ublk_device *ub = - container_of(work, struct ublk_device, stop_work); - - ublk_stop_dev(ub); -} - /* align max io buffer size with PAGE_SIZE */ static void ublk_align_max_io_size(struct ublk_device *ub) { @@ -2162,8 +2213,7 @@ static int ublk_add_tag_set(struct ublk_device *ub) static void ublk_remove(struct ublk_device *ub) { ublk_stop_dev(ub); - cancel_work_sync(&ub->stop_work); - cancel_work_sync(&ub->quiesce_work); + cancel_work_sync(&ub->nosrv_work); cdev_device_del(&ub->cdev, &ub->cdev_dev); ublk_put_device(ub); ublks_added--; @@ -2229,7 +2279,7 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) lim.features |= BLK_FEAT_ZONED; lim.max_active_zones = p->max_active_zones; lim.max_open_zones = p->max_open_zones; - lim.max_zone_append_sectors = p->max_zone_append_sectors; + lim.max_hw_zone_append_sectors = p->max_zone_append_sectors; } if (ub->params.basic.attrs & UBLK_ATTR_VOLATILE_CACHE) { @@ -2372,6 +2422,19 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd) else if (!(info.flags & UBLK_F_UNPRIVILEGED_DEV)) return -EPERM; + /* forbid nonsense combinations of recovery flags */ + switch (info.flags & UBLK_F_ALL_RECOVERY_FLAGS) { + case 0: + case UBLK_F_USER_RECOVERY: + case (UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_REISSUE): + case (UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_FAIL_IO): + break; + default: + pr_warn("%s: invalid recovery flags %llx\n", __func__, + info.flags & UBLK_F_ALL_RECOVERY_FLAGS); + return -EINVAL; + } + /* * unprivileged device can't be trusted, but RECOVERY and * RECOVERY_REISSUE still may hang error handling, so can't @@ -2424,8 +2487,7 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd) goto out_unlock; mutex_init(&ub->mutex); spin_lock_init(&ub->lock); - INIT_WORK(&ub->quiesce_work, ublk_quiesce_work_fn); - INIT_WORK(&ub->stop_work, ublk_stop_work_fn); + INIT_WORK(&ub->nosrv_work, ublk_nosrv_work); ret = ublk_alloc_dev_number(ub, header->dev_id); if (ret < 0) @@ -2560,9 +2622,7 @@ static inline void ublk_ctrl_cmd_dump(struct io_uring_cmd *cmd) static int ublk_ctrl_stop_dev(struct ublk_device *ub) { ublk_stop_dev(ub); - cancel_work_sync(&ub->stop_work); - cancel_work_sync(&ub->quiesce_work); - + cancel_work_sync(&ub->nosrv_work); return 0; } @@ -2699,7 +2759,7 @@ static int ublk_ctrl_start_recovery(struct ublk_device *ub, int i; mutex_lock(&ub->mutex); - if (!ublk_can_use_recovery(ub)) + if (ublk_nosrv_should_stop_dev(ub)) goto out_unlock; if (!ub->nr_queues_ready) goto out_unlock; @@ -2710,14 +2770,18 @@ static int ublk_ctrl_start_recovery(struct ublk_device *ub, * and related io_uring ctx is freed so file struct of /dev/ublkcX is * released. * + * and one of the following holds + * * (2) UBLK_S_DEV_QUIESCED is set, which means the quiesce_work: * (a)has quiesced request queue * (b)has requeued every inflight rqs whose io_flags is ACTIVE * (c)has requeued/aborted every inflight rqs whose io_flags is NOT ACTIVE * (d)has completed/camceled all ioucmds owned by ther dying process + * + * (3) UBLK_S_DEV_FAIL_IO is set, which means the queue is not + * quiesced, but all I/O is being immediately errored */ - if (test_bit(UB_STATE_OPEN, &ub->state) || - ub->dev_info.state != UBLK_S_DEV_QUIESCED) { + if (test_bit(UB_STATE_OPEN, &ub->state) || !ublk_dev_in_recoverable_state(ub)) { ret = -EBUSY; goto out_unlock; } @@ -2741,6 +2805,7 @@ static int ublk_ctrl_end_recovery(struct ublk_device *ub, const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); int ublksrv_pid = (int)header->data[0]; int ret = -EINVAL; + int i; pr_devel("%s: Waiting for new ubq_daemons(nr: %d) are ready, dev id %d...\n", __func__, ub->dev_info.nr_hw_queues, header->dev_id); @@ -2752,21 +2817,32 @@ static int ublk_ctrl_end_recovery(struct ublk_device *ub, __func__, ub->dev_info.nr_hw_queues, header->dev_id); mutex_lock(&ub->mutex); - if (!ublk_can_use_recovery(ub)) + if (ublk_nosrv_should_stop_dev(ub)) goto out_unlock; - if (ub->dev_info.state != UBLK_S_DEV_QUIESCED) { + if (!ublk_dev_in_recoverable_state(ub)) { ret = -EBUSY; goto out_unlock; } ub->dev_info.ublksrv_pid = ublksrv_pid; pr_devel("%s: new ublksrv_pid %d, dev id %d\n", __func__, ublksrv_pid, header->dev_id); - blk_mq_unquiesce_queue(ub->ub_disk->queue); - pr_devel("%s: queue unquiesced, dev id %d.\n", - __func__, header->dev_id); - blk_mq_kick_requeue_list(ub->ub_disk->queue); - ub->dev_info.state = UBLK_S_DEV_LIVE; + + if (ublk_nosrv_dev_should_queue_io(ub)) { + ub->dev_info.state = UBLK_S_DEV_LIVE; + blk_mq_unquiesce_queue(ub->ub_disk->queue); + pr_devel("%s: queue unquiesced, dev id %d.\n", + __func__, header->dev_id); + blk_mq_kick_requeue_list(ub->ub_disk->queue); + } else { + blk_mq_quiesce_queue(ub->ub_disk->queue); + ub->dev_info.state = UBLK_S_DEV_LIVE; + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) { + ublk_get_queue(ub, i)->fail_io = false; + } + blk_mq_unquiesce_queue(ub->ub_disk->queue); + } + ret = 0; out_unlock: mutex_unlock(&ub->mutex); |
