diff options
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/drbd/drbd_nl.c | 6 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_receiver.c | 4 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_state.c | 2 | ||||
-rw-r--r-- | drivers/block/loop.c | 43 | ||||
-rw-r--r-- | drivers/block/null_blk/main.c | 31 | ||||
-rw-r--r-- | drivers/block/sunvdc.c | 2 | ||||
-rw-r--r-- | drivers/block/ublk_drv.c | 60 | ||||
-rw-r--r-- | drivers/block/virtio_blk.c | 269 |
8 files changed, 265 insertions, 152 deletions
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 60757ac31701..f49f2a5282e1 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1615,7 +1615,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) drbd_send_sync_param(peer_device); } - kvfree_rcu(old_disk_conf); + kvfree_rcu_mightsleep(old_disk_conf); kfree(old_plan); mod_timer(&device->request_timer, jiffies + HZ); goto success; @@ -2446,7 +2446,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) mutex_unlock(&connection->resource->conf_update); mutex_unlock(&connection->data.mutex); - kvfree_rcu(old_net_conf); + kvfree_rcu_mightsleep(old_net_conf); if (connection->cstate >= C_WF_REPORT_PARAMS) { struct drbd_peer_device *peer_device; @@ -2860,7 +2860,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) new_disk_conf->disk_size = (sector_t)rs.resize_size; rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf); mutex_unlock(&device->resource->conf_update); - kvfree_rcu(old_disk_conf); + kvfree_rcu_mightsleep(old_disk_conf); new_disk_conf = NULL; } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 757f4692b5bd..e197b2a465d2 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3759,7 +3759,7 @@ static int receive_protocol(struct drbd_connection *connection, struct packet_in drbd_info(connection, "peer data-integrity-alg: %s\n", integrity_alg[0] ? integrity_alg : "(none)"); - kvfree_rcu(old_net_conf); + kvfree_rcu_mightsleep(old_net_conf); return 0; disconnect_rcu_unlock: @@ -4127,7 +4127,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf); mutex_unlock(&connection->resource->conf_update); - kvfree_rcu(old_disk_conf); + kvfree_rcu_mightsleep(old_disk_conf); drbd_info(device, "Peer sets u_size to %lu sectors (old: %lu)\n", (unsigned long)p_usize, (unsigned long)my_usize); diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 75d13ea0024f..2aeea295fa28 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -2071,7 +2071,7 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused) conn_free_crypto(connection); mutex_unlock(&connection->resource->conf_update); - kvfree_rcu(old_conf); + kvfree_rcu_mightsleep(old_conf); } if (ns_max.susp_fen) { diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 839373451c2b..bc31bb7072a2 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1010,9 +1010,6 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, /* This is safe, since we have a reference from open(). */ __module_get(THIS_MODULE); - /* suppress uevents while reconfiguring the device */ - dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1); - /* * If we don't hold exclusive handle for the device, upgrade to it * here to avoid changing device under exclusive owner. @@ -1067,6 +1064,9 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, } } + /* suppress uevents while reconfiguring the device */ + dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1); + disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE); set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0); @@ -1109,17 +1109,17 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, if (partscan) clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state); + /* enable and uncork uevent now that we are done */ + dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0); + loop_global_unlock(lo, is_loop); if (partscan) loop_reread_partitions(lo); + if (!(mode & FMODE_EXCL)) bd_abort_claiming(bdev, loop_configure); - error = 0; -done: - /* enable and uncork uevent now that we are done */ - dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0); - return error; + return 0; out_unlock: loop_global_unlock(lo, is_loop); @@ -1130,7 +1130,7 @@ out_putf: fput(file); /* This is safe: open() is still holding a reference. */ module_put(THIS_MODULE); - goto done; + return error; } static void __loop_clr_fd(struct loop_device *lo, bool release) @@ -1859,35 +1859,44 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx, static void loop_handle_cmd(struct loop_cmd *cmd) { + struct cgroup_subsys_state *cmd_blkcg_css = cmd->blkcg_css; + struct cgroup_subsys_state *cmd_memcg_css = cmd->memcg_css; struct request *rq = blk_mq_rq_from_pdu(cmd); const bool write = op_is_write(req_op(rq)); struct loop_device *lo = rq->q->queuedata; int ret = 0; struct mem_cgroup *old_memcg = NULL; + const bool use_aio = cmd->use_aio; if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) { ret = -EIO; goto failed; } - if (cmd->blkcg_css) - kthread_associate_blkcg(cmd->blkcg_css); - if (cmd->memcg_css) + if (cmd_blkcg_css) + kthread_associate_blkcg(cmd_blkcg_css); + if (cmd_memcg_css) old_memcg = set_active_memcg( - mem_cgroup_from_css(cmd->memcg_css)); + mem_cgroup_from_css(cmd_memcg_css)); + /* + * do_req_filebacked() may call blk_mq_complete_request() synchronously + * or asynchronously if using aio. Hence, do not touch 'cmd' after + * do_req_filebacked() has returned unless we are sure that 'cmd' has + * not yet been completed. + */ ret = do_req_filebacked(lo, rq); - if (cmd->blkcg_css) + if (cmd_blkcg_css) kthread_associate_blkcg(NULL); - if (cmd->memcg_css) { + if (cmd_memcg_css) { set_active_memcg(old_memcg); - css_put(cmd->memcg_css); + css_put(cmd_memcg_css); } failed: /* complete non-aio request */ - if (!cmd->use_aio || ret) { + if (!use_aio || ret) { if (ret == -EOPNOTSUPP) cmd->ret = ret; else diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 4c601ca9552a..9e6b032c8ecc 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1413,8 +1413,7 @@ static inline void nullb_complete_cmd(struct nullb_cmd *cmd) case NULL_IRQ_SOFTIRQ: switch (cmd->nq->dev->queue_mode) { case NULL_Q_MQ: - if (likely(!blk_should_fake_timeout(cmd->rq->q))) - blk_mq_complete_request(cmd->rq); + blk_mq_complete_request(cmd->rq); break; case NULL_Q_BIO: /* @@ -1658,12 +1657,13 @@ static enum blk_eh_timer_return null_timeout_rq(struct request *rq) } static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, - const struct blk_mq_queue_data *bd) + const struct blk_mq_queue_data *bd) { - struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); + struct request *rq = bd->rq; + struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq); struct nullb_queue *nq = hctx->driver_data; - sector_t nr_sectors = blk_rq_sectors(bd->rq); - sector_t sector = blk_rq_pos(bd->rq); + sector_t nr_sectors = blk_rq_sectors(rq); + sector_t sector = blk_rq_pos(rq); const bool is_poll = hctx->type == HCTX_TYPE_POLL; might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING); @@ -1672,14 +1672,15 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); cmd->timer.function = null_cmd_timer_expired; } - cmd->rq = bd->rq; + cmd->rq = rq; cmd->error = BLK_STS_OK; cmd->nq = nq; - cmd->fake_timeout = should_timeout_request(bd->rq); + cmd->fake_timeout = should_timeout_request(rq) || + blk_should_fake_timeout(rq->q); - blk_mq_start_request(bd->rq); + blk_mq_start_request(rq); - if (should_requeue_request(bd->rq)) { + if (should_requeue_request(rq)) { /* * Alternate between hitting the core BUSY path, and the * driver driven requeue path @@ -1687,22 +1688,20 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, nq->requeue_selection++; if (nq->requeue_selection & 1) return BLK_STS_RESOURCE; - else { - blk_mq_requeue_request(bd->rq, true); - return BLK_STS_OK; - } + blk_mq_requeue_request(rq, true); + return BLK_STS_OK; } if (is_poll) { spin_lock(&nq->poll_lock); - list_add_tail(&bd->rq->queuelist, &nq->poll_list); + list_add_tail(&rq->queuelist, &nq->poll_list); spin_unlock(&nq->poll_lock); return BLK_STS_OK; } if (cmd->fake_timeout) return BLK_STS_OK; - return null_handle_cmd(cmd, sector, nr_sectors, req_op(bd->rq)); + return null_handle_cmd(cmd, sector, nr_sectors, req_op(rq)); } static void cleanup_queue(struct nullb_queue *nq) diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index fb855da971ee..9fa821fa76b0 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -972,6 +972,8 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) print_version(); hp = mdesc_grab(); + if (!hp) + return -ENODEV; err = -ENODEV; if ((vdev->dev_no << PARTITION_SHIFT) & ~(u64)MINORMASK) { diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index d1d1c8d606c8..604c1a13c76e 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -246,7 +246,7 @@ static int ublk_validate_params(const struct ublk_device *ub) if (ub->params.types & UBLK_PARAM_TYPE_BASIC) { const struct ublk_param_basic *p = &ub->params.basic; - if (p->logical_bs_shift > PAGE_SHIFT) + if (p->logical_bs_shift > PAGE_SHIFT || p->logical_bs_shift < 9) return -EINVAL; if (p->logical_bs_shift > p->physical_bs_shift) @@ -715,7 +715,8 @@ static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io, } } -static void ubq_complete_io_cmd(struct ublk_io *io, int res) +static void ubq_complete_io_cmd(struct ublk_io *io, int res, + unsigned issue_flags) { /* mark this cmd owned by ublksrv */ io->flags |= UBLK_IO_FLAG_OWNED_BY_SRV; @@ -727,7 +728,7 @@ static void ubq_complete_io_cmd(struct ublk_io *io, int res) io->flags &= ~UBLK_IO_FLAG_ACTIVE; /* tell ublksrv one io request is coming */ - io_uring_cmd_done(io->cmd, res, 0); + io_uring_cmd_done(io->cmd, res, 0, issue_flags); } #define UBLK_REQUEUE_DELAY_MS 3 @@ -744,7 +745,8 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq, mod_delayed_work(system_wq, &ubq->dev->monitor_work, 0); } -static inline void __ublk_rq_task_work(struct request *req) +static inline void __ublk_rq_task_work(struct request *req, + unsigned issue_flags) { struct ublk_queue *ubq = req->mq_hctx->driver_data; int tag = req->tag; @@ -782,7 +784,7 @@ static inline void __ublk_rq_task_work(struct request *req) pr_devel("%s: need get data. op %d, qid %d tag %d io_flags %x\n", __func__, io->cmd->cmd_op, ubq->q_id, req->tag, io->flags); - ubq_complete_io_cmd(io, UBLK_IO_RES_NEED_GET_DATA); + ubq_complete_io_cmd(io, UBLK_IO_RES_NEED_GET_DATA, issue_flags); return; } /* @@ -820,17 +822,18 @@ static inline void __ublk_rq_task_work(struct request *req) mapped_bytes >> 9; } - ubq_complete_io_cmd(io, UBLK_IO_RES_OK); + ubq_complete_io_cmd(io, UBLK_IO_RES_OK, issue_flags); } -static inline void ublk_forward_io_cmds(struct ublk_queue *ubq) +static inline void ublk_forward_io_cmds(struct ublk_queue *ubq, + unsigned issue_flags) { struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds); struct ublk_rq_data *data, *tmp; io_cmds = llist_reverse_order(io_cmds); llist_for_each_entry_safe(data, tmp, io_cmds, node) - __ublk_rq_task_work(blk_mq_rq_from_pdu(data)); + __ublk_rq_task_work(blk_mq_rq_from_pdu(data), issue_flags); } static inline void ublk_abort_io_cmds(struct ublk_queue *ubq) @@ -842,12 +845,12 @@ static inline void ublk_abort_io_cmds(struct ublk_queue *ubq) __ublk_abort_rq(ubq, blk_mq_rq_from_pdu(data)); } -static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd) +static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, unsigned issue_flags) { struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); struct ublk_queue *ubq = pdu->ubq; - ublk_forward_io_cmds(ubq); + ublk_forward_io_cmds(ubq, issue_flags); } static void ublk_rq_task_work_fn(struct callback_head *work) @@ -856,8 +859,9 @@ static void ublk_rq_task_work_fn(struct callback_head *work) struct ublk_rq_data, work); struct request *req = blk_mq_rq_from_pdu(data); struct ublk_queue *ubq = req->mq_hctx->driver_data; + unsigned issue_flags = IO_URING_F_UNLOCKED; - ublk_forward_io_cmds(ubq); + ublk_forward_io_cmds(ubq, issue_flags); } static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq) @@ -1111,7 +1115,8 @@ static void ublk_cancel_queue(struct ublk_queue *ubq) struct ublk_io *io = &ubq->ios[i]; if (io->flags & UBLK_IO_FLAG_ACTIVE) - io_uring_cmd_done(io->cmd, UBLK_IO_RES_ABORT, 0); + io_uring_cmd_done(io->cmd, UBLK_IO_RES_ABORT, 0, + IO_URING_F_UNLOCKED); } /* all io commands are canceled */ @@ -1256,9 +1261,10 @@ static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id, ublk_queue_cmd(ubq, req); } -static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) +static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, + unsigned int issue_flags, + struct ublksrv_io_cmd *ub_cmd) { - struct ublksrv_io_cmd *ub_cmd = (struct ublksrv_io_cmd *)cmd->cmd; struct ublk_device *ub = cmd->file->private_data; struct ublk_queue *ubq; struct ublk_io *io; @@ -1351,12 +1357,29 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) return -EIOCBQUEUED; out: - io_uring_cmd_done(cmd, ret, 0); + io_uring_cmd_done(cmd, ret, 0, issue_flags); pr_devel("%s: complete: cmd op %d, tag %d ret %x io_flags %x\n", __func__, cmd_op, tag, ret, io->flags); return -EIOCBQUEUED; } +static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) +{ + struct ublksrv_io_cmd *ub_src = (struct ublksrv_io_cmd *) cmd->cmd; + struct ublksrv_io_cmd ub_cmd; + + /* + * Not necessary for async retry, but let's keep it simple and always + * copy the values to avoid any potential reuse. + */ + ub_cmd.q_id = READ_ONCE(ub_src->q_id); + ub_cmd.tag = READ_ONCE(ub_src->tag); + ub_cmd.result = READ_ONCE(ub_src->result); + ub_cmd.addr = READ_ONCE(ub_src->addr); + + return __ublk_ch_uring_cmd(cmd, issue_flags, &ub_cmd); +} + static const struct file_operations ublk_ch_fops = { .owner = THIS_MODULE, .open = ublk_ch_open, @@ -1602,17 +1625,18 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) set_bit(GD_SUPPRESS_PART_SCAN, &disk->state); get_device(&ub->cdev_dev); + ub->dev_info.state = UBLK_S_DEV_LIVE; ret = add_disk(disk); if (ret) { /* * Has to drop the reference since ->free_disk won't be * called in case of add_disk failure. */ + ub->dev_info.state = UBLK_S_DEV_DEAD; ublk_put_device(ub); goto out_put_disk; } set_bit(UB_STATE_USED, &ub->state); - ub->dev_info.state = UBLK_S_DEV_LIVE; out_put_disk: if (ret) put_disk(disk); @@ -1946,6 +1970,8 @@ static int ublk_ctrl_set_params(struct ublk_device *ub, /* clear all we don't support yet */ ub->params.types &= UBLK_PARAM_TYPE_ALL; ret = ublk_validate_params(ub); + if (ret) + ub->params.types = 0; } mutex_unlock(&ub->mutex); @@ -2233,7 +2259,7 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, if (ub) ublk_put_device(ub); out: - io_uring_cmd_done(cmd, ret, 0); + io_uring_cmd_done(cmd, ret, 0, issue_flags); pr_devel("%s: cmd done ret %d cmd_op %x, dev id %d qid %d\n", __func__, ret, cmd->cmd_op, header->dev_id, header->queue_id); return -EIOCBQUEUED; diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 2723eede6f21..2b918e28acaa 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -96,16 +96,14 @@ struct virtblk_req { /* * The zone append command has an extended in header. - * The status field in zone_append_in_hdr must have - * the same offset in virtblk_req as the non-zoned - * status field above. + * The status field in zone_append_in_hdr must always + * be the last byte. */ struct { + __virtio64 sector; u8 status; - u8 reserved[7]; - __le64 append_sector; - } zone_append_in_hdr; - }; + } zone_append; + } in_hdr; size_t in_hdr_len; @@ -154,7 +152,7 @@ static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr) sgs[num_out + num_in++] = vbr->sg_table.sgl; } - sg_init_one(&in_hdr, &vbr->status, vbr->in_hdr_len); + sg_init_one(&in_hdr, &vbr->in_hdr.status, vbr->in_hdr_len); sgs[num_out + num_in++] = &in_hdr; return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); @@ -242,11 +240,14 @@ static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev, struct request *req, struct virtblk_req *vbr) { - size_t in_hdr_len = sizeof(vbr->status); + size_t in_hdr_len = sizeof(vbr->in_hdr.status); bool unmap = false; u32 type; u64 sector = 0; + if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) && op_is_zone_mgmt(req_op(req))) + return BLK_STS_NOTSUPP; + /* Set fields for all request types */ vbr->out_hdr.ioprio = cpu_to_virtio32(vdev, req_get_ioprio(req)); @@ -287,7 +288,7 @@ static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev, case REQ_OP_ZONE_APPEND: type = VIRTIO_BLK_T_ZONE_APPEND; sector = blk_rq_pos(req); - in_hdr_len = sizeof(vbr->zone_append_in_hdr); + in_hdr_len = sizeof(vbr->in_hdr.zone_append); break; case REQ_OP_ZONE_RESET: type = VIRTIO_BLK_T_ZONE_RESET; @@ -297,7 +298,10 @@ static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev, type = VIRTIO_BLK_T_ZONE_RESET_ALL; break; case REQ_OP_DRV_IN: - /* Out header already filled in, nothing to do */ + /* + * Out header has already been prepared by the caller (virtblk_get_id() + * or virtblk_submit_zone_report()), nothing to do here. + */ return 0; default: WARN_ON_ONCE(1); @@ -318,16 +322,28 @@ static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev, return 0; } +/* + * The status byte is always the last byte of the virtblk request + * in-header. This helper fetches its value for all in-header formats + * that are currently defined. + */ +static inline u8 virtblk_vbr_status(struct virtblk_req *vbr) +{ + return *((u8 *)&vbr->in_hdr + vbr->in_hdr_len - 1); +} + static inline void virtblk_request_done(struct request *req) { struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); - blk_status_t status = virtblk_result(vbr->status); + blk_status_t status = virtblk_result(virtblk_vbr_status(vbr)); + struct virtio_blk *vblk = req->mq_hctx->queue->queuedata; virtblk_unmap_data(req, vbr); virtblk_cleanup_cmd(req); if (req_op(req) == REQ_OP_ZONE_APPEND) - req->__sector = le64_to_cpu(vbr->zone_append_in_hdr.append_sector); + req->__sector = virtio64_to_cpu(vblk->vdev, + vbr->in_hdr.zone_append.sector); blk_mq_end_request(req, status); } @@ -355,7 +371,7 @@ static int virtblk_handle_req(struct virtio_blk_vq *vq, if (likely(!blk_should_fake_timeout(req->q)) && !blk_mq_complete_request_remote(req) && - !blk_mq_add_to_batch(req, iob, vbr->status, + !blk_mq_add_to_batch(req, iob, virtblk_vbr_status(vbr), virtblk_complete_batch)) virtblk_request_done(req); req_done++; @@ -550,7 +566,6 @@ static void virtio_queue_rqs(struct request **rqlist) #ifdef CONFIG_BLK_DEV_ZONED static void *virtblk_alloc_report_buffer(struct virtio_blk *vblk, unsigned int nr_zones, - unsigned int zone_sectors, size_t *buflen) { struct request_queue *q = vblk->disk->queue; @@ -558,7 +573,7 @@ static void *virtblk_alloc_report_buffer(struct virtio_blk *vblk, void *buf; nr_zones = min_t(unsigned int, nr_zones, - get_capacity(vblk->disk) >> ilog2(zone_sectors)); + get_capacity(vblk->disk) >> ilog2(vblk->zone_sectors)); bufsize = sizeof(struct virtio_blk_zone_report) + nr_zones * sizeof(struct virtio_blk_zone_descriptor); @@ -592,7 +607,7 @@ static int virtblk_submit_zone_report(struct virtio_blk *vblk, return PTR_ERR(req); vbr = blk_mq_rq_to_pdu(req); - vbr->in_hdr_len = sizeof(vbr->status); + vbr->in_hdr_len = sizeof(vbr->in_hdr.status); vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_ZONE_REPORT); vbr->out_hdr.sector = cpu_to_virtio64(vblk->vdev, sector); @@ -601,7 +616,7 @@ static int virtblk_submit_zone_report(struct virtio_blk *vblk, goto out; blk_execute_rq(req, false); - err = blk_status_to_errno(virtblk_result(vbr->status)); + err = blk_status_to_errno(virtblk_result(vbr->in_hdr.status)); out: blk_mq_free_request(req); return err; @@ -609,29 +624,72 @@ out: static int virtblk_parse_zone(struct virtio_blk *vblk, struct virtio_blk_zone_descriptor *entry, - unsigned int idx, unsigned int zone_sectors, - report_zones_cb cb, void *data) + unsigned int idx, report_zones_cb cb, void *data) { struct blk_zone zone = { }; - if (entry->z_type != VIRTIO_BLK_ZT_SWR && - entry->z_type != VIRTIO_BLK_ZT_SWP && - entry->z_type != VIRTIO_BLK_ZT_CONV) { - dev_err(&vblk->vdev->dev, "invalid zone type %#x\n", - entry->z_type); - return -EINVAL; + zone.start = virtio64_to_cpu(vblk->vdev, entry->z_start); + if (zone.start + vblk->zone_sectors <= get_capacity(vblk->disk)) + zone.len = vblk->zone_sectors; + else + zone.len = get_capacity(vblk->disk) - zone.start; + zone.capacity = virtio64_to_cpu(vblk->vdev, entry->z_cap); + zone.wp = virtio64_to_cpu(vblk->vdev, entry->z_wp); + + switch (entry->z_type) { + case VIRTIO_BLK_ZT_SWR: + zone.type = BLK_ZONE_TYPE_SEQWRITE_REQ; + break; + case VIRTIO_BLK_ZT_SWP: + zone.type = BLK_ZONE_TYPE_SEQWRITE_PREF; + break; + case VIRTIO_BLK_ZT_CONV: + zone.type = BLK_ZONE_TYPE_CONVENTIONAL; + break; + default: + dev_err(&vblk->vdev->dev, "zone %llu: invalid type %#x\n", + zone.start, entry->z_type); + return -EIO; } - zone.type = entry->z_type; - zone.cond = entry->z_state; - zone.len = zone_sectors; - zone.capacity = le64_to_cpu(entry->z_cap); - zone.start = le64_to_cpu(entry->z_start); - if (zone.cond == BLK_ZONE_COND_FULL) + switch (entry->z_state) { + case VIRTIO_BLK_ZS_EMPTY: + zone.cond = BLK_ZONE_COND_EMPTY; + break; + case VIRTIO_BLK_ZS_CLOSED: + zone.cond = BLK_ZONE_COND_CLOSED; + break; + case VIRTIO_BLK_ZS_FULL: + zone.cond = BLK_ZONE_COND_FULL; zone.wp = zone.start + zone.len; - else - zone.wp = le64_to_cpu(entry->z_wp); + break; + case VIRTIO_BLK_ZS_EOPEN: + zone.cond = BLK_ZONE_COND_EXP_OPEN; + break; + case VIRTIO_BLK_ZS_IOPEN: + zone.cond = BLK_ZONE_COND_IMP_OPEN; + break; + case VIRTIO_BLK_ZS_NOT_WP: + zone.cond = BLK_ZONE_COND_NOT_WP; + break; + case VIRTIO_BLK_ZS_RDONLY: + zone.cond = BLK_ZONE_COND_READONLY; + zone.wp = ULONG_MAX; + break; + case VIRTIO_BLK_ZS_OFFLINE: + zone.cond = BLK_ZONE_COND_OFFLINE; + zone.wp = ULONG_MAX; + break; + default: + dev_err(&vblk->vdev->dev, "zone %llu: invalid condition %#x\n", + zone.start, entry->z_state); + return -EIO; + } + /* + * The callback below checks the validity of the reported + * entry data, no need to further validate it here. + */ return cb(&zone, idx, data); } @@ -641,39 +699,47 @@ static int virtblk_report_zones(struct gendisk *disk, sector_t sector, { struct virtio_blk *vblk = disk->private_data; struct virtio_blk_zone_report *report; - unsigned int zone_sectors = vblk->zone_sectors; - unsigned int nz, i; - int ret, zone_idx = 0; + unsigned long long nz, i; size_t buflen; + unsigned int zone_idx = 0; + int ret; if (WARN_ON_ONCE(!vblk->zone_sectors)) return -EOPNOTSUPP; - report = virtblk_alloc_report_buffer(vblk, nr_zones, - zone_sectors, &buflen); + report = virtblk_alloc_report_buffer(vblk, nr_zones, &buflen); if (!report) return -ENOMEM; + mutex_lock(&vblk->vdev_mutex); + + if (!vblk->vdev) { + ret = -ENXIO; + goto fail_report; + } + while (zone_idx < nr_zones && sector < get_capacity(vblk->disk)) { memset(report, 0, buflen); ret = virtblk_submit_zone_report(vblk, (char *)report, buflen, sector); - if (ret) { - if (ret > 0) - ret = -EIO; - goto out_free; - } - nz = min((unsigned int)le64_to_cpu(report->nr_zones), nr_zones); + if (ret) + goto fail_report; + + nz = min_t(u64, virtio64_to_cpu(vblk->vdev, report->nr_zones), + nr_zones); if (!nz) break; for (i = 0; i < nz && zone_idx < nr_zones; i++) { ret = virtblk_parse_zone(vblk, &report->zones[i], - zone_idx, zone_sectors, cb, data); + zone_idx, cb, data); if (ret) - goto out_free; - sector = le64_to_cpu(report->zones[i].z_start) + zone_sectors; + goto fail_report; + + sector = virtio64_to_cpu(vblk->vdev, + report->zones[i].z_start) + + vblk->zone_sectors; zone_idx++; } } @@ -682,7 +748,8 @@ static int virtblk_report_zones(struct gendisk *disk, sector_t sector, ret = zone_idx; else ret = -EINVAL; -out_free: +fail_report: + mutex_unlock(&vblk->vdev_mutex); kvfree(report); return ret; } @@ -691,20 +758,28 @@ static void virtblk_revalidate_zones(struct virtio_blk *vblk) { u8 model; - if (!vblk->zone_sectors) - return; - virtio_cread(vblk->vdev, struct virtio_blk_config, zoned.model, &model); - if (!blk_revalidate_disk_zones(vblk->disk, NULL)) - set_capacity_and_notify(vblk->disk, 0); + switch (model) { + default: + dev_err(&vblk->vdev->dev, "unknown zone model %d\n", model); + fallthrough; + case VIRTIO_BLK_Z_NONE: + case VIRTIO_BLK_Z_HA: + disk_set_zoned(vblk->disk, BLK_ZONED_NONE); + return; + case VIRTIO_BLK_Z_HM: + WARN_ON_ONCE(!vblk->zone_sectors); + if (!blk_revalidate_disk_zones(vblk->disk, NULL)) + set_capacity_and_notify(vblk->disk, 0); + } } static int virtblk_probe_zoned_device(struct virtio_device *vdev, struct virtio_blk *vblk, struct request_queue *q) { - u32 v; + u32 v, wg; u8 model; int ret; @@ -713,16 +788,11 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev, switch (model) { case VIRTIO_BLK_Z_NONE: + case VIRTIO_BLK_Z_HA: + /* Present the host-aware device as non-zoned */ return 0; case VIRTIO_BLK_Z_HM: break; - case VIRTIO_BLK_Z_HA: - /* - * Present the host-aware device as a regular drive. - * TODO It is possible to add an option to make it appear - * in the system as a zoned drive. - */ - return 0; default: dev_err(&vdev->dev, "unsupported zone model %d\n", model); return -EINVAL; @@ -735,32 +805,31 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev, virtio_cread(vdev, struct virtio_blk_config, zoned.max_open_zones, &v); - disk_set_max_open_zones(vblk->disk, le32_to_cpu(v)); - - dev_dbg(&vdev->dev, "max open zones = %u\n", le32_to_cpu(v)); + disk_set_max_open_zones(vblk->disk, v); + dev_dbg(&vdev->dev, "max open zones = %u\n", v); virtio_cread(vdev, struct virtio_blk_config, zoned.max_active_zones, &v); - disk_set_max_active_zones(vblk->disk, le32_to_cpu(v)); - dev_dbg(&vdev->dev, "max active zones = %u\n", le32_to_cpu(v)); + disk_set_max_active_zones(vblk->disk, v); + dev_dbg(&vdev->dev, "max active zones = %u\n", v); virtio_cread(vdev, struct virtio_blk_config, - zoned.write_granularity, &v); - if (!v) { + zoned.write_granularity, &wg); + if (!wg) { dev_warn(&vdev->dev, "zero write granularity reported\n"); return -ENODEV; } - blk_queue_physical_block_size(q, le32_to_cpu(v)); - blk_queue_io_min(q, le32_to_cpu(v)); + blk_queue_physical_block_size(q, wg); + blk_queue_io_min(q, wg); - dev_dbg(&vdev->dev, "write granularity = %u\n", le32_to_cpu(v)); + dev_dbg(&vdev->dev, "write granularity = %u\n", wg); /* * virtio ZBD specification doesn't require zones to be a power of * two sectors in size, but the code in this driver expects that. */ - virtio_cread(vdev, struct virtio_blk_config, zoned.zone_sectors, &v); - vblk->zone_sectors = le32_to_cpu(v); + virtio_cread(vdev, struct virtio_blk_config, zoned.zone_sectors, + &vblk->zone_sectors); if (vblk->zone_sectors == 0 || !is_power_of_2(vblk->zone_sectors)) { dev_err(&vdev->dev, "zoned device with non power of two zone size %u\n", @@ -783,36 +852,46 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev, dev_warn(&vdev->dev, "zero max_append_sectors reported\n"); return -ENODEV; } - blk_queue_max_zone_append_sectors(q, le32_to_cpu(v)); - dev_dbg(&vdev->dev, "max append sectors = %u\n", le32_to_cpu(v)); + if ((v << SECTOR_SHIFT) < wg) { + dev_err(&vdev->dev, + "write granularity %u exceeds max_append_sectors %u limit\n", + wg, v); + return -ENODEV; + } + + blk_queue_max_zone_append_sectors(q, v); + dev_dbg(&vdev->dev, "max append sectors = %u\n", v); } return ret; } -static inline bool virtblk_has_zoned_feature(struct virtio_device *vdev) -{ - return virtio_has_feature(vdev, VIRTIO_BLK_F_ZONED); -} #else /* * Zoned block device support is not configured in this kernel. - * We only need to define a few symbols to avoid compilation errors. + * Host-managed zoned devices can't be supported, but others are + * good to go as regular block devices. */ #define virtblk_report_zones NULL + static inline void virtblk_revalidate_zones(struct virtio_blk *vblk) { } + static inline int virtblk_probe_zoned_device(struct virtio_device *vdev, struct virtio_blk *vblk, struct request_queue *q) { - return -EOPNOTSUPP; -} + u8 model; -static inline bool virtblk_has_zoned_feature(struct virtio_device *vdev) -{ - return false; + virtio_cread(vdev, struct virtio_blk_config, zoned.model, &model); + if (model == VIRTIO_BLK_Z_HM) { + dev_err(&vdev->dev, + "virtio_blk: zoned devices are not supported"); + return -EOPNOTSUPP; + } + + return 0; } #endif /* CONFIG_BLK_DEV_ZONED */ @@ -831,7 +910,7 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str) return PTR_ERR(req); vbr = blk_mq_rq_to_pdu(req); - vbr->in_hdr_len = sizeof(vbr->status); + vbr->in_hdr_len = sizeof(vbr->in_hdr.status); vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_GET_ID); vbr->out_hdr.sector = 0; @@ -840,7 +919,7 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str) goto out; blk_execute_rq(req, false); - err = blk_status_to_errno(virtblk_result(vbr->status)); + err = blk_status_to_errno(virtblk_result(vbr->in_hdr.status)); out: blk_mq_free_request(req); return err; @@ -1498,15 +1577,16 @@ static int virtblk_probe(struct virtio_device *vdev) virtblk_update_capacity(vblk, false); virtio_device_ready(vdev); - if (virtblk_has_zoned_feature(vdev)) { + /* + * All steps that follow use the VQs therefore they need to be + * placed after the virtio_device_ready() call above. + */ + if (virtio_has_feature(vdev, VIRTIO_BLK_F_ZONED)) { err = virtblk_probe_zoned_device(vdev, vblk, q); if (err) goto out_cleanup_disk; } - dev_info(&vdev->dev, "blk config size: %zu\n", - sizeof(struct virtio_blk_config)); - err = device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups); if (err) goto out_cleanup_disk; @@ -1607,10 +1687,7 @@ static unsigned int features[] = { VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES, - VIRTIO_BLK_F_SECURE_ERASE, -#ifdef CONFIG_BLK_DEV_ZONED - VIRTIO_BLK_F_ZONED, -#endif /* CONFIG_BLK_DEV_ZONED */ + VIRTIO_BLK_F_SECURE_ERASE, VIRTIO_BLK_F_ZONED, }; static struct virtio_driver virtio_blk = { |