From ffe357c868e7796f20bc0eac61f5b952731c0fa8 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Thu, 13 Jul 2023 21:29:35 +0200 Subject: nbd: automatically load module on genl access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a module alias to nbd.ko that allows the generic netlink core to automatically load the module when netlink messages for nbd are received. This frees the user from manually having to load the module before using nbd functionality via netlink. If the system policy allows it this can even be used to load the nbd module from containers which would otherwise not have access to the necessary module files to do a normal "modprobe nbd". For example this avoids the following error when using nbd-client: $ nbd-client localhost 10809 /dev/nbd0 ... Error: Couldn't resolve the nbd netlink family, make sure the nbd module is loaded and your nbd driver supports the netlink interface. Signed-off-by: Thomas Weißschuh Reviewed-by: Josef Bacik Link: https://lore.kernel.org/r/20230713-b4-nbd-genl-v3-1-226cbddba04b@weissschuh.net Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/block') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 8576d696c7a2..a346dbd73543 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -2336,6 +2336,7 @@ static struct genl_family nbd_genl_family __ro_after_init = { .mcgrps = nbd_mcast_grps, .n_mcgrps = ARRAY_SIZE(nbd_mcast_grps), }; +MODULE_ALIAS_GENL_FAMILY(NBD_GENL_FAMILY_NAME); static int populate_nbd_status(struct nbd_device *nbd, struct sk_buff *reply) { -- cgit v1.2.3 From 9d4ed6d46272bb60036a6539aae74eafcbbb3d2d Mon Sep 17 00:00:00 2001 From: Andreas Hindborg Date: Fri, 4 Aug 2023 13:46:08 +0200 Subject: ublk: add helper to check if device supports user copy This will be used by ublk zoned storage support. Signed-off-by: Andreas Hindborg Reviewed-by: Ming Lei Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20230804114610.179530-2-nmi@metaspace.dk Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 1c823750c95a..8d271901efac 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -185,6 +185,11 @@ struct ublk_params_header { __u32 types; }; +static inline bool ublk_dev_is_user_copy(const struct ublk_device *ub) +{ + return ub->dev_info.flags & UBLK_F_USER_COPY; +} + static inline void __ublk_complete_rq(struct request *req); static void ublk_complete_rq(struct kref *ref); @@ -2037,7 +2042,7 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd) UBLK_F_URING_CMD_COMP_IN_TASK; /* GET_DATA isn't needed any more with USER_COPY */ - if (ub->dev_info.flags & UBLK_F_USER_COPY) + if (ublk_dev_is_user_copy(ub)) ub->dev_info.flags &= ~UBLK_F_NEED_GET_DATA; /* We are not ready to support zero copy */ -- cgit v1.2.3 From 1a6e88b9593b63ccdfe1d84e3f99dd91e4f8d490 Mon Sep 17 00:00:00 2001 From: Andreas Hindborg Date: Fri, 4 Aug 2023 13:46:09 +0200 Subject: ublk: move check for empty address field on command submission In preparation for zoned storage support, move the check for empty `addr` field into the command handler case statement. Note that the check makes no sense for `UBLK_IO_NEED_GET_DATA` because the `addr` field must always be set for this command. Signed-off-by: Andreas Hindborg Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20230804114610.179530-3-nmi@metaspace.dk Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 8d271901efac..0e3847579610 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1419,11 +1419,6 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, ^ (_IOC_NR(cmd_op) == UBLK_IO_NEED_GET_DATA)) goto out; - if (ublk_support_user_copy(ubq) && ub_cmd->addr) { - ret = -EINVAL; - goto out; - } - ret = ublk_check_cmd_op(cmd_op); if (ret) goto out; @@ -1450,6 +1445,10 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, */ if (!ub_cmd->addr && !ublk_need_get_data(ubq)) goto out; + } else if (ub_cmd->addr) { + /* User copy requires addr to be unset */ + ret = -EINVAL; + goto out; } ublk_fill_io_cmd(io, cmd, ub_cmd->addr); @@ -1469,7 +1468,12 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, if (!ub_cmd->addr && (!ublk_need_get_data(ubq) || req_op(req) == REQ_OP_READ)) goto out; + } else if (ub_cmd->addr) { + /* User copy requires addr to be unset */ + ret = -EINVAL; + goto out; } + ublk_fill_io_cmd(io, cmd, ub_cmd->addr); ublk_commit_completion(ub, ub_cmd); break; -- cgit v1.2.3 From 29802d7ca33bc0a75c9da2a143eeed4f9e99fca4 Mon Sep 17 00:00:00 2001 From: Andreas Hindborg Date: Fri, 4 Aug 2023 13:46:10 +0200 Subject: ublk: enable zoned storage support Add zoned storage support to ublk: report_zones and operations: - REQ_OP_ZONE_OPEN - REQ_OP_ZONE_CLOSE - REQ_OP_ZONE_FINISH - REQ_OP_ZONE_RESET - REQ_OP_ZONE_APPEND The zone append feature uses the `addr` field of `struct ublksrv_io_cmd` to communicate ALBA back to the kernel. Therefore ublk must be used with the user copy feature (UBLK_F_USER_COPY) for zoned storage support to be available. Without this feature, ublk will not allow zoned storage support. Signed-off-by: Andreas Hindborg Reviewed-by: Ming Lei Tested-by: Ming Lei Link: https://lore.kernel.org/r/20230804114610.179530-4-nmi@metaspace.dk Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 331 ++++++++++++++++++++++++++++++++++++++++-- include/uapi/linux/ublk_cmd.h | 63 ++++++-- 2 files changed, 370 insertions(+), 24 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 0e3847579610..b60394fe7be6 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -56,16 +56,21 @@ | UBLK_F_USER_RECOVERY_REISSUE \ | UBLK_F_UNPRIVILEGED_DEV \ | UBLK_F_CMD_IOCTL_ENCODE \ - | UBLK_F_USER_COPY) + | UBLK_F_USER_COPY \ + | UBLK_F_ZONED) /* All UBLK_PARAM_TYPE_* should be included here */ -#define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | \ - UBLK_PARAM_TYPE_DISCARD | UBLK_PARAM_TYPE_DEVT) +#define UBLK_PARAM_TYPE_ALL \ + (UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD | \ + UBLK_PARAM_TYPE_DEVT | UBLK_PARAM_TYPE_ZONED) struct ublk_rq_data { struct llist_node node; struct kref ref; + __u64 sector; + __u32 operation; + __u32 nr_zones; }; struct ublk_uring_cmd_pdu { @@ -185,11 +190,263 @@ struct ublk_params_header { __u32 types; }; +static inline unsigned int ublk_req_build_flags(struct request *req); +static inline struct ublksrv_io_desc *ublk_get_iod(struct ublk_queue *ubq, + int tag); + static inline bool ublk_dev_is_user_copy(const struct ublk_device *ub) { return ub->dev_info.flags & UBLK_F_USER_COPY; } +static inline bool ublk_dev_is_zoned(const struct ublk_device *ub) +{ + return ub->dev_info.flags & UBLK_F_ZONED; +} + +static inline bool ublk_queue_is_zoned(struct ublk_queue *ubq) +{ + return ubq->flags & UBLK_F_ZONED; +} + +#ifdef CONFIG_BLK_DEV_ZONED + +static int ublk_get_nr_zones(const struct ublk_device *ub) +{ + const struct ublk_param_basic *p = &ub->params.basic; + + /* Zone size is a power of 2 */ + return p->dev_sectors >> ilog2(p->chunk_sectors); +} + +static int ublk_revalidate_disk_zones(struct ublk_device *ub) +{ + return blk_revalidate_disk_zones(ub->ub_disk, NULL); +} + +static int ublk_dev_param_zoned_validate(const struct ublk_device *ub) +{ + const struct ublk_param_zoned *p = &ub->params.zoned; + int nr_zones; + + if (!ublk_dev_is_zoned(ub)) + return -EINVAL; + + if (!p->max_zone_append_sectors) + return -EINVAL; + + nr_zones = ublk_get_nr_zones(ub); + + if (p->max_active_zones > nr_zones) + return -EINVAL; + + if (p->max_open_zones > nr_zones) + return -EINVAL; + + return 0; +} + +static int ublk_dev_param_zoned_apply(struct ublk_device *ub) +{ + const struct ublk_param_zoned *p = &ub->params.zoned; + + disk_set_zoned(ub->ub_disk, BLK_ZONED_HM); + blk_queue_required_elevator_features(ub->ub_disk->queue, + ELEVATOR_F_ZBD_SEQ_WRITE); + disk_set_max_active_zones(ub->ub_disk, p->max_active_zones); + disk_set_max_open_zones(ub->ub_disk, p->max_open_zones); + blk_queue_max_zone_append_sectors(ub->ub_disk->queue, p->max_zone_append_sectors); + + ub->ub_disk->nr_zones = ublk_get_nr_zones(ub); + + return 0; +} + +/* Based on virtblk_alloc_report_buffer */ +static void *ublk_alloc_report_buffer(struct ublk_device *ublk, + unsigned int nr_zones, size_t *buflen) +{ + struct request_queue *q = ublk->ub_disk->queue; + size_t bufsize; + void *buf; + + nr_zones = min_t(unsigned int, nr_zones, + ublk->ub_disk->nr_zones); + + bufsize = nr_zones * sizeof(struct blk_zone); + bufsize = + min_t(size_t, bufsize, queue_max_hw_sectors(q) << SECTOR_SHIFT); + + while (bufsize >= sizeof(struct blk_zone)) { + buf = kvmalloc(bufsize, GFP_KERNEL | __GFP_NORETRY); + if (buf) { + *buflen = bufsize; + return buf; + } + bufsize >>= 1; + } + + *buflen = 0; + return NULL; +} + +static int ublk_report_zones(struct gendisk *disk, sector_t sector, + unsigned int nr_zones, report_zones_cb cb, void *data) +{ + struct ublk_device *ub = disk->private_data; + unsigned int zone_size_sectors = disk->queue->limits.chunk_sectors; + unsigned int first_zone = sector >> ilog2(zone_size_sectors); + unsigned int done_zones = 0; + unsigned int max_zones_per_request; + int ret; + struct blk_zone *buffer; + size_t buffer_length; + + nr_zones = min_t(unsigned int, ub->ub_disk->nr_zones - first_zone, + nr_zones); + + buffer = ublk_alloc_report_buffer(ub, nr_zones, &buffer_length); + if (!buffer) + return -ENOMEM; + + max_zones_per_request = buffer_length / sizeof(struct blk_zone); + + while (done_zones < nr_zones) { + unsigned int remaining_zones = nr_zones - done_zones; + unsigned int zones_in_request = + min_t(unsigned int, remaining_zones, max_zones_per_request); + struct request *req; + struct ublk_rq_data *pdu; + blk_status_t status; + + memset(buffer, 0, buffer_length); + + req = blk_mq_alloc_request(disk->queue, REQ_OP_DRV_IN, 0); + if (IS_ERR(req)) { + ret = PTR_ERR(req); + goto out; + } + + pdu = blk_mq_rq_to_pdu(req); + pdu->operation = UBLK_IO_OP_REPORT_ZONES; + pdu->sector = sector; + pdu->nr_zones = zones_in_request; + + ret = blk_rq_map_kern(disk->queue, req, buffer, buffer_length, + GFP_KERNEL); + if (ret) { + blk_mq_free_request(req); + goto out; + } + + status = blk_execute_rq(req, 0); + ret = blk_status_to_errno(status); + blk_mq_free_request(req); + if (ret) + goto out; + + for (unsigned int i = 0; i < zones_in_request; i++) { + struct blk_zone *zone = buffer + i; + + /* A zero length zone means no more zones in this response */ + if (!zone->len) + break; + + ret = cb(zone, i, data); + if (ret) + goto out; + + done_zones++; + sector += zone_size_sectors; + + } + } + + ret = done_zones; + +out: + kvfree(buffer); + return ret; +} + +static blk_status_t ublk_setup_iod_zoned(struct ublk_queue *ubq, + struct request *req) +{ + struct ublksrv_io_desc *iod = ublk_get_iod(ubq, req->tag); + struct ublk_io *io = &ubq->ios[req->tag]; + struct ublk_rq_data *pdu = blk_mq_rq_to_pdu(req); + u32 ublk_op; + + switch (req_op(req)) { + case REQ_OP_ZONE_OPEN: + ublk_op = UBLK_IO_OP_ZONE_OPEN; + break; + case REQ_OP_ZONE_CLOSE: + ublk_op = UBLK_IO_OP_ZONE_CLOSE; + break; + case REQ_OP_ZONE_FINISH: + ublk_op = UBLK_IO_OP_ZONE_FINISH; + break; + case REQ_OP_ZONE_RESET: + ublk_op = UBLK_IO_OP_ZONE_RESET; + break; + case REQ_OP_ZONE_APPEND: + ublk_op = UBLK_IO_OP_ZONE_APPEND; + break; + case REQ_OP_DRV_IN: + ublk_op = pdu->operation; + switch (ublk_op) { + case UBLK_IO_OP_REPORT_ZONES: + iod->op_flags = ublk_op | ublk_req_build_flags(req); + iod->nr_zones = pdu->nr_zones; + iod->start_sector = pdu->sector; + return BLK_STS_OK; + default: + return BLK_STS_IOERR; + } + case REQ_OP_ZONE_RESET_ALL: + case REQ_OP_DRV_OUT: + /* We do not support reset_all and drv_out */ + return BLK_STS_NOTSUPP; + default: + return BLK_STS_IOERR; + } + + iod->op_flags = ublk_op | ublk_req_build_flags(req); + iod->nr_sectors = blk_rq_sectors(req); + iod->start_sector = blk_rq_pos(req); + iod->addr = io->addr; + + return BLK_STS_OK; +} + +#else + +#define ublk_report_zones (NULL) + +static int ublk_dev_param_zoned_validate(const struct ublk_device *ub) +{ + return -EOPNOTSUPP; +} + +static int ublk_dev_param_zoned_apply(struct ublk_device *ub) +{ + return -EOPNOTSUPP; +} + +static int ublk_revalidate_disk_zones(struct ublk_device *ub) +{ + return 0; +} + +static blk_status_t ublk_setup_iod_zoned(struct ublk_queue *ubq, + struct request *req) +{ + return -EOPNOTSUPP; +} + +#endif + static inline void __ublk_complete_rq(struct request *req); static void ublk_complete_rq(struct kref *ref); @@ -286,6 +543,9 @@ static int ublk_validate_params(const struct ublk_device *ub) if (p->max_sectors > (ub->dev_info.max_io_buf_bytes >> 9)) return -EINVAL; + + if (ublk_dev_is_zoned(ub) && !p->chunk_sectors) + return -EINVAL; } else return -EINVAL; @@ -304,6 +564,11 @@ static int ublk_validate_params(const struct ublk_device *ub) if (ub->params.types & UBLK_PARAM_TYPE_DEVT) return -EINVAL; + if (ub->params.types & UBLK_PARAM_TYPE_ZONED) + return ublk_dev_param_zoned_validate(ub); + else if (ublk_dev_is_zoned(ub)) + return -EINVAL; + return 0; } @@ -317,6 +582,9 @@ static int ublk_apply_params(struct ublk_device *ub) if (ub->params.types & UBLK_PARAM_TYPE_DISCARD) ublk_dev_param_discard_apply(ub); + if (ub->params.types & UBLK_PARAM_TYPE_ZONED) + return ublk_dev_param_zoned_apply(ub); + return 0; } @@ -487,6 +755,7 @@ static const struct block_device_operations ub_fops = { .owner = THIS_MODULE, .open = ublk_open, .free_disk = ublk_free_disk, + .report_zones = ublk_report_zones, }; #define UBLK_MAX_PIN_PAGES 32 @@ -601,7 +870,8 @@ static inline bool ublk_need_map_req(const struct request *req) static inline bool ublk_need_unmap_req(const struct request *req) { - return ublk_rq_has_data(req) && req_op(req) == REQ_OP_READ; + return ublk_rq_has_data(req) && + (req_op(req) == REQ_OP_READ || req_op(req) == REQ_OP_DRV_IN); } static int ublk_map_io(const struct ublk_queue *ubq, const struct request *req, @@ -685,8 +955,13 @@ static blk_status_t ublk_setup_iod(struct ublk_queue *ubq, struct request *req) { struct ublksrv_io_desc *iod = ublk_get_iod(ubq, req->tag); struct ublk_io *io = &ubq->ios[req->tag]; + enum req_op op = req_op(req); u32 ublk_op; + if (!ublk_queue_is_zoned(ubq) && + (op_is_zone_mgmt(op) || op == REQ_OP_ZONE_APPEND)) + return -EIO; + switch (req_op(req)) { case REQ_OP_READ: ublk_op = UBLK_IO_OP_READ; @@ -704,6 +979,8 @@ static blk_status_t ublk_setup_iod(struct ublk_queue *ubq, struct request *req) ublk_op = UBLK_IO_OP_WRITE_ZEROES; break; default: + if (ublk_queue_is_zoned(ubq)) + return ublk_setup_iod_zoned(ubq, req); return BLK_STS_IOERR; } @@ -756,7 +1033,8 @@ static inline void __ublk_complete_rq(struct request *req) * * Both the two needn't unmap. */ - if (req_op(req) != REQ_OP_READ && req_op(req) != REQ_OP_WRITE) + if (req_op(req) != REQ_OP_READ && req_op(req) != REQ_OP_WRITE && + req_op(req) != REQ_OP_DRV_IN) goto exit; /* for READ request, writing data in iod->addr to rq buffers */ @@ -1120,6 +1398,9 @@ static void ublk_commit_completion(struct ublk_device *ub, /* find the io request and complete */ req = blk_mq_tag_to_rq(ub->tag_set.tags[qid], tag); + if (req_op(req) == REQ_OP_ZONE_APPEND) + req->__sector = ub_cmd->zone_append_lba; + if (req && likely(!blk_should_fake_timeout(req->q))) ublk_put_req_ref(ubq, req); } @@ -1468,8 +1749,11 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, if (!ub_cmd->addr && (!ublk_need_get_data(ubq) || req_op(req) == REQ_OP_READ)) goto out; - } else if (ub_cmd->addr) { - /* User copy requires addr to be unset */ + } else if (req_op(req) != REQ_OP_ZONE_APPEND && ub_cmd->addr) { + /* + * User copy requires addr to be unset when command is + * not zone append + */ ret = -EINVAL; goto out; } @@ -1546,11 +1830,14 @@ static inline bool ublk_check_ubuf_dir(const struct request *req, int ubuf_dir) { /* copy ubuf to request pages */ - if (req_op(req) == REQ_OP_READ && ubuf_dir == ITER_SOURCE) + if ((req_op(req) == REQ_OP_READ || req_op(req) == REQ_OP_DRV_IN) && + ubuf_dir == ITER_SOURCE) return true; /* copy request pages to ubuf */ - if (req_op(req) == REQ_OP_WRITE && ubuf_dir == ITER_DEST) + if ((req_op(req) == REQ_OP_WRITE || + req_op(req) == REQ_OP_ZONE_APPEND) && + ubuf_dir == ITER_DEST) return true; return false; @@ -1889,17 +2176,24 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) get_device(&ub->cdev_dev); ub->dev_info.state = UBLK_S_DEV_LIVE; + + if (ublk_dev_is_zoned(ub)) { + ret = ublk_revalidate_disk_zones(ub); + if (ret) + goto out_put_cdev; + } + ret = add_disk(disk); + if (ret) + goto out_put_cdev; + + set_bit(UB_STATE_USED, &ub->state); + +out_put_cdev: if (ret) { - /* - * Has to drop the reference since ->free_disk won't be - * called in case of add_disk failure. - */ ub->dev_info.state = UBLK_S_DEV_DEAD; ublk_put_device(ub); - goto out_put_disk; } - set_bit(UB_STATE_USED, &ub->state); out_put_disk: if (ret) put_disk(disk); @@ -2049,6 +2343,13 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd) if (ublk_dev_is_user_copy(ub)) ub->dev_info.flags &= ~UBLK_F_NEED_GET_DATA; + /* Zoned storage support requires user copy feature */ + if (ublk_dev_is_zoned(ub) && + (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) || !ublk_dev_is_user_copy(ub))) { + ret = -EINVAL; + goto out_free_dev_number; + } + /* We are not ready to support zero copy */ ub->dev_info.flags &= ~UBLK_F_SUPPORT_ZERO_COPY; diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 4b8558db90e1..2685e53e4752 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -176,6 +176,12 @@ /* Copy between request and user buffer by pread()/pwrite() */ #define UBLK_F_USER_COPY (1UL << 7) +/* + * User space sets this flag when setting up the device to request zoned storage support. Kernel may + * deny the request by returning an error. + */ +#define UBLK_F_ZONED (1ULL << 8) + /* device state */ #define UBLK_S_DEV_DEAD 0 #define UBLK_S_DEV_LIVE 1 @@ -232,9 +238,26 @@ struct ublksrv_ctrl_dev_info { #define UBLK_IO_OP_READ 0 #define UBLK_IO_OP_WRITE 1 #define UBLK_IO_OP_FLUSH 2 -#define UBLK_IO_OP_DISCARD 3 -#define UBLK_IO_OP_WRITE_SAME 4 -#define UBLK_IO_OP_WRITE_ZEROES 5 +#define UBLK_IO_OP_DISCARD 3 +#define UBLK_IO_OP_WRITE_SAME 4 +#define UBLK_IO_OP_WRITE_ZEROES 5 +#define UBLK_IO_OP_ZONE_OPEN 10 +#define UBLK_IO_OP_ZONE_CLOSE 11 +#define UBLK_IO_OP_ZONE_FINISH 12 +#define UBLK_IO_OP_ZONE_APPEND 13 +#define UBLK_IO_OP_ZONE_RESET 15 +/* + * Construct a zone report. The report request is carried in `struct + * ublksrv_io_desc`. The `start_sector` field must be the first sector of a zone + * and shall indicate the first zone of the report. The `nr_zones` shall + * indicate how many zones should be reported at most. The report shall be + * delivered as a `struct blk_zone` array. To report fewer zones than requested, + * zero the last entry of the returned array. + * + * Related definitions(blk_zone, blk_zone_cond, blk_zone_type, ...) in + * include/uapi/linux/blkzoned.h are part of ublk UAPI. + */ +#define UBLK_IO_OP_REPORT_ZONES 18 #define UBLK_IO_F_FAILFAST_DEV (1U << 8) #define UBLK_IO_F_FAILFAST_TRANSPORT (1U << 9) @@ -255,7 +278,10 @@ struct ublksrv_io_desc { /* op: bit 0-7, flags: bit 8-31 */ __u32 op_flags; - __u32 nr_sectors; + union { + __u32 nr_sectors; + __u32 nr_zones; /* for UBLK_IO_OP_REPORT_ZONES */ + }; /* start sector for this io */ __u64 start_sector; @@ -284,11 +310,21 @@ struct ublksrv_io_cmd { /* io result, it is valid for COMMIT* command only */ __s32 result; - /* - * userspace buffer address in ublksrv daemon process, valid for - * FETCH* command only - */ - __u64 addr; + union { + /* + * userspace buffer address in ublksrv daemon process, valid for + * FETCH* command only + * + * `addr` should not be used when UBLK_F_USER_COPY is enabled, + * because userspace handles data copy by pread()/pwrite() over + * /dev/ublkcN. But in case of UBLK_F_ZONED, this union is + * re-used to pass back the allocated LBA for + * UBLK_IO_OP_ZONE_APPEND which actually depends on + * UBLK_F_USER_COPY + */ + __u64 addr; + __u64 zone_append_lba; + }; }; struct ublk_param_basic { @@ -331,6 +367,13 @@ struct ublk_param_devt { __u32 disk_minor; }; +struct ublk_param_zoned { + __u32 max_open_zones; + __u32 max_active_zones; + __u32 max_zone_append_sectors; + __u8 reserved[20]; +}; + struct ublk_params { /* * Total length of parameters, userspace has to set 'len' for both @@ -342,11 +385,13 @@ struct ublk_params { #define UBLK_PARAM_TYPE_BASIC (1 << 0) #define UBLK_PARAM_TYPE_DISCARD (1 << 1) #define UBLK_PARAM_TYPE_DEVT (1 << 2) +#define UBLK_PARAM_TYPE_ZONED (1 << 3) __u32 types; /* types of parameter included */ struct ublk_param_basic basic; struct ublk_param_discard discard; struct ublk_param_devt devt; + struct ublk_param_zoned zoned; }; #endif -- cgit v1.2.3 From c8659bbb15cd42577a9b16a23b527436b028c8b2 Mon Sep 17 00:00:00 2001 From: Li Zetao Date: Thu, 10 Aug 2023 16:48:36 +0800 Subject: ublk: Fix signedness bug returning warning There are two warnings reported by smatch: drivers/block/ublk_drv.c:445 ublk_setup_iod_zoned() warn: signedness bug returning '(-95)' drivers/block/ublk_drv.c:963 ublk_setup_iod() warn: signedness bug returning '(-5)' The type of "blk_status_t" is either be a u32 or u8, but this two functions return a negative value when not supported or failed. Use the error code of the blk module to fix these warnings. Fixes: 29802d7ca33b ("ublk: enable zoned storage support") Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202308100201.TCRhgdvN-lkp@intel.com/ Signed-off-by: Li Zetao Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20230810084836.3535322-1-lizetao1@huawei.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index b60394fe7be6..109a5b17537d 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -442,7 +442,7 @@ static int ublk_revalidate_disk_zones(struct ublk_device *ub) static blk_status_t ublk_setup_iod_zoned(struct ublk_queue *ubq, struct request *req) { - return -EOPNOTSUPP; + return BLK_STS_NOTSUPP; } #endif @@ -960,7 +960,7 @@ static blk_status_t ublk_setup_iod(struct ublk_queue *ubq, struct request *req) if (!ublk_queue_is_zoned(ubq) && (op_is_zone_mgmt(op) || op == REQ_OP_ZONE_APPEND)) - return -EIO; + return BLK_STS_IOERR; switch (req_op(req)) { case REQ_OP_READ: -- cgit v1.2.3 From d21fed50c523d87af6456697ad09378060c4f09a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 10 Aug 2023 16:19:23 +0200 Subject: swim3: mark swim3_init() static This is the module init function, which by definition is used only locally, so mark it static to avoid a warning: drivers/block/swim3.c:1280:5: error: no previous prototype for 'swim3_init' [-Werror=missing-prototypes] Reviewed-by: Jack Wang Signed-off-by: Arnd Bergmann Signed-off-by: Jens Axboe --- drivers/block/swim3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index dc43a63b3469..c2bc85826358 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -1277,7 +1277,7 @@ static struct macio_driver swim3_driver = }; -int swim3_init(void) +static int swim3_init(void) { macio_register_driver(&swim3_driver); return 0; -- cgit v1.2.3 From e24721e441a7c640e4e7b2b63c23c06d9a750880 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 11 Aug 2023 21:52:16 +0800 Subject: ublk: fix 'warn: variable dereferenced before check 'req'' from Smatch The added check of 'req_op(req) == REQ_OP_ZONE_APPEND' should have been done after the request is confirmed as valid. Actually here, the request should always been true, so add one WARN_ON_ONCE(!req), meantime move the zone_append check after checking the request. Cc: Andreas Hindborg Reported-by: Dan Carpenter Fixes: 29802d7ca33b ("ublk: enable zoned storage support") Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20230811135216.420404-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 109a5b17537d..e85e075b5bce 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1397,11 +1397,13 @@ static void ublk_commit_completion(struct ublk_device *ub, /* find the io request and complete */ req = blk_mq_tag_to_rq(ub->tag_set.tags[qid], tag); + if (WARN_ON_ONCE(unlikely(!req))) + return; if (req_op(req) == REQ_OP_ZONE_APPEND) req->__sector = ub_cmd->zone_append_lba; - if (req && likely(!blk_should_fake_timeout(req->q))) + if (likely(!blk_should_fake_timeout(req->q))) ublk_put_req_ref(ubq, req); } -- cgit v1.2.3 From 66a6a5d0ec852eaced589da066376e69397cd71e Mon Sep 17 00:00:00 2001 From: Ruan Jinjie Date: Tue, 15 Aug 2023 19:48:14 +0800 Subject: ublk: Switch to memdup_user_nul() helper Use memdup_user_nul() helper instead of open-coding to simplify the code. Signed-off-by: Ruan Jinjie Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20230815114815.1551171-1-ruanjinjie@huawei.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index e85e075b5bce..6ecd728ee5ce 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -2742,14 +2742,9 @@ static int ublk_ctrl_uring_cmd_permission(struct ublk_device *ub, if (header->len < header->dev_path_len) return -EINVAL; - dev_path = kmalloc(header->dev_path_len + 1, GFP_KERNEL); - if (!dev_path) - return -ENOMEM; - - ret = -EFAULT; - if (copy_from_user(dev_path, argp, header->dev_path_len)) - goto exit; - dev_path[header->dev_path_len] = 0; + dev_path = memdup_user_nul(argp, header->dev_path_len); + if (IS_ERR(dev_path)) + return PTR_ERR(dev_path); ret = -EINVAL; switch (_IOC_NR(cmd->cmd_op)) { -- cgit v1.2.3 From f4283bc7e38ac89d0c6c0ae188464d3769bec098 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Wed, 16 Aug 2023 10:22:10 +0800 Subject: drivers/rnbd: restore sysfs interface to rnbd-client Commit 137380c0ec40 renamed 'rnbd-client' to 'rnbd_client', this changed sysfs interface to /sys/devices/virtual/rnbd_client/ctl/map_device from /sys/devices/virtual/rnbd-client/ctl/map_device. CC: Ivan Orlov CC: "Md. Haris Iqbal" CC: Jack Wang Fixes: 137380c0ec40 ("block/rnbd: make all 'class' structures const") Signed-off-by: Li Zhijian Acked-by: Jack Wang Link: https://lore.kernel.org/r/20230816022210.2501228-1-lizhijian@fujitsu.com Signed-off-by: Jens Axboe --- drivers/block/rnbd/rnbd-clt-sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/rnbd/rnbd-clt-sysfs.c b/drivers/block/rnbd/rnbd-clt-sysfs.c index c36d8b1ceeed..39887556cf95 100644 --- a/drivers/block/rnbd/rnbd-clt-sysfs.c +++ b/drivers/block/rnbd/rnbd-clt-sysfs.c @@ -25,7 +25,7 @@ static struct device *rnbd_dev; static const struct class rnbd_dev_class = { - .name = "rnbd_client", + .name = "rnbd-client", }; static struct kobject *rnbd_devs_kobj; -- cgit v1.2.3 From 851e06297f20bbd85c93bbf09469f2150d1db218 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 10 Aug 2023 20:43:26 +0800 Subject: ublk: zoned: support REQ_OP_ZONE_RESET_ALL There isn't any reason to not support REQ_OP_ZONE_RESET_ALL given everything is actually handled in userspace, not mention it is pretty easy to support RESET_ALL. So enable REQ_OP_ZONE_RESET_ALL and let userspace handle it. Verified by 'tools/zbc_reset_zone -all /dev/ublkb0' in libzbc[1] with libublk-rs based ublk-zoned target prototype[2], follows command line for creating ublk-zoned: cargo run --example zoned -- add -1 1024 # add $dev_id $DEV_SIZE [1] https://github.com/westerndigitalcorporation/libzbc [2] https://github.com/ming1/libublk-rs/tree/zoned.v2 Cc: Niklas Cassel Cc: Damien Le Moal Cc: Andreas Hindborg Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20230810124326.321472-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 7 +++++-- include/uapi/linux/ublk_cmd.h | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 6ecd728ee5ce..2d9cb59f8027 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -251,6 +251,7 @@ static int ublk_dev_param_zoned_apply(struct ublk_device *ub) const struct ublk_param_zoned *p = &ub->params.zoned; disk_set_zoned(ub->ub_disk, BLK_ZONED_HM); + blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ub->ub_disk->queue); blk_queue_required_elevator_features(ub->ub_disk->queue, ELEVATOR_F_ZBD_SEQ_WRITE); disk_set_max_active_zones(ub->ub_disk, p->max_active_zones); @@ -393,6 +394,9 @@ static blk_status_t ublk_setup_iod_zoned(struct ublk_queue *ubq, case REQ_OP_ZONE_APPEND: ublk_op = UBLK_IO_OP_ZONE_APPEND; break; + case REQ_OP_ZONE_RESET_ALL: + ublk_op = UBLK_IO_OP_ZONE_RESET_ALL; + break; case REQ_OP_DRV_IN: ublk_op = pdu->operation; switch (ublk_op) { @@ -404,9 +408,8 @@ static blk_status_t ublk_setup_iod_zoned(struct ublk_queue *ubq, default: return BLK_STS_IOERR; } - case REQ_OP_ZONE_RESET_ALL: case REQ_OP_DRV_OUT: - /* We do not support reset_all and drv_out */ + /* We do not support drv_out */ return BLK_STS_NOTSUPP; default: return BLK_STS_IOERR; diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 2685e53e4752..b9cfc5c96268 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -245,6 +245,7 @@ struct ublksrv_ctrl_dev_info { #define UBLK_IO_OP_ZONE_CLOSE 11 #define UBLK_IO_OP_ZONE_FINISH 12 #define UBLK_IO_OP_ZONE_APPEND 13 +#define UBLK_IO_OP_ZONE_RESET_ALL 14 #define UBLK_IO_OP_ZONE_RESET 15 /* * Construct a zone report. The report request is carried in `struct -- cgit v1.2.3