From a23634644afc2f7c1bac98776440a1f3b161819e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 31 May 2024 09:47:59 +0200 Subject: block: take io_opt and io_min into account for max_sectors The soft max_sectors limit is normally capped by the hardware limits and an arbitrary upper limit enforced by the kernel, but can be modified by the user. A few drivers want to increase this limit (nbd, rbd) or adjust it up or down based on hardware capabilities (sd). Change blk_validate_limits to default max_sectors to the optimal I/O size, or upgrade it to the preferred minimal I/O size if that is larger than the kernel default if no optimal I/O size is provided based on the logic in the SD driver. This keeps the existing kernel default for drivers that do not provide an io_opt or very big io_min value, but picks a much more useful default for those who provide these hints, and allows to remove the hacks to set the user max_sectors limit in nbd, rbd and sd. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Damien Le Moal Acked-by: Ilya Dryomov Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240531074837.1648501-5-hch@lst.de Signed-off-by: Jens Axboe --- drivers/scsi/sd.c | 29 +++++------------------------ 1 file changed, 5 insertions(+), 24 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index f6c822c9cbd2..3dff9150ce11 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3593,7 +3593,7 @@ static int sd_revalidate_disk(struct gendisk *disk) struct request_queue *q = sdkp->disk->queue; sector_t old_capacity = sdkp->capacity; unsigned char *buffer; - unsigned int dev_max, rw_max; + unsigned int dev_max; SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_revalidate_disk\n")); @@ -3675,34 +3675,15 @@ static int sd_revalidate_disk(struct gendisk *disk) else blk_queue_io_min(sdkp->disk->queue, 0); - if (sd_validate_opt_xfer_size(sdkp, dev_max)) { - q->limits.io_opt = logical_to_bytes(sdp, sdkp->opt_xfer_blocks); - rw_max = logical_to_sectors(sdp, sdkp->opt_xfer_blocks); - } else { - q->limits.io_opt = 0; - rw_max = min_not_zero(logical_to_sectors(sdp, dev_max), - (sector_t)BLK_DEF_MAX_SECTORS_CAP); - } - /* * Limit default to SCSI host optimal sector limit if set. There may be * an impact on performance for when the size of a request exceeds this * host limit. */ - rw_max = min_not_zero(rw_max, sdp->host->opt_sectors); - - /* Do not exceed controller limit */ - rw_max = min(rw_max, queue_max_hw_sectors(q)); - - /* - * Only update max_sectors if previously unset or if the current value - * exceeds the capabilities of the hardware. - */ - if (sdkp->first_scan || - q->limits.max_sectors > q->limits.max_dev_sectors || - q->limits.max_sectors > q->limits.max_hw_sectors) { - q->limits.max_sectors = rw_max; - q->limits.max_user_sectors = rw_max; + q->limits.io_opt = sdp->host->opt_sectors << SECTOR_SHIFT; + if (sd_validate_opt_xfer_size(sdkp, dev_max)) { + q->limits.io_opt = min_not_zero(q->limits.io_opt, + logical_to_bytes(sdp, sdkp->opt_xfer_blocks)); } sdkp->first_scan = 0; -- cgit v1.2.3 From b3491b0db165c0cbe25874da66d97652c03db654 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 31 May 2024 09:48:00 +0200 Subject: sd: simplify the ZBC case in provisioning_mode_store Don't reset the discard settings to no-op over and over when a user writes to the provisioning attribute as that is already the default mode for ZBC devices. In hindsight we should have made writing to the attribute fail for ZBC devices, but the code has probably been around for far too long to change this now. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Damien Le Moal Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240531074837.1648501-6-hch@lst.de Signed-off-by: Jens Axboe --- drivers/scsi/sd.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 3dff9150ce11..83aa17fea39d 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -461,14 +461,13 @@ provisioning_mode_store(struct device *dev, struct device_attribute *attr, if (!capable(CAP_SYS_ADMIN)) return -EACCES; - if (sd_is_zoned(sdkp)) { - sd_config_discard(sdkp, SD_LBP_DISABLE); - return count; - } - if (sdp->type != TYPE_DISK) return -EINVAL; + /* ignore the provisioning mode for ZBC devices */ + if (sd_is_zoned(sdkp)) + return count; + mode = sysfs_match_string(lbp_mode, buf); if (mode < 0) return -EINVAL; -- cgit v1.2.3 From b0dadb86a90bd5a7b723f9d3a6cf69da9b596496 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 31 May 2024 09:48:01 +0200 Subject: sd: add a sd_disable_discard helper Add helper to disable discard when it is not supported and use it instead of sd_config_discard in the I/O completion handler. This avoids touching more fields than required in the I/O completion handler and prepares for converting sd to use the atomic queue limits API. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Damien Le Moal Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240531074837.1648501-7-hch@lst.de Signed-off-by: Jens Axboe --- drivers/scsi/sd.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 83aa17fea39d..f07d90474e68 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -821,6 +821,12 @@ static unsigned char sd_setup_protect_cmnd(struct scsi_cmnd *scmd, return protect; } +static void sd_disable_discard(struct scsi_disk *sdkp) +{ + sdkp->provisioning_mode = SD_LBP_DISABLE; + blk_queue_max_discard_sectors(sdkp->disk->queue, 0); +} + static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode) { struct request_queue *q = sdkp->disk->queue; @@ -2245,12 +2251,12 @@ static int sd_done(struct scsi_cmnd *SCpnt) case 0x24: /* INVALID FIELD IN CDB */ switch (SCpnt->cmnd[0]) { case UNMAP: - sd_config_discard(sdkp, SD_LBP_DISABLE); + sd_disable_discard(sdkp); break; case WRITE_SAME_16: case WRITE_SAME: if (SCpnt->cmnd[1] & 8) { /* UNMAP */ - sd_config_discard(sdkp, SD_LBP_DISABLE); + sd_disable_discard(sdkp); } else { sdkp->device->no_write_same = 1; sd_config_write_same(sdkp); -- cgit v1.2.3 From 9972b8ce0d4ba373901bdd1e15e4de58fcd7f662 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 31 May 2024 09:48:02 +0200 Subject: sd: add a sd_disable_write_same helper Add helper to disable WRITE SAME when it is not supported and use it instead of sd_config_write_same in the I/O completion handler. This avoids touching more fields than required in the I/O completion handler and prepares for converting sd to use the atomic queue limits API. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Damien Le Moal Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240531074837.1648501-8-hch@lst.de Signed-off-by: Jens Axboe --- drivers/scsi/sd.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index f07d90474e68..70211d0b1876 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1004,6 +1004,13 @@ static blk_status_t sd_setup_write_zeroes_cmnd(struct scsi_cmnd *cmd) return sd_setup_write_same10_cmnd(cmd, false); } +static void sd_disable_write_same(struct scsi_disk *sdkp) +{ + sdkp->device->no_write_same = 1; + sdkp->max_ws_blocks = 0; + blk_queue_max_write_zeroes_sectors(sdkp->disk->queue, 0); +} + static void sd_config_write_same(struct scsi_disk *sdkp) { struct request_queue *q = sdkp->disk->queue; @@ -2258,8 +2265,7 @@ static int sd_done(struct scsi_cmnd *SCpnt) if (SCpnt->cmnd[1] & 8) { /* UNMAP */ sd_disable_discard(sdkp); } else { - sdkp->device->no_write_same = 1; - sd_config_write_same(sdkp); + sd_disable_write_same(sdkp); req->rq_flags |= RQF_QUIET; } break; -- cgit v1.2.3 From d15b9bd42cd3b2077812d4bf32f532a9bd5c4914 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 31 May 2024 09:48:03 +0200 Subject: sd: simplify the disable case in sd_config_discard Fall through to the main call to blk_queue_max_discard_sectors given that max_blocks has been initialized to zero above instead of duplicating the call. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Damien Le Moal Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240531074837.1648501-9-hch@lst.de Signed-off-by: Jens Axboe --- drivers/scsi/sd.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 70211d0b1876..0dbc6eb7a7ca 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -844,8 +844,7 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode) case SD_LBP_FULL: case SD_LBP_DISABLE: - blk_queue_max_discard_sectors(q, 0); - return; + break; case SD_LBP_UNMAP: max_blocks = min_not_zero(sdkp->max_unmap_blocks, -- cgit v1.2.3 From f1e8185fc12c699c3abf4f39b1ff5d7793da3a95 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 31 May 2024 09:48:04 +0200 Subject: sd: factor out a sd_discard_mode helper Split the logic to pick the right discard mode into a little helper to prepare for further changes. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Damien Le Moal Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240531074837.1648501-10-hch@lst.de Signed-off-by: Jens Axboe --- drivers/scsi/sd.c | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 0dbc6eb7a7ca..39eddfac09ef 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3201,6 +3201,25 @@ static void sd_read_app_tag_own(struct scsi_disk *sdkp, unsigned char *buffer) return; } +static unsigned int sd_discard_mode(struct scsi_disk *sdkp) +{ + if (!sdkp->lbpvpd) { + /* LBP VPD page not provided */ + if (sdkp->max_unmap_blocks) + return SD_LBP_UNMAP; + return SD_LBP_WS16; + } + + /* LBP VPD page tells us what to use */ + if (sdkp->lbpu && sdkp->max_unmap_blocks) + return SD_LBP_UNMAP; + if (sdkp->lbpws) + return SD_LBP_WS16; + if (sdkp->lbpws10) + return SD_LBP_WS10; + return SD_LBP_DISABLE; +} + /** * sd_read_block_limits - Query disk device for preferred I/O sizes. * @sdkp: disk to query @@ -3239,23 +3258,7 @@ static void sd_read_block_limits(struct scsi_disk *sdkp) sdkp->unmap_alignment = get_unaligned_be32(&vpd->data[32]) & ~(1 << 31); - if (!sdkp->lbpvpd) { /* LBP VPD page not provided */ - - if (sdkp->max_unmap_blocks) - sd_config_discard(sdkp, SD_LBP_UNMAP); - else - sd_config_discard(sdkp, SD_LBP_WS16); - - } else { /* LBP VPD page tells us what to use */ - if (sdkp->lbpu && sdkp->max_unmap_blocks) - sd_config_discard(sdkp, SD_LBP_UNMAP); - else if (sdkp->lbpws) - sd_config_discard(sdkp, SD_LBP_WS16); - else if (sdkp->lbpws10) - sd_config_discard(sdkp, SD_LBP_WS10); - else - sd_config_discard(sdkp, SD_LBP_DISABLE); - } + sd_config_discard(sdkp, sd_discard_mode(sdkp)); } out: -- cgit v1.2.3 From 9c1d339a1bf45f4d3a2e77bbf24b0ec51f02551c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 31 May 2024 09:48:05 +0200 Subject: sd: cleanup zoned queue limits initialization Consolidate setting zone-related queue limits in sd_zbc_read_zones instead of splitting them between sd_zbc_revalidate_zones and sd_zbc_read_zones, and move the early_zone_information initialization in sd_zbc_read_zones above setting up the queue limits. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Damien Le Moal Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240531074837.1648501-11-hch@lst.de Signed-off-by: Jens Axboe --- drivers/scsi/sd_zbc.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index 806036e48abe..1c24c844e8d1 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -565,12 +565,6 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp) sdkp->zone_info.zone_blocks = zone_blocks; sdkp->zone_info.nr_zones = nr_zones; - blk_queue_chunk_sectors(q, - logical_to_sectors(sdkp->device, zone_blocks)); - - /* Enable block layer zone append emulation */ - blk_queue_max_zone_append_sectors(q, 0); - flags = memalloc_noio_save(); ret = blk_revalidate_disk_zones(disk); memalloc_noio_restore(flags); @@ -625,6 +619,10 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE]) if (ret != 0) goto err; + nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks); + sdkp->early_zone_info.nr_zones = nr_zones; + sdkp->early_zone_info.zone_blocks = zone_blocks; + /* The drive satisfies the kernel restrictions: set it up */ blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); if (sdkp->zones_max_open == U32_MAX) @@ -632,10 +630,10 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE]) else disk_set_max_open_zones(disk, sdkp->zones_max_open); disk_set_max_active_zones(disk, 0); - nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks); - - sdkp->early_zone_info.nr_zones = nr_zones; - sdkp->early_zone_info.zone_blocks = zone_blocks; + blk_queue_chunk_sectors(q, + logical_to_sectors(sdkp->device, zone_blocks)); + /* Enable block layer zone append emulation */ + blk_queue_max_zone_append_sectors(q, 0); return 0; -- cgit v1.2.3 From 804e498e0496d889090f32f812b5ce1a4f2aa63e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 31 May 2024 09:48:06 +0200 Subject: sd: convert to the atomic queue limits API Assign all queue limits through a local queue_limits variable and queue_limits_commit_update so that we can't race updating them from multiple places, and freeze the queue when updating them so that in-progress I/O submissions don't see half-updated limits. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: John Garry Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240531074837.1648501-12-hch@lst.de Signed-off-by: Jens Axboe --- drivers/scsi/sd.c | 130 ++++++++++++++++++++++++++++---------------------- drivers/scsi/sd.h | 6 ++- drivers/scsi/sd_zbc.c | 15 +++--- 3 files changed, 85 insertions(+), 66 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 39eddfac09ef..049071b56819 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -101,12 +101,13 @@ MODULE_ALIAS_SCSI_DEVICE(TYPE_ZBC); #define SD_MINORS 16 -static void sd_config_discard(struct scsi_disk *, unsigned int); -static void sd_config_write_same(struct scsi_disk *); +static void sd_config_discard(struct scsi_disk *sdkp, struct queue_limits *lim, + unsigned int mode); +static void sd_config_write_same(struct scsi_disk *sdkp, + struct queue_limits *lim); static int sd_revalidate_disk(struct gendisk *); static void sd_unlock_native_capacity(struct gendisk *disk); static void sd_shutdown(struct device *); -static void sd_read_capacity(struct scsi_disk *sdkp, unsigned char *buffer); static void scsi_disk_release(struct device *cdev); static DEFINE_IDA(sd_index_ida); @@ -456,7 +457,8 @@ provisioning_mode_store(struct device *dev, struct device_attribute *attr, { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; - int mode; + struct queue_limits lim; + int mode, err; if (!capable(CAP_SYS_ADMIN)) return -EACCES; @@ -472,8 +474,13 @@ provisioning_mode_store(struct device *dev, struct device_attribute *attr, if (mode < 0) return -EINVAL; - sd_config_discard(sdkp, mode); - + lim = queue_limits_start_update(sdkp->disk->queue); + sd_config_discard(sdkp, &lim, mode); + blk_mq_freeze_queue(sdkp->disk->queue); + err = queue_limits_commit_update(sdkp->disk->queue, &lim); + blk_mq_unfreeze_queue(sdkp->disk->queue); + if (err) + return err; return count; } static DEVICE_ATTR_RW(provisioning_mode); @@ -556,6 +563,7 @@ max_write_same_blocks_store(struct device *dev, struct device_attribute *attr, { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; + struct queue_limits lim; unsigned long max; int err; @@ -577,8 +585,13 @@ max_write_same_blocks_store(struct device *dev, struct device_attribute *attr, sdkp->max_ws_blocks = max; } - sd_config_write_same(sdkp); - + lim = queue_limits_start_update(sdkp->disk->queue); + sd_config_write_same(sdkp, &lim); + blk_mq_freeze_queue(sdkp->disk->queue); + err = queue_limits_commit_update(sdkp->disk->queue, &lim); + blk_mq_unfreeze_queue(sdkp->disk->queue); + if (err) + return err; return count; } static DEVICE_ATTR_RW(max_write_same_blocks); @@ -827,17 +840,15 @@ static void sd_disable_discard(struct scsi_disk *sdkp) blk_queue_max_discard_sectors(sdkp->disk->queue, 0); } -static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode) +static void sd_config_discard(struct scsi_disk *sdkp, struct queue_limits *lim, + unsigned int mode) { - struct request_queue *q = sdkp->disk->queue; unsigned int logical_block_size = sdkp->device->sector_size; unsigned int max_blocks = 0; - q->limits.discard_alignment = - sdkp->unmap_alignment * logical_block_size; - q->limits.discard_granularity = - max(sdkp->physical_block_size, - sdkp->unmap_granularity * logical_block_size); + lim->discard_alignment = sdkp->unmap_alignment * logical_block_size; + lim->discard_granularity = max(sdkp->physical_block_size, + sdkp->unmap_granularity * logical_block_size); sdkp->provisioning_mode = mode; switch (mode) { @@ -875,7 +886,8 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode) break; } - blk_queue_max_discard_sectors(q, max_blocks * (logical_block_size >> 9)); + lim->max_hw_discard_sectors = max_blocks * + (logical_block_size >> SECTOR_SHIFT); } static void *sd_set_special_bvec(struct request *rq, unsigned int data_len) @@ -1010,9 +1022,9 @@ static void sd_disable_write_same(struct scsi_disk *sdkp) blk_queue_max_write_zeroes_sectors(sdkp->disk->queue, 0); } -static void sd_config_write_same(struct scsi_disk *sdkp) +static void sd_config_write_same(struct scsi_disk *sdkp, + struct queue_limits *lim) { - struct request_queue *q = sdkp->disk->queue; unsigned int logical_block_size = sdkp->device->sector_size; if (sdkp->device->no_write_same) { @@ -1066,8 +1078,8 @@ static void sd_config_write_same(struct scsi_disk *sdkp) } out: - blk_queue_max_write_zeroes_sectors(q, sdkp->max_ws_blocks * - (logical_block_size >> 9)); + lim->max_write_zeroes_sectors = + sdkp->max_ws_blocks * (logical_block_size >> SECTOR_SHIFT); } static blk_status_t sd_setup_flush_cmnd(struct scsi_cmnd *cmd) @@ -2523,7 +2535,7 @@ static void read_capacity_error(struct scsi_disk *sdkp, struct scsi_device *sdp, #define READ_CAPACITY_RETRIES_ON_RESET 10 static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp, - unsigned char *buffer) + struct queue_limits *lim, unsigned char *buffer) { unsigned char cmd[16]; struct scsi_sense_hdr sshdr; @@ -2597,7 +2609,7 @@ static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp, /* Lowest aligned logical block */ alignment = ((buffer[14] & 0x3f) << 8 | buffer[15]) * sector_size; - blk_queue_alignment_offset(sdp->request_queue, alignment); + lim->alignment_offset = alignment; if (alignment && sdkp->first_scan) sd_printk(KERN_NOTICE, sdkp, "physical block alignment offset: %u\n", alignment); @@ -2608,7 +2620,7 @@ static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp, if (buffer[14] & 0x40) /* LBPRZ */ sdkp->lbprz = 1; - sd_config_discard(sdkp, SD_LBP_WS16); + sd_config_discard(sdkp, lim, SD_LBP_WS16); } sdkp->capacity = lba + 1; @@ -2711,13 +2723,14 @@ static int sd_try_rc16_first(struct scsi_device *sdp) * read disk capacity */ static void -sd_read_capacity(struct scsi_disk *sdkp, unsigned char *buffer) +sd_read_capacity(struct scsi_disk *sdkp, struct queue_limits *lim, + unsigned char *buffer) { int sector_size; struct scsi_device *sdp = sdkp->device; if (sd_try_rc16_first(sdp)) { - sector_size = read_capacity_16(sdkp, sdp, buffer); + sector_size = read_capacity_16(sdkp, sdp, lim, buffer); if (sector_size == -EOVERFLOW) goto got_data; if (sector_size == -ENODEV) @@ -2737,7 +2750,7 @@ sd_read_capacity(struct scsi_disk *sdkp, unsigned char *buffer) int old_sector_size = sector_size; sd_printk(KERN_NOTICE, sdkp, "Very big device. " "Trying to use READ CAPACITY(16).\n"); - sector_size = read_capacity_16(sdkp, sdp, buffer); + sector_size = read_capacity_16(sdkp, sdp, lim, buffer); if (sector_size < 0) { sd_printk(KERN_NOTICE, sdkp, "Using 0xffffffff as device size\n"); @@ -2796,9 +2809,8 @@ got_data: */ sector_size = 512; } - blk_queue_logical_block_size(sdp->request_queue, sector_size); - blk_queue_physical_block_size(sdp->request_queue, - sdkp->physical_block_size); + lim->logical_block_size = sector_size; + lim->physical_block_size = sdkp->physical_block_size; sdkp->device->sector_size = sector_size; if (sdkp->capacity > 0xffffffff) @@ -3220,11 +3232,11 @@ static unsigned int sd_discard_mode(struct scsi_disk *sdkp) return SD_LBP_DISABLE; } -/** - * sd_read_block_limits - Query disk device for preferred I/O sizes. - * @sdkp: disk to query +/* + * Query disk device for preferred I/O sizes. */ -static void sd_read_block_limits(struct scsi_disk *sdkp) +static void sd_read_block_limits(struct scsi_disk *sdkp, + struct queue_limits *lim) { struct scsi_vpd *vpd; @@ -3258,7 +3270,7 @@ static void sd_read_block_limits(struct scsi_disk *sdkp) sdkp->unmap_alignment = get_unaligned_be32(&vpd->data[32]) & ~(1 << 31); - sd_config_discard(sdkp, sd_discard_mode(sdkp)); + sd_config_discard(sdkp, lim, sd_discard_mode(sdkp)); } out: @@ -3277,11 +3289,9 @@ static void sd_read_block_limits_ext(struct scsi_disk *sdkp) rcu_read_unlock(); } -/** - * sd_read_block_characteristics - Query block dev. characteristics - * @sdkp: disk to query - */ -static void sd_read_block_characteristics(struct scsi_disk *sdkp) +/* Query block device characteristics */ +static void sd_read_block_characteristics(struct scsi_disk *sdkp, + struct queue_limits *lim) { struct request_queue *q = sdkp->disk->queue; struct scsi_vpd *vpd; @@ -3307,29 +3317,26 @@ static void sd_read_block_characteristics(struct scsi_disk *sdkp) #ifdef CONFIG_BLK_DEV_ZONED /* sd_probe rejects ZBD devices early otherwise */ if (sdkp->device->type == TYPE_ZBC) { - /* - * Host-managed. - */ - disk_set_zoned(sdkp->disk); + lim->zoned = true; /* * Per ZBC and ZAC specifications, writes in sequential write * required zones of host-managed devices must be aligned to * the device physical block size. */ - blk_queue_zone_write_granularity(q, sdkp->physical_block_size); + lim->zone_write_granularity = sdkp->physical_block_size; } else { /* * Host-aware devices are treated as conventional. */ - WARN_ON_ONCE(blk_queue_is_zoned(q)); + lim->zoned = false; } #endif /* CONFIG_BLK_DEV_ZONED */ if (!sdkp->first_scan) return; - if (blk_queue_is_zoned(q)) + if (lim->zoned) sd_printk(KERN_NOTICE, sdkp, "Host-managed zoned block device\n"); else if (sdkp->zoned == 1) sd_printk(KERN_NOTICE, sdkp, "Host-aware SMR disk used as regular disk\n"); @@ -3605,8 +3612,10 @@ static int sd_revalidate_disk(struct gendisk *disk) struct scsi_device *sdp = sdkp->device; struct request_queue *q = sdkp->disk->queue; sector_t old_capacity = sdkp->capacity; + struct queue_limits lim; unsigned char *buffer; unsigned int dev_max; + int err; SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_revalidate_disk\n")); @@ -3627,12 +3636,14 @@ static int sd_revalidate_disk(struct gendisk *disk) sd_spinup_disk(sdkp); + lim = queue_limits_start_update(sdkp->disk->queue); + /* * Without media there is no reason to ask; moreover, some devices * react badly if we do. */ if (sdkp->media_present) { - sd_read_capacity(sdkp, buffer); + sd_read_capacity(sdkp, &lim, buffer); /* * Some USB/UAS devices return generic values for mode pages * until the media has been accessed. Trigger a READ operation @@ -3651,10 +3662,10 @@ static int sd_revalidate_disk(struct gendisk *disk) if (scsi_device_supports_vpd(sdp)) { sd_read_block_provisioning(sdkp); - sd_read_block_limits(sdkp); + sd_read_block_limits(sdkp, &lim); sd_read_block_limits_ext(sdkp); - sd_read_block_characteristics(sdkp); - sd_zbc_read_zones(sdkp, buffer); + sd_read_block_characteristics(sdkp, &lim); + sd_zbc_read_zones(sdkp, &lim, buffer); sd_read_cpr(sdkp); } @@ -3680,31 +3691,36 @@ static int sd_revalidate_disk(struct gendisk *disk) /* Some devices report a maximum block count for READ/WRITE requests. */ dev_max = min_not_zero(dev_max, sdkp->max_xfer_blocks); - q->limits.max_dev_sectors = logical_to_sectors(sdp, dev_max); + lim.max_dev_sectors = logical_to_sectors(sdp, dev_max); if (sd_validate_min_xfer_size(sdkp)) - blk_queue_io_min(sdkp->disk->queue, - logical_to_bytes(sdp, sdkp->min_xfer_blocks)); + lim.io_min = logical_to_bytes(sdp, sdkp->min_xfer_blocks); else - blk_queue_io_min(sdkp->disk->queue, 0); + lim.io_min = 0; /* * Limit default to SCSI host optimal sector limit if set. There may be * an impact on performance for when the size of a request exceeds this * host limit. */ - q->limits.io_opt = sdp->host->opt_sectors << SECTOR_SHIFT; + lim.io_opt = sdp->host->opt_sectors << SECTOR_SHIFT; if (sd_validate_opt_xfer_size(sdkp, dev_max)) { - q->limits.io_opt = min_not_zero(q->limits.io_opt, + lim.io_opt = min_not_zero(lim.io_opt, logical_to_bytes(sdp, sdkp->opt_xfer_blocks)); } sdkp->first_scan = 0; set_capacity_and_notify(disk, logical_to_sectors(sdp, sdkp->capacity)); - sd_config_write_same(sdkp); + sd_config_write_same(sdkp, &lim); kfree(buffer); + blk_mq_freeze_queue(sdkp->disk->queue); + err = queue_limits_commit_update(sdkp->disk->queue, &lim); + blk_mq_unfreeze_queue(sdkp->disk->queue); + if (err) + return err; + /* * For a zoned drive, revalidating the zones can be done only once * the gendisk capacity is set. So if this fails, set back the gendisk diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h index 49dd600bfa48..b4170b17bad4 100644 --- a/drivers/scsi/sd.h +++ b/drivers/scsi/sd.h @@ -239,7 +239,8 @@ static inline int sd_is_zoned(struct scsi_disk *sdkp) #ifdef CONFIG_BLK_DEV_ZONED -int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE]); +int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim, + u8 buf[SD_BUF_SIZE]); int sd_zbc_revalidate_zones(struct scsi_disk *sdkp); blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd, unsigned char op, bool all); @@ -250,7 +251,8 @@ int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, #else /* CONFIG_BLK_DEV_ZONED */ -static inline int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE]) +static inline int sd_zbc_read_zones(struct scsi_disk *sdkp, + struct queue_limits *lim, u8 buf[SD_BUF_SIZE]) { return 0; } diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index 1c24c844e8d1..f685838d9ed2 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -582,13 +582,15 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp) /** * sd_zbc_read_zones - Read zone information and update the request queue * @sdkp: SCSI disk pointer. + * @lim: queue limits to read into * @buf: 512 byte buffer used for storing SCSI command output. * * Read zone information and update the request queue zone characteristics and * also the zoned device information in *sdkp. Called by sd_revalidate_disk() * before the gendisk capacity has been set. */ -int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE]) +int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim, + u8 buf[SD_BUF_SIZE]) { struct gendisk *disk = sdkp->disk; struct request_queue *q = disk->queue; @@ -626,14 +628,13 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE]) /* The drive satisfies the kernel restrictions: set it up */ blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); if (sdkp->zones_max_open == U32_MAX) - disk_set_max_open_zones(disk, 0); + lim->max_open_zones = 0; else - disk_set_max_open_zones(disk, sdkp->zones_max_open); - disk_set_max_active_zones(disk, 0); - blk_queue_chunk_sectors(q, - logical_to_sectors(sdkp->device, zone_blocks)); + lim->max_open_zones = sdkp->zones_max_open; + lim->max_active_zones = 0; + lim->chunk_sectors = logical_to_sectors(sdkp->device, zone_blocks); /* Enable block layer zone append emulation */ - blk_queue_max_zone_append_sectors(q, 0); + lim->max_zone_append_sectors = 0; return 0; -- cgit v1.2.3 From 969f17e10f5b732c05186ee0126c8a08166d0cda Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 31 May 2024 09:48:07 +0200 Subject: sr: convert to the atomic queue limits API Assign all queue limits through a local queue_limits variable and queue_limits_commit_update so that we can't race updating them from multiple places, and free the queue when updating them so that in-progress I/O submissions don't see half-updated limits. Also use the chance to clean up variable names to standard ones. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240531074837.1648501-13-hch@lst.de Signed-off-by: Jens Axboe --- drivers/scsi/sr.c | 42 +++++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 17 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 7ab000942b97..3f491019103e 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -111,7 +111,7 @@ static struct lock_class_key sr_bio_compl_lkclass; static int sr_open(struct cdrom_device_info *, int); static void sr_release(struct cdrom_device_info *); -static void get_sectorsize(struct scsi_cd *); +static int get_sectorsize(struct scsi_cd *); static int get_capabilities(struct scsi_cd *); static unsigned int sr_check_events(struct cdrom_device_info *cdi, @@ -473,15 +473,15 @@ static blk_status_t sr_init_command(struct scsi_cmnd *SCpnt) return BLK_STS_IOERR; } -static void sr_revalidate_disk(struct scsi_cd *cd) +static int sr_revalidate_disk(struct scsi_cd *cd) { struct scsi_sense_hdr sshdr; /* if the unit is not ready, nothing more to do */ if (scsi_test_unit_ready(cd->device, SR_TIMEOUT, MAX_RETRIES, &sshdr)) - return; + return 0; sr_cd_check(&cd->cdi); - get_sectorsize(cd); + return get_sectorsize(cd); } static int sr_block_open(struct gendisk *disk, blk_mode_t mode) @@ -494,13 +494,16 @@ static int sr_block_open(struct gendisk *disk, blk_mode_t mode) return -ENXIO; scsi_autopm_get_device(sdev); - if (disk_check_media_change(disk)) - sr_revalidate_disk(cd); + if (disk_check_media_change(disk)) { + ret = sr_revalidate_disk(cd); + if (ret) + goto out; + } mutex_lock(&cd->lock); ret = cdrom_open(&cd->cdi, mode); mutex_unlock(&cd->lock); - +out: scsi_autopm_put_device(sdev); if (ret) scsi_device_put(cd->device); @@ -685,7 +688,9 @@ static int sr_probe(struct device *dev) blk_pm_runtime_init(sdev->request_queue, dev); dev_set_drvdata(dev, cd); - sr_revalidate_disk(cd); + error = sr_revalidate_disk(cd); + if (error) + goto unregister_cdrom; error = device_add_disk(&sdev->sdev_gendev, disk, NULL); if (error) @@ -714,13 +719,14 @@ fail: } -static void get_sectorsize(struct scsi_cd *cd) +static int get_sectorsize(struct scsi_cd *cd) { + struct request_queue *q = cd->device->request_queue; static const u8 cmd[10] = { READ_CAPACITY }; unsigned char buffer[8] = { }; - int the_result; + struct queue_limits lim; + int err; int sector_size; - struct request_queue *queue; struct scsi_failure failure_defs[] = { { .result = SCMD_FAILURE_RESULT_ANY, @@ -736,10 +742,10 @@ static void get_sectorsize(struct scsi_cd *cd) }; /* Do the command and wait.. */ - the_result = scsi_execute_cmd(cd->device, cmd, REQ_OP_DRV_IN, buffer, + err = scsi_execute_cmd(cd->device, cmd, REQ_OP_DRV_IN, buffer, sizeof(buffer), SR_TIMEOUT, MAX_RETRIES, &exec_args); - if (the_result) { + if (err) { cd->capacity = 0x1fffff; sector_size = 2048; /* A guess, just in case */ } else { @@ -789,10 +795,12 @@ static void get_sectorsize(struct scsi_cd *cd) set_capacity(cd->disk, cd->capacity); } - queue = cd->device->request_queue; - blk_queue_logical_block_size(queue, sector_size); - - return; + lim = queue_limits_start_update(q); + lim.logical_block_size = sector_size; + blk_mq_freeze_queue(q); + err = queue_limits_commit_update(q, &lim); + blk_mq_unfreeze_queue(q); + return err; } static int get_capabilities(struct scsi_cd *cd) -- cgit v1.2.3 From 73e3715ed14844067c5c598e72777641004a7f60 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 31 May 2024 09:48:09 +0200 Subject: block: add special APIs for run-time disabling of discard and friends A few drivers optimistically try to support discard, write zeroes and secure erase and disable the features from the I/O completion handler if the hardware can't support them. This disable can't be done using the atomic queue limits API because the I/O completion handlers can't take sleeping locks or freeze the queue. Keep the existing clearing of the relevant field to zero, but replace the old blk_queue_max_* APIs with new disable APIs that force the value to 0. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Damien Le Moal Reviewed-by: John Garry Reviewed-by: Nitesh Shetty Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240531074837.1648501-15-hch@lst.de Signed-off-by: Jens Axboe --- arch/um/drivers/ubd_kern.c | 4 ++-- block/blk-settings.c | 41 ----------------------------------------- drivers/block/xen-blkfront.c | 4 ++-- drivers/scsi/sd.c | 4 ++-- include/linux/blkdev.h | 28 ++++++++++++++++++++++------ 5 files changed, 28 insertions(+), 53 deletions(-) (limited to 'drivers/scsi') diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 093c87879d08..cdcb75a68989 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -451,9 +451,9 @@ static void ubd_end_request(struct io_thread_req *io_req) { if (io_req->error == BLK_STS_NOTSUPP) { if (req_op(io_req->req) == REQ_OP_DISCARD) - blk_queue_max_discard_sectors(io_req->req->q, 0); + blk_queue_disable_discard(io_req->req->q); else if (req_op(io_req->req) == REQ_OP_WRITE_ZEROES) - blk_queue_max_write_zeroes_sectors(io_req->req->q, 0); + blk_queue_disable_write_zeroes(io_req->req->q); } blk_mq_end_request(io_req->req, io_req->error); kfree(io_req); diff --git a/block/blk-settings.c b/block/blk-settings.c index 0b038729608f..996f247fc98e 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -293,47 +293,6 @@ int queue_limits_set(struct request_queue *q, struct queue_limits *lim) } EXPORT_SYMBOL_GPL(queue_limits_set); -/** - * blk_queue_max_discard_sectors - set max sectors for a single discard - * @q: the request queue for the device - * @max_discard_sectors: maximum number of sectors to discard - **/ -void blk_queue_max_discard_sectors(struct request_queue *q, - unsigned int max_discard_sectors) -{ - struct queue_limits *lim = &q->limits; - - lim->max_hw_discard_sectors = max_discard_sectors; - lim->max_discard_sectors = - min(max_discard_sectors, lim->max_user_discard_sectors); -} -EXPORT_SYMBOL(blk_queue_max_discard_sectors); - -/** - * blk_queue_max_secure_erase_sectors - set max sectors for a secure erase - * @q: the request queue for the device - * @max_sectors: maximum number of sectors to secure_erase - **/ -void blk_queue_max_secure_erase_sectors(struct request_queue *q, - unsigned int max_sectors) -{ - q->limits.max_secure_erase_sectors = max_sectors; -} -EXPORT_SYMBOL(blk_queue_max_secure_erase_sectors); - -/** - * blk_queue_max_write_zeroes_sectors - set max sectors for a single - * write zeroes - * @q: the request queue for the device - * @max_write_zeroes_sectors: maximum number of sectors to write per command - **/ -void blk_queue_max_write_zeroes_sectors(struct request_queue *q, - unsigned int max_write_zeroes_sectors) -{ - q->limits.max_write_zeroes_sectors = max_write_zeroes_sectors; -} -EXPORT_SYMBOL(blk_queue_max_write_zeroes_sectors); - void disk_update_readahead(struct gendisk *disk) { blk_apply_bdi_limits(disk->bdi, &disk->queue->limits); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index fd7c0ff2139c..9b4ec3e4908c 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1605,8 +1605,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) blkif_req(req)->error = BLK_STS_NOTSUPP; info->feature_discard = 0; info->feature_secdiscard = 0; - blk_queue_max_discard_sectors(rq, 0); - blk_queue_max_secure_erase_sectors(rq, 0); + blk_queue_disable_discard(rq); + blk_queue_disable_secure_erase(rq); } break; case BLKIF_OP_FLUSH_DISKCACHE: diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 049071b56819..d957e29b17a9 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -837,7 +837,7 @@ static unsigned char sd_setup_protect_cmnd(struct scsi_cmnd *scmd, static void sd_disable_discard(struct scsi_disk *sdkp) { sdkp->provisioning_mode = SD_LBP_DISABLE; - blk_queue_max_discard_sectors(sdkp->disk->queue, 0); + blk_queue_disable_discard(sdkp->disk->queue); } static void sd_config_discard(struct scsi_disk *sdkp, struct queue_limits *lim, @@ -1019,7 +1019,7 @@ static void sd_disable_write_same(struct scsi_disk *sdkp) { sdkp->device->no_write_same = 1; sdkp->max_ws_blocks = 0; - blk_queue_max_write_zeroes_sectors(sdkp->disk->queue, 0); + blk_queue_disable_write_zeroes(sdkp->disk->queue); } static void sd_config_write_same(struct scsi_disk *sdkp, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ad995e7a7698..ac8e0cb2353a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -912,15 +912,31 @@ static inline void queue_limits_cancel_update(struct request_queue *q) mutex_unlock(&q->limits_lock); } +/* + * These helpers are for drivers that have sloppy feature negotiation and might + * have to disable DISCARD, WRITE_ZEROES or SECURE_DISCARD from the I/O + * completion handler when the device returned an indicator that the respective + * feature is not actually supported. They are racy and the driver needs to + * cope with that. Try to avoid this scheme if you can. + */ +static inline void blk_queue_disable_discard(struct request_queue *q) +{ + q->limits.max_discard_sectors = 0; +} + +static inline void blk_queue_disable_secure_erase(struct request_queue *q) +{ + q->limits.max_secure_erase_sectors = 0; +} + +static inline void blk_queue_disable_write_zeroes(struct request_queue *q) +{ + q->limits.max_write_zeroes_sectors = 0; +} + /* * Access functions for manipulating queue properties */ -void blk_queue_max_secure_erase_sectors(struct request_queue *q, - unsigned int max_sectors); -extern void blk_queue_max_discard_sectors(struct request_queue *q, - unsigned int max_discard_sectors); -extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q, - unsigned int max_write_same_sectors); void disk_update_readahead(struct gendisk *disk); extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); -- cgit v1.2.3 From e9f5f44ad3725335d9c559c3c22cd3726152a7b1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Jun 2024 10:48:15 +0200 Subject: block: remove the blk_integrity_profile structure Block layer integrity configuration is a bit complex right now, as it indirects through operation vectors for a simple two-dimensional configuration: a) the checksum type of none, ip checksum, crc, crc64 b) the presence or absence of a reference tag Remove the integrity profile, and instead add a separate csum_type flag which replaces the existing ip-checksum field and a new flag that indicates the presence of the reference tag. This removes up to two layers of indirect calls, remove the need to offload the no-op verification of non-PI metadata to a workqueue and generally simplifies the code. The downside is that block/t10-pi.c now has to be built into the kernel when CONFIG_BLK_DEV_INTEGRITY is supported. Given that both nvme and SCSI require t10-pi.ko, it is loaded for all usual configurations that enabled CONFIG_BLK_DEV_INTEGRITY already, though. Signed-off-by: Christoph Hellwig Reviewed-by: Kanchan Joshi Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240613084839.1044015-6-hch@lst.de Signed-off-by: Jens Axboe --- block/Kconfig | 8 +- block/Makefile | 3 +- block/bio-integrity.c | 32 ++--- block/blk-integrity.c | 66 +++++----- block/blk-mq.c | 13 +- block/blk.h | 8 ++ block/t10-pi.c | 241 +++++++++++++----------------------- drivers/md/dm-crypt.c | 2 +- drivers/nvme/host/Kconfig | 1 - drivers/nvme/host/core.c | 17 ++- drivers/nvme/target/Kconfig | 1 - drivers/nvme/target/io-cmd-bdev.c | 16 +-- drivers/scsi/Kconfig | 1 - drivers/scsi/sd_dif.c | 19 ++- drivers/target/target_core_iblock.c | 49 ++++---- include/linux/blk-integrity.h | 35 ++---- include/linux/blkdev.h | 9 +- include/linux/t10-pi.h | 8 -- 18 files changed, 215 insertions(+), 314 deletions(-) (limited to 'drivers/scsi') diff --git a/block/Kconfig b/block/Kconfig index dc12af58dbae..5b623b876d3b 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -62,6 +62,8 @@ config BLK_DEV_BSGLIB config BLK_DEV_INTEGRITY bool "Block layer data integrity support" + select CRC_T10DIF + select CRC64_ROCKSOFT help Some storage devices allow extra information to be stored/retrieved to help protect the data. The block layer @@ -72,12 +74,6 @@ config BLK_DEV_INTEGRITY T10/SCSI Data Integrity Field or the T13/ATA External Path Protection. If in doubt, say N. -config BLK_DEV_INTEGRITY_T10 - tristate - depends on BLK_DEV_INTEGRITY - select CRC_T10DIF - select CRC64_ROCKSOFT - config BLK_DEV_WRITE_MOUNTED bool "Allow writing to mounted block devices" default y diff --git a/block/Makefile b/block/Makefile index 168150b9c510..ddfd21c1a9ff 100644 --- a/block/Makefile +++ b/block/Makefile @@ -26,8 +26,7 @@ obj-$(CONFIG_MQ_IOSCHED_KYBER) += kyber-iosched.o bfq-y := bfq-iosched.o bfq-wf2q.o bfq-cgroup.o obj-$(CONFIG_IOSCHED_BFQ) += bfq.o -obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o -obj-$(CONFIG_BLK_DEV_INTEGRITY_T10) += t10-pi.o +obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o obj-$(CONFIG_BLK_MQ_VIRTIO) += blk-mq-virtio.o obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o diff --git a/block/bio-integrity.c b/block/bio-integrity.c index af7f71d16114..31dbc2853f92 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -378,10 +378,9 @@ EXPORT_SYMBOL_GPL(bio_integrity_map_user); * bio_integrity_process - Process integrity metadata for a bio * @bio: bio to generate/verify integrity metadata for * @proc_iter: iterator to process - * @proc_fn: Pointer to the relevant processing function */ static blk_status_t bio_integrity_process(struct bio *bio, - struct bvec_iter *proc_iter, integrity_processing_fn *proc_fn) + struct bvec_iter *proc_iter) { struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); struct blk_integrity_iter iter; @@ -392,17 +391,18 @@ static blk_status_t bio_integrity_process(struct bio *bio, iter.disk_name = bio->bi_bdev->bd_disk->disk_name; iter.interval = 1 << bi->interval_exp; - iter.tuple_size = bi->tuple_size; iter.seed = proc_iter->bi_sector; iter.prot_buf = bvec_virt(bip->bip_vec); - iter.pi_offset = bi->pi_offset; __bio_for_each_segment(bv, bio, bviter, *proc_iter) { void *kaddr = bvec_kmap_local(&bv); iter.data_buf = kaddr; iter.data_size = bv.bv_len; - ret = proc_fn(&iter); + if (bio_data_dir(bio) == WRITE) + blk_integrity_generate(&iter, bi); + else + ret = blk_integrity_verify(&iter, bi); kunmap_local(kaddr); if (ret) @@ -448,12 +448,10 @@ bool bio_integrity_prep(struct bio *bio) return true; if (bio_data_dir(bio) == READ) { - if (!bi->profile->verify_fn || - !(bi->flags & BLK_INTEGRITY_VERIFY)) + if (!(bi->flags & BLK_INTEGRITY_VERIFY)) return true; } else { - if (!bi->profile->generate_fn || - !(bi->flags & BLK_INTEGRITY_GENERATE)) + if (!(bi->flags & BLK_INTEGRITY_GENERATE)) return true; /* @@ -488,7 +486,7 @@ bool bio_integrity_prep(struct bio *bio) bip->bip_flags |= BIP_BLOCK_INTEGRITY; bip_set_seed(bip, bio->bi_iter.bi_sector); - if (bi->flags & BLK_INTEGRITY_IP_CHECKSUM) + if (bi->csum_type == BLK_INTEGRITY_CSUM_IP) bip->bip_flags |= BIP_IP_CHECKSUM; /* Map it */ @@ -511,12 +509,10 @@ bool bio_integrity_prep(struct bio *bio) } /* Auto-generate integrity metadata if this is a write */ - if (bio_data_dir(bio) == WRITE) { - bio_integrity_process(bio, &bio->bi_iter, - bi->profile->generate_fn); - } else { + if (bio_data_dir(bio) == WRITE) + bio_integrity_process(bio, &bio->bi_iter); + else bip->bio_iter = bio->bi_iter; - } return true; err_end_io: @@ -539,15 +535,13 @@ static void bio_integrity_verify_fn(struct work_struct *work) struct bio_integrity_payload *bip = container_of(work, struct bio_integrity_payload, bip_work); struct bio *bio = bip->bip_bio; - struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); /* * At the moment verify is called bio's iterator was advanced * during split and completion, we need to rewind iterator to * it's original position. */ - bio->bi_status = bio_integrity_process(bio, &bip->bio_iter, - bi->profile->verify_fn); + bio->bi_status = bio_integrity_process(bio, &bip->bio_iter); bio_integrity_free(bio); bio_endio(bio); } @@ -569,7 +563,7 @@ bool __bio_integrity_endio(struct bio *bio) struct bio_integrity_payload *bip = bio_integrity(bio); if (bio_op(bio) == REQ_OP_READ && !bio->bi_status && - (bip->bip_flags & BIP_BLOCK_INTEGRITY) && bi->profile->verify_fn) { + (bip->bip_flags & BIP_BLOCK_INTEGRITY) && bi->csum_type) { INIT_WORK(&bip->bip_work, bio_integrity_verify_fn); queue_work(kintegrityd_wq, &bip->bip_work); return false; diff --git a/block/blk-integrity.c b/block/blk-integrity.c index ccbeb6dfa87a..17d37badfbb8 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -123,10 +123,10 @@ int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2) struct blk_integrity *b1 = &gd1->queue->integrity; struct blk_integrity *b2 = &gd2->queue->integrity; - if (!b1->profile && !b2->profile) + if (!b1->tuple_size && !b2->tuple_size) return 0; - if (!b1->profile || !b2->profile) + if (!b1->tuple_size || !b2->tuple_size) return -1; if (b1->interval_exp != b2->interval_exp) { @@ -150,10 +150,13 @@ int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2) return -1; } - if (b1->profile != b2->profile) { + if (b1->csum_type != b2->csum_type || + (b1->flags & BLK_INTEGRITY_REF_TAG) != + (b2->flags & BLK_INTEGRITY_REF_TAG)) { pr_err("%s: %s/%s type %s != %s\n", __func__, gd1->disk_name, gd2->disk_name, - b1->profile->name, b2->profile->name); + blk_integrity_profile_name(b1), + blk_integrity_profile_name(b2)); return -1; } @@ -217,14 +220,37 @@ static inline struct blk_integrity *dev_to_bi(struct device *dev) return &dev_to_disk(dev)->queue->integrity; } +const char *blk_integrity_profile_name(struct blk_integrity *bi) +{ + switch (bi->csum_type) { + case BLK_INTEGRITY_CSUM_IP: + if (bi->flags & BLK_INTEGRITY_REF_TAG) + return "T10-DIF-TYPE1-IP"; + return "T10-DIF-TYPE3-IP"; + case BLK_INTEGRITY_CSUM_CRC: + if (bi->flags & BLK_INTEGRITY_REF_TAG) + return "T10-DIF-TYPE1-CRC"; + return "T10-DIF-TYPE3-CRC"; + case BLK_INTEGRITY_CSUM_CRC64: + if (bi->flags & BLK_INTEGRITY_REF_TAG) + return "EXT-DIF-TYPE1-CRC64"; + return "EXT-DIF-TYPE3-CRC64"; + case BLK_INTEGRITY_CSUM_NONE: + break; + } + + return "nop"; +} +EXPORT_SYMBOL_GPL(blk_integrity_profile_name); + static ssize_t format_show(struct device *dev, struct device_attribute *attr, char *page) { struct blk_integrity *bi = dev_to_bi(dev); - if (bi->profile && bi->profile->name) - return sysfs_emit(page, "%s\n", bi->profile->name); - return sysfs_emit(page, "none\n"); + if (!bi->tuple_size) + return sysfs_emit(page, "none\n"); + return sysfs_emit(page, "%s\n", blk_integrity_profile_name(bi)); } static ssize_t tag_size_show(struct device *dev, struct device_attribute *attr, @@ -326,28 +352,6 @@ const struct attribute_group blk_integrity_attr_group = { .attrs = integrity_attrs, }; -static blk_status_t blk_integrity_nop_fn(struct blk_integrity_iter *iter) -{ - return BLK_STS_OK; -} - -static void blk_integrity_nop_prepare(struct request *rq) -{ -} - -static void blk_integrity_nop_complete(struct request *rq, - unsigned int nr_bytes) -{ -} - -static const struct blk_integrity_profile nop_profile = { - .name = "nop", - .generate_fn = blk_integrity_nop_fn, - .verify_fn = blk_integrity_nop_fn, - .prepare_fn = blk_integrity_nop_prepare, - .complete_fn = blk_integrity_nop_complete, -}; - /** * blk_integrity_register - Register a gendisk as being integrity-capable * @disk: struct gendisk pointer to make integrity-aware @@ -363,11 +367,11 @@ void blk_integrity_register(struct gendisk *disk, struct blk_integrity *template { struct blk_integrity *bi = &disk->queue->integrity; + bi->csum_type = template->csum_type; bi->flags = BLK_INTEGRITY_VERIFY | BLK_INTEGRITY_GENERATE | template->flags; bi->interval_exp = template->interval_exp ? : ilog2(queue_logical_block_size(disk->queue)); - bi->profile = template->profile ? template->profile : &nop_profile; bi->tuple_size = template->tuple_size; bi->tag_size = template->tag_size; bi->pi_offset = template->pi_offset; @@ -394,7 +398,7 @@ void blk_integrity_unregister(struct gendisk *disk) { struct blk_integrity *bi = &disk->queue->integrity; - if (!bi->profile) + if (!bi->tuple_size) return; /* ensure all bios are off the integrity workqueue */ diff --git a/block/blk-mq.c b/block/blk-mq.c index 3b4df8e5ac9e..0d4cd39c3d25 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -804,10 +804,8 @@ static void blk_complete_request(struct request *req) if (!bio) return; -#ifdef CONFIG_BLK_DEV_INTEGRITY if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ) - req->q->integrity.profile->complete_fn(req, total_bytes); -#endif + blk_integrity_complete(req, total_bytes); /* * Upper layers may call blk_crypto_evict_key() anytime after the last @@ -875,11 +873,9 @@ bool blk_update_request(struct request *req, blk_status_t error, if (!req->bio) return false; -#ifdef CONFIG_BLK_DEV_INTEGRITY if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ && error == BLK_STS_OK) - req->q->integrity.profile->complete_fn(req, nr_bytes); -#endif + blk_integrity_complete(req, nr_bytes); /* * Upper layers may call blk_crypto_evict_key() anytime after the last @@ -1264,10 +1260,9 @@ void blk_mq_start_request(struct request *rq) WRITE_ONCE(rq->state, MQ_RQ_IN_FLIGHT); rq->mq_hctx->tags->rqs[rq->tag] = rq; -#ifdef CONFIG_BLK_DEV_INTEGRITY if (blk_integrity_rq(rq) && req_op(rq) == REQ_OP_WRITE) - q->integrity.profile->prepare_fn(rq); -#endif + blk_integrity_prepare(rq); + if (rq->bio && rq->bio->bi_opf & REQ_POLLED) WRITE_ONCE(rq->bio->bi_cookie, rq->mq_hctx->queue_num); } diff --git a/block/blk.h b/block/blk.h index 189bc25beb50..79e8d5d4fe0c 100644 --- a/block/blk.h +++ b/block/blk.h @@ -9,6 +9,7 @@ #include #include "blk-crypto-internal.h" +struct blk_integrity_iter; struct elevator_type; /* Max future timer expiry for timeouts */ @@ -673,4 +674,11 @@ int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder, const struct blk_holder_ops *hops, struct file *bdev_file); int bdev_permission(dev_t dev, blk_mode_t mode, void *holder); +void blk_integrity_generate(struct blk_integrity_iter *iter, + struct blk_integrity *bi); +blk_status_t blk_integrity_verify(struct blk_integrity_iter *iter, + struct blk_integrity *bi); +void blk_integrity_prepare(struct request *rq); +void blk_integrity_complete(struct request *rq, unsigned int nr_bytes); + #endif /* BLK_INTERNAL_H */ diff --git a/block/t10-pi.c b/block/t10-pi.c index f4cc91156da1..dadecf621497 100644 --- a/block/t10-pi.c +++ b/block/t10-pi.c @@ -11,17 +11,14 @@ #include #include #include +#include "blk.h" -typedef __be16 (csum_fn) (__be16, void *, unsigned int); - -static __be16 t10_pi_crc_fn(__be16 crc, void *data, unsigned int len) -{ - return cpu_to_be16(crc_t10dif_update(be16_to_cpu(crc), data, len)); -} - -static __be16 t10_pi_ip_fn(__be16 csum, void *data, unsigned int len) +static __be16 t10_pi_csum(__be16 csum, void *data, unsigned int len, + unsigned char csum_type) { - return (__force __be16)ip_compute_csum(data, len); + if (csum_type == BLK_INTEGRITY_CSUM_IP) + return (__force __be16)ip_compute_csum(data, len); + return cpu_to_be16(crc_t10dif_update(be16_to_cpu(csum), data, len)); } /* @@ -29,48 +26,44 @@ static __be16 t10_pi_ip_fn(__be16 csum, void *data, unsigned int len) * 16 bit app tag, 32 bit reference tag. Type 3 does not define the ref * tag. */ -static blk_status_t t10_pi_generate(struct blk_integrity_iter *iter, - csum_fn *fn, enum t10_dif_type type) +static void t10_pi_generate(struct blk_integrity_iter *iter, + struct blk_integrity *bi) { - u8 offset = iter->pi_offset; + u8 offset = bi->pi_offset; unsigned int i; for (i = 0 ; i < iter->data_size ; i += iter->interval) { struct t10_pi_tuple *pi = iter->prot_buf + offset; - pi->guard_tag = fn(0, iter->data_buf, iter->interval); + pi->guard_tag = t10_pi_csum(0, iter->data_buf, iter->interval, + bi->csum_type); if (offset) - pi->guard_tag = fn(pi->guard_tag, iter->prot_buf, - offset); + pi->guard_tag = t10_pi_csum(pi->guard_tag, + iter->prot_buf, offset, bi->csum_type); pi->app_tag = 0; - if (type == T10_PI_TYPE1_PROTECTION) + if (bi->flags & BLK_INTEGRITY_REF_TAG) pi->ref_tag = cpu_to_be32(lower_32_bits(iter->seed)); else pi->ref_tag = 0; iter->data_buf += iter->interval; - iter->prot_buf += iter->tuple_size; + iter->prot_buf += bi->tuple_size; iter->seed++; } - - return BLK_STS_OK; } static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter, - csum_fn *fn, enum t10_dif_type type) + struct blk_integrity *bi) { - u8 offset = iter->pi_offset; + u8 offset = bi->pi_offset; unsigned int i; - BUG_ON(type == T10_PI_TYPE0_PROTECTION); - for (i = 0 ; i < iter->data_size ; i += iter->interval) { struct t10_pi_tuple *pi = iter->prot_buf + offset; __be16 csum; - if (type == T10_PI_TYPE1_PROTECTION || - type == T10_PI_TYPE2_PROTECTION) { + if (bi->flags & BLK_INTEGRITY_REF_TAG) { if (pi->app_tag == T10_PI_APP_ESCAPE) goto next; @@ -82,15 +75,17 @@ static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter, iter->seed, be32_to_cpu(pi->ref_tag)); return BLK_STS_PROTECTION; } - } else if (type == T10_PI_TYPE3_PROTECTION) { + } else { if (pi->app_tag == T10_PI_APP_ESCAPE && pi->ref_tag == T10_PI_REF_ESCAPE) goto next; } - csum = fn(0, iter->data_buf, iter->interval); + csum = t10_pi_csum(0, iter->data_buf, iter->interval, + bi->csum_type); if (offset) - csum = fn(csum, iter->prot_buf, offset); + csum = t10_pi_csum(csum, iter->prot_buf, offset, + bi->csum_type); if (pi->guard_tag != csum) { pr_err("%s: guard tag error at sector %llu " \ @@ -102,33 +97,13 @@ static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter, next: iter->data_buf += iter->interval; - iter->prot_buf += iter->tuple_size; + iter->prot_buf += bi->tuple_size; iter->seed++; } return BLK_STS_OK; } -static blk_status_t t10_pi_type1_generate_crc(struct blk_integrity_iter *iter) -{ - return t10_pi_generate(iter, t10_pi_crc_fn, T10_PI_TYPE1_PROTECTION); -} - -static blk_status_t t10_pi_type1_generate_ip(struct blk_integrity_iter *iter) -{ - return t10_pi_generate(iter, t10_pi_ip_fn, T10_PI_TYPE1_PROTECTION); -} - -static blk_status_t t10_pi_type1_verify_crc(struct blk_integrity_iter *iter) -{ - return t10_pi_verify(iter, t10_pi_crc_fn, T10_PI_TYPE1_PROTECTION); -} - -static blk_status_t t10_pi_type1_verify_ip(struct blk_integrity_iter *iter) -{ - return t10_pi_verify(iter, t10_pi_ip_fn, T10_PI_TYPE1_PROTECTION); -} - /** * t10_pi_type1_prepare - prepare PI prior submitting request to device * @rq: request with PI that should be prepared @@ -225,81 +200,15 @@ static void t10_pi_type1_complete(struct request *rq, unsigned int nr_bytes) } } -static blk_status_t t10_pi_type3_generate_crc(struct blk_integrity_iter *iter) -{ - return t10_pi_generate(iter, t10_pi_crc_fn, T10_PI_TYPE3_PROTECTION); -} - -static blk_status_t t10_pi_type3_generate_ip(struct blk_integrity_iter *iter) -{ - return t10_pi_generate(iter, t10_pi_ip_fn, T10_PI_TYPE3_PROTECTION); -} - -static blk_status_t t10_pi_type3_verify_crc(struct blk_integrity_iter *iter) -{ - return t10_pi_verify(iter, t10_pi_crc_fn, T10_PI_TYPE3_PROTECTION); -} - -static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter) -{ - return t10_pi_verify(iter, t10_pi_ip_fn, T10_PI_TYPE3_PROTECTION); -} - -/* Type 3 does not have a reference tag so no remapping is required. */ -static void t10_pi_type3_prepare(struct request *rq) -{ -} - -/* Type 3 does not have a reference tag so no remapping is required. */ -static void t10_pi_type3_complete(struct request *rq, unsigned int nr_bytes) -{ -} - -const struct blk_integrity_profile t10_pi_type1_crc = { - .name = "T10-DIF-TYPE1-CRC", - .generate_fn = t10_pi_type1_generate_crc, - .verify_fn = t10_pi_type1_verify_crc, - .prepare_fn = t10_pi_type1_prepare, - .complete_fn = t10_pi_type1_complete, -}; -EXPORT_SYMBOL(t10_pi_type1_crc); - -const struct blk_integrity_profile t10_pi_type1_ip = { - .name = "T10-DIF-TYPE1-IP", - .generate_fn = t10_pi_type1_generate_ip, - .verify_fn = t10_pi_type1_verify_ip, - .prepare_fn = t10_pi_type1_prepare, - .complete_fn = t10_pi_type1_complete, -}; -EXPORT_SYMBOL(t10_pi_type1_ip); - -const struct blk_integrity_profile t10_pi_type3_crc = { - .name = "T10-DIF-TYPE3-CRC", - .generate_fn = t10_pi_type3_generate_crc, - .verify_fn = t10_pi_type3_verify_crc, - .prepare_fn = t10_pi_type3_prepare, - .complete_fn = t10_pi_type3_complete, -}; -EXPORT_SYMBOL(t10_pi_type3_crc); - -const struct blk_integrity_profile t10_pi_type3_ip = { - .name = "T10-DIF-TYPE3-IP", - .generate_fn = t10_pi_type3_generate_ip, - .verify_fn = t10_pi_type3_verify_ip, - .prepare_fn = t10_pi_type3_prepare, - .complete_fn = t10_pi_type3_complete, -}; -EXPORT_SYMBOL(t10_pi_type3_ip); - static __be64 ext_pi_crc64(u64 crc, void *data, unsigned int len) { return cpu_to_be64(crc64_rocksoft_update(crc, data, len)); } -static blk_status_t ext_pi_crc64_generate(struct blk_integrity_iter *iter, - enum t10_dif_type type) +static void ext_pi_crc64_generate(struct blk_integrity_iter *iter, + struct blk_integrity *bi) { - u8 offset = iter->pi_offset; + u8 offset = bi->pi_offset; unsigned int i; for (i = 0 ; i < iter->data_size ; i += iter->interval) { @@ -311,17 +220,15 @@ static blk_status_t ext_pi_crc64_generate(struct blk_integrity_iter *iter, iter->prot_buf, offset); pi->app_tag = 0; - if (type == T10_PI_TYPE1_PROTECTION) + if (bi->flags & BLK_INTEGRITY_REF_TAG) put_unaligned_be48(iter->seed, pi->ref_tag); else put_unaligned_be48(0ULL, pi->ref_tag); iter->data_buf += iter->interval; - iter->prot_buf += iter->tuple_size; + iter->prot_buf += bi->tuple_size; iter->seed++; } - - return BLK_STS_OK; } static bool ext_pi_ref_escape(u8 *ref_tag) @@ -332,9 +239,9 @@ static bool ext_pi_ref_escape(u8 *ref_tag) } static blk_status_t ext_pi_crc64_verify(struct blk_integrity_iter *iter, - enum t10_dif_type type) + struct blk_integrity *bi) { - u8 offset = iter->pi_offset; + u8 offset = bi->pi_offset; unsigned int i; for (i = 0; i < iter->data_size; i += iter->interval) { @@ -342,7 +249,7 @@ static blk_status_t ext_pi_crc64_verify(struct blk_integrity_iter *iter, u64 ref, seed; __be64 csum; - if (type == T10_PI_TYPE1_PROTECTION) { + if (bi->flags & BLK_INTEGRITY_REF_TAG) { if (pi->app_tag == T10_PI_APP_ESCAPE) goto next; @@ -353,7 +260,7 @@ static blk_status_t ext_pi_crc64_verify(struct blk_integrity_iter *iter, iter->disk_name, seed, ref); return BLK_STS_PROTECTION; } - } else if (type == T10_PI_TYPE3_PROTECTION) { + } else { if (pi->app_tag == T10_PI_APP_ESCAPE && ext_pi_ref_escape(pi->ref_tag)) goto next; @@ -374,23 +281,13 @@ static blk_status_t ext_pi_crc64_verify(struct blk_integrity_iter *iter, next: iter->data_buf += iter->interval; - iter->prot_buf += iter->tuple_size; + iter->prot_buf += bi->tuple_size; iter->seed++; } return BLK_STS_OK; } -static blk_status_t ext_pi_type1_verify_crc64(struct blk_integrity_iter *iter) -{ - return ext_pi_crc64_verify(iter, T10_PI_TYPE1_PROTECTION); -} - -static blk_status_t ext_pi_type1_generate_crc64(struct blk_integrity_iter *iter) -{ - return ext_pi_crc64_generate(iter, T10_PI_TYPE1_PROTECTION); -} - static void ext_pi_type1_prepare(struct request *rq) { struct blk_integrity *bi = &rq->q->integrity; @@ -467,33 +364,61 @@ static void ext_pi_type1_complete(struct request *rq, unsigned int nr_bytes) } } -static blk_status_t ext_pi_type3_verify_crc64(struct blk_integrity_iter *iter) +void blk_integrity_generate(struct blk_integrity_iter *iter, + struct blk_integrity *bi) { - return ext_pi_crc64_verify(iter, T10_PI_TYPE3_PROTECTION); + switch (bi->csum_type) { + case BLK_INTEGRITY_CSUM_CRC64: + ext_pi_crc64_generate(iter, bi); + break; + case BLK_INTEGRITY_CSUM_CRC: + case BLK_INTEGRITY_CSUM_IP: + t10_pi_generate(iter, bi); + break; + default: + break; + } } -static blk_status_t ext_pi_type3_generate_crc64(struct blk_integrity_iter *iter) +blk_status_t blk_integrity_verify(struct blk_integrity_iter *iter, + struct blk_integrity *bi) { - return ext_pi_crc64_generate(iter, T10_PI_TYPE3_PROTECTION); + switch (bi->csum_type) { + case BLK_INTEGRITY_CSUM_CRC64: + return ext_pi_crc64_verify(iter, bi); + case BLK_INTEGRITY_CSUM_CRC: + case BLK_INTEGRITY_CSUM_IP: + return t10_pi_verify(iter, bi); + default: + return BLK_STS_OK; + } } -const struct blk_integrity_profile ext_pi_type1_crc64 = { - .name = "EXT-DIF-TYPE1-CRC64", - .generate_fn = ext_pi_type1_generate_crc64, - .verify_fn = ext_pi_type1_verify_crc64, - .prepare_fn = ext_pi_type1_prepare, - .complete_fn = ext_pi_type1_complete, -}; -EXPORT_SYMBOL_GPL(ext_pi_type1_crc64); - -const struct blk_integrity_profile ext_pi_type3_crc64 = { - .name = "EXT-DIF-TYPE3-CRC64", - .generate_fn = ext_pi_type3_generate_crc64, - .verify_fn = ext_pi_type3_verify_crc64, - .prepare_fn = t10_pi_type3_prepare, - .complete_fn = t10_pi_type3_complete, -}; -EXPORT_SYMBOL_GPL(ext_pi_type3_crc64); +void blk_integrity_prepare(struct request *rq) +{ + struct blk_integrity *bi = &rq->q->integrity; + + if (!(bi->flags & BLK_INTEGRITY_REF_TAG)) + return; + + if (bi->csum_type == BLK_INTEGRITY_CSUM_CRC64) + ext_pi_type1_prepare(rq); + else + t10_pi_type1_prepare(rq); +} + +void blk_integrity_complete(struct request *rq, unsigned int nr_bytes) +{ + struct blk_integrity *bi = &rq->q->integrity; + + if (!(bi->flags & BLK_INTEGRITY_REF_TAG)) + return; + + if (bi->csum_type == BLK_INTEGRITY_CSUM_CRC64) + ext_pi_type1_complete(rq, nr_bytes); + else + t10_pi_type1_complete(rq, nr_bytes); +} MODULE_DESCRIPTION("T10 Protection Information module"); MODULE_LICENSE("GPL"); diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 1dfc462f29cd..6c013ceb0e5f 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1177,7 +1177,7 @@ static int crypt_integrity_ctr(struct crypt_config *cc, struct dm_target *ti) struct mapped_device *md = dm_table_get_md(ti->table); /* We require an underlying device with non-PI metadata */ - if (!bi || strcmp(bi->profile->name, "nop")) { + if (!bi || bi->csum_type != BLK_INTEGRITY_CSUM_NONE) { ti->error = "Integrity profile not supported."; return -EINVAL; } diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index b309c8be720f..a3caef75aa0a 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only config NVME_CORE tristate - select BLK_DEV_INTEGRITY_T10 if BLK_DEV_INTEGRITY config BLK_DEV_NVME tristate "NVM Express block device" diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f5d150c62955..14bac248cde4 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1744,17 +1744,16 @@ static bool nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head) case NVME_NS_DPS_PI_TYPE3: switch (head->guard_type) { case NVME_NVM_NS_16B_GUARD: - integrity.profile = &t10_pi_type3_crc; + integrity.csum_type = BLK_INTEGRITY_CSUM_CRC; integrity.tag_size = sizeof(u16) + sizeof(u32); integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE; break; case NVME_NVM_NS_64B_GUARD: - integrity.profile = &ext_pi_type3_crc64; + integrity.csum_type = BLK_INTEGRITY_CSUM_CRC64; integrity.tag_size = sizeof(u16) + 6; integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE; break; default: - integrity.profile = NULL; break; } break; @@ -1762,22 +1761,22 @@ static bool nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head) case NVME_NS_DPS_PI_TYPE2: switch (head->guard_type) { case NVME_NVM_NS_16B_GUARD: - integrity.profile = &t10_pi_type1_crc; + integrity.csum_type = BLK_INTEGRITY_CSUM_CRC; integrity.tag_size = sizeof(u16); - integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE; + integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE | + BLK_INTEGRITY_REF_TAG; break; case NVME_NVM_NS_64B_GUARD: - integrity.profile = &ext_pi_type1_crc64; + integrity.csum_type = BLK_INTEGRITY_CSUM_CRC64; integrity.tag_size = sizeof(u16); - integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE; + integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE | + BLK_INTEGRITY_REF_TAG; break; default: - integrity.profile = NULL; break; } break; default: - integrity.profile = NULL; break; } diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig index 872dd1a0acd8..c42aec41cc7b 100644 --- a/drivers/nvme/target/Kconfig +++ b/drivers/nvme/target/Kconfig @@ -6,7 +6,6 @@ config NVME_TARGET depends on CONFIGFS_FS select NVME_KEYRING if NVME_TARGET_TCP_TLS select KEYS if NVME_TARGET_TCP_TLS - select BLK_DEV_INTEGRITY_T10 if BLK_DEV_INTEGRITY select SGL_ALLOC help This enabled target side support for the NVMe protocol, that is diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 6426aac2634a..b628bc5ee998 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -61,15 +61,17 @@ static void nvmet_bdev_ns_enable_integrity(struct nvmet_ns *ns) { struct blk_integrity *bi = bdev_get_integrity(ns->bdev); - if (bi) { + if (!bi) + return; + + if (bi->csum_type == BLK_INTEGRITY_CSUM_CRC) { ns->metadata_size = bi->tuple_size; - if (bi->profile == &t10_pi_type1_crc) + if (bi->flags & BLK_INTEGRITY_REF_TAG) ns->pi_type = NVME_NS_DPS_PI_TYPE1; - else if (bi->profile == &t10_pi_type3_crc) - ns->pi_type = NVME_NS_DPS_PI_TYPE3; else - /* Unsupported metadata type */ - ns->metadata_size = 0; + ns->pi_type = NVME_NS_DPS_PI_TYPE3; + } else { + ns->metadata_size = 0; } } @@ -102,7 +104,7 @@ int nvmet_bdev_ns_enable(struct nvmet_ns *ns) ns->pi_type = 0; ns->metadata_size = 0; - if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY_T10)) + if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) nvmet_bdev_ns_enable_integrity(ns); if (bdev_is_zoned(ns->bdev)) { diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 065db86d6021..37c24ffea65c 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -82,7 +82,6 @@ comment "SCSI support type (disk, tape, CD-ROM)" config BLK_DEV_SD tristate "SCSI disk support" depends on SCSI - select BLK_DEV_INTEGRITY_T10 if BLK_DEV_INTEGRITY help If you want to use SCSI hard disks, Fibre Channel disks, Serial ATA (SATA) or Parallel ATA (PATA) hard disks, diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c index 1df847b5f747..6f0921c7db78 100644 --- a/drivers/scsi/sd_dif.c +++ b/drivers/scsi/sd_dif.c @@ -47,18 +47,13 @@ void sd_dif_config_host(struct scsi_disk *sdkp) memset(&bi, 0, sizeof(bi)); /* Enable DMA of protection information */ - if (scsi_host_get_guard(sdkp->device->host) & SHOST_DIX_GUARD_IP) { - if (type == T10_PI_TYPE3_PROTECTION) - bi.profile = &t10_pi_type3_ip; - else - bi.profile = &t10_pi_type1_ip; + if (scsi_host_get_guard(sdkp->device->host) & SHOST_DIX_GUARD_IP) + bi.csum_type = BLK_INTEGRITY_CSUM_IP; + else + bi.csum_type = BLK_INTEGRITY_CSUM_CRC; - bi.flags |= BLK_INTEGRITY_IP_CHECKSUM; - } else - if (type == T10_PI_TYPE3_PROTECTION) - bi.profile = &t10_pi_type3_crc; - else - bi.profile = &t10_pi_type1_crc; + if (type != T10_PI_TYPE3_PROTECTION) + bi.flags |= BLK_INTEGRITY_REF_TAG; bi.tuple_size = sizeof(struct t10_pi_tuple); @@ -76,7 +71,7 @@ void sd_dif_config_host(struct scsi_disk *sdkp) sd_first_printk(KERN_NOTICE, sdkp, "Enabling DIX %s, application tag size %u bytes\n", - bi.profile->name, bi.tag_size); + blk_integrity_profile_name(&bi), bi.tag_size); out: blk_integrity_register(disk, &bi); } diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 7f6ca8177845..a3e09adc4e76 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -148,35 +148,38 @@ static int iblock_configure_device(struct se_device *dev) dev->dev_attrib.is_nonrot = 1; bi = bdev_get_integrity(bd); - if (bi) { - struct bio_set *bs = &ib_dev->ibd_bio_set; - - if (!strcmp(bi->profile->name, "T10-DIF-TYPE3-IP") || - !strcmp(bi->profile->name, "T10-DIF-TYPE1-IP")) { - pr_err("IBLOCK export of blk_integrity: %s not" - " supported\n", bi->profile->name); - ret = -ENOSYS; - goto out_blkdev_put; - } + if (!bi) + return 0; - if (!strcmp(bi->profile->name, "T10-DIF-TYPE3-CRC")) { - dev->dev_attrib.pi_prot_type = TARGET_DIF_TYPE3_PROT; - } else if (!strcmp(bi->profile->name, "T10-DIF-TYPE1-CRC")) { + switch (bi->csum_type) { + case BLK_INTEGRITY_CSUM_IP: + pr_err("IBLOCK export of blk_integrity: %s not supported\n", + blk_integrity_profile_name(bi)); + ret = -ENOSYS; + goto out_blkdev_put; + case BLK_INTEGRITY_CSUM_CRC: + if (bi->flags & BLK_INTEGRITY_REF_TAG) dev->dev_attrib.pi_prot_type = TARGET_DIF_TYPE1_PROT; - } + else + dev->dev_attrib.pi_prot_type = TARGET_DIF_TYPE3_PROT; + break; + default: + break; + } - if (dev->dev_attrib.pi_prot_type) { - if (bioset_integrity_create(bs, IBLOCK_BIO_POOL_SIZE) < 0) { - pr_err("Unable to allocate bioset for PI\n"); - ret = -ENOMEM; - goto out_blkdev_put; - } - pr_debug("IBLOCK setup BIP bs->bio_integrity_pool: %p\n", - &bs->bio_integrity_pool); + if (dev->dev_attrib.pi_prot_type) { + struct bio_set *bs = &ib_dev->ibd_bio_set; + + if (bioset_integrity_create(bs, IBLOCK_BIO_POOL_SIZE) < 0) { + pr_err("Unable to allocate bioset for PI\n"); + ret = -ENOMEM; + goto out_blkdev_put; } - dev->dev_attrib.hw_pi_prot_type = dev->dev_attrib.pi_prot_type; + pr_debug("IBLOCK setup BIP bs->bio_integrity_pool: %p\n", + &bs->bio_integrity_pool); } + dev->dev_attrib.hw_pi_prot_type = dev->dev_attrib.pi_prot_type; return 0; out_blkdev_put: diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h index 7428cb43952d..56ce1ae35580 100644 --- a/include/linux/blk-integrity.h +++ b/include/linux/blk-integrity.h @@ -10,7 +10,7 @@ enum blk_integrity_flags { BLK_INTEGRITY_VERIFY = 1 << 0, BLK_INTEGRITY_GENERATE = 1 << 1, BLK_INTEGRITY_DEVICE_CAPABLE = 1 << 2, - BLK_INTEGRITY_IP_CHECKSUM = 1 << 3, + BLK_INTEGRITY_REF_TAG = 1 << 3, }; struct blk_integrity_iter { @@ -19,22 +19,10 @@ struct blk_integrity_iter { sector_t seed; unsigned int data_size; unsigned short interval; - unsigned char tuple_size; - unsigned char pi_offset; const char *disk_name; }; -typedef blk_status_t (integrity_processing_fn) (struct blk_integrity_iter *); -typedef void (integrity_prepare_fn) (struct request *); -typedef void (integrity_complete_fn) (struct request *, unsigned int); - -struct blk_integrity_profile { - integrity_processing_fn *generate_fn; - integrity_processing_fn *verify_fn; - integrity_prepare_fn *prepare_fn; - integrity_complete_fn *complete_fn; - const char *name; -}; +const char *blk_integrity_profile_name(struct blk_integrity *bi); #ifdef CONFIG_BLK_DEV_INTEGRITY void blk_integrity_register(struct gendisk *, struct blk_integrity *); @@ -44,14 +32,17 @@ int blk_rq_map_integrity_sg(struct request_queue *, struct bio *, struct scatterlist *); int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); -static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) +static inline bool +blk_integrity_queue_supports_integrity(struct request_queue *q) { - struct blk_integrity *bi = &disk->queue->integrity; + return q->integrity.tuple_size; +} - if (!bi->profile) +static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) +{ + if (!blk_integrity_queue_supports_integrity(disk->queue)) return NULL; - - return bi; + return &disk->queue->integrity; } static inline struct blk_integrity * @@ -60,12 +51,6 @@ bdev_get_integrity(struct block_device *bdev) return blk_get_integrity(bdev->bd_disk); } -static inline bool -blk_integrity_queue_supports_integrity(struct request_queue *q) -{ - return q->integrity.profile; -} - static inline unsigned short queue_max_integrity_segments(const struct request_queue *q) { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ac8e0cb2353a..bdd33388e1ce 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -105,9 +105,16 @@ enum { struct disk_events; struct badblocks; +enum blk_integrity_checksum { + BLK_INTEGRITY_CSUM_NONE = 0, + BLK_INTEGRITY_CSUM_IP = 1, + BLK_INTEGRITY_CSUM_CRC = 2, + BLK_INTEGRITY_CSUM_CRC64 = 3, +} __packed ; + struct blk_integrity { - const struct blk_integrity_profile *profile; unsigned char flags; + enum blk_integrity_checksum csum_type; unsigned char tuple_size; unsigned char pi_offset; unsigned char interval_exp; diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h index 248f4ac95642..d2bafb76badf 100644 --- a/include/linux/t10-pi.h +++ b/include/linux/t10-pi.h @@ -48,11 +48,6 @@ static inline u32 t10_pi_ref_tag(struct request *rq) return blk_rq_pos(rq) >> (shift - SECTOR_SHIFT) & 0xffffffff; } -extern const struct blk_integrity_profile t10_pi_type1_crc; -extern const struct blk_integrity_profile t10_pi_type1_ip; -extern const struct blk_integrity_profile t10_pi_type3_crc; -extern const struct blk_integrity_profile t10_pi_type3_ip; - struct crc64_pi_tuple { __be64 guard_tag; __be16 app_tag; @@ -79,7 +74,4 @@ static inline u64 ext_pi_ref_tag(struct request *rq) return lower_48_bits(blk_rq_pos(rq) >> (shift - SECTOR_SHIFT)); } -extern const struct blk_integrity_profile ext_pi_type1_crc64; -extern const struct blk_integrity_profile ext_pi_type3_crc64; - #endif -- cgit v1.2.3 From c6e56cf6b2e79a463af21286ba951714ed20828c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Jun 2024 10:48:22 +0200 Subject: block: move integrity information into queue_limits Move the integrity information into the queue limits so that it can be set atomically with other queue limits, and that the sysfs changes to the read_verify and write_generate flags are properly synchronized. This also allows to provide a more useful helper to stack the integrity fields, although it still is separate from the main stacking function as not all stackable devices want to inherit the integrity settings. Even with that it greatly simplifies the code in md and dm. Note that the integrity field is moved as-is into the queue limits. While there are good arguments for removing the separate blk_integrity structure, this would cause a lot of churn and might better be done at a later time if desired. However the integrity field in the queue_limits structure is now unconditional so that various ifdefs can be avoided or replaced with IS_ENABLED(). Given that tiny size of it that seems like a worthwhile trade off. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240613084839.1044015-13-hch@lst.de Signed-off-by: Jens Axboe --- Documentation/block/data-integrity.rst | 49 +--------- block/blk-integrity.c | 124 +++---------------------- block/blk-settings.c | 118 ++++++++++++++++++++++-- block/t10-pi.c | 12 +-- drivers/md/dm-core.h | 1 - drivers/md/dm-integrity.c | 27 +++--- drivers/md/dm-table.c | 161 ++++++--------------------------- drivers/md/md.c | 72 ++++----------- drivers/md/md.h | 5 +- drivers/md/raid0.c | 7 +- drivers/md/raid1.c | 10 +- drivers/md/raid10.c | 10 +- drivers/md/raid5.c | 2 +- drivers/nvdimm/btt.c | 13 +-- drivers/nvme/host/core.c | 70 +++++++------- drivers/scsi/sd.c | 8 +- drivers/scsi/sd.h | 12 +-- drivers/scsi/sd_dif.c | 34 +++---- include/linux/blk-integrity.h | 27 +++--- include/linux/blkdev.h | 12 +-- include/linux/t10-pi.h | 12 +-- 21 files changed, 289 insertions(+), 497 deletions(-) (limited to 'drivers/scsi') diff --git a/Documentation/block/data-integrity.rst b/Documentation/block/data-integrity.rst index 6a760c0eb192..99905e880a0e 100644 --- a/Documentation/block/data-integrity.rst +++ b/Documentation/block/data-integrity.rst @@ -153,18 +153,11 @@ bio_free() will automatically free the bip. 4.2 Block Device ---------------- -Because the format of the protection data is tied to the physical -disk, each block device has been extended with a block integrity -profile (struct blk_integrity). This optional profile is registered -with the block layer using blk_integrity_register(). - -The profile contains callback functions for generating and verifying -the protection data, as well as getting and setting application tags. -The profile also contains a few constants to aid in completing, -merging and splitting the integrity metadata. +Block devices can set up the integrity information in the integrity +sub-struture of the queue_limits structure. Layered block devices will need to pick a profile that's appropriate -for all subdevices. blk_integrity_compare() can help with that. DM +for all subdevices. queue_limits_stack_integrity() can help with that. DM and MD linear, RAID0 and RAID1 are currently supported. RAID4/5/6 will require extra work due to the application tag. @@ -250,42 +243,6 @@ will require extra work due to the application tag. integrity upon completion. -5.4 Registering A Block Device As Capable Of Exchanging Integrity Metadata --------------------------------------------------------------------------- - - To enable integrity exchange on a block device the gendisk must be - registered as capable: - - `int blk_integrity_register(gendisk, blk_integrity);` - - The blk_integrity struct is a template and should contain the - following:: - - static struct blk_integrity my_profile = { - .name = "STANDARDSBODY-TYPE-VARIANT-CSUM", - .generate_fn = my_generate_fn, - .verify_fn = my_verify_fn, - .tuple_size = sizeof(struct my_tuple_size), - .tag_size = , - }; - - 'name' is a text string which will be visible in sysfs. This is - part of the userland API so chose it carefully and never change - it. The format is standards body-type-variant. - E.g. T10-DIF-TYPE1-IP or T13-EPP-0-CRC. - - 'generate_fn' generates appropriate integrity metadata (for WRITE). - - 'verify_fn' verifies that the data buffer matches the integrity - metadata. - - 'tuple_size' must be set to match the size of the integrity - metadata per sector. I.e. 8 for DIF and EPP. - - 'tag_size' must be set to identify how many bytes of tag space - are available per hardware sector. For DIF this is either 2 or - 0 depending on the value of the Control Mode Page ATO bit. - ---------------------------------------------------------------------- 2007-12-24 Martin K. Petersen diff --git a/block/blk-integrity.c b/block/blk-integrity.c index b37b8855eed1..05a48689a424 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -107,63 +107,6 @@ new_segment: } EXPORT_SYMBOL(blk_rq_map_integrity_sg); -/** - * blk_integrity_compare - Compare integrity profile of two disks - * @gd1: Disk to compare - * @gd2: Disk to compare - * - * Description: Meta-devices like DM and MD need to verify that all - * sub-devices use the same integrity format before advertising to - * upper layers that they can send/receive integrity metadata. This - * function can be used to check whether two gendisk devices have - * compatible integrity formats. - */ -int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2) -{ - struct blk_integrity *b1 = &gd1->queue->integrity; - struct blk_integrity *b2 = &gd2->queue->integrity; - - if (!b1->tuple_size && !b2->tuple_size) - return 0; - - if (!b1->tuple_size || !b2->tuple_size) - return -1; - - if (b1->interval_exp != b2->interval_exp) { - pr_err("%s: %s/%s protection interval %u != %u\n", - __func__, gd1->disk_name, gd2->disk_name, - 1 << b1->interval_exp, 1 << b2->interval_exp); - return -1; - } - - if (b1->tuple_size != b2->tuple_size) { - pr_err("%s: %s/%s tuple sz %u != %u\n", __func__, - gd1->disk_name, gd2->disk_name, - b1->tuple_size, b2->tuple_size); - return -1; - } - - if (b1->tag_size && b2->tag_size && (b1->tag_size != b2->tag_size)) { - pr_err("%s: %s/%s tag sz %u != %u\n", __func__, - gd1->disk_name, gd2->disk_name, - b1->tag_size, b2->tag_size); - return -1; - } - - if (b1->csum_type != b2->csum_type || - (b1->flags & BLK_INTEGRITY_REF_TAG) != - (b2->flags & BLK_INTEGRITY_REF_TAG)) { - pr_err("%s: %s/%s type %s != %s\n", __func__, - gd1->disk_name, gd2->disk_name, - blk_integrity_profile_name(b1), - blk_integrity_profile_name(b2)); - return -1; - } - - return 0; -} -EXPORT_SYMBOL(blk_integrity_compare); - bool blk_integrity_merge_rq(struct request_queue *q, struct request *req, struct request *next) { @@ -217,7 +160,7 @@ bool blk_integrity_merge_bio(struct request_queue *q, struct request *req, static inline struct blk_integrity *dev_to_bi(struct device *dev) { - return &dev_to_disk(dev)->queue->integrity; + return &dev_to_disk(dev)->queue->limits.integrity; } const char *blk_integrity_profile_name(struct blk_integrity *bi) @@ -246,7 +189,8 @@ EXPORT_SYMBOL_GPL(blk_integrity_profile_name); static ssize_t flag_store(struct device *dev, struct device_attribute *attr, const char *page, size_t count, unsigned char flag) { - struct blk_integrity *bi = dev_to_bi(dev); + struct request_queue *q = dev_to_disk(dev)->queue; + struct queue_limits lim; unsigned long val; int err; @@ -254,11 +198,18 @@ static ssize_t flag_store(struct device *dev, struct device_attribute *attr, if (err) return err; - /* the flags are inverted vs the values in the sysfs files */ + /* note that the flags are inverted vs the values in the sysfs files */ + lim = queue_limits_start_update(q); if (val) - bi->flags &= ~flag; + lim.integrity.flags &= ~flag; else - bi->flags |= flag; + lim.integrity.flags |= flag; + + blk_mq_freeze_queue(q); + err = queue_limits_commit_update(q, &lim); + blk_mq_unfreeze_queue(q); + if (err) + return err; return count; } @@ -355,52 +306,3 @@ const struct attribute_group blk_integrity_attr_group = { .name = "integrity", .attrs = integrity_attrs, }; - -/** - * blk_integrity_register - Register a gendisk as being integrity-capable - * @disk: struct gendisk pointer to make integrity-aware - * @template: block integrity profile to register - * - * Description: When a device needs to advertise itself as being able to - * send/receive integrity metadata it must use this function to register - * the capability with the block layer. The template is a blk_integrity - * struct with values appropriate for the underlying hardware. See - * Documentation/block/data-integrity.rst. - */ -void blk_integrity_register(struct gendisk *disk, struct blk_integrity *template) -{ - struct blk_integrity *bi = &disk->queue->integrity; - - bi->csum_type = template->csum_type; - bi->flags = template->flags; - bi->interval_exp = template->interval_exp ? : - ilog2(queue_logical_block_size(disk->queue)); - bi->tuple_size = template->tuple_size; - bi->tag_size = template->tag_size; - bi->pi_offset = template->pi_offset; - -#ifdef CONFIG_BLK_INLINE_ENCRYPTION - if (disk->queue->crypto_profile) { - pr_warn("blk-integrity: Integrity and hardware inline encryption are not supported together. Disabling hardware inline encryption.\n"); - disk->queue->crypto_profile = NULL; - } -#endif -} -EXPORT_SYMBOL(blk_integrity_register); - -/** - * blk_integrity_unregister - Unregister block integrity profile - * @disk: disk whose integrity profile to unregister - * - * Description: This function unregisters the integrity capability from - * a block device. - */ -void blk_integrity_unregister(struct gendisk *disk) -{ - struct blk_integrity *bi = &disk->queue->integrity; - - if (!bi->tuple_size) - return; - memset(bi, 0, sizeof(*bi)); -} -EXPORT_SYMBOL(blk_integrity_unregister); diff --git a/block/blk-settings.c b/block/blk-settings.c index 996f247fc98e..f11c8676eb4c 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include @@ -97,6 +97,36 @@ static int blk_validate_zoned_limits(struct queue_limits *lim) return 0; } +static int blk_validate_integrity_limits(struct queue_limits *lim) +{ + struct blk_integrity *bi = &lim->integrity; + + if (!bi->tuple_size) { + if (bi->csum_type != BLK_INTEGRITY_CSUM_NONE || + bi->tag_size || ((bi->flags & BLK_INTEGRITY_REF_TAG))) { + pr_warn("invalid PI settings.\n"); + return -EINVAL; + } + return 0; + } + + if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) { + pr_warn("integrity support disabled.\n"); + return -EINVAL; + } + + if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE && + (bi->flags & BLK_INTEGRITY_REF_TAG)) { + pr_warn("ref tag not support without checksum.\n"); + return -EINVAL; + } + + if (!bi->interval_exp) + bi->interval_exp = ilog2(lim->logical_block_size); + + return 0; +} + /* * Check that the limits in lim are valid, initialize defaults for unset * values, and cap values based on others where needed. @@ -105,6 +135,7 @@ static int blk_validate_limits(struct queue_limits *lim) { unsigned int max_hw_sectors; unsigned int logical_block_sectors; + int err; /* * Unless otherwise specified, default to 512 byte logical blocks and a @@ -230,6 +261,9 @@ static int blk_validate_limits(struct queue_limits *lim) lim->misaligned = 0; } + err = blk_validate_integrity_limits(lim); + if (err) + return err; return blk_validate_zoned_limits(lim); } @@ -263,13 +297,24 @@ int queue_limits_commit_update(struct request_queue *q, struct queue_limits *lim) __releases(q->limits_lock) { - int error = blk_validate_limits(lim); + int error; - if (!error) { - q->limits = *lim; - if (q->disk) - blk_apply_bdi_limits(q->disk->bdi, lim); + error = blk_validate_limits(lim); + if (error) + goto out_unlock; + +#ifdef CONFIG_BLK_INLINE_ENCRYPTION + if (q->crypto_profile && lim->integrity.tag_size) { + pr_warn("blk-integrity: Integrity and hardware inline encryption are not supported together.\n"); + error = -EINVAL; + goto out_unlock; } +#endif + + q->limits = *lim; + if (q->disk) + blk_apply_bdi_limits(q->disk->bdi, lim); +out_unlock: mutex_unlock(&q->limits_lock); return error; } @@ -575,6 +620,67 @@ void queue_limits_stack_bdev(struct queue_limits *t, struct block_device *bdev, } EXPORT_SYMBOL_GPL(queue_limits_stack_bdev); +/** + * queue_limits_stack_integrity - stack integrity profile + * @t: target queue limits + * @b: base queue limits + * + * Check if the integrity profile in the @b can be stacked into the + * target @t. Stacking is possible if either: + * + * a) does not have any integrity information stacked into it yet + * b) the integrity profile in @b is identical to the one in @t + * + * If @b can be stacked into @t, return %true. Else return %false and clear the + * integrity information in @t. + */ +bool queue_limits_stack_integrity(struct queue_limits *t, + struct queue_limits *b) +{ + struct blk_integrity *ti = &t->integrity; + struct blk_integrity *bi = &b->integrity; + + if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) + return true; + + if (!ti->tuple_size) { + /* inherit the settings from the first underlying device */ + if (!(ti->flags & BLK_INTEGRITY_STACKED)) { + ti->flags = BLK_INTEGRITY_DEVICE_CAPABLE | + (bi->flags & BLK_INTEGRITY_REF_TAG); + ti->csum_type = bi->csum_type; + ti->tuple_size = bi->tuple_size; + ti->pi_offset = bi->pi_offset; + ti->interval_exp = bi->interval_exp; + ti->tag_size = bi->tag_size; + goto done; + } + if (!bi->tuple_size) + goto done; + } + + if (ti->tuple_size != bi->tuple_size) + goto incompatible; + if (ti->interval_exp != bi->interval_exp) + goto incompatible; + if (ti->tag_size != bi->tag_size) + goto incompatible; + if (ti->csum_type != bi->csum_type) + goto incompatible; + if ((ti->flags & BLK_INTEGRITY_REF_TAG) != + (bi->flags & BLK_INTEGRITY_REF_TAG)) + goto incompatible; + +done: + ti->flags |= BLK_INTEGRITY_STACKED; + return true; + +incompatible: + memset(ti, 0, sizeof(*ti)); + return false; +} +EXPORT_SYMBOL_GPL(queue_limits_stack_integrity); + /** * blk_queue_update_dma_pad - update pad mask * @q: the request queue for the device diff --git a/block/t10-pi.c b/block/t10-pi.c index dadecf621497..cd7fa60d63ff 100644 --- a/block/t10-pi.c +++ b/block/t10-pi.c @@ -116,7 +116,7 @@ next: */ static void t10_pi_type1_prepare(struct request *rq) { - struct blk_integrity *bi = &rq->q->integrity; + struct blk_integrity *bi = &rq->q->limits.integrity; const int tuple_sz = bi->tuple_size; u32 ref_tag = t10_pi_ref_tag(rq); u8 offset = bi->pi_offset; @@ -167,7 +167,7 @@ static void t10_pi_type1_prepare(struct request *rq) */ static void t10_pi_type1_complete(struct request *rq, unsigned int nr_bytes) { - struct blk_integrity *bi = &rq->q->integrity; + struct blk_integrity *bi = &rq->q->limits.integrity; unsigned intervals = nr_bytes >> bi->interval_exp; const int tuple_sz = bi->tuple_size; u32 ref_tag = t10_pi_ref_tag(rq); @@ -290,7 +290,7 @@ next: static void ext_pi_type1_prepare(struct request *rq) { - struct blk_integrity *bi = &rq->q->integrity; + struct blk_integrity *bi = &rq->q->limits.integrity; const int tuple_sz = bi->tuple_size; u64 ref_tag = ext_pi_ref_tag(rq); u8 offset = bi->pi_offset; @@ -330,7 +330,7 @@ static void ext_pi_type1_prepare(struct request *rq) static void ext_pi_type1_complete(struct request *rq, unsigned int nr_bytes) { - struct blk_integrity *bi = &rq->q->integrity; + struct blk_integrity *bi = &rq->q->limits.integrity; unsigned intervals = nr_bytes >> bi->interval_exp; const int tuple_sz = bi->tuple_size; u64 ref_tag = ext_pi_ref_tag(rq); @@ -396,7 +396,7 @@ blk_status_t blk_integrity_verify(struct blk_integrity_iter *iter, void blk_integrity_prepare(struct request *rq) { - struct blk_integrity *bi = &rq->q->integrity; + struct blk_integrity *bi = &rq->q->limits.integrity; if (!(bi->flags & BLK_INTEGRITY_REF_TAG)) return; @@ -409,7 +409,7 @@ void blk_integrity_prepare(struct request *rq) void blk_integrity_complete(struct request *rq, unsigned int nr_bytes) { - struct blk_integrity *bi = &rq->q->integrity; + struct blk_integrity *bi = &rq->q->limits.integrity; if (!(bi->flags & BLK_INTEGRITY_REF_TAG)) return; diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 08700bfc3e23..14a44c0f8286 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -206,7 +206,6 @@ struct dm_table { bool integrity_supported:1; bool singleton:1; - unsigned integrity_added:1; /* * Indicates the rw permissions for the new logical device. This diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index c1cc27541673..2a89f8eb4713 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -3475,6 +3475,17 @@ static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *lim limits->dma_alignment = limits->logical_block_size - 1; limits->discard_granularity = ic->sectors_per_block << SECTOR_SHIFT; } + + if (!ic->internal_hash) { + struct blk_integrity *bi = &limits->integrity; + + memset(bi, 0, sizeof(*bi)); + bi->tuple_size = ic->tag_size; + bi->tag_size = bi->tuple_size; + bi->interval_exp = + ic->sb->log2_sectors_per_block + SECTOR_SHIFT; + } + limits->max_integrity_segments = USHRT_MAX; } @@ -3631,19 +3642,6 @@ try_smaller_buffer: return 0; } -static void dm_integrity_set(struct dm_target *ti, struct dm_integrity_c *ic) -{ - struct gendisk *disk = dm_disk(dm_table_get_md(ti->table)); - struct blk_integrity bi; - - memset(&bi, 0, sizeof(bi)); - bi.tuple_size = ic->tag_size; - bi.tag_size = bi.tuple_size; - bi.interval_exp = ic->sb->log2_sectors_per_block + SECTOR_SHIFT; - - blk_integrity_register(disk, &bi); -} - static void dm_integrity_free_page_list(struct page_list *pl) { unsigned int i; @@ -4629,9 +4627,6 @@ try_smaller_buffer: } } - if (!ic->internal_hash) - dm_integrity_set(ti, ic); - ti->num_flush_bios = 1; ti->flush_supported = true; if (ic->discard) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index b2d5246cff21..fd789eeb62d9 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -425,6 +425,13 @@ static int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, q->limits.logical_block_size, q->limits.alignment_offset, (unsigned long long) start << SECTOR_SHIFT); + + /* + * Only stack the integrity profile if the target doesn't have native + * integrity support. + */ + if (!dm_target_has_integrity(ti->type)) + queue_limits_stack_integrity_bdev(limits, bdev); return 0; } @@ -702,9 +709,6 @@ int dm_table_add_target(struct dm_table *t, const char *type, t->immutable_target_type = ti->type; } - if (dm_target_has_integrity(ti->type)) - t->integrity_added = 1; - ti->table = t; ti->begin = start; ti->len = len; @@ -1119,99 +1123,6 @@ static int dm_table_build_index(struct dm_table *t) return r; } -static bool integrity_profile_exists(struct gendisk *disk) -{ - return !!blk_get_integrity(disk); -} - -/* - * Get a disk whose integrity profile reflects the table's profile. - * Returns NULL if integrity support was inconsistent or unavailable. - */ -static struct gendisk *dm_table_get_integrity_disk(struct dm_table *t) -{ - struct list_head *devices = dm_table_get_devices(t); - struct dm_dev_internal *dd = NULL; - struct gendisk *prev_disk = NULL, *template_disk = NULL; - - for (unsigned int i = 0; i < t->num_targets; i++) { - struct dm_target *ti = dm_table_get_target(t, i); - - if (!dm_target_passes_integrity(ti->type)) - goto no_integrity; - } - - list_for_each_entry(dd, devices, list) { - template_disk = dd->dm_dev->bdev->bd_disk; - if (!integrity_profile_exists(template_disk)) - goto no_integrity; - else if (prev_disk && - blk_integrity_compare(prev_disk, template_disk) < 0) - goto no_integrity; - prev_disk = template_disk; - } - - return template_disk; - -no_integrity: - if (prev_disk) - DMWARN("%s: integrity not set: %s and %s profile mismatch", - dm_device_name(t->md), - prev_disk->disk_name, - template_disk->disk_name); - return NULL; -} - -/* - * Register the mapped device for blk_integrity support if the - * underlying devices have an integrity profile. But all devices may - * not have matching profiles (checking all devices isn't reliable - * during table load because this table may use other DM device(s) which - * must be resumed before they will have an initialized integity - * profile). Consequently, stacked DM devices force a 2 stage integrity - * profile validation: First pass during table load, final pass during - * resume. - */ -static int dm_table_register_integrity(struct dm_table *t) -{ - struct mapped_device *md = t->md; - struct gendisk *template_disk = NULL; - - /* If target handles integrity itself do not register it here. */ - if (t->integrity_added) - return 0; - - template_disk = dm_table_get_integrity_disk(t); - if (!template_disk) - return 0; - - if (!integrity_profile_exists(dm_disk(md))) { - t->integrity_supported = true; - /* - * Register integrity profile during table load; we can do - * this because the final profile must match during resume. - */ - blk_integrity_register(dm_disk(md), - blk_get_integrity(template_disk)); - return 0; - } - - /* - * If DM device already has an initialized integrity - * profile the new profile should not conflict. - */ - if (blk_integrity_compare(dm_disk(md), template_disk) < 0) { - DMERR("%s: conflict with existing integrity profile: %s profile mismatch", - dm_device_name(t->md), - template_disk->disk_name); - return 1; - } - - /* Preserve existing integrity profile */ - t->integrity_supported = true; - return 0; -} - #ifdef CONFIG_BLK_INLINE_ENCRYPTION struct dm_crypto_profile { @@ -1423,12 +1334,6 @@ int dm_table_complete(struct dm_table *t) return r; } - r = dm_table_register_integrity(t); - if (r) { - DMERR("could not register integrity profile."); - return r; - } - r = dm_table_construct_crypto_profile(t); if (r) { DMERR("could not construct crypto profile."); @@ -1688,6 +1593,14 @@ int dm_calculate_queue_limits(struct dm_table *t, blk_set_stacking_limits(limits); + t->integrity_supported = true; + for (unsigned int i = 0; i < t->num_targets; i++) { + struct dm_target *ti = dm_table_get_target(t, i); + + if (!dm_target_passes_integrity(ti->type)) + t->integrity_supported = false; + } + for (unsigned int i = 0; i < t->num_targets; i++) { struct dm_target *ti = dm_table_get_target(t, i); @@ -1738,6 +1651,18 @@ combine_limits: dm_device_name(t->md), (unsigned long long) ti->begin, (unsigned long long) ti->len); + + if (t->integrity_supported || + dm_target_has_integrity(ti->type)) { + if (!queue_limits_stack_integrity(limits, &ti_limits)) { + DMWARN("%s: adding target device (start sect %llu len %llu) " + "disabled integrity support due to incompatibility", + dm_device_name(t->md), + (unsigned long long) ti->begin, + (unsigned long long) ti->len); + t->integrity_supported = false; + } + } } /* @@ -1761,36 +1686,6 @@ combine_limits: return validate_hardware_logical_block_alignment(t, limits); } -/* - * Verify that all devices have an integrity profile that matches the - * DM device's registered integrity profile. If the profiles don't - * match then unregister the DM device's integrity profile. - */ -static void dm_table_verify_integrity(struct dm_table *t) -{ - struct gendisk *template_disk = NULL; - - if (t->integrity_added) - return; - - if (t->integrity_supported) { - /* - * Verify that the original integrity profile - * matches all the devices in this table. - */ - template_disk = dm_table_get_integrity_disk(t); - if (template_disk && - blk_integrity_compare(dm_disk(t->md), template_disk) >= 0) - return; - } - - if (integrity_profile_exists(dm_disk(t->md))) { - DMWARN("%s: unable to establish an integrity profile", - dm_device_name(t->md)); - blk_integrity_unregister(dm_disk(t->md)); - } -} - static int device_flush_capable(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { @@ -2004,8 +1899,6 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, else blk_queue_flag_set(QUEUE_FLAG_NONROT, q); - dm_table_verify_integrity(t); - /* * Some devices don't use blk_integrity but still want stable pages * because they do their own checksumming. diff --git a/drivers/md/md.c b/drivers/md/md.c index aff9118ff697..67ece2cd725f 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2410,36 +2410,10 @@ static LIST_HEAD(pending_raid_disks); */ int md_integrity_register(struct mddev *mddev) { - struct md_rdev *rdev, *reference = NULL; - if (list_empty(&mddev->disks)) return 0; /* nothing to do */ - if (mddev_is_dm(mddev) || blk_get_integrity(mddev->gendisk)) - return 0; /* shouldn't register, or already is */ - rdev_for_each(rdev, mddev) { - /* skip spares and non-functional disks */ - if (test_bit(Faulty, &rdev->flags)) - continue; - if (rdev->raid_disk < 0) - continue; - if (!reference) { - /* Use the first rdev as the reference */ - reference = rdev; - continue; - } - /* does this rdev's profile match the reference profile? */ - if (blk_integrity_compare(reference->bdev->bd_disk, - rdev->bdev->bd_disk) < 0) - return -EINVAL; - } - if (!reference || !bdev_get_integrity(reference->bdev)) - return 0; - /* - * All component devices are integrity capable and have matching - * profiles, register the common profile for the md device. - */ - blk_integrity_register(mddev->gendisk, - bdev_get_integrity(reference->bdev)); + if (mddev_is_dm(mddev) || !blk_get_integrity(mddev->gendisk)) + return 0; /* shouldn't register */ pr_debug("md: data integrity enabled on %s\n", mdname(mddev)); if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE) || @@ -2459,32 +2433,6 @@ int md_integrity_register(struct mddev *mddev) } EXPORT_SYMBOL(md_integrity_register); -/* - * Attempt to add an rdev, but only if it is consistent with the current - * integrity profile - */ -int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev) -{ - struct blk_integrity *bi_mddev; - - if (mddev_is_dm(mddev)) - return 0; - - bi_mddev = blk_get_integrity(mddev->gendisk); - - if (!bi_mddev) /* nothing to do */ - return 0; - - if (blk_integrity_compare(mddev->gendisk, rdev->bdev->bd_disk) != 0) { - pr_err("%s: incompatible integrity profile for %pg\n", - mdname(mddev), rdev->bdev); - return -ENXIO; - } - - return 0; -} -EXPORT_SYMBOL(md_integrity_add_rdev); - static bool rdev_read_only(struct md_rdev *rdev) { return bdev_read_only(rdev->bdev) || @@ -5755,14 +5703,20 @@ static const struct kobj_type md_ktype = { int mdp_major = 0; /* stack the limit for all rdevs into lim */ -void mddev_stack_rdev_limits(struct mddev *mddev, struct queue_limits *lim) +int mddev_stack_rdev_limits(struct mddev *mddev, struct queue_limits *lim, + unsigned int flags) { struct md_rdev *rdev; rdev_for_each(rdev, mddev) { queue_limits_stack_bdev(lim, rdev->bdev, rdev->data_offset, mddev->gendisk->disk_name); + if ((flags & MDDEV_STACK_INTEGRITY) && + !queue_limits_stack_integrity_bdev(lim, rdev->bdev)) + return -EINVAL; } + + return 0; } EXPORT_SYMBOL_GPL(mddev_stack_rdev_limits); @@ -5777,6 +5731,14 @@ int mddev_stack_new_rdev(struct mddev *mddev, struct md_rdev *rdev) lim = queue_limits_start_update(mddev->gendisk->queue); queue_limits_stack_bdev(&lim, rdev->bdev, rdev->data_offset, mddev->gendisk->disk_name); + + if (!queue_limits_stack_integrity_bdev(&lim, rdev->bdev)) { + pr_err("%s: incompatible integrity profile for %pg\n", + mdname(mddev), rdev->bdev); + queue_limits_cancel_update(mddev->gendisk->queue); + return -ENXIO; + } + return queue_limits_commit_update(mddev->gendisk->queue, &lim); } EXPORT_SYMBOL_GPL(mddev_stack_new_rdev); diff --git a/drivers/md/md.h b/drivers/md/md.h index ca085ecad504..6733b0b0abf9 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -809,7 +809,6 @@ extern void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev); extern void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors); extern int md_check_no_bitmap(struct mddev *mddev); extern int md_integrity_register(struct mddev *mddev); -extern int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev); extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale); extern int mddev_init(struct mddev *mddev); @@ -908,7 +907,9 @@ void md_autostart_arrays(int part); int md_set_array_info(struct mddev *mddev, struct mdu_array_info_s *info); int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info); int do_md_run(struct mddev *mddev); -void mddev_stack_rdev_limits(struct mddev *mddev, struct queue_limits *lim); +#define MDDEV_STACK_INTEGRITY (1u << 0) +int mddev_stack_rdev_limits(struct mddev *mddev, struct queue_limits *lim, + unsigned int flags); int mddev_stack_new_rdev(struct mddev *mddev, struct md_rdev *rdev); void mddev_update_io_opt(struct mddev *mddev, unsigned int nr_stripes); diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 81c01347cd24..62634e2a33bd 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -377,13 +377,18 @@ static void raid0_free(struct mddev *mddev, void *priv) static int raid0_set_limits(struct mddev *mddev) { struct queue_limits lim; + int err; blk_set_stacking_limits(&lim); lim.max_hw_sectors = mddev->chunk_sectors; lim.max_write_zeroes_sectors = mddev->chunk_sectors; lim.io_min = mddev->chunk_sectors << 9; lim.io_opt = lim.io_min * mddev->raid_disks; - mddev_stack_rdev_limits(mddev, &lim); + err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY); + if (err) { + queue_limits_cancel_update(mddev->gendisk->queue); + return err; + } return queue_limits_set(mddev->gendisk->queue, &lim); } diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 1f321826ef02..779cad62f6f8 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1907,9 +1907,6 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) if (mddev->recovery_disabled == conf->recovery_disabled) return -EBUSY; - if (md_integrity_add_rdev(rdev, mddev)) - return -ENXIO; - if (rdev->raid_disk >= 0) first = last = rdev->raid_disk; @@ -3197,10 +3194,15 @@ static struct r1conf *setup_conf(struct mddev *mddev) static int raid1_set_limits(struct mddev *mddev) { struct queue_limits lim; + int err; blk_set_stacking_limits(&lim); lim.max_write_zeroes_sectors = 0; - mddev_stack_rdev_limits(mddev, &lim); + err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY); + if (err) { + queue_limits_cancel_update(mddev->gendisk->queue); + return err; + } return queue_limits_set(mddev->gendisk->queue, &lim); } diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index a4556d2e46bf..5f6885b53b69 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2083,9 +2083,6 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) if (rdev->saved_raid_disk < 0 && !_enough(conf, 1, -1)) return -EINVAL; - if (md_integrity_add_rdev(rdev, mddev)) - return -ENXIO; - if (rdev->raid_disk >= 0) first = last = rdev->raid_disk; @@ -3980,12 +3977,17 @@ static int raid10_set_queue_limits(struct mddev *mddev) { struct r10conf *conf = mddev->private; struct queue_limits lim; + int err; blk_set_stacking_limits(&lim); lim.max_write_zeroes_sectors = 0; lim.io_min = mddev->chunk_sectors << 9; lim.io_opt = lim.io_min * raid10_nr_stripes(conf); - mddev_stack_rdev_limits(mddev, &lim); + err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY); + if (err) { + queue_limits_cancel_update(mddev->gendisk->queue); + return err; + } return queue_limits_set(mddev->gendisk->queue, &lim); } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 2bd1ce9b3922..675c68fa6c64 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7708,7 +7708,7 @@ static int raid5_set_limits(struct mddev *mddev) lim.raid_partial_stripes_expensive = 1; lim.discard_granularity = stripe; lim.max_write_zeroes_sectors = 0; - mddev_stack_rdev_limits(mddev, &lim); + mddev_stack_rdev_limits(mddev, &lim, 0); rdev_for_each(rdev, mddev) queue_limits_stack_bdev(&lim, rdev->bdev, rdev->new_data_offset, mddev->gendisk->disk_name); diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 1e5aedaf8c7b..c5f8451b494d 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1504,6 +1504,11 @@ static int btt_blk_init(struct btt *btt) }; int rc; + if (btt_meta_size(btt) && IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) { + lim.integrity.tuple_size = btt_meta_size(btt); + lim.integrity.tag_size = btt_meta_size(btt); + } + btt->btt_disk = blk_alloc_disk(&lim, NUMA_NO_NODE); if (IS_ERR(btt->btt_disk)) return PTR_ERR(btt->btt_disk); @@ -1516,14 +1521,6 @@ static int btt_blk_init(struct btt *btt) blk_queue_flag_set(QUEUE_FLAG_NONROT, btt->btt_disk->queue); blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, btt->btt_disk->queue); - if (btt_meta_size(btt) && IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) { - struct blk_integrity bi = { - .tuple_size = btt_meta_size(btt), - .tag_size = btt_meta_size(btt), - }; - blk_integrity_register(btt->btt_disk, &bi); - } - set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9); rc = device_add_disk(&btt->nd_btt->dev, btt->btt_disk, NULL); if (rc) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 14bac248cde4..5a673fa5cb26 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1723,11 +1723,12 @@ int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) return 0; } -static bool nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head) +static bool nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head, + struct queue_limits *lim) { - struct blk_integrity integrity = { }; + struct blk_integrity *bi = &lim->integrity; - blk_integrity_unregister(disk); + memset(bi, 0, sizeof(*bi)); if (!head->ms) return true; @@ -1744,14 +1745,14 @@ static bool nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head) case NVME_NS_DPS_PI_TYPE3: switch (head->guard_type) { case NVME_NVM_NS_16B_GUARD: - integrity.csum_type = BLK_INTEGRITY_CSUM_CRC; - integrity.tag_size = sizeof(u16) + sizeof(u32); - integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE; + bi->csum_type = BLK_INTEGRITY_CSUM_CRC; + bi->tag_size = sizeof(u16) + sizeof(u32); + bi->flags |= BLK_INTEGRITY_DEVICE_CAPABLE; break; case NVME_NVM_NS_64B_GUARD: - integrity.csum_type = BLK_INTEGRITY_CSUM_CRC64; - integrity.tag_size = sizeof(u16) + 6; - integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE; + bi->csum_type = BLK_INTEGRITY_CSUM_CRC64; + bi->tag_size = sizeof(u16) + 6; + bi->flags |= BLK_INTEGRITY_DEVICE_CAPABLE; break; default: break; @@ -1761,16 +1762,16 @@ static bool nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head) case NVME_NS_DPS_PI_TYPE2: switch (head->guard_type) { case NVME_NVM_NS_16B_GUARD: - integrity.csum_type = BLK_INTEGRITY_CSUM_CRC; - integrity.tag_size = sizeof(u16); - integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE | - BLK_INTEGRITY_REF_TAG; + bi->csum_type = BLK_INTEGRITY_CSUM_CRC; + bi->tag_size = sizeof(u16); + bi->flags |= BLK_INTEGRITY_DEVICE_CAPABLE | + BLK_INTEGRITY_REF_TAG; break; case NVME_NVM_NS_64B_GUARD: - integrity.csum_type = BLK_INTEGRITY_CSUM_CRC64; - integrity.tag_size = sizeof(u16); - integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE | - BLK_INTEGRITY_REF_TAG; + bi->csum_type = BLK_INTEGRITY_CSUM_CRC64; + bi->tag_size = sizeof(u16); + bi->flags |= BLK_INTEGRITY_DEVICE_CAPABLE | + BLK_INTEGRITY_REF_TAG; break; default: break; @@ -1780,9 +1781,8 @@ static bool nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head) break; } - integrity.tuple_size = head->ms; - integrity.pi_offset = head->pi_offset; - blk_integrity_register(disk, &integrity); + bi->tuple_size = head->ms; + bi->pi_offset = head->pi_offset; return true; } @@ -2105,11 +2105,6 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && ns->head->ids.csi == NVME_CSI_ZNS) nvme_update_zone_info(ns, &lim, &zi); - ret = queue_limits_commit_update(ns->disk->queue, &lim); - if (ret) { - blk_mq_unfreeze_queue(ns->disk->queue); - goto out; - } /* * Register a metadata profile for PI, or the plain non-integrity NVMe @@ -2117,9 +2112,15 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, * I/O to namespaces with metadata except when the namespace supports * PI, as it can strip/insert in that case. */ - if (!nvme_init_integrity(ns->disk, ns->head)) + if (!nvme_init_integrity(ns->disk, ns->head, &lim)) capacity = 0; + ret = queue_limits_commit_update(ns->disk->queue, &lim); + if (ret) { + blk_mq_unfreeze_queue(ns->disk->queue); + goto out; + } + set_capacity_and_notify(ns->disk, capacity); /* @@ -2191,14 +2192,6 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info) struct queue_limits lim; blk_mq_freeze_queue(ns->head->disk->queue); - if (unsupported) - ns->head->disk->flags |= GENHD_FL_HIDDEN; - else - nvme_init_integrity(ns->head->disk, ns->head); - set_capacity_and_notify(ns->head->disk, get_capacity(ns->disk)); - set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info)); - nvme_mpath_revalidate_paths(ns); - /* * queue_limits mixes values that are the hardware limitations * for bio splitting with what is the device configuration. @@ -2221,7 +2214,16 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info) lim.io_opt = ns_lim->io_opt; queue_limits_stack_bdev(&lim, ns->disk->part0, 0, ns->head->disk->disk_name); + if (unsupported) + ns->head->disk->flags |= GENHD_FL_HIDDEN; + else + nvme_init_integrity(ns->head->disk, ns->head, &lim); ret = queue_limits_commit_update(ns->head->disk->queue, &lim); + + set_capacity_and_notify(ns->head->disk, get_capacity(ns->disk)); + set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info)); + nvme_mpath_revalidate_paths(ns); + blk_mq_unfreeze_queue(ns->head->disk->queue); } diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index d957e29b17a9..e01393ed4207 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2482,11 +2482,13 @@ static int sd_read_protection_type(struct scsi_disk *sdkp, unsigned char *buffer return 0; } -static void sd_config_protection(struct scsi_disk *sdkp) +static void sd_config_protection(struct scsi_disk *sdkp, + struct queue_limits *lim) { struct scsi_device *sdp = sdkp->device; - sd_dif_config_host(sdkp); + if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) + sd_dif_config_host(sdkp, lim); if (!sdkp->protection_type) return; @@ -3677,7 +3679,7 @@ static int sd_revalidate_disk(struct gendisk *disk) sd_read_app_tag_own(sdkp, buffer); sd_read_write_same(sdkp, buffer); sd_read_security(sdkp, buffer); - sd_config_protection(sdkp); + sd_config_protection(sdkp, &lim); } /* diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h index b4170b17bad4..726f1613f6cb 100644 --- a/drivers/scsi/sd.h +++ b/drivers/scsi/sd.h @@ -220,17 +220,7 @@ static inline sector_t sectors_to_logical(struct scsi_device *sdev, sector_t sec return sector >> (ilog2(sdev->sector_size) - 9); } -#ifdef CONFIG_BLK_DEV_INTEGRITY - -extern void sd_dif_config_host(struct scsi_disk *); - -#else /* CONFIG_BLK_DEV_INTEGRITY */ - -static inline void sd_dif_config_host(struct scsi_disk *disk) -{ -} - -#endif /* CONFIG_BLK_DEV_INTEGRITY */ +void sd_dif_config_host(struct scsi_disk *sdkp, struct queue_limits *lim); static inline int sd_is_zoned(struct scsi_disk *sdkp) { diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c index 6f0921c7db78..ae6ce6f5d622 100644 --- a/drivers/scsi/sd_dif.c +++ b/drivers/scsi/sd_dif.c @@ -24,14 +24,15 @@ /* * Configure exchange of protection information between OS and HBA. */ -void sd_dif_config_host(struct scsi_disk *sdkp) +void sd_dif_config_host(struct scsi_disk *sdkp, struct queue_limits *lim) { struct scsi_device *sdp = sdkp->device; - struct gendisk *disk = sdkp->disk; u8 type = sdkp->protection_type; - struct blk_integrity bi; + struct blk_integrity *bi = &lim->integrity; int dif, dix; + memset(bi, 0, sizeof(*bi)); + dif = scsi_host_dif_capable(sdp->host, type); dix = scsi_host_dix_capable(sdp->host, type); @@ -39,40 +40,33 @@ void sd_dif_config_host(struct scsi_disk *sdkp) dif = 0; dix = 1; } - if (!dix) { - blk_integrity_unregister(disk); + if (!dix) return; - } - - memset(&bi, 0, sizeof(bi)); /* Enable DMA of protection information */ if (scsi_host_get_guard(sdkp->device->host) & SHOST_DIX_GUARD_IP) - bi.csum_type = BLK_INTEGRITY_CSUM_IP; + bi->csum_type = BLK_INTEGRITY_CSUM_IP; else - bi.csum_type = BLK_INTEGRITY_CSUM_CRC; + bi->csum_type = BLK_INTEGRITY_CSUM_CRC; if (type != T10_PI_TYPE3_PROTECTION) - bi.flags |= BLK_INTEGRITY_REF_TAG; + bi->flags |= BLK_INTEGRITY_REF_TAG; - bi.tuple_size = sizeof(struct t10_pi_tuple); + bi->tuple_size = sizeof(struct t10_pi_tuple); if (dif && type) { - bi.flags |= BLK_INTEGRITY_DEVICE_CAPABLE; + bi->flags |= BLK_INTEGRITY_DEVICE_CAPABLE; if (!sdkp->ATO) - goto out; + return; if (type == T10_PI_TYPE3_PROTECTION) - bi.tag_size = sizeof(u16) + sizeof(u32); + bi->tag_size = sizeof(u16) + sizeof(u32); else - bi.tag_size = sizeof(u16); + bi->tag_size = sizeof(u16); } sd_first_printk(KERN_NOTICE, sdkp, "Enabling DIX %s, application tag size %u bytes\n", - blk_integrity_profile_name(&bi), bi.tag_size); -out: - blk_integrity_register(disk, &bi); + blk_integrity_profile_name(bi), bi->tag_size); } - diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h index bafa01d4e7f9..d201140d77a3 100644 --- a/include/linux/blk-integrity.h +++ b/include/linux/blk-integrity.h @@ -11,6 +11,7 @@ enum blk_integrity_flags { BLK_INTEGRITY_NOGENERATE = 1 << 1, BLK_INTEGRITY_DEVICE_CAPABLE = 1 << 2, BLK_INTEGRITY_REF_TAG = 1 << 3, + BLK_INTEGRITY_STACKED = 1 << 4, }; struct blk_integrity_iter { @@ -23,11 +24,15 @@ struct blk_integrity_iter { }; const char *blk_integrity_profile_name(struct blk_integrity *bi); +bool queue_limits_stack_integrity(struct queue_limits *t, + struct queue_limits *b); +static inline bool queue_limits_stack_integrity_bdev(struct queue_limits *t, + struct block_device *bdev) +{ + return queue_limits_stack_integrity(t, &bdev->bd_disk->queue->limits); +} #ifdef CONFIG_BLK_DEV_INTEGRITY -void blk_integrity_register(struct gendisk *, struct blk_integrity *); -void blk_integrity_unregister(struct gendisk *); -int blk_integrity_compare(struct gendisk *, struct gendisk *); int blk_rq_map_integrity_sg(struct request_queue *, struct bio *, struct scatterlist *); int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); @@ -35,14 +40,14 @@ int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); static inline bool blk_integrity_queue_supports_integrity(struct request_queue *q) { - return q->integrity.tuple_size; + return q->limits.integrity.tuple_size; } static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) { if (!blk_integrity_queue_supports_integrity(disk->queue)) return NULL; - return &disk->queue->integrity; + return &disk->queue->limits.integrity; } static inline struct blk_integrity * @@ -119,17 +124,6 @@ blk_integrity_queue_supports_integrity(struct request_queue *q) { return false; } -static inline int blk_integrity_compare(struct gendisk *a, struct gendisk *b) -{ - return 0; -} -static inline void blk_integrity_register(struct gendisk *d, - struct blk_integrity *b) -{ -} -static inline void blk_integrity_unregister(struct gendisk *d) -{ -} static inline unsigned short queue_max_integrity_segments(const struct request_queue *q) { @@ -157,4 +151,5 @@ static inline struct bio_vec *rq_integrity_vec(struct request *rq) return NULL; } #endif /* CONFIG_BLK_DEV_INTEGRITY */ + #endif /* _LINUX_BLK_INTEGRITY_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f9089750919c..0c247a716885 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -334,6 +334,8 @@ struct queue_limits { * due to possible offsets. */ unsigned int dma_alignment; + + struct blk_integrity integrity; }; typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx, @@ -419,10 +421,6 @@ struct request_queue { struct queue_limits limits; -#ifdef CONFIG_BLK_DEV_INTEGRITY - struct blk_integrity integrity; -#endif /* CONFIG_BLK_DEV_INTEGRITY */ - #ifdef CONFIG_PM struct device *dev; enum rpm_status rpm_status; @@ -1300,11 +1298,9 @@ static inline bool bdev_stable_writes(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); -#ifdef CONFIG_BLK_DEV_INTEGRITY - /* BLK_INTEGRITY_CSUM_NONE is not available in blkdev.h */ - if (q->integrity.csum_type != 0) + if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && + q->limits.integrity.csum_type != BLK_INTEGRITY_CSUM_NONE) return true; -#endif return test_bit(QUEUE_FLAG_STABLE_WRITES, &q->queue_flags); } diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h index d2bafb76badf..1773610010eb 100644 --- a/include/linux/t10-pi.h +++ b/include/linux/t10-pi.h @@ -39,12 +39,8 @@ struct t10_pi_tuple { static inline u32 t10_pi_ref_tag(struct request *rq) { - unsigned int shift = ilog2(queue_logical_block_size(rq->q)); + unsigned int shift = rq->q->limits.integrity.interval_exp; -#ifdef CONFIG_BLK_DEV_INTEGRITY - if (rq->q->integrity.interval_exp) - shift = rq->q->integrity.interval_exp; -#endif return blk_rq_pos(rq) >> (shift - SECTOR_SHIFT) & 0xffffffff; } @@ -65,12 +61,8 @@ static inline u64 lower_48_bits(u64 n) static inline u64 ext_pi_ref_tag(struct request *rq) { - unsigned int shift = ilog2(queue_logical_block_size(rq->q)); + unsigned int shift = rq->q->limits.integrity.interval_exp; -#ifdef CONFIG_BLK_DEV_INTEGRITY - if (rq->q->integrity.interval_exp) - shift = rq->q->integrity.interval_exp; -#endif return lower_48_bits(blk_rq_pos(rq) >> (shift - SECTOR_SHIFT)); } -- cgit v1.2.3 From be60e7700e6df1e16a2f60f45bece08e6140a46d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:29 +0200 Subject: sd: remove sd_is_zoned Since commit 7437bb73f087 ("block: remove support for the host aware zone model"), only ZBC devices expose a zoned access model. sd_is_zoned is used to check for that and thus return false for host aware devices. Replace the helper with the simple open coded TYPE_ZBC check to fix this. Fixes: 7437bb73f087 ("block: remove support for the host aware zone model") Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20240617060532.127975-3-hch@lst.de Signed-off-by: Jens Axboe --- drivers/scsi/sd.c | 6 +----- drivers/scsi/sd.h | 5 ----- drivers/scsi/sd_zbc.c | 13 ++++--------- 3 files changed, 5 insertions(+), 19 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index e01393ed4207..664523048ce8 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -466,10 +466,6 @@ provisioning_mode_store(struct device *dev, struct device_attribute *attr, if (sdp->type != TYPE_DISK) return -EINVAL; - /* ignore the provisioning mode for ZBC devices */ - if (sd_is_zoned(sdkp)) - return count; - mode = sysfs_match_string(lbp_mode, buf); if (mode < 0) return -EINVAL; @@ -2288,7 +2284,7 @@ static int sd_done(struct scsi_cmnd *SCpnt) } out: - if (sd_is_zoned(sdkp)) + if (sdkp->device->type == TYPE_ZBC) good_bytes = sd_zbc_complete(SCpnt, good_bytes, &sshdr); SCSI_LOG_HLCOMPLETE(1, scmd_printk(KERN_INFO, SCpnt, diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h index 726f1613f6cb..7603b3c67b23 100644 --- a/drivers/scsi/sd.h +++ b/drivers/scsi/sd.h @@ -222,11 +222,6 @@ static inline sector_t sectors_to_logical(struct scsi_device *sdev, sector_t sec void sd_dif_config_host(struct scsi_disk *sdkp, struct queue_limits *lim); -static inline int sd_is_zoned(struct scsi_disk *sdkp) -{ - return sdkp->zoned == 1 || sdkp->device->type == TYPE_ZBC; -} - #ifdef CONFIG_BLK_DEV_ZONED int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim, diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index f685838d9ed2..8cc9c0250179 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -232,7 +232,7 @@ int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, int zone_idx = 0; int ret; - if (!sd_is_zoned(sdkp)) + if (sdkp->device->type != TYPE_ZBC) /* Not a zoned device */ return -EOPNOTSUPP; @@ -300,7 +300,7 @@ static blk_status_t sd_zbc_cmnd_checks(struct scsi_cmnd *cmd) struct scsi_disk *sdkp = scsi_disk(rq->q->disk); sector_t sector = blk_rq_pos(rq); - if (!sd_is_zoned(sdkp)) + if (sdkp->device->type != TYPE_ZBC) /* Not a zoned device */ return BLK_STS_IOERR; @@ -521,7 +521,7 @@ static int sd_zbc_check_capacity(struct scsi_disk *sdkp, unsigned char *buf, static void sd_zbc_print_zones(struct scsi_disk *sdkp) { - if (!sd_is_zoned(sdkp) || !sdkp->capacity) + if (sdkp->device->type != TYPE_ZBC || !sdkp->capacity) return; if (sdkp->capacity & (sdkp->zone_info.zone_blocks - 1)) @@ -598,13 +598,8 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim, u32 zone_blocks = 0; int ret; - if (!sd_is_zoned(sdkp)) { - /* - * Device managed or normal SCSI disk, no special handling - * required. - */ + if (sdkp->device->type != TYPE_ZBC) return 0; - } /* READ16/WRITE16/SYNC16 is mandatory for ZBC devices */ sdkp->device->use_16_for_rw = 1; -- cgit v1.2.3 From 308ad58af49d6c4c3b7a36b98972cc9db4d7b36a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:30 +0200 Subject: sd: move zone limits setup out of sd_read_block_characteristics Move a bit of code that sets up the zone flag and the write granularity into sd_zbc_read_zones to be with the rest of the zoned limits. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-4-hch@lst.de Signed-off-by: Jens Axboe --- drivers/scsi/sd.c | 21 +-------------------- drivers/scsi/sd_zbc.c | 9 +++++++++ 2 files changed, 10 insertions(+), 20 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 664523048ce8..66f7d1e3429c 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3312,29 +3312,10 @@ static void sd_read_block_characteristics(struct scsi_disk *sdkp, blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q); } - -#ifdef CONFIG_BLK_DEV_ZONED /* sd_probe rejects ZBD devices early otherwise */ - if (sdkp->device->type == TYPE_ZBC) { - lim->zoned = true; - - /* - * Per ZBC and ZAC specifications, writes in sequential write - * required zones of host-managed devices must be aligned to - * the device physical block size. - */ - lim->zone_write_granularity = sdkp->physical_block_size; - } else { - /* - * Host-aware devices are treated as conventional. - */ - lim->zoned = false; - } -#endif /* CONFIG_BLK_DEV_ZONED */ - if (!sdkp->first_scan) return; - if (lim->zoned) + if (sdkp->device->type == TYPE_ZBC) sd_printk(KERN_NOTICE, sdkp, "Host-managed zoned block device\n"); else if (sdkp->zoned == 1) sd_printk(KERN_NOTICE, sdkp, "Host-aware SMR disk used as regular disk\n"); diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index 8cc9c0250179..360ec9804995 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -601,6 +601,15 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim, if (sdkp->device->type != TYPE_ZBC) return 0; + lim->zoned = true; + + /* + * Per ZBC and ZAC specifications, writes in sequential write required + * zones of host-managed devices must be aligned to the device physical + * block size. + */ + lim->zone_write_granularity = sdkp->physical_block_size; + /* READ16/WRITE16/SYNC16 is mandatory for ZBC devices */ sdkp->device->use_16_for_rw = 1; sdkp->device->use_10_for_rw = 0; -- cgit v1.2.3 From 1122c0c1cc71f740fa4d5f14f239194e06a1d5e7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:40 +0200 Subject: block: move cache control settings out of queue->flags Move the cache control settings into the queue_limits so that the flags can be set atomically with the device queue frozen. Add new features and flags field for the driver set flags, and internal (usually sysfs-controlled) flags in the block layer. Note that we'll eventually remove enough field from queue_limits to bring it back to the previous size. The disable flag is inverted compared to the previous meaning, which means it now survives a rescan, similar to the max_sectors and max_discard_sectors user limits. The FLUSH and FUA flags are now inherited by blk_stack_limits, which simplified the code in dm a lot, but also causes a slight behavior change in that dm-switch and dm-unstripe now advertise a write cache despite setting num_flush_bios to 0. The I/O path will handle this gracefully, but as far as I can tell the lack of num_flush_bios and thus flush support is a pre-existing data integrity bug in those targets that really needs fixing, after which a non-zero num_flush_bios should be required in dm for targets that map to underlying devices. Signed-off-by: Christoph Hellwig Acked-by: Ulf Hansson Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-14-hch@lst.de Signed-off-by: Jens Axboe --- Documentation/block/writeback_cache_control.rst | 67 ++++++++++++++----------- arch/um/drivers/ubd_kern.c | 2 +- block/blk-core.c | 2 +- block/blk-flush.c | 9 ++-- block/blk-mq-debugfs.c | 2 - block/blk-settings.c | 29 ++--------- block/blk-sysfs.c | 29 +++++++---- block/blk-wbt.c | 4 +- drivers/block/drbd/drbd_main.c | 2 +- drivers/block/loop.c | 9 ++-- drivers/block/nbd.c | 14 +++--- drivers/block/null_blk/main.c | 12 +++-- drivers/block/ps3disk.c | 7 +-- drivers/block/rnbd/rnbd-clt.c | 10 ++-- drivers/block/ublk_drv.c | 8 ++- drivers/block/virtio_blk.c | 20 ++++++-- drivers/block/xen-blkfront.c | 8 ++- drivers/md/bcache/super.c | 7 +-- drivers/md/dm-table.c | 39 +++----------- drivers/md/md.c | 8 +-- drivers/mmc/core/block.c | 42 +++++++--------- drivers/mmc/core/queue.c | 12 +++-- drivers/mmc/core/queue.h | 3 +- drivers/mtd/mtd_blkdevs.c | 5 +- drivers/nvdimm/pmem.c | 4 +- drivers/nvme/host/core.c | 7 ++- drivers/nvme/host/multipath.c | 6 --- drivers/scsi/sd.c | 28 +++++++---- include/linux/blkdev.h | 38 +++++++++++--- 29 files changed, 227 insertions(+), 206 deletions(-) (limited to 'drivers/scsi') diff --git a/Documentation/block/writeback_cache_control.rst b/Documentation/block/writeback_cache_control.rst index b208488d0aae..c575e08beda8 100644 --- a/Documentation/block/writeback_cache_control.rst +++ b/Documentation/block/writeback_cache_control.rst @@ -46,41 +46,50 @@ worry if the underlying devices need any explicit cache flushing and how the Forced Unit Access is implemented. The REQ_PREFLUSH and REQ_FUA flags may both be set on a single bio. +Feature settings for block drivers +---------------------------------- -Implementation details for bio based block drivers --------------------------------------------------------------- +For devices that do not support volatile write caches there is no driver +support required, the block layer completes empty REQ_PREFLUSH requests before +entering the driver and strips off the REQ_PREFLUSH and REQ_FUA bits from +requests that have a payload. -These drivers will always see the REQ_PREFLUSH and REQ_FUA bits as they sit -directly below the submit_bio interface. For remapping drivers the REQ_FUA -bits need to be propagated to underlying devices, and a global flush needs -to be implemented for bios with the REQ_PREFLUSH bit set. For real device -drivers that do not have a volatile cache the REQ_PREFLUSH and REQ_FUA bits -on non-empty bios can simply be ignored, and REQ_PREFLUSH requests without -data can be completed successfully without doing any work. Drivers for -devices with volatile caches need to implement the support for these -flags themselves without any help from the block layer. +For devices with volatile write caches the driver needs to tell the block layer +that it supports flushing caches by setting the + BLK_FEAT_WRITE_CACHE -Implementation details for request_fn based block drivers ---------------------------------------------------------- +flag in the queue_limits feature field. For devices that also support the FUA +bit the block layer needs to be told to pass on the REQ_FUA bit by also setting +the -For devices that do not support volatile write caches there is no driver -support required, the block layer completes empty REQ_PREFLUSH requests before -entering the driver and strips off the REQ_PREFLUSH and REQ_FUA bits from -requests that have a payload. For devices with volatile write caches the -driver needs to tell the block layer that it supports flushing caches by -doing:: + BLK_FEAT_FUA + +flag in the features field of the queue_limits structure. + +Implementation details for bio based block drivers +-------------------------------------------------- + +For bio based drivers the REQ_PREFLUSH and REQ_FUA bit are simplify passed on +to the driver if the drivers sets the BLK_FEAT_WRITE_CACHE flag and the drivers +needs to handle them. + +*NOTE*: The REQ_FUA bit also gets passed on when the BLK_FEAT_FUA flags is +_not_ set. Any bio based driver that sets BLK_FEAT_WRITE_CACHE also needs to +handle REQ_FUA. - blk_queue_write_cache(sdkp->disk->queue, true, false); +For remapping drivers the REQ_FUA bits need to be propagated to underlying +devices, and a global flush needs to be implemented for bios with the +REQ_PREFLUSH bit set. -and handle empty REQ_OP_FLUSH requests in its prep_fn/request_fn. Note that -REQ_PREFLUSH requests with a payload are automatically turned into a sequence -of an empty REQ_OP_FLUSH request followed by the actual write by the block -layer. For devices that also support the FUA bit the block layer needs -to be told to pass through the REQ_FUA bit using:: +Implementation details for blk-mq drivers +----------------------------------------- - blk_queue_write_cache(sdkp->disk->queue, true, true); +When the BLK_FEAT_WRITE_CACHE flag is set, REQ_OP_WRITE | REQ_PREFLUSH requests +with a payload are automatically turned into a sequence of a REQ_OP_FLUSH +request followed by the actual write by the block layer. -and the driver must handle write requests that have the REQ_FUA bit set -in prep_fn/request_fn. If the FUA bit is not natively supported the block -layer turns it into an empty REQ_OP_FLUSH request after the actual write. +When the BLK_FEAT_FUA flags is set, the REQ_FUA bit simplify passed on for the +REQ_OP_WRITE request, else a REQ_OP_FLUSH request is sent by the block layer +after the completion of the write request for bio submissions with the REQ_FUA +bit set. diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index cdcb75a68989..19e01691ea0e 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -835,6 +835,7 @@ static int ubd_add(int n, char **error_out) struct queue_limits lim = { .max_segments = MAX_SG, .seg_boundary_mask = PAGE_SIZE - 1, + .features = BLK_FEAT_WRITE_CACHE, }; struct gendisk *disk; int err = 0; @@ -882,7 +883,6 @@ static int ubd_add(int n, char **error_out) } blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); - blk_queue_write_cache(disk->queue, true, false); disk->major = UBD_MAJOR; disk->first_minor = n << UBD_SHIFT; disk->minors = 1 << UBD_SHIFT; diff --git a/block/blk-core.c b/block/blk-core.c index 82c3ae22d76d..2b45a4df9a1a 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -782,7 +782,7 @@ void submit_bio_noacct(struct bio *bio) if (WARN_ON_ONCE(bio_op(bio) != REQ_OP_WRITE && bio_op(bio) != REQ_OP_ZONE_APPEND)) goto end_io; - if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) { + if (!bdev_write_cache(bdev)) { bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA); if (!bio_sectors(bio)) { status = BLK_STS_OK; diff --git a/block/blk-flush.c b/block/blk-flush.c index 2234f8b3fc05..30b9d5033a2b 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -381,8 +381,8 @@ static void blk_rq_init_flush(struct request *rq) bool blk_insert_flush(struct request *rq) { struct request_queue *q = rq->q; - unsigned long fflags = q->queue_flags; /* may change, cache */ struct blk_flush_queue *fq = blk_get_flush_queue(q, rq->mq_ctx); + bool supports_fua = q->limits.features & BLK_FEAT_FUA; unsigned int policy = 0; /* FLUSH/FUA request must never be merged */ @@ -394,11 +394,10 @@ bool blk_insert_flush(struct request *rq) /* * Check which flushes we need to sequence for this operation. */ - if (fflags & (1UL << QUEUE_FLAG_WC)) { + if (blk_queue_write_cache(q)) { if (rq->cmd_flags & REQ_PREFLUSH) policy |= REQ_FSEQ_PREFLUSH; - if (!(fflags & (1UL << QUEUE_FLAG_FUA)) && - (rq->cmd_flags & REQ_FUA)) + if ((rq->cmd_flags & REQ_FUA) && !supports_fua) policy |= REQ_FSEQ_POSTFLUSH; } @@ -407,7 +406,7 @@ bool blk_insert_flush(struct request *rq) * REQ_PREFLUSH and FUA for the driver. */ rq->cmd_flags &= ~REQ_PREFLUSH; - if (!(fflags & (1UL << QUEUE_FLAG_FUA))) + if (!supports_fua) rq->cmd_flags &= ~REQ_FUA; /* diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 770c0c2b72fa..e8b9db7c30c4 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -93,8 +93,6 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(INIT_DONE), QUEUE_FLAG_NAME(STABLE_WRITES), QUEUE_FLAG_NAME(POLL), - QUEUE_FLAG_NAME(WC), - QUEUE_FLAG_NAME(FUA), QUEUE_FLAG_NAME(DAX), QUEUE_FLAG_NAME(STATS), QUEUE_FLAG_NAME(REGISTERED), diff --git a/block/blk-settings.c b/block/blk-settings.c index f11c8676eb4c..536ee202fcdc 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -261,6 +261,9 @@ static int blk_validate_limits(struct queue_limits *lim) lim->misaligned = 0; } + if (!(lim->features & BLK_FEAT_WRITE_CACHE)) + lim->features &= ~BLK_FEAT_FUA; + err = blk_validate_integrity_limits(lim); if (err) return err; @@ -454,6 +457,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, { unsigned int top, bottom, alignment, ret = 0; + t->features |= (b->features & BLK_FEAT_INHERIT_MASK); + t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors); t->max_user_sectors = min_not_zero(t->max_user_sectors, b->max_user_sectors); @@ -711,30 +716,6 @@ void blk_set_queue_depth(struct request_queue *q, unsigned int depth) } EXPORT_SYMBOL(blk_set_queue_depth); -/** - * blk_queue_write_cache - configure queue's write cache - * @q: the request queue for the device - * @wc: write back cache on or off - * @fua: device supports FUA writes, if true - * - * Tell the block layer about the write cache of @q. - */ -void blk_queue_write_cache(struct request_queue *q, bool wc, bool fua) -{ - if (wc) { - blk_queue_flag_set(QUEUE_FLAG_HW_WC, q); - blk_queue_flag_set(QUEUE_FLAG_WC, q); - } else { - blk_queue_flag_clear(QUEUE_FLAG_HW_WC, q); - blk_queue_flag_clear(QUEUE_FLAG_WC, q); - } - if (fua) - blk_queue_flag_set(QUEUE_FLAG_FUA, q); - else - blk_queue_flag_clear(QUEUE_FLAG_FUA, q); -} -EXPORT_SYMBOL_GPL(blk_queue_write_cache); - int bdev_alignment_offset(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 5c787965b7d0..4f524c1d5e08 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -423,32 +423,41 @@ static ssize_t queue_io_timeout_store(struct request_queue *q, const char *page, static ssize_t queue_wc_show(struct request_queue *q, char *page) { - if (test_bit(QUEUE_FLAG_WC, &q->queue_flags)) - return sprintf(page, "write back\n"); - - return sprintf(page, "write through\n"); + if (q->limits.features & BLK_FLAGS_WRITE_CACHE_DISABLED) + return sprintf(page, "write through\n"); + return sprintf(page, "write back\n"); } static ssize_t queue_wc_store(struct request_queue *q, const char *page, size_t count) { + struct queue_limits lim; + bool disable; + int err; + if (!strncmp(page, "write back", 10)) { - if (!test_bit(QUEUE_FLAG_HW_WC, &q->queue_flags)) - return -EINVAL; - blk_queue_flag_set(QUEUE_FLAG_WC, q); + disable = false; } else if (!strncmp(page, "write through", 13) || - !strncmp(page, "none", 4)) { - blk_queue_flag_clear(QUEUE_FLAG_WC, q); + !strncmp(page, "none", 4)) { + disable = true; } else { return -EINVAL; } + lim = queue_limits_start_update(q); + if (disable) + lim.flags |= BLK_FLAGS_WRITE_CACHE_DISABLED; + else + lim.flags &= ~BLK_FLAGS_WRITE_CACHE_DISABLED; + err = queue_limits_commit_update(q, &lim); + if (err) + return err; return count; } static ssize_t queue_fua_show(struct request_queue *q, char *page) { - return sprintf(page, "%u\n", test_bit(QUEUE_FLAG_FUA, &q->queue_flags)); + return sprintf(page, "%u\n", !!(q->limits.features & BLK_FEAT_FUA)); } static ssize_t queue_dax_show(struct request_queue *q, char *page) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 64472134dd26..1a5e4b049ecd 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -206,8 +206,8 @@ static void wbt_rqw_done(struct rq_wb *rwb, struct rq_wait *rqw, */ if (wb_acct & WBT_DISCARD) limit = rwb->wb_background; - else if (test_bit(QUEUE_FLAG_WC, &rwb->rqos.disk->queue->queue_flags) && - !wb_recent_wait(rwb)) + else if (blk_queue_write_cache(rwb->rqos.disk->queue) && + !wb_recent_wait(rwb)) limit = 0; else limit = rwb->wb_normal; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 113b441d4d36..bf42a46781fa 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2697,6 +2697,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig * connect. */ .max_hw_sectors = DRBD_MAX_BIO_SIZE_SAFE >> 8, + .features = BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA, }; device = minor_to_device(minor); @@ -2736,7 +2737,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig disk->private_data = device; blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, disk->queue); - blk_queue_write_cache(disk->queue, true, true); device->md_io.page = alloc_page(GFP_KERNEL); if (!device->md_io.page) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 8991de8fb1bb..08d0fc7f17b7 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -985,6 +985,9 @@ static int loop_reconfigure_limits(struct loop_device *lo, unsigned short bsize) lim.logical_block_size = bsize; lim.physical_block_size = bsize; lim.io_min = bsize; + lim.features &= ~BLK_FEAT_WRITE_CACHE; + if (file->f_op->fsync && !(lo->lo_flags & LO_FLAGS_READ_ONLY)) + lim.features |= BLK_FEAT_WRITE_CACHE; if (!backing_bdev || bdev_nonrot(backing_bdev)) blk_queue_flag_set(QUEUE_FLAG_NONROT, lo->lo_queue); else @@ -1078,9 +1081,6 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode, lo->old_gfp_mask = mapping_gfp_mask(mapping); mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); - if (!(lo->lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync) - blk_queue_write_cache(lo->lo_queue, true, false); - error = loop_reconfigure_limits(lo, config->block_size); if (WARN_ON_ONCE(error)) goto out_unlock; @@ -1131,9 +1131,6 @@ static void __loop_clr_fd(struct loop_device *lo, bool release) struct file *filp; gfp_t gfp = lo->old_gfp_mask; - if (test_bit(QUEUE_FLAG_WC, &lo->lo_queue->queue_flags)) - blk_queue_write_cache(lo->lo_queue, false, false); - /* * Freeze the request queue when unbinding on a live file descriptor and * thus an open device. When called from ->release we are guaranteed diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 44b8c671921e..cb1c86a6a3fb 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -342,12 +342,14 @@ static int __nbd_set_size(struct nbd_device *nbd, loff_t bytesize, lim.max_hw_discard_sectors = UINT_MAX; else lim.max_hw_discard_sectors = 0; - if (!(nbd->config->flags & NBD_FLAG_SEND_FLUSH)) - blk_queue_write_cache(nbd->disk->queue, false, false); - else if (nbd->config->flags & NBD_FLAG_SEND_FUA) - blk_queue_write_cache(nbd->disk->queue, true, true); - else - blk_queue_write_cache(nbd->disk->queue, true, false); + if (!(nbd->config->flags & NBD_FLAG_SEND_FLUSH)) { + lim.features &= ~(BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA); + } else if (nbd->config->flags & NBD_FLAG_SEND_FUA) { + lim.features |= BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA; + } else { + lim.features |= BLK_FEAT_WRITE_CACHE; + lim.features &= ~BLK_FEAT_FUA; + } lim.logical_block_size = blksize; lim.physical_block_size = blksize; error = queue_limits_commit_update(nbd->disk->queue, &lim); diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 75f189e42f88..21f9d256e884 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1928,6 +1928,13 @@ static int null_add_dev(struct nullb_device *dev) goto out_cleanup_tags; } + if (dev->cache_size > 0) { + set_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags); + lim.features |= BLK_FEAT_WRITE_CACHE; + if (dev->fua) + lim.features |= BLK_FEAT_FUA; + } + nullb->disk = blk_mq_alloc_disk(nullb->tag_set, &lim, nullb); if (IS_ERR(nullb->disk)) { rv = PTR_ERR(nullb->disk); @@ -1940,11 +1947,6 @@ static int null_add_dev(struct nullb_device *dev) nullb_setup_bwtimer(nullb); } - if (dev->cache_size > 0) { - set_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags); - blk_queue_write_cache(nullb->q, true, dev->fua); - } - nullb->q->queuedata = nullb; blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q); diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index b810ac0a5c4b..8b73cf459b59 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -388,9 +388,8 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev) .max_segments = -1, .max_segment_size = dev->bounce_size, .dma_alignment = dev->blk_size - 1, + .features = BLK_FEAT_WRITE_CACHE, }; - - struct request_queue *queue; struct gendisk *gendisk; if (dev->blk_size < 512) { @@ -447,10 +446,6 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev) goto fail_free_tag_set; } - queue = gendisk->queue; - - blk_queue_write_cache(queue, true, false); - priv->gendisk = gendisk; gendisk->major = ps3disk_major; gendisk->first_minor = devidx * PS3DISK_MINORS; diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index b7ffe03c6160..02c4b1731827 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -1389,6 +1389,12 @@ static int rnbd_client_setup_device(struct rnbd_clt_dev *dev, le32_to_cpu(rsp->max_discard_sectors); } + if (rsp->cache_policy & RNBD_WRITEBACK) { + lim.features |= BLK_FEAT_WRITE_CACHE; + if (rsp->cache_policy & RNBD_FUA) + lim.features |= BLK_FEAT_FUA; + } + dev->gd = blk_mq_alloc_disk(&dev->sess->tag_set, &lim, dev); if (IS_ERR(dev->gd)) return PTR_ERR(dev->gd); @@ -1397,10 +1403,6 @@ static int rnbd_client_setup_device(struct rnbd_clt_dev *dev, blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, dev->queue); blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, dev->queue); - blk_queue_write_cache(dev->queue, - !!(rsp->cache_policy & RNBD_WRITEBACK), - !!(rsp->cache_policy & RNBD_FUA)); - return rnbd_clt_setup_gen_disk(dev, rsp, idx); } diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 4e159948c912..e45c65c1848d 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -487,8 +487,6 @@ static void ublk_dev_param_basic_apply(struct ublk_device *ub) struct request_queue *q = ub->ub_disk->queue; const struct ublk_param_basic *p = &ub->params.basic; - blk_queue_write_cache(q, p->attrs & UBLK_ATTR_VOLATILE_CACHE, - p->attrs & UBLK_ATTR_FUA); if (p->attrs & UBLK_ATTR_ROTATIONAL) blk_queue_flag_clear(QUEUE_FLAG_NONROT, q); else @@ -2210,6 +2208,12 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) lim.max_zone_append_sectors = p->max_zone_append_sectors; } + if (ub->params.basic.attrs & UBLK_ATTR_VOLATILE_CACHE) { + lim.features |= BLK_FEAT_WRITE_CACHE; + if (ub->params.basic.attrs & UBLK_ATTR_FUA) + lim.features |= BLK_FEAT_FUA; + } + if (wait_for_completion_interruptible(&ub->completion) != 0) return -EINTR; diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 378b241911ca..b1a3c2935285 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -1100,6 +1100,7 @@ cache_type_store(struct device *dev, struct device_attribute *attr, struct gendisk *disk = dev_to_disk(dev); struct virtio_blk *vblk = disk->private_data; struct virtio_device *vdev = vblk->vdev; + struct queue_limits lim; int i; BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE)); @@ -1108,7 +1109,17 @@ cache_type_store(struct device *dev, struct device_attribute *attr, return i; virtio_cwrite8(vdev, offsetof(struct virtio_blk_config, wce), i); - blk_queue_write_cache(disk->queue, virtblk_get_cache_mode(vdev), false); + + lim = queue_limits_start_update(disk->queue); + if (virtblk_get_cache_mode(vdev)) + lim.features |= BLK_FEAT_WRITE_CACHE; + else + lim.features &= ~BLK_FEAT_WRITE_CACHE; + blk_mq_freeze_queue(disk->queue); + i = queue_limits_commit_update(disk->queue, &lim); + blk_mq_unfreeze_queue(disk->queue); + if (i) + return i; return count; } @@ -1504,6 +1515,9 @@ static int virtblk_probe(struct virtio_device *vdev) if (err) goto out_free_tags; + if (virtblk_get_cache_mode(vdev)) + lim.features |= BLK_FEAT_WRITE_CACHE; + vblk->disk = blk_mq_alloc_disk(&vblk->tag_set, &lim, vblk); if (IS_ERR(vblk->disk)) { err = PTR_ERR(vblk->disk); @@ -1519,10 +1533,6 @@ static int virtblk_probe(struct virtio_device *vdev) vblk->disk->fops = &virtblk_fops; vblk->index = index; - /* configure queue flush support */ - blk_queue_write_cache(vblk->disk->queue, virtblk_get_cache_mode(vdev), - false); - /* If disk is read-only in the host, the guest should obey */ if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO)) set_disk_ro(vblk->disk, 1); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 851b03844edd..9aafce3e5987 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -959,6 +959,12 @@ static void blkif_set_queue_limits(const struct blkfront_info *info, lim->max_secure_erase_sectors = UINT_MAX; } + if (info->feature_flush) { + lim->features |= BLK_FEAT_WRITE_CACHE; + if (info->feature_fua) + lim->features |= BLK_FEAT_FUA; + } + /* Hard sector size and max sectors impersonate the equiv. hardware. */ lim->logical_block_size = info->sector_size; lim->physical_block_size = info->physical_sector_size; @@ -987,8 +993,6 @@ static const char *flush_info(struct blkfront_info *info) static void xlvbd_flush(struct blkfront_info *info) { - blk_queue_write_cache(info->rq, info->feature_flush ? true : false, - info->feature_fua ? true : false); pr_info("blkfront: %s: %s %s %s %s %s %s %s\n", info->gd->disk_name, flush_info(info), "persistent grants:", info->feature_persistent ? diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 4d11fc664cb0..cb6595c8b551 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -897,7 +897,6 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, sector_t sectors, struct block_device *cached_bdev, const struct block_device_operations *ops) { - struct request_queue *q; const size_t max_stripes = min_t(size_t, INT_MAX, SIZE_MAX / sizeof(atomic_t)); struct queue_limits lim = { @@ -909,6 +908,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, .io_min = block_size, .logical_block_size = block_size, .physical_block_size = block_size, + .features = BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA, }; uint64_t n; int idx; @@ -975,12 +975,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, d->disk->fops = ops; d->disk->private_data = d; - q = d->disk->queue; - blk_queue_flag_set(QUEUE_FLAG_NONROT, d->disk->queue); - - blk_queue_write_cache(q, true, true); - return 0; out_bioset_exit: diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index fd789eeb62d9..03abdae64682 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1686,34 +1686,16 @@ combine_limits: return validate_hardware_logical_block_alignment(t, limits); } -static int device_flush_capable(struct dm_target *ti, struct dm_dev *dev, - sector_t start, sector_t len, void *data) -{ - unsigned long flush = (unsigned long) data; - struct request_queue *q = bdev_get_queue(dev->bdev); - - return (q->queue_flags & flush); -} - -static bool dm_table_supports_flush(struct dm_table *t, unsigned long flush) +/* + * Check if a target requires flush support even if none of the underlying + * devices need it (e.g. to persist target-specific metadata). + */ +static bool dm_table_supports_flush(struct dm_table *t) { - /* - * Require at least one underlying device to support flushes. - * t->devices includes internal dm devices such as mirror logs - * so we need to use iterate_devices here, which targets - * supporting flushes must provide. - */ for (unsigned int i = 0; i < t->num_targets; i++) { struct dm_target *ti = dm_table_get_target(t, i); - if (!ti->num_flush_bios) - continue; - - if (ti->flush_supported) - return true; - - if (ti->type->iterate_devices && - ti->type->iterate_devices(ti, device_flush_capable, (void *) flush)) + if (ti->num_flush_bios && ti->flush_supported) return true; } @@ -1855,7 +1837,6 @@ static int device_requires_stable_pages(struct dm_target *ti, int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, struct queue_limits *limits) { - bool wc = false, fua = false; int r; if (dm_table_supports_nowait(t)) @@ -1876,12 +1857,8 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, if (!dm_table_supports_secure_erase(t)) limits->max_secure_erase_sectors = 0; - if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_WC))) { - wc = true; - if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_FUA))) - fua = true; - } - blk_queue_write_cache(q, wc, fua); + if (dm_table_supports_flush(t)) + limits->features |= BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA; if (dm_table_supports_dax(t, device_not_dax_capable)) { blk_queue_flag_set(QUEUE_FLAG_DAX, q); diff --git a/drivers/md/md.c b/drivers/md/md.c index 67ece2cd725f..2f4c5d1755d8 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5785,7 +5785,10 @@ struct mddev *md_alloc(dev_t dev, char *name) int partitioned; int shift; int unit; - int error ; + int error; + struct queue_limits lim = { + .features = BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA, + }; /* * Wait for any previous instance of this device to be completely @@ -5825,7 +5828,7 @@ struct mddev *md_alloc(dev_t dev, char *name) */ mddev->hold_active = UNTIL_STOP; - disk = blk_alloc_disk(NULL, NUMA_NO_NODE); + disk = blk_alloc_disk(&lim, NUMA_NO_NODE); if (IS_ERR(disk)) { error = PTR_ERR(disk); goto out_free_mddev; @@ -5843,7 +5846,6 @@ struct mddev *md_alloc(dev_t dev, char *name) disk->fops = &md_fops; disk->private_data = mddev; - blk_queue_write_cache(disk->queue, true, true); disk->events |= DISK_EVENT_MEDIA_CHANGE; mddev->gendisk = disk; error = add_disk(disk); diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 367509b5b646..2c9963248fcb 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -2466,8 +2466,7 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, struct mmc_blk_data *md; int devidx, ret; char cap_str[10]; - bool cache_enabled = false; - bool fua_enabled = false; + unsigned int features = 0; devidx = ida_alloc_max(&mmc_blk_ida, max_devices - 1, GFP_KERNEL); if (devidx < 0) { @@ -2499,7 +2498,24 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, */ md->read_only = mmc_blk_readonly(card); - md->disk = mmc_init_queue(&md->queue, card); + if (mmc_host_cmd23(card->host)) { + if ((mmc_card_mmc(card) && + card->csd.mmca_vsn >= CSD_SPEC_VER_3) || + (mmc_card_sd(card) && + card->scr.cmds & SD_SCR_CMD23_SUPPORT)) + md->flags |= MMC_BLK_CMD23; + } + + if (md->flags & MMC_BLK_CMD23 && + ((card->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN) || + card->ext_csd.rel_sectors)) { + md->flags |= MMC_BLK_REL_WR; + features |= (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA); + } else if (mmc_cache_enabled(card->host)) { + features |= BLK_FEAT_WRITE_CACHE; + } + + md->disk = mmc_init_queue(&md->queue, card, features); if (IS_ERR(md->disk)) { ret = PTR_ERR(md->disk); goto err_kfree; @@ -2539,26 +2555,6 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, set_capacity(md->disk, size); - if (mmc_host_cmd23(card->host)) { - if ((mmc_card_mmc(card) && - card->csd.mmca_vsn >= CSD_SPEC_VER_3) || - (mmc_card_sd(card) && - card->scr.cmds & SD_SCR_CMD23_SUPPORT)) - md->flags |= MMC_BLK_CMD23; - } - - if (md->flags & MMC_BLK_CMD23 && - ((card->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN) || - card->ext_csd.rel_sectors)) { - md->flags |= MMC_BLK_REL_WR; - fua_enabled = true; - cache_enabled = true; - } - if (mmc_cache_enabled(card->host)) - cache_enabled = true; - - blk_queue_write_cache(md->queue.queue, cache_enabled, fua_enabled); - string_get_size((u64)size, 512, STRING_UNITS_2, cap_str, sizeof(cap_str)); pr_info("%s: %s %s %s%s\n", diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 241cdc2b2a2a..97ff993d3157 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -344,10 +344,12 @@ static const struct blk_mq_ops mmc_mq_ops = { }; static struct gendisk *mmc_alloc_disk(struct mmc_queue *mq, - struct mmc_card *card) + struct mmc_card *card, unsigned int features) { struct mmc_host *host = card->host; - struct queue_limits lim = { }; + struct queue_limits lim = { + .features = features, + }; struct gendisk *disk; if (mmc_can_erase(card)) @@ -413,10 +415,12 @@ static inline bool mmc_merge_capable(struct mmc_host *host) * mmc_init_queue - initialise a queue structure. * @mq: mmc queue * @card: mmc card to attach this queue + * @features: block layer features (BLK_FEAT_*) * * Initialise a MMC card request queue. */ -struct gendisk *mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card) +struct gendisk *mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, + unsigned int features) { struct mmc_host *host = card->host; struct gendisk *disk; @@ -460,7 +464,7 @@ struct gendisk *mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card) return ERR_PTR(ret); - disk = mmc_alloc_disk(mq, card); + disk = mmc_alloc_disk(mq, card, features); if (IS_ERR(disk)) blk_mq_free_tag_set(&mq->tag_set); return disk; diff --git a/drivers/mmc/core/queue.h b/drivers/mmc/core/queue.h index 9ade3bcbb714..1498840a4ea0 100644 --- a/drivers/mmc/core/queue.h +++ b/drivers/mmc/core/queue.h @@ -94,7 +94,8 @@ struct mmc_queue { struct work_struct complete_work; }; -struct gendisk *mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card); +struct gendisk *mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, + unsigned int features); extern void mmc_cleanup_queue(struct mmc_queue *); extern void mmc_queue_suspend(struct mmc_queue *); extern void mmc_queue_resume(struct mmc_queue *); diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 3caa0717d46c..1b9f57f231e8 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -336,6 +336,8 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) lim.logical_block_size = tr->blksize; if (tr->discard) lim.max_hw_discard_sectors = UINT_MAX; + if (tr->flush) + lim.features |= BLK_FEAT_WRITE_CACHE; /* Create gendisk */ gd = blk_mq_alloc_disk(new->tag_set, &lim, new); @@ -373,9 +375,6 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) spin_lock_init(&new->queue_lock); INIT_LIST_HEAD(&new->rq_list); - if (tr->flush) - blk_queue_write_cache(new->rq, true, false); - blk_queue_flag_set(QUEUE_FLAG_NONROT, new->rq); blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, new->rq); diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 598fe2e89bda..aff818469c11 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -455,6 +455,7 @@ static int pmem_attach_disk(struct device *dev, .logical_block_size = pmem_sector_size(ndns), .physical_block_size = PAGE_SIZE, .max_hw_sectors = UINT_MAX, + .features = BLK_FEAT_WRITE_CACHE, }; int nid = dev_to_node(dev), fua; struct resource *res = &nsio->res; @@ -495,6 +496,8 @@ static int pmem_attach_disk(struct device *dev, dev_warn(dev, "unable to guarantee persistence of writes\n"); fua = 0; } + if (fua) + lim.features |= BLK_FEAT_FUA; if (!devm_request_mem_region(dev, res->start, resource_size(res), dev_name(&ndns->dev))) { @@ -543,7 +546,6 @@ static int pmem_attach_disk(struct device *dev, } pmem->virt_addr = addr; - blk_queue_write_cache(q, true, fua); blk_queue_flag_set(QUEUE_FLAG_NONROT, q); blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, q); if (pmem->pfn_flags & PFN_MAP) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 5a673fa5cb26..9fc5e36fe2e5 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2056,7 +2056,6 @@ static int nvme_update_ns_info_generic(struct nvme_ns *ns, static int nvme_update_ns_info_block(struct nvme_ns *ns, struct nvme_ns_info *info) { - bool vwc = ns->ctrl->vwc & NVME_CTRL_VWC_PRESENT; struct queue_limits lim; struct nvme_id_ns_nvm *nvm = NULL; struct nvme_zone_info zi = {}; @@ -2106,6 +2105,11 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, ns->head->ids.csi == NVME_CSI_ZNS) nvme_update_zone_info(ns, &lim, &zi); + if (ns->ctrl->vwc & NVME_CTRL_VWC_PRESENT) + lim.features |= BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA; + else + lim.features &= ~(BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA); + /* * Register a metadata profile for PI, or the plain non-integrity NVMe * metadata masquerading as Type 0 if supported, otherwise reject block @@ -2132,7 +2136,6 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, if ((id->dlfeat & 0x7) == 0x1 && (id->dlfeat & (1 << 3))) ns->head->features |= NVME_NS_DEAC; set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info)); - blk_queue_write_cache(ns->disk->queue, vwc, vwc); set_bit(NVME_NS_READY, &ns->flags); blk_mq_unfreeze_queue(ns->disk->queue); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 12c59db02539..3d0e23a0a4dd 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -521,7 +521,6 @@ static void nvme_requeue_work(struct work_struct *work) int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) { struct queue_limits lim; - bool vwc = false; mutex_init(&head->lock); bio_list_init(&head->requeue_list); @@ -562,11 +561,6 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) if (ctrl->tagset->nr_maps > HCTX_TYPE_POLL && ctrl->tagset->map[HCTX_TYPE_POLL].nr_queues) blk_queue_flag_set(QUEUE_FLAG_POLL, head->disk->queue); - - /* we need to propagate up the VMC settings */ - if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) - vwc = true; - blk_queue_write_cache(head->disk->queue, vwc, vwc); return 0; } diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 66f7d1e3429c..d8ee4a4d4a62 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -120,17 +120,18 @@ static const char *sd_cache_types[] = { "write back, no read (daft)" }; -static void sd_set_flush_flag(struct scsi_disk *sdkp) +static void sd_set_flush_flag(struct scsi_disk *sdkp, + struct queue_limits *lim) { - bool wc = false, fua = false; - if (sdkp->WCE) { - wc = true; + lim->features |= BLK_FEAT_WRITE_CACHE; if (sdkp->DPOFUA) - fua = true; + lim->features |= BLK_FEAT_FUA; + else + lim->features &= ~BLK_FEAT_FUA; + } else { + lim->features &= ~(BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA); } - - blk_queue_write_cache(sdkp->disk->queue, wc, fua); } static ssize_t @@ -168,9 +169,18 @@ cache_type_store(struct device *dev, struct device_attribute *attr, wce = (ct & 0x02) && !sdkp->write_prot ? 1 : 0; if (sdkp->cache_override) { + struct queue_limits lim; + sdkp->WCE = wce; sdkp->RCD = rcd; - sd_set_flush_flag(sdkp); + + lim = queue_limits_start_update(sdkp->disk->queue); + sd_set_flush_flag(sdkp, &lim); + blk_mq_freeze_queue(sdkp->disk->queue); + ret = queue_limits_commit_update(sdkp->disk->queue, &lim); + blk_mq_unfreeze_queue(sdkp->disk->queue); + if (ret) + return ret; return count; } @@ -3663,7 +3673,7 @@ static int sd_revalidate_disk(struct gendisk *disk) * We now have all cache related info, determine how we deal * with flush requests. */ - sd_set_flush_flag(sdkp); + sd_set_flush_flag(sdkp, &lim); /* Initial block count limit based on CDB TRANSFER LENGTH field size. */ dev_max = sdp->use_16_for_rw ? SD_MAX_XFER_BLOCKS : SD_DEF_XFER_BLOCKS; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0c247a716885..acdfe5122faa 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -282,6 +282,28 @@ static inline bool blk_op_is_passthrough(blk_opf_t op) return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT; } +/* flags set by the driver in queue_limits.features */ +enum { + /* supports a volatile write cache */ + BLK_FEAT_WRITE_CACHE = (1u << 0), + + /* supports passing on the FUA bit */ + BLK_FEAT_FUA = (1u << 1), +}; + +/* + * Flags automatically inherited when stacking limits. + */ +#define BLK_FEAT_INHERIT_MASK \ + (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA) + + +/* internal flags in queue_limits.flags */ +enum { + /* do not send FLUSH or FUA command despite advertised write cache */ + BLK_FLAGS_WRITE_CACHE_DISABLED = (1u << 31), +}; + /* * BLK_BOUNCE_NONE: never bounce (default) * BLK_BOUNCE_HIGH: bounce all highmem pages @@ -292,6 +314,8 @@ enum blk_bounce { }; struct queue_limits { + unsigned int features; + unsigned int flags; enum blk_bounce bounce; unsigned long seg_boundary_mask; unsigned long virt_boundary_mask; @@ -536,12 +560,9 @@ struct request_queue { #define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */ #define QUEUE_FLAG_SYNCHRONOUS 11 /* always completes in submit context */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ -#define QUEUE_FLAG_HW_WC 13 /* Write back caching supported */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ #define QUEUE_FLAG_STABLE_WRITES 15 /* don't modify blks until WB is done */ #define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */ -#define QUEUE_FLAG_WC 17 /* Write back caching */ -#define QUEUE_FLAG_FUA 18 /* device supports FUA writes */ #define QUEUE_FLAG_DAX 19 /* device supports DAX */ #define QUEUE_FLAG_STATS 20 /* track IO start and completion times */ #define QUEUE_FLAG_REGISTERED 22 /* queue has been registered to a disk */ @@ -951,7 +972,6 @@ void queue_limits_stack_bdev(struct queue_limits *t, struct block_device *bdev, sector_t offset, const char *pfx); extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); -extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua); struct blk_independent_access_ranges * disk_alloc_independent_access_ranges(struct gendisk *disk, int nr_ia_ranges); @@ -1304,14 +1324,20 @@ static inline bool bdev_stable_writes(struct block_device *bdev) return test_bit(QUEUE_FLAG_STABLE_WRITES, &q->queue_flags); } +static inline bool blk_queue_write_cache(struct request_queue *q) +{ + return (q->limits.features & BLK_FEAT_WRITE_CACHE) && + !(q->limits.flags & BLK_FLAGS_WRITE_CACHE_DISABLED); +} + static inline bool bdev_write_cache(struct block_device *bdev) { - return test_bit(QUEUE_FLAG_WC, &bdev_get_queue(bdev)->queue_flags); + return blk_queue_write_cache(bdev_get_queue(bdev)); } static inline bool bdev_fua(struct block_device *bdev) { - return test_bit(QUEUE_FLAG_FUA, &bdev_get_queue(bdev)->queue_flags); + return bdev_get_queue(bdev)->limits.features & BLK_FEAT_FUA; } static inline bool bdev_nowait(struct block_device *bdev) -- cgit v1.2.3 From bd4a633b6f7c3c6b6ebc1a07317643270e751a94 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:41 +0200 Subject: block: move the nonrot flag to queue_limits Move the nonrot flag into the queue_limits feature field so that it can be set atomically with the queue frozen. Use the chance to switch to defaulting to non-rotational and require the driver to opt into rotational, which matches the polarity of the sysfs interface. For the z2ram, ps3vram, 2x memstick, ubiblock and dcssblk the new rotational flag is not set as they clearly are not rotational despite this being a behavior change. There are some other drivers that unconditionally set the rotational flag to keep the existing behavior as they arguably can be used on rotational devices even if that is probably not their main use today (e.g. virtio_blk and drbd). The flag is automatically inherited in blk_stack_limits matching the existing behavior in dm and md. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-15-hch@lst.de Signed-off-by: Jens Axboe --- arch/m68k/emu/nfblock.c | 1 + arch/um/drivers/ubd_kern.c | 1 - arch/xtensa/platforms/iss/simdisk.c | 5 ++++- block/blk-mq-debugfs.c | 1 - block/blk-sysfs.c | 39 ++++++++++++++++++++++++++++++++++--- drivers/block/amiflop.c | 5 ++++- drivers/block/aoe/aoeblk.c | 1 + drivers/block/ataflop.c | 5 ++++- drivers/block/brd.c | 2 -- drivers/block/drbd/drbd_main.c | 3 ++- drivers/block/floppy.c | 3 ++- drivers/block/loop.c | 8 +++----- drivers/block/mtip32xx/mtip32xx.c | 1 - drivers/block/n64cart.c | 2 -- drivers/block/nbd.c | 5 ----- drivers/block/null_blk/main.c | 1 - drivers/block/pktcdvd.c | 1 + drivers/block/ps3disk.c | 3 ++- drivers/block/rbd.c | 3 --- drivers/block/rnbd/rnbd-clt.c | 4 ---- drivers/block/sunvdc.c | 1 + drivers/block/swim.c | 5 ++++- drivers/block/swim3.c | 5 ++++- drivers/block/ublk_drv.c | 9 +++------ drivers/block/virtio_blk.c | 4 +++- drivers/block/xen-blkfront.c | 1 - drivers/block/zram/zram_drv.c | 2 -- drivers/cdrom/gdrom.c | 1 + drivers/md/bcache/super.c | 2 -- drivers/md/dm-table.c | 12 ------------ drivers/md/md.c | 13 ------------- drivers/mmc/core/queue.c | 1 - drivers/mtd/mtd_blkdevs.c | 1 - drivers/nvdimm/btt.c | 1 - drivers/nvdimm/pmem.c | 1 - drivers/nvme/host/core.c | 1 - drivers/nvme/host/multipath.c | 1 - drivers/s390/block/dasd_genhd.c | 1 - drivers/s390/block/scm_blk.c | 1 - drivers/scsi/sd.c | 4 ++-- include/linux/blkdev.h | 10 +++++----- 41 files changed, 83 insertions(+), 88 deletions(-) (limited to 'drivers/scsi') diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c index 642fb80c5c4e..8eea7ef91151 100644 --- a/arch/m68k/emu/nfblock.c +++ b/arch/m68k/emu/nfblock.c @@ -98,6 +98,7 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 bsize) { struct queue_limits lim = { .logical_block_size = bsize, + .features = BLK_FEAT_ROTATIONAL, }; struct nfhd_device *dev; int dev_id = id - NFHD_DEV_OFFSET; diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 19e01691ea0e..9f1e76ddda5a 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -882,7 +882,6 @@ static int ubd_add(int n, char **error_out) goto out_cleanup_tags; } - blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); disk->major = UBD_MAJOR; disk->first_minor = n << UBD_SHIFT; disk->minors = 1 << UBD_SHIFT; diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c index defc67909a9c..d6d2b533a574 100644 --- a/arch/xtensa/platforms/iss/simdisk.c +++ b/arch/xtensa/platforms/iss/simdisk.c @@ -263,6 +263,9 @@ static const struct proc_ops simdisk_proc_ops = { static int __init simdisk_setup(struct simdisk *dev, int which, struct proc_dir_entry *procdir) { + struct queue_limits lim = { + .features = BLK_FEAT_ROTATIONAL, + }; char tmp[2] = { '0' + which, 0 }; int err; @@ -271,7 +274,7 @@ static int __init simdisk_setup(struct simdisk *dev, int which, spin_lock_init(&dev->lock); dev->users = 0; - dev->gd = blk_alloc_disk(NULL, NUMA_NO_NODE); + dev->gd = blk_alloc_disk(&lim, NUMA_NO_NODE); if (IS_ERR(dev->gd)) { err = PTR_ERR(dev->gd); goto out; diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index e8b9db7c30c4..4d0e62ec88f0 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -84,7 +84,6 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(NOMERGES), QUEUE_FLAG_NAME(SAME_COMP), QUEUE_FLAG_NAME(FAIL_IO), - QUEUE_FLAG_NAME(NONROT), QUEUE_FLAG_NAME(IO_STAT), QUEUE_FLAG_NAME(NOXMERGES), QUEUE_FLAG_NAME(ADD_RANDOM), diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 4f524c1d5e08..637ed3bbbfb4 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -263,6 +263,39 @@ static ssize_t queue_dma_alignment_show(struct request_queue *q, char *page) return queue_var_show(queue_dma_alignment(q), page); } +static ssize_t queue_feature_store(struct request_queue *q, const char *page, + size_t count, unsigned int feature) +{ + struct queue_limits lim; + unsigned long val; + ssize_t ret; + + ret = queue_var_store(&val, page, count); + if (ret < 0) + return ret; + + lim = queue_limits_start_update(q); + if (val) + lim.features |= feature; + else + lim.features &= ~feature; + ret = queue_limits_commit_update(q, &lim); + if (ret) + return ret; + return count; +} + +#define QUEUE_SYSFS_FEATURE(_name, _feature) \ +static ssize_t queue_##_name##_show(struct request_queue *q, char *page) \ +{ \ + return sprintf(page, "%u\n", !!(q->limits.features & _feature)); \ +} \ +static ssize_t queue_##_name##_store(struct request_queue *q, \ + const char *page, size_t count) \ +{ \ + return queue_feature_store(q, page, count, _feature); \ +} + #define QUEUE_SYSFS_BIT_FNS(name, flag, neg) \ static ssize_t \ queue_##name##_show(struct request_queue *q, char *page) \ @@ -289,7 +322,7 @@ queue_##name##_store(struct request_queue *q, const char *page, size_t count) \ return ret; \ } -QUEUE_SYSFS_BIT_FNS(nonrot, NONROT, 1); +QUEUE_SYSFS_FEATURE(rotational, BLK_FEAT_ROTATIONAL) QUEUE_SYSFS_BIT_FNS(random, ADD_RANDOM, 0); QUEUE_SYSFS_BIT_FNS(iostats, IO_STAT, 0); QUEUE_SYSFS_BIT_FNS(stable_writes, STABLE_WRITES, 0); @@ -526,7 +559,7 @@ static struct queue_sysfs_entry queue_hw_sector_size_entry = { .show = queue_logical_block_size_show, }; -QUEUE_RW_ENTRY(queue_nonrot, "rotational"); +QUEUE_RW_ENTRY(queue_rotational, "rotational"); QUEUE_RW_ENTRY(queue_iostats, "iostats"); QUEUE_RW_ENTRY(queue_random, "add_random"); QUEUE_RW_ENTRY(queue_stable_writes, "stable_writes"); @@ -624,7 +657,7 @@ static struct attribute *queue_attrs[] = { &queue_write_zeroes_max_entry.attr, &queue_zone_append_max_entry.attr, &queue_zone_write_granularity_entry.attr, - &queue_nonrot_entry.attr, + &queue_rotational_entry.attr, &queue_zoned_entry.attr, &queue_nr_zones_entry.attr, &queue_max_open_zones_entry.attr, diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index a25414228e47..ff45701f7a5e 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -1776,10 +1776,13 @@ static const struct blk_mq_ops amiflop_mq_ops = { static int fd_alloc_disk(int drive, int system) { + struct queue_limits lim = { + .features = BLK_FEAT_ROTATIONAL, + }; struct gendisk *disk; int err; - disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL, NULL); + disk = blk_mq_alloc_disk(&unit[drive].tag_set, &lim, NULL); if (IS_ERR(disk)) return PTR_ERR(disk); diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index b6dac8cee70f..2028795ec61c 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -337,6 +337,7 @@ aoeblk_gdalloc(void *vp) struct queue_limits lim = { .max_hw_sectors = aoe_maxsectors, .io_opt = SZ_2M, + .features = BLK_FEAT_ROTATIONAL, }; ulong flags; int late = 0; diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index cacc4ba942a8..4ee10a742bdb 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -1992,9 +1992,12 @@ static const struct blk_mq_ops ataflop_mq_ops = { static int ataflop_alloc_disk(unsigned int drive, unsigned int type) { + struct queue_limits lim = { + .features = BLK_FEAT_ROTATIONAL, + }; struct gendisk *disk; - disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL, NULL); + disk = blk_mq_alloc_disk(&unit[drive].tag_set, &lim, NULL); if (IS_ERR(disk)) return PTR_ERR(disk); diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 558d8e670566..b25dc463b5e3 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -366,8 +366,6 @@ static int brd_alloc(int i) strscpy(disk->disk_name, buf, DISK_NAME_LEN); set_capacity(disk, rd_size * 2); - /* Tell the block layer that this is not a rotational device */ - blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, disk->queue); blk_queue_flag_set(QUEUE_FLAG_NOWAIT, disk->queue); err = add_disk(disk); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index bf42a46781fa..2ef29a478075 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2697,7 +2697,8 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig * connect. */ .max_hw_sectors = DRBD_MAX_BIO_SIZE_SAFE >> 8, - .features = BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA, + .features = BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | + BLK_FEAT_ROTATIONAL, }; device = minor_to_device(minor); diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 25c9d85667f1..6d7f7df97c3a 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4516,7 +4516,8 @@ static bool floppy_available(int drive) static int floppy_alloc_disk(unsigned int drive, unsigned int type) { struct queue_limits lim = { - .max_hw_sectors = 64, + .max_hw_sectors = 64, + .features = BLK_FEAT_ROTATIONAL, }; struct gendisk *disk; diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 08d0fc7f17b7..86b5d956dc4e 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -985,13 +985,11 @@ static int loop_reconfigure_limits(struct loop_device *lo, unsigned short bsize) lim.logical_block_size = bsize; lim.physical_block_size = bsize; lim.io_min = bsize; - lim.features &= ~BLK_FEAT_WRITE_CACHE; + lim.features &= ~(BLK_FEAT_WRITE_CACHE | BLK_FEAT_ROTATIONAL); if (file->f_op->fsync && !(lo->lo_flags & LO_FLAGS_READ_ONLY)) lim.features |= BLK_FEAT_WRITE_CACHE; - if (!backing_bdev || bdev_nonrot(backing_bdev)) - blk_queue_flag_set(QUEUE_FLAG_NONROT, lo->lo_queue); - else - blk_queue_flag_clear(QUEUE_FLAG_NONROT, lo->lo_queue); + if (backing_bdev && !bdev_nonrot(backing_bdev)) + lim.features |= BLK_FEAT_ROTATIONAL; loop_config_discard(lo, &lim); return queue_limits_commit_update(lo->lo_queue, &lim); } diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 43a187609ef7..1dbbf72659d5 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3485,7 +3485,6 @@ skip_create_disk: goto start_service_thread; /* Set device limits. */ - blk_queue_flag_set(QUEUE_FLAG_NONROT, dd->queue); blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, dd->queue); dma_set_max_seg_size(&dd->pdev->dev, 0x400000); diff --git a/drivers/block/n64cart.c b/drivers/block/n64cart.c index 27b2187e7a6d..b9fdeff31caf 100644 --- a/drivers/block/n64cart.c +++ b/drivers/block/n64cart.c @@ -150,8 +150,6 @@ static int __init n64cart_probe(struct platform_device *pdev) set_capacity(disk, size >> SECTOR_SHIFT); set_disk_ro(disk, 1); - blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); - err = add_disk(disk); if (err) goto out_cleanup_disk; diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index cb1c86a6a3fb..6cddf5baffe0 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1867,11 +1867,6 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs) goto out_err_disk; } - /* - * Tell the block layer that we are not a rotational device - */ - blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); - mutex_init(&nbd->config_lock); refcount_set(&nbd->config_refs, 0); /* diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 21f9d256e884..83a4ebe4763a 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1948,7 +1948,6 @@ static int null_add_dev(struct nullb_device *dev) } nullb->q->queuedata = nullb; - blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q); rv = ida_alloc(&nullb_indexes, GFP_KERNEL); if (rv < 0) diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 8a2ce8070010..7cece5884b9c 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2622,6 +2622,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) struct queue_limits lim = { .max_hw_sectors = PACKET_MAX_SECTORS, .logical_block_size = CD_FRAMESIZE, + .features = BLK_FEAT_ROTATIONAL, }; int idx; int ret = -ENOMEM; diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index 8b73cf459b59..ff45ed766469 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -388,7 +388,8 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev) .max_segments = -1, .max_segment_size = dev->bounce_size, .dma_alignment = dev->blk_size - 1, - .features = BLK_FEAT_WRITE_CACHE, + .features = BLK_FEAT_WRITE_CACHE | + BLK_FEAT_ROTATIONAL, }; struct gendisk *gendisk; diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 22ad704f81d8..ec1f1c7d4275 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4997,9 +4997,6 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) disk->fops = &rbd_bd_ops; disk->private_data = rbd_dev; - blk_queue_flag_set(QUEUE_FLAG_NONROT, q); - /* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */ - if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q); diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index 02c4b1731827..4918b0f68b46 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -1352,10 +1352,6 @@ static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, if (dev->access_mode == RNBD_ACCESS_RO) set_disk_ro(dev->gd, true); - /* - * Network device does not need rotational - */ - blk_queue_flag_set(QUEUE_FLAG_NONROT, dev->queue); err = add_disk(dev->gd); if (err) put_disk(dev->gd); diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index 5286cb8e0824..2d38331ee667 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -791,6 +791,7 @@ static int probe_disk(struct vdc_port *port) .seg_boundary_mask = PAGE_SIZE - 1, .max_segment_size = PAGE_SIZE, .max_segments = port->ring_cookies, + .features = BLK_FEAT_ROTATIONAL, }; struct request_queue *q; struct gendisk *g; diff --git a/drivers/block/swim.c b/drivers/block/swim.c index 6731678f3a41..126f151c4f2c 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -787,6 +787,9 @@ static void swim_cleanup_floppy_disk(struct floppy_state *fs) static int swim_floppy_init(struct swim_priv *swd) { + struct queue_limits lim = { + .features = BLK_FEAT_ROTATIONAL, + }; int err; int drive; struct swim __iomem *base = swd->base; @@ -820,7 +823,7 @@ static int swim_floppy_init(struct swim_priv *swd) goto exit_put_disks; swd->unit[drive].disk = - blk_mq_alloc_disk(&swd->unit[drive].tag_set, NULL, + blk_mq_alloc_disk(&swd->unit[drive].tag_set, &lim, &swd->unit[drive]); if (IS_ERR(swd->unit[drive].disk)) { blk_mq_free_tag_set(&swd->unit[drive].tag_set); diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index a04756ac778e..90be1017f7bf 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -1189,6 +1189,9 @@ static int swim3_add_device(struct macio_dev *mdev, int index) static int swim3_attach(struct macio_dev *mdev, const struct of_device_id *match) { + struct queue_limits lim = { + .features = BLK_FEAT_ROTATIONAL, + }; struct floppy_state *fs; struct gendisk *disk; int rc; @@ -1210,7 +1213,7 @@ static int swim3_attach(struct macio_dev *mdev, if (rc) goto out_unregister; - disk = blk_mq_alloc_disk(&fs->tag_set, NULL, fs); + disk = blk_mq_alloc_disk(&fs->tag_set, &lim, fs); if (IS_ERR(disk)) { rc = PTR_ERR(disk); goto out_free_tag_set; diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index e45c65c1848d..4fcde0999358 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -484,14 +484,8 @@ static inline unsigned ublk_pos_to_tag(loff_t pos) static void ublk_dev_param_basic_apply(struct ublk_device *ub) { - struct request_queue *q = ub->ub_disk->queue; const struct ublk_param_basic *p = &ub->params.basic; - if (p->attrs & UBLK_ATTR_ROTATIONAL) - blk_queue_flag_clear(QUEUE_FLAG_NONROT, q); - else - blk_queue_flag_set(QUEUE_FLAG_NONROT, q); - if (p->attrs & UBLK_ATTR_READ_ONLY) set_disk_ro(ub->ub_disk, true); @@ -2214,6 +2208,9 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) lim.features |= BLK_FEAT_FUA; } + if (ub->params.basic.attrs & UBLK_ATTR_ROTATIONAL) + lim.features |= BLK_FEAT_ROTATIONAL; + if (wait_for_completion_interruptible(&ub->completion) != 0) return -EINTR; diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index b1a3c2935285..13a2f24f1766 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -1451,7 +1451,9 @@ static int virtblk_read_limits(struct virtio_blk *vblk, static int virtblk_probe(struct virtio_device *vdev) { struct virtio_blk *vblk; - struct queue_limits lim = { }; + struct queue_limits lim = { + .features = BLK_FEAT_ROTATIONAL, + }; int err, index; unsigned int queue_depth; diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 9aafce3e5987..fa3a2ba52545 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1146,7 +1146,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, err = PTR_ERR(gd); goto out_free_tag_set; } - blk_queue_flag_set(QUEUE_FLAG_VIRT, gd->queue); strcpy(gd->disk_name, DEV_NAME); ptr = encode_disk_name(gd->disk_name + sizeof(DEV_NAME) - 1, offset); diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 3acd7006ad2c..aad840fc7e18 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -2245,8 +2245,6 @@ static int zram_add(void) /* Actual capacity set using sysfs (/sys/block/zram/disksize */ set_capacity(zram->disk, 0); - /* zram devices sort of resembles non-rotational disks */ - blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue); blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, zram->disk->queue); blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue); ret = device_add_disk(NULL, zram->disk, zram_disk_groups); diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index eefdd422ad8e..71cfe7a85913 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -744,6 +744,7 @@ static int probe_gdrom(struct platform_device *devptr) .max_segments = 1, /* set a large max size to get most from DMA */ .max_segment_size = 0x40000, + .features = BLK_FEAT_ROTATIONAL, }; int err; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index cb6595c8b551..baa364eedd00 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -974,8 +974,6 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, d->disk->minors = BCACHE_MINORS; d->disk->fops = ops; d->disk->private_data = d; - - blk_queue_flag_set(QUEUE_FLAG_NONROT, d->disk->queue); return 0; out_bioset_exit: diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 03abdae64682..c062af329709 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1716,12 +1716,6 @@ static int device_dax_write_cache_enabled(struct dm_target *ti, return false; } -static int device_is_rotational(struct dm_target *ti, struct dm_dev *dev, - sector_t start, sector_t len, void *data) -{ - return !bdev_nonrot(dev->bdev); -} - static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { @@ -1870,12 +1864,6 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, if (dm_table_any_dev_attr(t, device_dax_write_cache_enabled, NULL)) dax_write_cache(t->md->dax_dev, true); - /* Ensure that all underlying devices are non-rotational. */ - if (dm_table_any_dev_attr(t, device_is_rotational, NULL)) - blk_queue_flag_clear(QUEUE_FLAG_NONROT, q); - else - blk_queue_flag_set(QUEUE_FLAG_NONROT, q); - /* * Some devices don't use blk_integrity but still want stable pages * because they do their own checksumming. diff --git a/drivers/md/md.c b/drivers/md/md.c index 2f4c5d1755d8..c23423c51fb7 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6151,20 +6151,7 @@ int md_run(struct mddev *mddev) if (!mddev_is_dm(mddev)) { struct request_queue *q = mddev->gendisk->queue; - bool nonrot = true; - rdev_for_each(rdev, mddev) { - if (rdev->raid_disk >= 0 && !bdev_nonrot(rdev->bdev)) { - nonrot = false; - break; - } - } - if (mddev->degraded) - nonrot = false; - if (nonrot) - blk_queue_flag_set(QUEUE_FLAG_NONROT, q); - else - blk_queue_flag_clear(QUEUE_FLAG_NONROT, q); blk_queue_flag_set(QUEUE_FLAG_IO_STAT, q); /* Set the NOWAIT flags if all underlying devices support it */ diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 97ff993d3157..b4f62fa84586 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -387,7 +387,6 @@ static struct gendisk *mmc_alloc_disk(struct mmc_queue *mq, blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, mq->queue); blk_queue_rq_timeout(mq->queue, 60 * HZ); - blk_queue_flag_set(QUEUE_FLAG_NONROT, mq->queue); blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, mq->queue); dma_set_max_seg_size(mmc_dev(host), queue_max_segment_size(mq->queue)); diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 1b9f57f231e8..bf8369ce7ddf 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -375,7 +375,6 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) spin_lock_init(&new->queue_lock); INIT_LIST_HEAD(&new->rq_list); - blk_queue_flag_set(QUEUE_FLAG_NONROT, new->rq); blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, new->rq); gd->queue = new->rq; diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index c5f8451b494d..e474afa8e9f6 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1518,7 +1518,6 @@ static int btt_blk_init(struct btt *btt) btt->btt_disk->fops = &btt_fops; btt->btt_disk->private_data = btt; - blk_queue_flag_set(QUEUE_FLAG_NONROT, btt->btt_disk->queue); blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, btt->btt_disk->queue); set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9); diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index aff818469c11..501cf226df01 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -546,7 +546,6 @@ static int pmem_attach_disk(struct device *dev, } pmem->virt_addr = addr; - blk_queue_flag_set(QUEUE_FLAG_NONROT, q); blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, q); if (pmem->pfn_flags & PFN_MAP) blk_queue_flag_set(QUEUE_FLAG_DAX, q); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 9fc5e36fe2e5..0d753fe71f35 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3744,7 +3744,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) if (ctrl->opts && ctrl->opts->data_digest) blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, ns->queue); - blk_queue_flag_set(QUEUE_FLAG_NONROT, ns->queue); if (ctrl->ops->supports_pci_p2pdma && ctrl->ops->supports_pci_p2pdma(ctrl)) blk_queue_flag_set(QUEUE_FLAG_PCI_P2PDMA, ns->queue); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 3d0e23a0a4dd..58c13304e558 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -549,7 +549,6 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) sprintf(head->disk->disk_name, "nvme%dn%d", ctrl->subsys->instance, head->instance); - blk_queue_flag_set(QUEUE_FLAG_NONROT, head->disk->queue); blk_queue_flag_set(QUEUE_FLAG_NOWAIT, head->disk->queue); blk_queue_flag_set(QUEUE_FLAG_IO_STAT, head->disk->queue); /* diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index 4533dd055ca8..1aa426b1dedd 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -68,7 +68,6 @@ int dasd_gendisk_alloc(struct dasd_block *block) blk_mq_free_tag_set(&block->tag_set); return PTR_ERR(gdp); } - blk_queue_flag_set(QUEUE_FLAG_NONROT, gdp->queue); /* Initialize gendisk structure. */ gdp->major = DASD_MAJOR; diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c index 1d456a5a3bfb..2e2309fa9a0b 100644 --- a/drivers/s390/block/scm_blk.c +++ b/drivers/s390/block/scm_blk.c @@ -475,7 +475,6 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev) goto out_tag; } rq = bdev->rq = bdev->gendisk->queue; - blk_queue_flag_set(QUEUE_FLAG_NONROT, rq); blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, rq); bdev->gendisk->private_data = scmdev; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index d8ee4a4d4a62..a42c3c45e868 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3318,7 +3318,7 @@ static void sd_read_block_characteristics(struct scsi_disk *sdkp, rcu_read_unlock(); if (rot == 1) { - blk_queue_flag_set(QUEUE_FLAG_NONROT, q); + lim->features &= ~BLK_FEAT_ROTATIONAL; blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q); } @@ -3646,7 +3646,7 @@ static int sd_revalidate_disk(struct gendisk *disk) * cause this to be updated correctly and any device which * doesn't support it should be treated as rotational. */ - blk_queue_flag_clear(QUEUE_FLAG_NONROT, q); + lim.features |= BLK_FEAT_ROTATIONAL; blk_queue_flag_set(QUEUE_FLAG_ADD_RANDOM, q); if (scsi_device_supports_vpd(sdp)) { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index acdfe5122faa..988e3248cffe 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -289,14 +289,16 @@ enum { /* supports passing on the FUA bit */ BLK_FEAT_FUA = (1u << 1), + + /* rotational device (hard drive or floppy) */ + BLK_FEAT_ROTATIONAL = (1u << 2), }; /* * Flags automatically inherited when stacking limits. */ #define BLK_FEAT_INHERIT_MASK \ - (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA) - + (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL) /* internal flags in queue_limits.flags */ enum { @@ -553,8 +555,6 @@ struct request_queue { #define QUEUE_FLAG_NOMERGES 3 /* disable merge attempts */ #define QUEUE_FLAG_SAME_COMP 4 /* complete on same CPU-group */ #define QUEUE_FLAG_FAIL_IO 5 /* fake timeout */ -#define QUEUE_FLAG_NONROT 6 /* non-rotational device (SSD) */ -#define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ #define QUEUE_FLAG_IO_STAT 7 /* do disk/partitions IO accounting */ #define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */ #define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */ @@ -589,7 +589,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) #define blk_queue_noxmerges(q) \ test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) -#define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) +#define blk_queue_nonrot(q) ((q)->limits.features & BLK_FEAT_ROTATIONAL) #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) #define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) #define blk_queue_zone_resetall(q) \ -- cgit v1.2.3 From 39a9f1c334f9f27b3b3e6d0005c10ed667268346 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:42 +0200 Subject: block: move the add_random flag to queue_limits Move the add_random flag into the queue_limits feature field so that it can be set atomically with the queue frozen. Note that this also removes code from dm to clear the flag based on the underlying devices, which can't be reached as dm devices will always start out without the flag set. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-16-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 1 - block/blk-sysfs.c | 6 +++--- drivers/block/mtip32xx/mtip32xx.c | 1 - drivers/md/dm-table.c | 18 ------------------ drivers/mmc/core/queue.c | 2 -- drivers/mtd/mtd_blkdevs.c | 3 --- drivers/s390/block/scm_blk.c | 4 ---- drivers/scsi/scsi_lib.c | 3 +-- drivers/scsi/sd.c | 11 +++-------- include/linux/blkdev.h | 5 +++-- 10 files changed, 10 insertions(+), 44 deletions(-) (limited to 'drivers/scsi') diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 4d0e62ec88f0..6b7edb50bfd3 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -86,7 +86,6 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(FAIL_IO), QUEUE_FLAG_NAME(IO_STAT), QUEUE_FLAG_NAME(NOXMERGES), - QUEUE_FLAG_NAME(ADD_RANDOM), QUEUE_FLAG_NAME(SYNCHRONOUS), QUEUE_FLAG_NAME(SAME_FORCE), QUEUE_FLAG_NAME(INIT_DONE), diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 637ed3bbbfb4..9174aca3b855 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -323,7 +323,7 @@ queue_##name##_store(struct request_queue *q, const char *page, size_t count) \ } QUEUE_SYSFS_FEATURE(rotational, BLK_FEAT_ROTATIONAL) -QUEUE_SYSFS_BIT_FNS(random, ADD_RANDOM, 0); +QUEUE_SYSFS_FEATURE(add_random, BLK_FEAT_ADD_RANDOM) QUEUE_SYSFS_BIT_FNS(iostats, IO_STAT, 0); QUEUE_SYSFS_BIT_FNS(stable_writes, STABLE_WRITES, 0); #undef QUEUE_SYSFS_BIT_FNS @@ -561,7 +561,7 @@ static struct queue_sysfs_entry queue_hw_sector_size_entry = { QUEUE_RW_ENTRY(queue_rotational, "rotational"); QUEUE_RW_ENTRY(queue_iostats, "iostats"); -QUEUE_RW_ENTRY(queue_random, "add_random"); +QUEUE_RW_ENTRY(queue_add_random, "add_random"); QUEUE_RW_ENTRY(queue_stable_writes, "stable_writes"); #ifdef CONFIG_BLK_WBT @@ -665,7 +665,7 @@ static struct attribute *queue_attrs[] = { &queue_nomerges_entry.attr, &queue_iostats_entry.attr, &queue_stable_writes_entry.attr, - &queue_random_entry.attr, + &queue_add_random_entry.attr, &queue_poll_entry.attr, &queue_wc_entry.attr, &queue_fua_entry.attr, diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 1dbbf72659d5..c6ef0546ffc9 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3485,7 +3485,6 @@ skip_create_disk: goto start_service_thread; /* Set device limits. */ - blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, dd->queue); dma_set_max_seg_size(&dd->pdev->dev, 0x400000); /* Set the capacity of the device in 512 byte sectors. */ diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index c062af329709..0a3838e45aff 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1716,14 +1716,6 @@ static int device_dax_write_cache_enabled(struct dm_target *ti, return false; } -static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev, - sector_t start, sector_t len, void *data) -{ - struct request_queue *q = bdev_get_queue(dev->bdev); - - return !blk_queue_add_random(q); -} - static int device_not_write_zeroes_capable(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { @@ -1876,16 +1868,6 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, else blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, q); - /* - * Determine whether or not this queue's I/O timings contribute - * to the entropy pool, Only request-based targets use this. - * Clear QUEUE_FLAG_ADD_RANDOM if any underlying device does not - * have it set. - */ - if (blk_queue_add_random(q) && - dm_table_any_dev_attr(t, device_is_not_random, NULL)) - blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q); - /* * For a zoned target, setup the zones related queue attributes * and resources necessary for zone append emulation if necessary. diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index b4f62fa84586..da00904d4a3c 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -387,8 +387,6 @@ static struct gendisk *mmc_alloc_disk(struct mmc_queue *mq, blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, mq->queue); blk_queue_rq_timeout(mq->queue, 60 * HZ); - blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, mq->queue); - dma_set_max_seg_size(mmc_dev(host), queue_max_segment_size(mq->queue)); INIT_WORK(&mq->recovery_work, mmc_mq_recovery_handler); diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index bf8369ce7ddf..47ead84407cd 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -374,9 +374,6 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) /* Create the request queue */ spin_lock_init(&new->queue_lock); INIT_LIST_HEAD(&new->rq_list); - - blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, new->rq); - gd->queue = new->rq; if (new->readonly) diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c index 2e2309fa9a0b..3fcfe029db1b 100644 --- a/drivers/s390/block/scm_blk.c +++ b/drivers/s390/block/scm_blk.c @@ -439,7 +439,6 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev) .logical_block_size = 1 << 12, }; unsigned int devindex; - struct request_queue *rq; int len, ret; lim.max_segments = min(scmdev->nr_max_block, @@ -474,9 +473,6 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev) ret = PTR_ERR(bdev->gendisk); goto out_tag; } - rq = bdev->rq = bdev->gendisk->queue; - blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, rq); - bdev->gendisk->private_data = scmdev; bdev->gendisk->fops = &scm_blk_devops; bdev->gendisk->major = scm_major; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index ec39acc986d6..54f771ec8cfb 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -631,8 +631,7 @@ static bool scsi_end_request(struct request *req, blk_status_t error, if (blk_update_request(req, error, bytes)) return true; - // XXX: - if (blk_queue_add_random(q)) + if (q->limits.features & BLK_FEAT_ADD_RANDOM) add_disk_randomness(req->q->disk); WARN_ON_ONCE(!blk_rq_is_passthrough(req) && diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index a42c3c45e868..a27f1c7f1b61 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3301,7 +3301,6 @@ static void sd_read_block_limits_ext(struct scsi_disk *sdkp) static void sd_read_block_characteristics(struct scsi_disk *sdkp, struct queue_limits *lim) { - struct request_queue *q = sdkp->disk->queue; struct scsi_vpd *vpd; u16 rot; @@ -3317,10 +3316,8 @@ static void sd_read_block_characteristics(struct scsi_disk *sdkp, sdkp->zoned = (vpd->data[8] >> 4) & 3; rcu_read_unlock(); - if (rot == 1) { - lim->features &= ~BLK_FEAT_ROTATIONAL; - blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q); - } + if (rot == 1) + lim->features &= ~(BLK_FEAT_ROTATIONAL | BLK_FEAT_ADD_RANDOM); if (!sdkp->first_scan) return; @@ -3599,7 +3596,6 @@ static int sd_revalidate_disk(struct gendisk *disk) { struct scsi_disk *sdkp = scsi_disk(disk); struct scsi_device *sdp = sdkp->device; - struct request_queue *q = sdkp->disk->queue; sector_t old_capacity = sdkp->capacity; struct queue_limits lim; unsigned char *buffer; @@ -3646,8 +3642,7 @@ static int sd_revalidate_disk(struct gendisk *disk) * cause this to be updated correctly and any device which * doesn't support it should be treated as rotational. */ - lim.features |= BLK_FEAT_ROTATIONAL; - blk_queue_flag_set(QUEUE_FLAG_ADD_RANDOM, q); + lim.features |= (BLK_FEAT_ROTATIONAL | BLK_FEAT_ADD_RANDOM); if (scsi_device_supports_vpd(sdp)) { sd_read_block_provisioning(sdkp); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 988e3248cffe..cf1bbf566b2b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -292,6 +292,9 @@ enum { /* rotational device (hard drive or floppy) */ BLK_FEAT_ROTATIONAL = (1u << 2), + + /* contributes to the random number pool */ + BLK_FEAT_ADD_RANDOM = (1u << 3), }; /* @@ -557,7 +560,6 @@ struct request_queue { #define QUEUE_FLAG_FAIL_IO 5 /* fake timeout */ #define QUEUE_FLAG_IO_STAT 7 /* do disk/partitions IO accounting */ #define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */ -#define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */ #define QUEUE_FLAG_SYNCHRONOUS 11 /* always completes in submit context */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ @@ -591,7 +593,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) #define blk_queue_nonrot(q) ((q)->limits.features & BLK_FEAT_ROTATIONAL) #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) -#define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) #define blk_queue_zone_resetall(q) \ test_bit(QUEUE_FLAG_ZONE_RESETALL, &(q)->queue_flags) #define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags) -- cgit v1.2.3 From 1a02f3a73f8c670eddeb44bf52a75ae7f67cfc11 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:44 +0200 Subject: block: move the stable_writes flag to queue_limits Move the stable_writes flag into the queue_limits feature field so that it can be set atomically with the queue frozen. The flag is now inherited by blk_stack_limits, which greatly simplifies the code in dm, and fixed md which previously did not pass on the flag set on lower devices. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-18-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 1 - block/blk-sysfs.c | 29 +---------------------------- drivers/block/drbd/drbd_main.c | 5 ++--- drivers/block/rbd.c | 9 +++------ drivers/block/zram/zram_drv.c | 2 +- drivers/md/dm-table.c | 19 ------------------- drivers/md/raid5.c | 6 ++++-- drivers/mmc/core/queue.c | 5 +++-- drivers/nvme/host/core.c | 9 +++++---- drivers/nvme/host/multipath.c | 4 ---- drivers/scsi/iscsi_tcp.c | 8 ++++---- include/linux/blkdev.h | 9 ++++++--- 12 files changed, 29 insertions(+), 77 deletions(-) (limited to 'drivers/scsi') diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index cbe99444ed1a..eb73f1d348e5 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -88,7 +88,6 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(SYNCHRONOUS), QUEUE_FLAG_NAME(SAME_FORCE), QUEUE_FLAG_NAME(INIT_DONE), - QUEUE_FLAG_NAME(STABLE_WRITES), QUEUE_FLAG_NAME(POLL), QUEUE_FLAG_NAME(DAX), QUEUE_FLAG_NAME(STATS), diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 6f58530fb3c0..cde525724831 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -296,37 +296,10 @@ static ssize_t queue_##_name##_store(struct request_queue *q, \ return queue_feature_store(q, page, count, _feature); \ } -#define QUEUE_SYSFS_BIT_FNS(name, flag, neg) \ -static ssize_t \ -queue_##name##_show(struct request_queue *q, char *page) \ -{ \ - int bit; \ - bit = test_bit(QUEUE_FLAG_##flag, &q->queue_flags); \ - return queue_var_show(neg ? !bit : bit, page); \ -} \ -static ssize_t \ -queue_##name##_store(struct request_queue *q, const char *page, size_t count) \ -{ \ - unsigned long val; \ - ssize_t ret; \ - ret = queue_var_store(&val, page, count); \ - if (ret < 0) \ - return ret; \ - if (neg) \ - val = !val; \ - \ - if (val) \ - blk_queue_flag_set(QUEUE_FLAG_##flag, q); \ - else \ - blk_queue_flag_clear(QUEUE_FLAG_##flag, q); \ - return ret; \ -} - QUEUE_SYSFS_FEATURE(rotational, BLK_FEAT_ROTATIONAL) QUEUE_SYSFS_FEATURE(add_random, BLK_FEAT_ADD_RANDOM) QUEUE_SYSFS_FEATURE(iostats, BLK_FEAT_IO_STAT) -QUEUE_SYSFS_BIT_FNS(stable_writes, STABLE_WRITES, 0); -#undef QUEUE_SYSFS_BIT_FNS +QUEUE_SYSFS_FEATURE(stable_writes, BLK_FEAT_STABLE_WRITES); static ssize_t queue_zoned_show(struct request_queue *q, char *page) { diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 2ef29a478075..f92673f05c7a 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2698,7 +2698,8 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig */ .max_hw_sectors = DRBD_MAX_BIO_SIZE_SAFE >> 8, .features = BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | - BLK_FEAT_ROTATIONAL, + BLK_FEAT_ROTATIONAL | + BLK_FEAT_STABLE_WRITES, }; device = minor_to_device(minor); @@ -2737,8 +2738,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig sprintf(disk->disk_name, "drbd%d", minor); disk->private_data = device; - blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, disk->queue); - device->md_io.page = alloc_page(GFP_KERNEL); if (!device->md_io.page) goto out_no_io_page; diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index ec1f1c7d4275..008e850555f4 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4949,7 +4949,6 @@ static const struct blk_mq_ops rbd_mq_ops = { static int rbd_init_disk(struct rbd_device *rbd_dev) { struct gendisk *disk; - struct request_queue *q; unsigned int objset_bytes = rbd_dev->layout.object_size * rbd_dev->layout.stripe_count; struct queue_limits lim = { @@ -4979,12 +4978,14 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) lim.max_write_zeroes_sectors = objset_bytes >> SECTOR_SHIFT; } + if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) + lim.features |= BLK_FEAT_STABLE_WRITES; + disk = blk_mq_alloc_disk(&rbd_dev->tag_set, &lim, rbd_dev); if (IS_ERR(disk)) { err = PTR_ERR(disk); goto out_tag_set; } - q = disk->queue; snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d", rbd_dev->dev_id); @@ -4996,10 +4997,6 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) disk->minors = RBD_MINORS_PER_MAJOR; disk->fops = &rbd_bd_ops; disk->private_data = rbd_dev; - - if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) - blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q); - rbd_dev->disk = disk; return 0; diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index aad840fc7e18..f8f1b5b54795 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -2208,6 +2208,7 @@ static int zram_add(void) #if ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE .max_write_zeroes_sectors = UINT_MAX, #endif + .features = BLK_FEAT_STABLE_WRITES, }; struct zram *zram; int ret, device_id; @@ -2246,7 +2247,6 @@ static int zram_add(void) /* Actual capacity set using sysfs (/sys/block/zram/disksize */ set_capacity(zram->disk, 0); blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, zram->disk->queue); - blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue); ret = device_add_disk(NULL, zram->disk, zram_disk_groups); if (ret) goto out_cleanup_disk; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 5d5431e531ae..aaf379cb15d9 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1819,13 +1819,6 @@ static bool dm_table_supports_secure_erase(struct dm_table *t) return true; } -static int device_requires_stable_pages(struct dm_target *ti, - struct dm_dev *dev, sector_t start, - sector_t len, void *data) -{ - return bdev_stable_writes(dev->bdev); -} - int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, struct queue_limits *limits) { @@ -1862,18 +1855,6 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, if (dm_table_any_dev_attr(t, device_dax_write_cache_enabled, NULL)) dax_write_cache(t->md->dax_dev, true); - /* - * Some devices don't use blk_integrity but still want stable pages - * because they do their own checksumming. - * If any underlying device requires stable pages, a table must require - * them as well. Only targets that support iterate_devices are considered: - * don't want error, zero, etc to require stable pages. - */ - if (dm_table_any_dev_attr(t, device_requires_stable_pages, NULL)) - blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q); - else - blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, q); - /* * For a zoned target, setup the zones related queue attributes * and resources necessary for zone append emulation if necessary. diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 675c68fa6c64..e875763d6991 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7082,12 +7082,14 @@ raid5_store_skip_copy(struct mddev *mddev, const char *page, size_t len) err = -ENODEV; else if (new != conf->skip_copy) { struct request_queue *q = mddev->gendisk->queue; + struct queue_limits lim = queue_limits_start_update(q); conf->skip_copy = new; if (new) - blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q); + lim.features |= BLK_FEAT_STABLE_WRITES; else - blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, q); + lim.features &= ~BLK_FEAT_STABLE_WRITES; + err = queue_limits_commit_update(q, &lim); } mddev_unlock_and_resume(mddev); return err ?: len; diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index da00904d4a3c..d0b3ca8a11f0 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -378,13 +378,14 @@ static struct gendisk *mmc_alloc_disk(struct mmc_queue *mq, lim.max_segments = host->max_segs; } + if (mmc_host_is_spi(host) && host->use_spi_crc) + lim.features |= BLK_FEAT_STABLE_WRITES; + disk = blk_mq_alloc_disk(&mq->tag_set, &lim, mq); if (IS_ERR(disk)) return disk; mq->queue = disk->queue; - if (mmc_host_is_spi(host) && host->use_spi_crc) - blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, mq->queue); blk_queue_rq_timeout(mq->queue, 60 * HZ); dma_set_max_seg_size(mmc_dev(host), queue_max_segment_size(mq->queue)); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0d753fe71f35..5ecf762d7c88 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3724,6 +3724,7 @@ static void nvme_ns_add_to_ctrl_list(struct nvme_ns *ns) static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) { + struct queue_limits lim = { }; struct nvme_ns *ns; struct gendisk *disk; int node = ctrl->numa_node; @@ -3732,7 +3733,10 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) if (!ns) return; - disk = blk_mq_alloc_disk(ctrl->tagset, NULL, ns); + if (ctrl->opts && ctrl->opts->data_digest) + lim.features |= BLK_FEAT_STABLE_WRITES; + + disk = blk_mq_alloc_disk(ctrl->tagset, &lim, ns); if (IS_ERR(disk)) goto out_free_ns; disk->fops = &nvme_bdev_ops; @@ -3741,9 +3745,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) ns->disk = disk; ns->queue = disk->queue; - if (ctrl->opts && ctrl->opts->data_digest) - blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, ns->queue); - if (ctrl->ops->supports_pci_p2pdma && ctrl->ops->supports_pci_p2pdma(ctrl)) blk_queue_flag_set(QUEUE_FLAG_PCI_P2PDMA, ns->queue); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index eea727cfa9e6..173796f2ddea 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -868,10 +868,6 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid) nvme_mpath_set_live(ns); } - if (test_bit(QUEUE_FLAG_STABLE_WRITES, &ns->queue->queue_flags) && - ns->head->disk) - blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, - ns->head->disk->queue); #ifdef CONFIG_BLK_DEV_ZONED if (blk_queue_is_zoned(ns->queue) && ns->head->disk) ns->head->disk->nr_zones = ns->disk->nr_zones; diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index 60688f18fac6..c708e1059638 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -1057,15 +1057,15 @@ static umode_t iscsi_sw_tcp_attr_is_visible(int param_type, int param) return 0; } -static int iscsi_sw_tcp_slave_configure(struct scsi_device *sdev) +static int iscsi_sw_tcp_device_configure(struct scsi_device *sdev, + struct queue_limits *lim) { struct iscsi_sw_tcp_host *tcp_sw_host = iscsi_host_priv(sdev->host); struct iscsi_session *session = tcp_sw_host->session; struct iscsi_conn *conn = session->leadconn; if (conn->datadgst_en) - blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, - sdev->request_queue); + lim->features |= BLK_FEAT_STABLE_WRITES; return 0; } @@ -1083,7 +1083,7 @@ static const struct scsi_host_template iscsi_sw_tcp_sht = { .eh_device_reset_handler= iscsi_eh_device_reset, .eh_target_reset_handler = iscsi_eh_recover_target, .dma_boundary = PAGE_SIZE - 1, - .slave_configure = iscsi_sw_tcp_slave_configure, + .device_configure = iscsi_sw_tcp_device_configure, .proc_name = "iscsi_tcp", .this_id = -1, .track_queue_depth = 1, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5fafb2f95fd1..8936eb6ba609 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -298,13 +298,17 @@ enum { /* do disk/partitions IO accounting */ BLK_FEAT_IO_STAT = (1u << 4), + + /* don't modify data until writeback is done */ + BLK_FEAT_STABLE_WRITES = (1u << 5), }; /* * Flags automatically inherited when stacking limits. */ #define BLK_FEAT_INHERIT_MASK \ - (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL) + (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL | \ + BLK_FEAT_STABLE_WRITES) /* internal flags in queue_limits.flags */ enum { @@ -565,7 +569,6 @@ struct request_queue { #define QUEUE_FLAG_SYNCHRONOUS 11 /* always completes in submit context */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ -#define QUEUE_FLAG_STABLE_WRITES 15 /* don't modify blks until WB is done */ #define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */ #define QUEUE_FLAG_DAX 19 /* device supports DAX */ #define QUEUE_FLAG_STATS 20 /* track IO start and completion times */ @@ -1323,7 +1326,7 @@ static inline bool bdev_stable_writes(struct block_device *bdev) if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && q->limits.integrity.csum_type != BLK_INTEGRITY_CSUM_NONE) return true; - return test_bit(QUEUE_FLAG_STABLE_WRITES, &q->queue_flags); + return q->limits.features & BLK_FEAT_STABLE_WRITES; } static inline bool blk_queue_write_cache(struct request_queue *q) -- cgit v1.2.3 From b1fc937a55f5735b98d9dceae5bb6ba262501f56 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:49 +0200 Subject: block: move the zoned flag into the features field Move the zoned flags into the features field to reclaim a little bit of space. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-23-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-settings.c | 5 ++--- drivers/block/null_blk/zoned.c | 2 +- drivers/block/ublk_drv.c | 2 +- drivers/block/virtio_blk.c | 5 +++-- drivers/md/dm-table.c | 11 ++++++----- drivers/md/dm-zone.c | 2 +- drivers/md/dm-zoned-target.c | 2 +- drivers/nvme/host/zns.c | 2 +- drivers/scsi/sd_zbc.c | 2 +- include/linux/blkdev.h | 9 ++++++--- 10 files changed, 23 insertions(+), 19 deletions(-) (limited to 'drivers/scsi') diff --git a/block/blk-settings.c b/block/blk-settings.c index 026ba68d8298..96e07f24bd9a 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -68,7 +68,7 @@ static void blk_apply_bdi_limits(struct backing_dev_info *bdi, static int blk_validate_zoned_limits(struct queue_limits *lim) { - if (!lim->zoned) { + if (!(lim->features & BLK_FEAT_ZONED)) { if (WARN_ON_ONCE(lim->max_open_zones) || WARN_ON_ONCE(lim->max_active_zones) || WARN_ON_ONCE(lim->zone_write_granularity) || @@ -602,8 +602,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, b->max_secure_erase_sectors); t->zone_write_granularity = max(t->zone_write_granularity, b->zone_write_granularity); - t->zoned = max(t->zoned, b->zoned); - if (!t->zoned) { + if (!(t->features & BLK_FEAT_ZONED)) { t->zone_write_granularity = 0; t->max_zone_append_sectors = 0; } diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c index f118d304f310..ca8e739e76b9 100644 --- a/drivers/block/null_blk/zoned.c +++ b/drivers/block/null_blk/zoned.c @@ -158,7 +158,7 @@ int null_init_zoned_dev(struct nullb_device *dev, sector += dev->zone_size_sects; } - lim->zoned = true; + lim->features |= BLK_FEAT_ZONED; lim->chunk_sectors = dev->zone_size_sects; lim->max_zone_append_sectors = dev->zone_append_max_sectors; lim->max_open_zones = dev->zone_max_open; diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 4fcde0999358..69c16018cbb1 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -2196,7 +2196,7 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED)) return -EOPNOTSUPP; - lim.zoned = true; + lim.features |= BLK_FEAT_ZONED; lim.max_active_zones = p->max_active_zones; lim.max_open_zones = p->max_open_zones; lim.max_zone_append_sectors = p->max_zone_append_sectors; diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 13a2f24f1766..cea45b296f8b 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -728,7 +728,7 @@ static int virtblk_read_zoned_limits(struct virtio_blk *vblk, dev_dbg(&vdev->dev, "probing host-managed zoned device\n"); - lim->zoned = true; + lim->features |= BLK_FEAT_ZONED; virtio_cread(vdev, struct virtio_blk_config, zoned.max_open_zones, &v); @@ -1546,7 +1546,8 @@ static int virtblk_probe(struct virtio_device *vdev) * All steps that follow use the VQs therefore they need to be * placed after the virtio_device_ready() call above. */ - if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && lim.zoned) { + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && + (lim.features & BLK_FEAT_ZONED)) { blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, vblk->disk->queue); err = blk_revalidate_disk_zones(vblk->disk); if (err) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index ca1f136575cf..df6313c3fe6b 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1605,12 +1605,12 @@ int dm_calculate_queue_limits(struct dm_table *t, ti->type->iterate_devices(ti, dm_set_device_limits, &ti_limits); - if (!zoned && ti_limits.zoned) { + if (!zoned && (ti_limits.features & BLK_FEAT_ZONED)) { /* * After stacking all limits, validate all devices * in table support this zoned model and zone sectors. */ - zoned = ti_limits.zoned; + zoned = (ti_limits.features & BLK_FEAT_ZONED); zone_sectors = ti_limits.chunk_sectors; } @@ -1658,12 +1658,12 @@ combine_limits: * zoned model on host-managed zoned block devices. * BUT... */ - if (limits->zoned) { + if (limits->features & BLK_FEAT_ZONED) { /* * ...IF the above limits stacking determined a zoned model * validate that all of the table's devices conform to it. */ - zoned = limits->zoned; + zoned = limits->features & BLK_FEAT_ZONED; zone_sectors = limits->chunk_sectors; } if (validate_hardware_zoned(t, zoned, zone_sectors)) @@ -1834,7 +1834,8 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, * For a zoned target, setup the zones related queue attributes * and resources necessary for zone append emulation if necessary. */ - if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && limits->zoned) { + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && + (limits->features & limits->features & BLK_FEAT_ZONED)) { r = dm_set_zones_restrictions(t, q, limits); if (r) return r; diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c index 5d66d916730e..88d313229b43 100644 --- a/drivers/md/dm-zone.c +++ b/drivers/md/dm-zone.c @@ -263,7 +263,7 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q, if (nr_conv_zones >= ret) { lim->max_open_zones = 0; lim->max_active_zones = 0; - lim->zoned = false; + lim->features &= ~BLK_FEAT_ZONED; clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); disk->nr_zones = 0; return 0; diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index 12236e6f46f3..cd0ee144973f 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -1009,7 +1009,7 @@ static void dmz_io_hints(struct dm_target *ti, struct queue_limits *limits) limits->max_sectors = chunk_sectors; /* We are exposing a drive-managed zoned block device */ - limits->zoned = false; + limits->features &= ~BLK_FEAT_ZONED; } /* diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c index 77aa0f440a6d..06f2417aa50d 100644 --- a/drivers/nvme/host/zns.c +++ b/drivers/nvme/host/zns.c @@ -108,7 +108,7 @@ free_data: void nvme_update_zone_info(struct nvme_ns *ns, struct queue_limits *lim, struct nvme_zone_info *zi) { - lim->zoned = 1; + lim->features |= BLK_FEAT_ZONED; lim->max_open_zones = zi->max_open_zones; lim->max_active_zones = zi->max_active_zones; lim->max_zone_append_sectors = ns->ctrl->max_zone_append; diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index 360ec9804995..d3f84665946e 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -601,7 +601,7 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim, if (sdkp->device->type != TYPE_ZBC) return 0; - lim->zoned = true; + lim->features |= BLK_FEAT_ZONED; /* * Per ZBC and ZAC specifications, writes in sequential write required diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index cd27b66cbacc..bdc30c1fb1b5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -313,6 +313,9 @@ enum { /* supports I/O polling */ BLK_FEAT_POLL = (1u << 9), + + /* is a zoned device */ + BLK_FEAT_ZONED = (1u << 10), }; /* @@ -320,7 +323,7 @@ enum { */ #define BLK_FEAT_INHERIT_MASK \ (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL | \ - BLK_FEAT_STABLE_WRITES) + BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED) /* internal flags in queue_limits.flags */ enum { @@ -372,7 +375,6 @@ struct queue_limits { unsigned char misaligned; unsigned char discard_misaligned; unsigned char raid_partial_stripes_expensive; - bool zoned; unsigned int max_open_zones; unsigned int max_active_zones; @@ -654,7 +656,8 @@ static inline enum rpm_status queue_rpm_status(struct request_queue *q) static inline bool blk_queue_is_zoned(struct request_queue *q) { - return IS_ENABLED(CONFIG_BLK_DEV_ZONED) && q->limits.zoned; + return IS_ENABLED(CONFIG_BLK_DEV_ZONED) && + (q->limits.features & BLK_FEAT_ZONED); } #ifdef CONFIG_BLK_DEV_ZONED -- cgit v1.2.3 From a52758a39768f441e468a41da6c15a59d6d6011a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:50 +0200 Subject: block: move the zone_resetall flag to queue_limits Move the zone_resetall flag into the queue_limits feature field so that it can be set atomically with the queue frozen. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-24-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 1 - drivers/block/null_blk/zoned.c | 3 +-- drivers/block/ublk_drv.c | 4 +--- drivers/block/virtio_blk.c | 3 +-- drivers/nvme/host/zns.c | 3 +-- drivers/scsi/sd_zbc.c | 5 +---- include/linux/blkdev.h | 6 ++++-- 7 files changed, 9 insertions(+), 16 deletions(-) (limited to 'drivers/scsi') diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 3a2152791384..f2fd72f4414a 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -91,7 +91,6 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(REGISTERED), QUEUE_FLAG_NAME(QUIESCED), QUEUE_FLAG_NAME(PCI_P2PDMA), - QUEUE_FLAG_NAME(ZONE_RESETALL), QUEUE_FLAG_NAME(RQ_ALLOC_TIME), QUEUE_FLAG_NAME(HCTX_ACTIVE), QUEUE_FLAG_NAME(SQ_SCHED), diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c index ca8e739e76b9..b42c00f13132 100644 --- a/drivers/block/null_blk/zoned.c +++ b/drivers/block/null_blk/zoned.c @@ -158,7 +158,7 @@ int null_init_zoned_dev(struct nullb_device *dev, sector += dev->zone_size_sects; } - lim->features |= BLK_FEAT_ZONED; + lim->features |= BLK_FEAT_ZONED | BLK_FEAT_ZONE_RESETALL; lim->chunk_sectors = dev->zone_size_sects; lim->max_zone_append_sectors = dev->zone_append_max_sectors; lim->max_open_zones = dev->zone_max_open; @@ -171,7 +171,6 @@ int null_register_zoned_dev(struct nullb *nullb) struct request_queue *q = nullb->q; struct gendisk *disk = nullb->disk; - blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); disk->nr_zones = bdev_nr_zones(disk->part0); pr_info("%s: using %s zone append\n", diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 69c16018cbb1..4fdff13fc23b 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -248,8 +248,6 @@ static int ublk_dev_param_zoned_validate(const struct ublk_device *ub) static void ublk_dev_param_zoned_apply(struct ublk_device *ub) { - blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ub->ub_disk->queue); - ub->ub_disk->nr_zones = ublk_get_nr_zones(ub); } @@ -2196,7 +2194,7 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED)) return -EOPNOTSUPP; - lim.features |= BLK_FEAT_ZONED; + lim.features |= BLK_FEAT_ZONED | BLK_FEAT_ZONE_RESETALL; lim.max_active_zones = p->max_active_zones; lim.max_open_zones = p->max_open_zones; lim.max_zone_append_sectors = p->max_zone_append_sectors; diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index cea45b296f8b..6c64a67ab9c9 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -728,7 +728,7 @@ static int virtblk_read_zoned_limits(struct virtio_blk *vblk, dev_dbg(&vdev->dev, "probing host-managed zoned device\n"); - lim->features |= BLK_FEAT_ZONED; + lim->features |= BLK_FEAT_ZONED | BLK_FEAT_ZONE_RESETALL; virtio_cread(vdev, struct virtio_blk_config, zoned.max_open_zones, &v); @@ -1548,7 +1548,6 @@ static int virtblk_probe(struct virtio_device *vdev) */ if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && (lim.features & BLK_FEAT_ZONED)) { - blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, vblk->disk->queue); err = blk_revalidate_disk_zones(vblk->disk); if (err) goto out_cleanup_disk; diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c index 06f2417aa50d..99bb89c2495a 100644 --- a/drivers/nvme/host/zns.c +++ b/drivers/nvme/host/zns.c @@ -108,13 +108,12 @@ free_data: void nvme_update_zone_info(struct nvme_ns *ns, struct queue_limits *lim, struct nvme_zone_info *zi) { - lim->features |= BLK_FEAT_ZONED; + lim->features |= BLK_FEAT_ZONED | BLK_FEAT_ZONE_RESETALL; lim->max_open_zones = zi->max_open_zones; lim->max_active_zones = zi->max_active_zones; lim->max_zone_append_sectors = ns->ctrl->max_zone_append; lim->chunk_sectors = ns->head->zsze = nvme_lba_to_sect(ns->head, zi->zone_size); - blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ns->queue); } static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns, diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index d3f84665946e..f7067afac79c 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -592,8 +592,6 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp) int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim, u8 buf[SD_BUF_SIZE]) { - struct gendisk *disk = sdkp->disk; - struct request_queue *q = disk->queue; unsigned int nr_zones; u32 zone_blocks = 0; int ret; @@ -601,7 +599,7 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim, if (sdkp->device->type != TYPE_ZBC) return 0; - lim->features |= BLK_FEAT_ZONED; + lim->features |= BLK_FEAT_ZONED | BLK_FEAT_ZONE_RESETALL; /* * Per ZBC and ZAC specifications, writes in sequential write required @@ -630,7 +628,6 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim, sdkp->early_zone_info.zone_blocks = zone_blocks; /* The drive satisfies the kernel restrictions: set it up */ - blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); if (sdkp->zones_max_open == U32_MAX) lim->max_open_zones = 0; else diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bdc30c1fb1b5..1077cb8d8fd8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -316,6 +316,9 @@ enum { /* is a zoned device */ BLK_FEAT_ZONED = (1u << 10), + + /* supports Zone Reset All */ + BLK_FEAT_ZONE_RESETALL = (1u << 11), }; /* @@ -586,7 +589,6 @@ struct request_queue { #define QUEUE_FLAG_REGISTERED 22 /* queue has been registered to a disk */ #define QUEUE_FLAG_QUIESCED 24 /* queue has been quiesced */ #define QUEUE_FLAG_PCI_P2PDMA 25 /* device supports PCI p2p requests */ -#define QUEUE_FLAG_ZONE_RESETALL 26 /* supports Zone Reset All */ #define QUEUE_FLAG_RQ_ALLOC_TIME 27 /* record rq->alloc_time_ns */ #define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */ #define QUEUE_FLAG_SQ_SCHED 30 /* single queue style io dispatch */ @@ -607,7 +609,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_nonrot(q) ((q)->limits.features & BLK_FEAT_ROTATIONAL) #define blk_queue_io_stat(q) ((q)->limits.features & BLK_FEAT_IO_STAT) #define blk_queue_zone_resetall(q) \ - test_bit(QUEUE_FLAG_ZONE_RESETALL, &(q)->queue_flags) + ((q)->limits.features & BLK_FEAT_ZONE_RESETALL) #define blk_queue_dax(q) ((q)->limits.features & BLK_FEAT_DAX) #define blk_queue_pci_p2pdma(q) \ test_bit(QUEUE_FLAG_PCI_P2PDMA, &(q)->queue_flags) -- cgit v1.2.3 From 339d3948c07b4aa2940aeb874294a7d6782cec16 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:53 +0200 Subject: block: move the bounce flag into the features field Move the bounce flag into the features field to reclaim a little bit of space. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-27-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-settings.c | 1 - block/blk.h | 2 +- drivers/scsi/scsi_lib.c | 2 +- include/linux/blkdev.h | 6 ++++-- 4 files changed, 6 insertions(+), 5 deletions(-) (limited to 'drivers/scsi') diff --git a/block/blk-settings.c b/block/blk-settings.c index 96e07f24bd9a..d0e9096f93ca 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -479,7 +479,6 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, b->max_write_zeroes_sectors); t->max_zone_append_sectors = min(queue_limits_max_zone_append_sectors(t), queue_limits_max_zone_append_sectors(b)); - t->bounce = max(t->bounce, b->bounce); t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, b->seg_boundary_mask); diff --git a/block/blk.h b/block/blk.h index 79e8d5d4fe0c..fa32f7fad5d7 100644 --- a/block/blk.h +++ b/block/blk.h @@ -394,7 +394,7 @@ struct bio *__blk_queue_bounce(struct bio *bio, struct request_queue *q); static inline bool blk_queue_may_bounce(struct request_queue *q) { return IS_ENABLED(CONFIG_BOUNCE) && - q->limits.bounce == BLK_BOUNCE_HIGH && + (q->limits.features & BLK_FEAT_BOUNCE_HIGH) && max_low_pfn >= max_pfn; } diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 54f771ec8cfb..e2f7bfb2b9e4 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1986,7 +1986,7 @@ void scsi_init_limits(struct Scsi_Host *shost, struct queue_limits *lim) shost->dma_alignment, dma_get_cache_alignment() - 1); if (shost->no_highmem) - lim->bounce = BLK_BOUNCE_HIGH; + lim->features |= BLK_FEAT_BOUNCE_HIGH; dma_set_seg_boundary(dev, shost->dma_boundary); dma_set_max_seg_size(dev, shost->max_segment_size); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2c433ebf6f20..e96ba7b97288 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -325,6 +325,9 @@ enum { /* skip this queue in blk_mq_(un)quiesce_tagset */ BLK_FEAT_SKIP_TAGSET_QUIESCE = (1u << 13), + + /* bounce all highmem pages */ + BLK_FEAT_BOUNCE_HIGH = (1u << 14), }; /* @@ -332,7 +335,7 @@ enum { */ #define BLK_FEAT_INHERIT_MASK \ (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL | \ - BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED) + BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED | BLK_FEAT_BOUNCE_HIGH) /* internal flags in queue_limits.flags */ enum { @@ -352,7 +355,6 @@ enum blk_bounce { struct queue_limits { unsigned int features; unsigned int flags; - enum blk_bounce bounce; unsigned long seg_boundary_mask; unsigned long virt_boundary_mask; -- cgit v1.2.3 From bf4ae8f2e6407a779c0368eb0f3e047a8333be17 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 20 Jun 2024 12:53:57 +0000 Subject: scsi: sd: Atomic write support Support is divided into two main areas: - reading VPD pages and setting sdev request_queue limits - support WRITE ATOMIC (16) command and tracing The relevant block limits VPD page need to be read to allow the block layer request_queue atomic write limits to be set. These VPD page limits are described in sbc4r22 section 6.6.4 - Block limits VPD page. There are five limits of interest: - MAXIMUM ATOMIC TRANSFER LENGTH - ATOMIC ALIGNMENT - ATOMIC TRANSFER LENGTH GRANULARITY - MAXIMUM ATOMIC TRANSFER LENGTH WITH BOUNDARY - MAXIMUM ATOMIC BOUNDARY SIZE MAXIMUM ATOMIC TRANSFER LENGTH is the maximum length for a WRITE ATOMIC (16) command. It will not be greater than the device MAXIMUM TRANSFER LENGTH. ATOMIC ALIGNMENT and ATOMIC TRANSFER LENGTH GRANULARITY are the minimum alignment and length values for an atomic write in terms of logical blocks. Unlike NVMe, SCSI does not specify an LBA space boundary, but does specify a per-IO boundary granularity. The maximum boundary size is specified in MAXIMUM ATOMIC BOUNDARY SIZE. When used, this boundary value is set in the WRITE ATOMIC (16) ATOMIC BOUNDARY field - layout for the WRITE_ATOMIC_16 command can be found in sbc4r22 section 5.48. This boundary value is the granularity size at which the device may atomically write the data. A value of zero in WRITE ATOMIC (16) ATOMIC BOUNDARY field means that all data must be atomically written together. MAXIMUM ATOMIC TRANSFER LENGTH WITH BOUNDARY is the maximum atomic write length if a non-zero boundary value is set. For atomic write support, the WRITE ATOMIC (16) boundary is not of much interest, as the block layer expects each request submitted to be executed atomically. However, the SCSI spec does leave itself open to a quirky scenario where MAXIMUM ATOMIC TRANSFER LENGTH is zero, yet MAXIMUM ATOMIC TRANSFER LENGTH WITH BOUNDARY and MAXIMUM ATOMIC BOUNDARY SIZE are both non-zero. This case will be supported. To set the block layer request_queue atomic write capabilities, sanitize the VPD page limits and set limits as follows: - atomic_write_unit_min is derived from granularity and alignment values. If no granularity value is not set, use physical block size - atomic_write_unit_max is derived from MAXIMUM ATOMIC TRANSFER LENGTH. In the scenario where MAXIMUM ATOMIC TRANSFER LENGTH is zero and boundary limits are non-zero, use MAXIMUM ATOMIC BOUNDARY SIZE for atomic_write_unit_max. New flag scsi_disk.use_atomic_write_boundary is set for this scenario. - atomic_write_boundary_bytes is set to zero always SCSI also supports a WRITE ATOMIC (32) command, which is for type 2 protection enabled. This is not going to be supported now, so check for T10_PI_TYPE2_PROTECTION when setting any request_queue limits. To handle an atomic write request, add support for WRITE ATOMIC (16) command in handler sd_setup_atomic_cmnd(). Flag use_atomic_write_boundary is checked here for encoding ATOMIC BOUNDARY field. Trace info is also added for WRITE_ATOMIC_16 command. Reviewed-by: Martin K. Petersen Signed-off-by: John Garry Acked-by: Darrick J. Wong Reviewed-by: Darrick J. Wong Link: https://lore.kernel.org/r/20240620125359.2684798-9-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- drivers/scsi/scsi_trace.c | 22 +++++++++++ drivers/scsi/sd.c | 93 ++++++++++++++++++++++++++++++++++++++++++++- drivers/scsi/sd.h | 8 ++++ include/scsi/scsi_proto.h | 1 + include/trace/events/scsi.h | 1 + 5 files changed, 124 insertions(+), 1 deletion(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/scsi_trace.c b/drivers/scsi/scsi_trace.c index 41a950075913..3e47c4472a80 100644 --- a/drivers/scsi/scsi_trace.c +++ b/drivers/scsi/scsi_trace.c @@ -325,6 +325,26 @@ out: return ret; } +static const char * +scsi_trace_atomic_write16_out(struct trace_seq *p, unsigned char *cdb, int len) +{ + const char *ret = trace_seq_buffer_ptr(p); + unsigned int boundary_size; + unsigned int nr_blocks; + sector_t lba; + + lba = get_unaligned_be64(&cdb[2]); + boundary_size = get_unaligned_be16(&cdb[10]); + nr_blocks = get_unaligned_be16(&cdb[12]); + + trace_seq_printf(p, "lba=%llu txlen=%u boundary_size=%u", + lba, nr_blocks, boundary_size); + + trace_seq_putc(p, 0); + + return ret; +} + static const char * scsi_trace_varlen(struct trace_seq *p, unsigned char *cdb, int len) { @@ -385,6 +405,8 @@ scsi_trace_parse_cdb(struct trace_seq *p, unsigned char *cdb, int len) return scsi_trace_zbc_in(p, cdb, len); case ZBC_OUT: return scsi_trace_zbc_out(p, cdb, len); + case WRITE_ATOMIC_16: + return scsi_trace_atomic_write16_out(p, cdb, len); default: return scsi_trace_misc(p, cdb, len); } diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index a27f1c7f1b61..525f48c97f5e 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -939,6 +939,64 @@ static blk_status_t sd_setup_unmap_cmnd(struct scsi_cmnd *cmd) return scsi_alloc_sgtables(cmd); } +static void sd_config_atomic(struct scsi_disk *sdkp, struct queue_limits *lim) +{ + unsigned int logical_block_size = sdkp->device->sector_size, + physical_block_size_sectors, max_atomic, unit_min, unit_max; + + if ((!sdkp->max_atomic && !sdkp->max_atomic_with_boundary) || + sdkp->protection_type == T10_PI_TYPE2_PROTECTION) + return; + + physical_block_size_sectors = sdkp->physical_block_size / + sdkp->device->sector_size; + + unit_min = rounddown_pow_of_two(sdkp->atomic_granularity ? + sdkp->atomic_granularity : + physical_block_size_sectors); + + /* + * Only use atomic boundary when we have the odd scenario of + * sdkp->max_atomic == 0, which the spec does permit. + */ + if (sdkp->max_atomic) { + max_atomic = sdkp->max_atomic; + unit_max = rounddown_pow_of_two(sdkp->max_atomic); + sdkp->use_atomic_write_boundary = 0; + } else { + max_atomic = sdkp->max_atomic_with_boundary; + unit_max = rounddown_pow_of_two(sdkp->max_atomic_boundary); + sdkp->use_atomic_write_boundary = 1; + } + + /* + * Ensure compliance with granularity and alignment. For now, keep it + * simple and just don't support atomic writes for values mismatched + * with max_{boundary}atomic, physical block size, and + * atomic_granularity itself. + * + * We're really being distrustful by checking unit_max also... + */ + if (sdkp->atomic_granularity > 1) { + if (unit_min > 1 && unit_min % sdkp->atomic_granularity) + return; + if (unit_max > 1 && unit_max % sdkp->atomic_granularity) + return; + } + + if (sdkp->atomic_alignment > 1) { + if (unit_min > 1 && unit_min % sdkp->atomic_alignment) + return; + if (unit_max > 1 && unit_max % sdkp->atomic_alignment) + return; + } + + lim->atomic_write_hw_max = max_atomic * logical_block_size; + lim->atomic_write_hw_boundary = 0; + lim->atomic_write_hw_unit_min = unit_min * logical_block_size; + lim->atomic_write_hw_unit_max = unit_max * logical_block_size; +} + static blk_status_t sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd, bool unmap) { @@ -1237,6 +1295,26 @@ static int sd_cdl_dld(struct scsi_disk *sdkp, struct scsi_cmnd *scmd) return (hint - IOPRIO_HINT_DEV_DURATION_LIMIT_1) + 1; } +static blk_status_t sd_setup_atomic_cmnd(struct scsi_cmnd *cmd, + sector_t lba, unsigned int nr_blocks, + bool boundary, unsigned char flags) +{ + cmd->cmd_len = 16; + cmd->cmnd[0] = WRITE_ATOMIC_16; + cmd->cmnd[1] = flags; + put_unaligned_be64(lba, &cmd->cmnd[2]); + put_unaligned_be16(nr_blocks, &cmd->cmnd[12]); + if (boundary) + put_unaligned_be16(nr_blocks, &cmd->cmnd[10]); + else + put_unaligned_be16(0, &cmd->cmnd[10]); + put_unaligned_be16(nr_blocks, &cmd->cmnd[12]); + cmd->cmnd[14] = 0; + cmd->cmnd[15] = 0; + + return BLK_STS_OK; +} + static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd) { struct request *rq = scsi_cmd_to_rq(cmd); @@ -1302,6 +1380,10 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd) if (protect && sdkp->protection_type == T10_PI_TYPE2_PROTECTION) { ret = sd_setup_rw32_cmnd(cmd, write, lba, nr_blocks, protect | fua, dld); + } else if (rq->cmd_flags & REQ_ATOMIC && write) { + ret = sd_setup_atomic_cmnd(cmd, lba, nr_blocks, + sdkp->use_atomic_write_boundary, + protect | fua); } else if (sdp->use_16_for_rw || (nr_blocks > 0xffff)) { ret = sd_setup_rw16_cmnd(cmd, write, lba, nr_blocks, protect | fua, dld); @@ -3264,7 +3346,7 @@ static void sd_read_block_limits(struct scsi_disk *sdkp, sdkp->max_ws_blocks = (u32)get_unaligned_be64(&vpd->data[36]); if (!sdkp->lbpme) - goto out; + goto config_atomic; lba_count = get_unaligned_be32(&vpd->data[20]); desc_count = get_unaligned_be32(&vpd->data[24]); @@ -3279,6 +3361,15 @@ static void sd_read_block_limits(struct scsi_disk *sdkp, get_unaligned_be32(&vpd->data[32]) & ~(1 << 31); sd_config_discard(sdkp, lim, sd_discard_mode(sdkp)); + +config_atomic: + sdkp->max_atomic = get_unaligned_be32(&vpd->data[44]); + sdkp->atomic_alignment = get_unaligned_be32(&vpd->data[48]); + sdkp->atomic_granularity = get_unaligned_be32(&vpd->data[52]); + sdkp->max_atomic_with_boundary = get_unaligned_be32(&vpd->data[56]); + sdkp->max_atomic_boundary = get_unaligned_be32(&vpd->data[60]); + + sd_config_atomic(sdkp, lim); } out: diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h index 7603b3c67b23..36382eca941c 100644 --- a/drivers/scsi/sd.h +++ b/drivers/scsi/sd.h @@ -115,6 +115,13 @@ struct scsi_disk { u32 max_unmap_blocks; u32 unmap_granularity; u32 unmap_alignment; + + u32 max_atomic; + u32 atomic_alignment; + u32 atomic_granularity; + u32 max_atomic_with_boundary; + u32 max_atomic_boundary; + u32 index; unsigned int physical_block_size; unsigned int max_medium_access_timeouts; @@ -148,6 +155,7 @@ struct scsi_disk { unsigned security : 1; unsigned ignore_medium_access_errors : 1; unsigned rscs : 1; /* reduced stream control support */ + unsigned use_atomic_write_boundary : 1; }; #define to_scsi_disk(obj) container_of(obj, struct scsi_disk, disk_dev) diff --git a/include/scsi/scsi_proto.h b/include/scsi/scsi_proto.h index 843106e1109f..70e1262b2e20 100644 --- a/include/scsi/scsi_proto.h +++ b/include/scsi/scsi_proto.h @@ -120,6 +120,7 @@ #define WRITE_SAME_16 0x93 #define ZBC_OUT 0x94 #define ZBC_IN 0x95 +#define WRITE_ATOMIC_16 0x9c #define SERVICE_ACTION_BIDIRECTIONAL 0x9d #define SERVICE_ACTION_IN_16 0x9e #define SERVICE_ACTION_OUT_16 0x9f diff --git a/include/trace/events/scsi.h b/include/trace/events/scsi.h index 8e2d9b1b0e77..05f1945ed204 100644 --- a/include/trace/events/scsi.h +++ b/include/trace/events/scsi.h @@ -102,6 +102,7 @@ scsi_opcode_name(WRITE_32), \ scsi_opcode_name(WRITE_SAME_32), \ scsi_opcode_name(ATA_16), \ + scsi_opcode_name(WRITE_ATOMIC_16), \ scsi_opcode_name(ATA_12)) #define scsi_hostbyte_name(result) { result, #result } -- cgit v1.2.3 From 84f3a3c01d70efba736bc42155cf32722067b327 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 20 Jun 2024 12:53:58 +0000 Subject: scsi: scsi_debug: Atomic write support Add initial support for atomic writes. As is standard method, feed device properties via modules param, those being: - atomic_max_size_blks - atomic_alignment_blks - atomic_granularity_blks - atomic_max_size_with_boundary_blks - atomic_max_boundary_blks These just match sbc4r22 section 6.6.4 - Block limits VPD page. We just support ATOMIC WRITE (16). The major change in the driver is how we lock the device for RW accesses. Currently the driver uses a per-device lock for accessing device metadata and "media" data (calls to do_device_access()) atomically for the duration of the whole read/write command. This should not suit verifying atomic writes. Reason being that currently all reads/writes are atomic, so using atomic writes does not prove anything. Change device access model to basis that regular writes only atomic on a per-sector basis, while reads and atomic writes are fully atomic. As mentioned, since accessing metadata and device media is atomic, continue to have regular writes involving metadata - like discard or PI - as atomic. We can improve this later. Currently we only support model where overlapping going reads or writes wait for current access to complete before commencing an atomic write. This is described in 4.29.3.2 section of the SBC. However, we simplify, things and wait for all accesses to complete (when issuing an atomic write). Reviewed-by: Martin K. Petersen Signed-off-by: John Garry Acked-by: Darrick J. Wong Reviewed-by: Darrick J. Wong Reviewed-by: Luis Chamberlain Link: https://lore.kernel.org/r/20240620125359.2684798-10-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- drivers/scsi/scsi_debug.c | 588 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 454 insertions(+), 134 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index acf0592d63da..6b8397b3a55f 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -69,6 +69,8 @@ static const char *sdebug_version_date = "20210520"; /* Additional Sense Code (ASC) */ #define NO_ADDITIONAL_SENSE 0x0 +#define OVERLAP_ATOMIC_COMMAND_ASC 0x0 +#define OVERLAP_ATOMIC_COMMAND_ASCQ 0x23 #define LOGICAL_UNIT_NOT_READY 0x4 #define LOGICAL_UNIT_COMMUNICATION_FAILURE 0x8 #define UNRECOVERED_READ_ERR 0x11 @@ -103,6 +105,7 @@ static const char *sdebug_version_date = "20210520"; #define READ_BOUNDARY_ASCQ 0x7 #define ATTEMPT_ACCESS_GAP 0x9 #define INSUFF_ZONE_ASCQ 0xe +/* see drivers/scsi/sense_codes.h */ /* Additional Sense Code Qualifier (ASCQ) */ #define ACK_NAK_TO 0x3 @@ -152,6 +155,12 @@ static const char *sdebug_version_date = "20210520"; #define DEF_VIRTUAL_GB 0 #define DEF_VPD_USE_HOSTNO 1 #define DEF_WRITESAME_LENGTH 0xFFFF +#define DEF_ATOMIC_WR 0 +#define DEF_ATOMIC_WR_MAX_LENGTH 8192 +#define DEF_ATOMIC_WR_ALIGN 2 +#define DEF_ATOMIC_WR_GRAN 2 +#define DEF_ATOMIC_WR_MAX_LENGTH_BNDRY (DEF_ATOMIC_WR_MAX_LENGTH) +#define DEF_ATOMIC_WR_MAX_BNDRY 128 #define DEF_STRICT 0 #define DEF_STATISTICS false #define DEF_SUBMIT_QUEUES 1 @@ -374,7 +383,9 @@ struct sdebug_host_info { /* There is an xarray of pointers to this struct's objects, one per host */ struct sdeb_store_info { - rwlock_t macc_lck; /* for atomic media access on this store */ + rwlock_t macc_data_lck; /* for media data access on this store */ + rwlock_t macc_meta_lck; /* for atomic media meta access on this store */ + rwlock_t macc_sector_lck; /* per-sector media data access on this store */ u8 *storep; /* user data storage (ram) */ struct t10_pi_tuple *dif_storep; /* protection info */ void *map_storep; /* provisioning map */ @@ -398,12 +409,20 @@ struct sdebug_defer { enum sdeb_defer_type defer_t; }; +struct sdebug_device_access_info { + bool atomic_write; + u64 lba; + u32 num; + struct scsi_cmnd *self; +}; + struct sdebug_queued_cmd { /* corresponding bit set in in_use_bm[] in owning struct sdebug_queue * instance indicates this slot is in use. */ struct sdebug_defer sd_dp; struct scsi_cmnd *scmd; + struct sdebug_device_access_info *i; }; struct sdebug_scsi_cmd { @@ -463,7 +482,8 @@ enum sdeb_opcode_index { SDEB_I_PRE_FETCH = 29, /* 10, 16 */ SDEB_I_ZONE_OUT = 30, /* 0x94+SA; includes no data xfer */ SDEB_I_ZONE_IN = 31, /* 0x95+SA; all have data-in */ - SDEB_I_LAST_ELEM_P1 = 32, /* keep this last (previous + 1) */ + SDEB_I_ATOMIC_WRITE_16 = 32, + SDEB_I_LAST_ELEM_P1 = 33, /* keep this last (previous + 1) */ }; @@ -497,7 +517,8 @@ static const unsigned char opcode_ind_arr[256] = { 0, 0, 0, SDEB_I_VERIFY, SDEB_I_PRE_FETCH, SDEB_I_SYNC_CACHE, 0, SDEB_I_WRITE_SAME, SDEB_I_ZONE_OUT, SDEB_I_ZONE_IN, 0, 0, - 0, 0, 0, 0, 0, 0, SDEB_I_SERV_ACT_IN_16, SDEB_I_SERV_ACT_OUT_16, + 0, 0, 0, 0, + SDEB_I_ATOMIC_WRITE_16, 0, SDEB_I_SERV_ACT_IN_16, SDEB_I_SERV_ACT_OUT_16, /* 0xa0; 0xa0->0xbf: 12 byte cdbs */ SDEB_I_REPORT_LUNS, SDEB_I_ATA_PT, 0, SDEB_I_MAINT_IN, SDEB_I_MAINT_OUT, 0, 0, 0, @@ -547,6 +568,7 @@ static int resp_write_buffer(struct scsi_cmnd *, struct sdebug_dev_info *); static int resp_sync_cache(struct scsi_cmnd *, struct sdebug_dev_info *); static int resp_pre_fetch(struct scsi_cmnd *, struct sdebug_dev_info *); static int resp_report_zones(struct scsi_cmnd *, struct sdebug_dev_info *); +static int resp_atomic_write(struct scsi_cmnd *, struct sdebug_dev_info *); static int resp_open_zone(struct scsi_cmnd *, struct sdebug_dev_info *); static int resp_close_zone(struct scsi_cmnd *, struct sdebug_dev_info *); static int resp_finish_zone(struct scsi_cmnd *, struct sdebug_dev_info *); @@ -788,6 +810,11 @@ static const struct opcode_info_t opcode_info_arr[SDEB_I_LAST_ELEM_P1 + 1] = { resp_report_zones, zone_in_iarr, /* ZONE_IN(16), REPORT ZONES) */ {16, 0x0 /* SA */, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xbf, 0xc7} }, +/* 31 */ + {0, 0x0, 0x0, F_D_OUT | FF_MEDIA_IO, + resp_atomic_write, NULL, /* ATOMIC WRITE 16 */ + {16, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff} }, /* sentinel */ {0xff, 0, 0, 0, NULL, NULL, /* terminating element */ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} }, @@ -835,6 +862,13 @@ static unsigned int sdebug_unmap_granularity = DEF_UNMAP_GRANULARITY; static unsigned int sdebug_unmap_max_blocks = DEF_UNMAP_MAX_BLOCKS; static unsigned int sdebug_unmap_max_desc = DEF_UNMAP_MAX_DESC; static unsigned int sdebug_write_same_length = DEF_WRITESAME_LENGTH; +static unsigned int sdebug_atomic_wr = DEF_ATOMIC_WR; +static unsigned int sdebug_atomic_wr_max_length = DEF_ATOMIC_WR_MAX_LENGTH; +static unsigned int sdebug_atomic_wr_align = DEF_ATOMIC_WR_ALIGN; +static unsigned int sdebug_atomic_wr_gran = DEF_ATOMIC_WR_GRAN; +static unsigned int sdebug_atomic_wr_max_length_bndry = + DEF_ATOMIC_WR_MAX_LENGTH_BNDRY; +static unsigned int sdebug_atomic_wr_max_bndry = DEF_ATOMIC_WR_MAX_BNDRY; static int sdebug_uuid_ctl = DEF_UUID_CTL; static bool sdebug_random = DEF_RANDOM; static bool sdebug_per_host_store = DEF_PER_HOST_STORE; @@ -1188,6 +1222,11 @@ static inline bool scsi_debug_lbp(void) (sdebug_lbpu || sdebug_lbpws || sdebug_lbpws10); } +static inline bool scsi_debug_atomic_write(void) +{ + return sdebug_fake_rw == 0 && sdebug_atomic_wr; +} + static void *lba2fake_store(struct sdeb_store_info *sip, unsigned long long lba) { @@ -1815,6 +1854,14 @@ static int inquiry_vpd_b0(unsigned char *arr) /* Maximum WRITE SAME Length */ put_unaligned_be64(sdebug_write_same_length, &arr[32]); + if (sdebug_atomic_wr) { + put_unaligned_be32(sdebug_atomic_wr_max_length, &arr[40]); + put_unaligned_be32(sdebug_atomic_wr_align, &arr[44]); + put_unaligned_be32(sdebug_atomic_wr_gran, &arr[48]); + put_unaligned_be32(sdebug_atomic_wr_max_length_bndry, &arr[52]); + put_unaligned_be32(sdebug_atomic_wr_max_bndry, &arr[56]); + } + return 0x3c; /* Mandatory page length for Logical Block Provisioning */ } @@ -3377,16 +3424,238 @@ static inline struct sdeb_store_info *devip2sip(struct sdebug_dev_info *devip, return xa_load(per_store_ap, devip->sdbg_host->si_idx); } +static inline void +sdeb_read_lock(rwlock_t *lock) +{ + if (sdebug_no_rwlock) + __acquire(lock); + else + read_lock(lock); +} + +static inline void +sdeb_read_unlock(rwlock_t *lock) +{ + if (sdebug_no_rwlock) + __release(lock); + else + read_unlock(lock); +} + +static inline void +sdeb_write_lock(rwlock_t *lock) +{ + if (sdebug_no_rwlock) + __acquire(lock); + else + write_lock(lock); +} + +static inline void +sdeb_write_unlock(rwlock_t *lock) +{ + if (sdebug_no_rwlock) + __release(lock); + else + write_unlock(lock); +} + +static inline void +sdeb_data_read_lock(struct sdeb_store_info *sip) +{ + BUG_ON(!sip); + + sdeb_read_lock(&sip->macc_data_lck); +} + +static inline void +sdeb_data_read_unlock(struct sdeb_store_info *sip) +{ + BUG_ON(!sip); + + sdeb_read_unlock(&sip->macc_data_lck); +} + +static inline void +sdeb_data_write_lock(struct sdeb_store_info *sip) +{ + BUG_ON(!sip); + + sdeb_write_lock(&sip->macc_data_lck); +} + +static inline void +sdeb_data_write_unlock(struct sdeb_store_info *sip) +{ + BUG_ON(!sip); + + sdeb_write_unlock(&sip->macc_data_lck); +} + +static inline void +sdeb_data_sector_read_lock(struct sdeb_store_info *sip) +{ + BUG_ON(!sip); + + sdeb_read_lock(&sip->macc_sector_lck); +} + +static inline void +sdeb_data_sector_read_unlock(struct sdeb_store_info *sip) +{ + BUG_ON(!sip); + + sdeb_read_unlock(&sip->macc_sector_lck); +} + +static inline void +sdeb_data_sector_write_lock(struct sdeb_store_info *sip) +{ + BUG_ON(!sip); + + sdeb_write_lock(&sip->macc_sector_lck); +} + +static inline void +sdeb_data_sector_write_unlock(struct sdeb_store_info *sip) +{ + BUG_ON(!sip); + + sdeb_write_unlock(&sip->macc_sector_lck); +} + +/* + * Atomic locking: + * We simplify the atomic model to allow only 1x atomic write and many non- + * atomic reads or writes for all LBAs. + + * A RW lock has a similar bahaviour: + * Only 1x writer and many readers. + + * So use a RW lock for per-device read and write locking: + * An atomic access grabs the lock as a writer and non-atomic grabs the lock + * as a reader. + */ + +static inline void +sdeb_data_lock(struct sdeb_store_info *sip, bool atomic) +{ + if (atomic) + sdeb_data_write_lock(sip); + else + sdeb_data_read_lock(sip); +} + +static inline void +sdeb_data_unlock(struct sdeb_store_info *sip, bool atomic) +{ + if (atomic) + sdeb_data_write_unlock(sip); + else + sdeb_data_read_unlock(sip); +} + +/* Allow many reads but only 1x write per sector */ +static inline void +sdeb_data_sector_lock(struct sdeb_store_info *sip, bool do_write) +{ + if (do_write) + sdeb_data_sector_write_lock(sip); + else + sdeb_data_sector_read_lock(sip); +} + +static inline void +sdeb_data_sector_unlock(struct sdeb_store_info *sip, bool do_write) +{ + if (do_write) + sdeb_data_sector_write_unlock(sip); + else + sdeb_data_sector_read_unlock(sip); +} + +static inline void +sdeb_meta_read_lock(struct sdeb_store_info *sip) +{ + if (sdebug_no_rwlock) { + if (sip) + __acquire(&sip->macc_meta_lck); + else + __acquire(&sdeb_fake_rw_lck); + } else { + if (sip) + read_lock(&sip->macc_meta_lck); + else + read_lock(&sdeb_fake_rw_lck); + } +} + +static inline void +sdeb_meta_read_unlock(struct sdeb_store_info *sip) +{ + if (sdebug_no_rwlock) { + if (sip) + __release(&sip->macc_meta_lck); + else + __release(&sdeb_fake_rw_lck); + } else { + if (sip) + read_unlock(&sip->macc_meta_lck); + else + read_unlock(&sdeb_fake_rw_lck); + } +} + +static inline void +sdeb_meta_write_lock(struct sdeb_store_info *sip) +{ + if (sdebug_no_rwlock) { + if (sip) + __acquire(&sip->macc_meta_lck); + else + __acquire(&sdeb_fake_rw_lck); + } else { + if (sip) + write_lock(&sip->macc_meta_lck); + else + write_lock(&sdeb_fake_rw_lck); + } +} + +static inline void +sdeb_meta_write_unlock(struct sdeb_store_info *sip) +{ + if (sdebug_no_rwlock) { + if (sip) + __release(&sip->macc_meta_lck); + else + __release(&sdeb_fake_rw_lck); + } else { + if (sip) + write_unlock(&sip->macc_meta_lck); + else + write_unlock(&sdeb_fake_rw_lck); + } +} + /* Returns number of bytes copied or -1 if error. */ static int do_device_access(struct sdeb_store_info *sip, struct scsi_cmnd *scp, - u32 sg_skip, u64 lba, u32 num, bool do_write, - u8 group_number) + u32 sg_skip, u64 lba, u32 num, u8 group_number, + bool do_write, bool atomic) { int ret; - u64 block, rest = 0; + u64 block; enum dma_data_direction dir; struct scsi_data_buffer *sdb = &scp->sdb; u8 *fsp; + int i; + + /* + * Even though reads are inherently atomic (in this driver), we expect + * the atomic flag only for writes. + */ + if (!do_write && atomic) + return -1; if (do_write) { dir = DMA_TO_DEVICE; @@ -3406,21 +3675,26 @@ static int do_device_access(struct sdeb_store_info *sip, struct scsi_cmnd *scp, fsp = sip->storep; block = do_div(lba, sdebug_store_sectors); - if (block + num > sdebug_store_sectors) - rest = block + num - sdebug_store_sectors; - ret = sg_copy_buffer(sdb->table.sgl, sdb->table.nents, + /* Only allow 1x atomic write or multiple non-atomic writes at any given time */ + sdeb_data_lock(sip, atomic); + for (i = 0; i < num; i++) { + /* We shouldn't need to lock for atomic writes, but do it anyway */ + sdeb_data_sector_lock(sip, do_write); + ret = sg_copy_buffer(sdb->table.sgl, sdb->table.nents, fsp + (block * sdebug_sector_size), - (num - rest) * sdebug_sector_size, sg_skip, do_write); - if (ret != (num - rest) * sdebug_sector_size) - return ret; - - if (rest) { - ret += sg_copy_buffer(sdb->table.sgl, sdb->table.nents, - fsp, rest * sdebug_sector_size, - sg_skip + ((num - rest) * sdebug_sector_size), - do_write); + sdebug_sector_size, sg_skip, do_write); + sdeb_data_sector_unlock(sip, do_write); + if (ret != sdebug_sector_size) { + ret += (i * sdebug_sector_size); + break; + } + sg_skip += sdebug_sector_size; + if (++block >= sdebug_store_sectors) + block = 0; } + ret = num * sdebug_sector_size; + sdeb_data_unlock(sip, atomic); return ret; } @@ -3596,70 +3870,6 @@ static int prot_verify_read(struct scsi_cmnd *scp, sector_t start_sec, return ret; } -static inline void -sdeb_read_lock(struct sdeb_store_info *sip) -{ - if (sdebug_no_rwlock) { - if (sip) - __acquire(&sip->macc_lck); - else - __acquire(&sdeb_fake_rw_lck); - } else { - if (sip) - read_lock(&sip->macc_lck); - else - read_lock(&sdeb_fake_rw_lck); - } -} - -static inline void -sdeb_read_unlock(struct sdeb_store_info *sip) -{ - if (sdebug_no_rwlock) { - if (sip) - __release(&sip->macc_lck); - else - __release(&sdeb_fake_rw_lck); - } else { - if (sip) - read_unlock(&sip->macc_lck); - else - read_unlock(&sdeb_fake_rw_lck); - } -} - -static inline void -sdeb_write_lock(struct sdeb_store_info *sip) -{ - if (sdebug_no_rwlock) { - if (sip) - __acquire(&sip->macc_lck); - else - __acquire(&sdeb_fake_rw_lck); - } else { - if (sip) - write_lock(&sip->macc_lck); - else - write_lock(&sdeb_fake_rw_lck); - } -} - -static inline void -sdeb_write_unlock(struct sdeb_store_info *sip) -{ - if (sdebug_no_rwlock) { - if (sip) - __release(&sip->macc_lck); - else - __release(&sdeb_fake_rw_lck); - } else { - if (sip) - write_unlock(&sip->macc_lck); - else - write_unlock(&sdeb_fake_rw_lck); - } -} - static int resp_read_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) { bool check_prot; @@ -3669,6 +3879,7 @@ static int resp_read_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) u64 lba; struct sdeb_store_info *sip = devip2sip(devip, true); u8 *cmd = scp->cmnd; + bool meta_data_locked = false; switch (cmd[0]) { case READ_16: @@ -3727,6 +3938,10 @@ static int resp_read_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) atomic_set(&sdeb_inject_pending, 0); } + /* + * When checking device access params, for reads we only check data + * versus what is set at init time, so no need to lock. + */ ret = check_device_access_params(scp, lba, num, false); if (ret) return ret; @@ -3746,29 +3961,33 @@ static int resp_read_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) return check_condition_result; } - sdeb_read_lock(sip); + if (sdebug_dev_is_zoned(devip) || + (sdebug_dix && scsi_prot_sg_count(scp))) { + sdeb_meta_read_lock(sip); + meta_data_locked = true; + } /* DIX + T10 DIF */ if (unlikely(sdebug_dix && scsi_prot_sg_count(scp))) { switch (prot_verify_read(scp, lba, num, ei_lba)) { case 1: /* Guard tag error */ if (cmd[1] >> 5 != 3) { /* RDPROTECT != 3 */ - sdeb_read_unlock(sip); + sdeb_meta_read_unlock(sip); mk_sense_buffer(scp, ABORTED_COMMAND, 0x10, 1); return check_condition_result; } else if (scp->prot_flags & SCSI_PROT_GUARD_CHECK) { - sdeb_read_unlock(sip); + sdeb_meta_read_unlock(sip); mk_sense_buffer(scp, ILLEGAL_REQUEST, 0x10, 1); return illegal_condition_result; } break; case 3: /* Reference tag error */ if (cmd[1] >> 5 != 3) { /* RDPROTECT != 3 */ - sdeb_read_unlock(sip); + sdeb_meta_read_unlock(sip); mk_sense_buffer(scp, ABORTED_COMMAND, 0x10, 3); return check_condition_result; } else if (scp->prot_flags & SCSI_PROT_REF_CHECK) { - sdeb_read_unlock(sip); + sdeb_meta_read_unlock(sip); mk_sense_buffer(scp, ILLEGAL_REQUEST, 0x10, 3); return illegal_condition_result; } @@ -3776,8 +3995,9 @@ static int resp_read_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) } } - ret = do_device_access(sip, scp, 0, lba, num, false, 0); - sdeb_read_unlock(sip); + ret = do_device_access(sip, scp, 0, lba, num, 0, false, false); + if (meta_data_locked) + sdeb_meta_read_unlock(sip); if (unlikely(ret == -1)) return DID_ERROR << 16; @@ -3967,6 +4187,7 @@ static int resp_write_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) u64 lba; struct sdeb_store_info *sip = devip2sip(devip, true); u8 *cmd = scp->cmnd; + bool meta_data_locked = false; switch (cmd[0]) { case WRITE_16: @@ -4025,10 +4246,17 @@ static int resp_write_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) "to DIF device\n"); } - sdeb_write_lock(sip); + if (sdebug_dev_is_zoned(devip) || + (sdebug_dix && scsi_prot_sg_count(scp)) || + scsi_debug_lbp()) { + sdeb_meta_write_lock(sip); + meta_data_locked = true; + } + ret = check_device_access_params(scp, lba, num, true); if (ret) { - sdeb_write_unlock(sip); + if (meta_data_locked) + sdeb_meta_write_unlock(sip); return ret; } @@ -4037,22 +4265,22 @@ static int resp_write_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) switch (prot_verify_write(scp, lba, num, ei_lba)) { case 1: /* Guard tag error */ if (scp->prot_flags & SCSI_PROT_GUARD_CHECK) { - sdeb_write_unlock(sip); + sdeb_meta_write_unlock(sip); mk_sense_buffer(scp, ILLEGAL_REQUEST, 0x10, 1); return illegal_condition_result; } else if (scp->cmnd[1] >> 5 != 3) { /* WRPROTECT != 3 */ - sdeb_write_unlock(sip); + sdeb_meta_write_unlock(sip); mk_sense_buffer(scp, ABORTED_COMMAND, 0x10, 1); return check_condition_result; } break; case 3: /* Reference tag error */ if (scp->prot_flags & SCSI_PROT_REF_CHECK) { - sdeb_write_unlock(sip); + sdeb_meta_write_unlock(sip); mk_sense_buffer(scp, ILLEGAL_REQUEST, 0x10, 3); return illegal_condition_result; } else if (scp->cmnd[1] >> 5 != 3) { /* WRPROTECT != 3 */ - sdeb_write_unlock(sip); + sdeb_meta_write_unlock(sip); mk_sense_buffer(scp, ABORTED_COMMAND, 0x10, 3); return check_condition_result; } @@ -4060,13 +4288,16 @@ static int resp_write_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) } } - ret = do_device_access(sip, scp, 0, lba, num, true, group); + ret = do_device_access(sip, scp, 0, lba, num, group, true, false); if (unlikely(scsi_debug_lbp())) map_region(sip, lba, num); + /* If ZBC zone then bump its write pointer */ if (sdebug_dev_is_zoned(devip)) zbc_inc_wp(devip, lba, num); - sdeb_write_unlock(sip); + if (meta_data_locked) + sdeb_meta_write_unlock(sip); + if (unlikely(-1 == ret)) return DID_ERROR << 16; else if (unlikely(sdebug_verbose && @@ -4176,7 +4407,8 @@ static int resp_write_scat(struct scsi_cmnd *scp, goto err_out; } - sdeb_write_lock(sip); + /* Just keep it simple and always lock for now */ + sdeb_meta_write_lock(sip); sg_off = lbdof_blen; /* Spec says Buffer xfer Length field in number of LBs in dout */ cum_lb = 0; @@ -4219,7 +4451,11 @@ static int resp_write_scat(struct scsi_cmnd *scp, } } - ret = do_device_access(sip, scp, sg_off, lba, num, true, group); + /* + * Write ranges atomically to keep as close to pre-atomic + * writes behaviour as possible. + */ + ret = do_device_access(sip, scp, sg_off, lba, num, group, true, true); /* If ZBC zone then bump its write pointer */ if (sdebug_dev_is_zoned(devip)) zbc_inc_wp(devip, lba, num); @@ -4258,7 +4494,7 @@ static int resp_write_scat(struct scsi_cmnd *scp, } ret = 0; err_out_unlock: - sdeb_write_unlock(sip); + sdeb_meta_write_unlock(sip); err_out: kfree(lrdp); return ret; @@ -4277,14 +4513,16 @@ static int resp_write_same(struct scsi_cmnd *scp, u64 lba, u32 num, scp->device->hostdata, true); u8 *fs1p; u8 *fsp; + bool meta_data_locked = false; - sdeb_write_lock(sip); + if (sdebug_dev_is_zoned(devip) || scsi_debug_lbp()) { + sdeb_meta_write_lock(sip); + meta_data_locked = true; + } ret = check_device_access_params(scp, lba, num, true); - if (ret) { - sdeb_write_unlock(sip); - return ret; - } + if (ret) + goto out; if (unmap && scsi_debug_lbp()) { unmap_region(sip, lba, num); @@ -4295,6 +4533,7 @@ static int resp_write_same(struct scsi_cmnd *scp, u64 lba, u32 num, /* if ndob then zero 1 logical block, else fetch 1 logical block */ fsp = sip->storep; fs1p = fsp + (block * lb_size); + sdeb_data_write_lock(sip); if (ndob) { memset(fs1p, 0, lb_size); ret = 0; @@ -4302,8 +4541,8 @@ static int resp_write_same(struct scsi_cmnd *scp, u64 lba, u32 num, ret = fetch_to_dev_buffer(scp, fs1p, lb_size); if (-1 == ret) { - sdeb_write_unlock(sip); - return DID_ERROR << 16; + ret = DID_ERROR << 16; + goto out; } else if (sdebug_verbose && !ndob && (ret < lb_size)) sdev_printk(KERN_INFO, scp->device, "%s: %s: lb size=%u, IO sent=%d bytes\n", @@ -4320,10 +4559,12 @@ static int resp_write_same(struct scsi_cmnd *scp, u64 lba, u32 num, /* If ZBC zone then bump its write pointer */ if (sdebug_dev_is_zoned(devip)) zbc_inc_wp(devip, lba, num); + sdeb_data_write_unlock(sip); + ret = 0; out: - sdeb_write_unlock(sip); - - return 0; + if (meta_data_locked) + sdeb_meta_write_unlock(sip); + return ret; } static int resp_write_same_10(struct scsi_cmnd *scp, @@ -4466,25 +4707,30 @@ static int resp_comp_write(struct scsi_cmnd *scp, return check_condition_result; } - sdeb_write_lock(sip); - ret = do_dout_fetch(scp, dnum, arr); if (ret == -1) { retval = DID_ERROR << 16; - goto cleanup; + goto cleanup_free; } else if (sdebug_verbose && (ret < (dnum * lb_size))) sdev_printk(KERN_INFO, scp->device, "%s: compare_write: cdb " "indicated=%u, IO sent=%d bytes\n", my_name, dnum * lb_size, ret); + + sdeb_data_write_lock(sip); + sdeb_meta_write_lock(sip); if (!comp_write_worker(sip, lba, num, arr, false)) { mk_sense_buffer(scp, MISCOMPARE, MISCOMPARE_VERIFY_ASC, 0); retval = check_condition_result; - goto cleanup; + goto cleanup_unlock; } + + /* Cover sip->map_storep (which map_region()) sets with data lock */ if (scsi_debug_lbp()) map_region(sip, lba, num); -cleanup: - sdeb_write_unlock(sip); +cleanup_unlock: + sdeb_meta_write_unlock(sip); + sdeb_data_write_unlock(sip); +cleanup_free: kfree(arr); return retval; } @@ -4528,7 +4774,7 @@ static int resp_unmap(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) desc = (void *)&buf[8]; - sdeb_write_lock(sip); + sdeb_meta_write_lock(sip); for (i = 0 ; i < descriptors ; i++) { unsigned long long lba = get_unaligned_be64(&desc[i].lba); @@ -4544,7 +4790,7 @@ static int resp_unmap(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) ret = 0; out: - sdeb_write_unlock(sip); + sdeb_meta_write_unlock(sip); kfree(buf); return ret; @@ -4702,12 +4948,13 @@ static int resp_pre_fetch(struct scsi_cmnd *scp, rest = block + nblks - sdebug_store_sectors; /* Try to bring the PRE-FETCH range into CPU's cache */ - sdeb_read_lock(sip); + sdeb_data_read_lock(sip); prefetch_range(fsp + (sdebug_sector_size * block), (nblks - rest) * sdebug_sector_size); if (rest) prefetch_range(fsp, rest * sdebug_sector_size); - sdeb_read_unlock(sip); + + sdeb_data_read_unlock(sip); fini: if (cmd[1] & 0x2) res = SDEG_RES_IMMED_MASK; @@ -4866,7 +5113,7 @@ static int resp_verify(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) return check_condition_result; } /* Not changing store, so only need read access */ - sdeb_read_lock(sip); + sdeb_data_read_lock(sip); ret = do_dout_fetch(scp, a_num, arr); if (ret == -1) { @@ -4888,7 +5135,7 @@ static int resp_verify(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) goto cleanup; } cleanup: - sdeb_read_unlock(sip); + sdeb_data_read_unlock(sip); kfree(arr); return ret; } @@ -4934,7 +5181,7 @@ static int resp_report_zones(struct scsi_cmnd *scp, return check_condition_result; } - sdeb_read_lock(sip); + sdeb_meta_read_lock(sip); desc = arr + 64; for (lba = zs_lba; lba < sdebug_capacity; @@ -5032,11 +5279,70 @@ static int resp_report_zones(struct scsi_cmnd *scp, ret = fill_from_dev_buffer(scp, arr, min_t(u32, alloc_len, rep_len)); fini: - sdeb_read_unlock(sip); + sdeb_meta_read_unlock(sip); kfree(arr); return ret; } +static int resp_atomic_write(struct scsi_cmnd *scp, + struct sdebug_dev_info *devip) +{ + struct sdeb_store_info *sip; + u8 *cmd = scp->cmnd; + u16 boundary, len; + u64 lba, lba_tmp; + int ret; + + if (!scsi_debug_atomic_write()) { + mk_sense_invalid_opcode(scp); + return check_condition_result; + } + + sip = devip2sip(devip, true); + + lba = get_unaligned_be64(cmd + 2); + boundary = get_unaligned_be16(cmd + 10); + len = get_unaligned_be16(cmd + 12); + + lba_tmp = lba; + if (sdebug_atomic_wr_align && + do_div(lba_tmp, sdebug_atomic_wr_align)) { + /* Does not meet alignment requirement */ + mk_sense_buffer(scp, ILLEGAL_REQUEST, INVALID_FIELD_IN_CDB, 0); + return check_condition_result; + } + + if (sdebug_atomic_wr_gran && len % sdebug_atomic_wr_gran) { + /* Does not meet alignment requirement */ + mk_sense_buffer(scp, ILLEGAL_REQUEST, INVALID_FIELD_IN_CDB, 0); + return check_condition_result; + } + + if (boundary > 0) { + if (boundary > sdebug_atomic_wr_max_bndry) { + mk_sense_invalid_fld(scp, SDEB_IN_CDB, 12, -1); + return check_condition_result; + } + + if (len > sdebug_atomic_wr_max_length_bndry) { + mk_sense_invalid_fld(scp, SDEB_IN_CDB, 12, -1); + return check_condition_result; + } + } else { + if (len > sdebug_atomic_wr_max_length) { + mk_sense_invalid_fld(scp, SDEB_IN_CDB, 12, -1); + return check_condition_result; + } + } + + ret = do_device_access(sip, scp, 0, lba, len, 0, true, true); + if (unlikely(ret == -1)) + return DID_ERROR << 16; + if (unlikely(ret != len * sdebug_sector_size)) + return DID_ERROR << 16; + return 0; +} + /* Logic transplanted from tcmu-runner, file_zbc.c */ static void zbc_open_all(struct sdebug_dev_info *devip) { @@ -5063,8 +5369,7 @@ static int resp_open_zone(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) mk_sense_invalid_opcode(scp); return check_condition_result; } - - sdeb_write_lock(sip); + sdeb_meta_write_lock(sip); if (all) { /* Check if all closed zones can be open */ @@ -5113,7 +5418,7 @@ static int resp_open_zone(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) zbc_open_zone(devip, zsp, true); fini: - sdeb_write_unlock(sip); + sdeb_meta_write_unlock(sip); return res; } @@ -5140,7 +5445,7 @@ static int resp_close_zone(struct scsi_cmnd *scp, return check_condition_result; } - sdeb_write_lock(sip); + sdeb_meta_write_lock(sip); if (all) { zbc_close_all(devip); @@ -5169,7 +5474,7 @@ static int resp_close_zone(struct scsi_cmnd *scp, zbc_close_zone(devip, zsp); fini: - sdeb_write_unlock(sip); + sdeb_meta_write_unlock(sip); return res; } @@ -5212,7 +5517,7 @@ static int resp_finish_zone(struct scsi_cmnd *scp, return check_condition_result; } - sdeb_write_lock(sip); + sdeb_meta_write_lock(sip); if (all) { zbc_finish_all(devip); @@ -5241,7 +5546,7 @@ static int resp_finish_zone(struct scsi_cmnd *scp, zbc_finish_zone(devip, zsp, true); fini: - sdeb_write_unlock(sip); + sdeb_meta_write_unlock(sip); return res; } @@ -5292,7 +5597,7 @@ static int resp_rwp_zone(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) return check_condition_result; } - sdeb_write_lock(sip); + sdeb_meta_write_lock(sip); if (all) { zbc_rwp_all(devip); @@ -5320,7 +5625,7 @@ static int resp_rwp_zone(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) zbc_rwp_zone(devip, zsp); fini: - sdeb_write_unlock(sip); + sdeb_meta_write_unlock(sip); return res; } @@ -6284,6 +6589,7 @@ module_param_named(lbprz, sdebug_lbprz, int, S_IRUGO); module_param_named(lbpu, sdebug_lbpu, int, S_IRUGO); module_param_named(lbpws, sdebug_lbpws, int, S_IRUGO); module_param_named(lbpws10, sdebug_lbpws10, int, S_IRUGO); +module_param_named(atomic_wr, sdebug_atomic_wr, int, S_IRUGO); module_param_named(lowest_aligned, sdebug_lowest_aligned, int, S_IRUGO); module_param_named(lun_format, sdebug_lun_am_i, int, S_IRUGO | S_IWUSR); module_param_named(max_luns, sdebug_max_luns, int, S_IRUGO | S_IWUSR); @@ -6318,6 +6624,11 @@ module_param_named(unmap_alignment, sdebug_unmap_alignment, int, S_IRUGO); module_param_named(unmap_granularity, sdebug_unmap_granularity, int, S_IRUGO); module_param_named(unmap_max_blocks, sdebug_unmap_max_blocks, int, S_IRUGO); module_param_named(unmap_max_desc, sdebug_unmap_max_desc, int, S_IRUGO); +module_param_named(atomic_wr_max_length, sdebug_atomic_wr_max_length, int, S_IRUGO); +module_param_named(atomic_wr_align, sdebug_atomic_wr_align, int, S_IRUGO); +module_param_named(atomic_wr_gran, sdebug_atomic_wr_gran, int, S_IRUGO); +module_param_named(atomic_wr_max_length_bndry, sdebug_atomic_wr_max_length_bndry, int, S_IRUGO); +module_param_named(atomic_wr_max_bndry, sdebug_atomic_wr_max_bndry, int, S_IRUGO); module_param_named(uuid_ctl, sdebug_uuid_ctl, int, S_IRUGO); module_param_named(virtual_gb, sdebug_virtual_gb, int, S_IRUGO | S_IWUSR); module_param_named(vpd_use_hostno, sdebug_vpd_use_hostno, int, @@ -6361,6 +6672,7 @@ MODULE_PARM_DESC(lbprz, MODULE_PARM_DESC(lbpu, "enable LBP, support UNMAP command (def=0)"); MODULE_PARM_DESC(lbpws, "enable LBP, support WRITE SAME(16) with UNMAP bit (def=0)"); MODULE_PARM_DESC(lbpws10, "enable LBP, support WRITE SAME(10) with UNMAP bit (def=0)"); +MODULE_PARM_DESC(atomic_write, "enable ATOMIC WRITE support, support WRITE ATOMIC(16) (def=0)"); MODULE_PARM_DESC(lowest_aligned, "lowest aligned lba (def=0)"); MODULE_PARM_DESC(lun_format, "LUN format: 0->peripheral (def); 1 --> flat address method"); MODULE_PARM_DESC(max_luns, "number of LUNs per target to simulate(def=1)"); @@ -6392,6 +6704,11 @@ MODULE_PARM_DESC(unmap_alignment, "lowest aligned thin provisioning lba (def=0)" MODULE_PARM_DESC(unmap_granularity, "thin provisioning granularity in blocks (def=1)"); MODULE_PARM_DESC(unmap_max_blocks, "max # of blocks can be unmapped in one cmd (def=0xffffffff)"); MODULE_PARM_DESC(unmap_max_desc, "max # of ranges that can be unmapped in one cmd (def=256)"); +MODULE_PARM_DESC(atomic_wr_max_length, "max # of blocks can be atomically written in one cmd (def=8192)"); +MODULE_PARM_DESC(atomic_wr_align, "minimum alignment of atomic write in blocks (def=2)"); +MODULE_PARM_DESC(atomic_wr_gran, "minimum granularity of atomic write in blocks (def=2)"); +MODULE_PARM_DESC(atomic_wr_max_length_bndry, "max # of blocks can be atomically written in one cmd with boundary set (def=8192)"); +MODULE_PARM_DESC(atomic_wr_max_bndry, "max # boundaries per atomic write (def=128)"); MODULE_PARM_DESC(uuid_ctl, "1->use uuid for lu name, 0->don't, 2->all use same (def=0)"); MODULE_PARM_DESC(virtual_gb, "virtual gigabyte (GiB) size (def=0 -> use dev_size_mb)"); @@ -7563,6 +7880,7 @@ static int __init scsi_debug_init(void) return -EINVAL; } } + xa_init_flags(per_store_ap, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ); if (want_store) { idx = sdebug_add_store(); @@ -7770,7 +8088,9 @@ static int sdebug_add_store(void) map_region(sip, 0, 2); } - rwlock_init(&sip->macc_lck); + rwlock_init(&sip->macc_data_lck); + rwlock_init(&sip->macc_meta_lck); + rwlock_init(&sip->macc_sector_lck); return (int)n_idx; err: sdebug_erase_store((int)n_idx, sip); -- cgit v1.2.3 From bf86e7d97b448eadf7959f025ce9e39c42eef92d Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 27 May 2024 07:15:25 +0200 Subject: lpfc_nvmet: implement 'host_traddr' Implement the 'host_traddr' callback to display the host transport address for nvmet debugfs. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Reviewed-by: Justin Tee Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/scsi/lpfc/lpfc_nvmet.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'drivers/scsi') diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 5297cacc8beb..0cef5d089f34 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -1363,6 +1363,16 @@ lpfc_nvmet_ls_abort(struct nvmet_fc_target_port *targetport, atomic_inc(&lpfc_nvmet->xmt_ls_abort); } +static int +lpfc_nvmet_host_traddr(void *hosthandle, u64 *wwnn, u64 *wwpn) +{ + struct lpfc_nodelist *ndlp = hosthandle; + + *wwnn = wwn_to_u64(ndlp->nlp_nodename.u.wwn); + *wwpn = wwn_to_u64(ndlp->nlp_portname.u.wwn); + return 0; +} + static void lpfc_nvmet_host_release(void *hosthandle) { @@ -1413,6 +1423,7 @@ static struct nvmet_fc_target_template lpfc_tgttemplate = { .ls_req = lpfc_nvmet_ls_req, .ls_abort = lpfc_nvmet_ls_abort, .host_release = lpfc_nvmet_host_release, + .host_traddr = lpfc_nvmet_host_traddr, .max_hw_queues = 1, .max_sgl_segments = LPFC_NVMET_DEFAULT_SEGS, -- cgit v1.2.3 From e94b45d08b5d1c230c0f59c3eed758d28658851e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 Jun 2024 16:26:29 +0200 Subject: block: move dma_pad_mask into queue_limits dma_pad_mask is a queue_limits by all ways of looking at it, so move it there and set it through the atomic queue limits APIs. Add a little helper that takes the alignment and pad into account to simplify the code that is touched a bit. Note that there never was any need for the > check in blk_queue_update_dma_pad, this probably was just copy and paste from dma_update_dma_alignment. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20240626142637.300624-9-hch@lst.de Signed-off-by: Jens Axboe --- block/bio-integrity.c | 2 +- block/blk-map.c | 2 +- block/blk-settings.c | 17 ----------------- drivers/ata/libata-scsi.c | 3 +-- drivers/ata/pata_macio.c | 4 ++-- drivers/scsi/scsi_lib.c | 4 ++-- drivers/ufs/core/ufshcd.c | 10 ++++++---- include/linux/blkdev.h | 12 ++++++++---- 8 files changed, 21 insertions(+), 33 deletions(-) (limited to 'drivers/scsi') diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 173ffd4d6237..356ca0d3d62f 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -312,7 +312,7 @@ int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t bytes, u32 seed) { struct request_queue *q = bdev_get_queue(bio->bi_bdev); - unsigned int align = q->dma_pad_mask | queue_dma_alignment(q); + unsigned int align = blk_lim_dma_alignment_and_pad(&q->limits); struct page *stack_pages[UIO_FASTIOV], **pages = stack_pages; struct bio_vec stack_vec[UIO_FASTIOV], *bvec = stack_vec; unsigned int direction, nr_bvecs; diff --git a/block/blk-map.c b/block/blk-map.c index 71210cdb3442..bce144091128 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -634,7 +634,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, const struct iov_iter *iter, gfp_t gfp_mask) { bool copy = false, map_bvec = false; - unsigned long align = q->dma_pad_mask | queue_dma_alignment(q); + unsigned long align = blk_lim_dma_alignment_and_pad(&q->limits); struct bio *bio = NULL; struct iov_iter i; int ret = -EINVAL; diff --git a/block/blk-settings.c b/block/blk-settings.c index c692e80bb4f8..2e559cf97cc8 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -768,23 +768,6 @@ incompatible: } EXPORT_SYMBOL_GPL(queue_limits_stack_integrity); -/** - * blk_queue_update_dma_pad - update pad mask - * @q: the request queue for the device - * @mask: pad mask - * - * Update dma pad mask. - * - * Appending pad buffer to a request modifies the last entry of a - * scatter list such that it includes the pad buffer. - **/ -void blk_queue_update_dma_pad(struct request_queue *q, unsigned int mask) -{ - if (mask > q->dma_pad_mask) - q->dma_pad_mask = mask; -} -EXPORT_SYMBOL(blk_queue_update_dma_pad); - /** * blk_set_queue_depth - tell the block layer about the device queue depth * @q: the request queue for the device diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index cdf29b178ddc..682971c4cbe4 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1024,7 +1024,6 @@ EXPORT_SYMBOL_GPL(ata_scsi_dma_need_drain); int ata_scsi_dev_config(struct scsi_device *sdev, struct queue_limits *lim, struct ata_device *dev) { - struct request_queue *q = sdev->request_queue; int depth = 1; if (!ata_id_has_unload(dev->id)) @@ -1038,7 +1037,7 @@ int ata_scsi_dev_config(struct scsi_device *sdev, struct queue_limits *lim, sdev->sector_size = ATA_SECT_SIZE; /* set DMA padding */ - blk_queue_update_dma_pad(q, ATA_DMA_PAD_SZ - 1); + lim->dma_pad_mask = ATA_DMA_PAD_SZ - 1; /* make room for appending the drain */ lim->max_segments--; diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c index 3cb455a32d92..1b85e8bf4ef9 100644 --- a/drivers/ata/pata_macio.c +++ b/drivers/ata/pata_macio.c @@ -816,7 +816,7 @@ static int pata_macio_device_configure(struct scsi_device *sdev, /* OHare has issues with non cache aligned DMA on some chipsets */ if (priv->kind == controller_ohare) { lim->dma_alignment = 31; - blk_queue_update_dma_pad(sdev->request_queue, 31); + lim->dma_pad_mask = 31; /* Tell the world about it */ ata_dev_info(dev, "OHare alignment limits applied\n"); @@ -831,7 +831,7 @@ static int pata_macio_device_configure(struct scsi_device *sdev, if (priv->kind == controller_sh_ata6 || priv->kind == controller_k2_ata6) { /* Allright these are bad, apply restrictions */ lim->dma_alignment = 15; - blk_queue_update_dma_pad(sdev->request_queue, 15); + lim->dma_pad_mask = 15; /* We enable MWI and hack cache line size directly here, this * is specific to this chipset and not normal values, we happen diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index e2f7bfb2b9e4..3958a6d14bf4 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1139,9 +1139,9 @@ blk_status_t scsi_alloc_sgtables(struct scsi_cmnd *cmd) */ count = __blk_rq_map_sg(rq->q, rq, cmd->sdb.table.sgl, &last_sg); - if (blk_rq_bytes(rq) & rq->q->dma_pad_mask) { + if (blk_rq_bytes(rq) & rq->q->limits.dma_pad_mask) { unsigned int pad_len = - (rq->q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1; + (rq->q->limits.dma_pad_mask & ~blk_rq_bytes(rq)) + 1; last_sg->length += pad_len; cmd->extra_len += pad_len; diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 0cf07194bbe8..b7957a431589 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -5193,17 +5193,19 @@ static int ufshcd_change_queue_depth(struct scsi_device *sdev, int depth) } /** - * ufshcd_slave_configure - adjust SCSI device configurations + * ufshcd_device_configure - adjust SCSI device configurations * @sdev: pointer to SCSI device + * @lim: queue limits * * Return: 0 (success). */ -static int ufshcd_slave_configure(struct scsi_device *sdev) +static int ufshcd_device_configure(struct scsi_device *sdev, + struct queue_limits *lim) { struct ufs_hba *hba = shost_priv(sdev->host); struct request_queue *q = sdev->request_queue; - blk_queue_update_dma_pad(q, PRDT_DATA_BYTE_COUNT_PAD - 1); + lim->dma_pad_mask = PRDT_DATA_BYTE_COUNT_PAD - 1; /* * Block runtime-pm until all consumers are added. @@ -8907,7 +8909,7 @@ static const struct scsi_host_template ufshcd_driver_template = { .queuecommand = ufshcd_queuecommand, .mq_poll = ufshcd_poll, .slave_alloc = ufshcd_slave_alloc, - .slave_configure = ufshcd_slave_configure, + .device_configure = ufshcd_device_configure, .slave_destroy = ufshcd_slave_destroy, .change_queue_depth = ufshcd_change_queue_depth, .eh_abort_handler = ufshcd_abort, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 94fcbc912312..a53e3434e1a2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -401,6 +401,7 @@ struct queue_limits { * due to possible offsets. */ unsigned int dma_alignment; + unsigned int dma_pad_mask; struct blk_integrity integrity; }; @@ -509,8 +510,6 @@ struct request_queue { */ int id; - unsigned int dma_pad_mask; - /* * queue settings */ @@ -981,7 +980,6 @@ extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, sector_t offset); void queue_limits_stack_bdev(struct queue_limits *t, struct block_device *bdev, sector_t offset, const char *pfx); -extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); struct blk_independent_access_ranges * @@ -1433,10 +1431,16 @@ static inline bool bdev_iter_is_aligned(struct block_device *bdev, bdev_logical_block_size(bdev) - 1); } +static inline int blk_lim_dma_alignment_and_pad(struct queue_limits *lim) +{ + return lim->dma_alignment | lim->dma_pad_mask; +} + static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr, unsigned int len) { - unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask; + unsigned int alignment = blk_lim_dma_alignment_and_pad(&q->limits); + return !(addr & alignment) && !(len & alignment); } -- cgit v1.2.3 From aa57abe6a7f91fafe53fb98d0f1e74db951bce24 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 27 Jun 2024 14:49:12 +0200 Subject: megaraid_sas: don't set QUEUE_FLAG_NOMERGES Setting QUEUE_FLAG_NOMERGES was added in commit 15dd03811d99dcf ("scsi: megaraid_sas: NVME Interface detection and prop settings") without any explanation. Drivers should second guess the block layer merge decisions, so remove the flag. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20240627124926.512662-3-hch@lst.de Signed-off-by: Jens Axboe --- drivers/scsi/megaraid/megaraid_sas_base.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 88acefbf9aea..6c79c350a4d5 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -1981,8 +1981,6 @@ megasas_set_nvme_device_properties(struct scsi_device *sdev, lim->max_hw_sectors = max_io_size / 512; lim->virt_boundary_mask = mr_nvme_pg_size - 1; - - blk_queue_flag_set(QUEUE_FLAG_NOMERGES, sdev->request_queue); } /* -- cgit v1.2.3 From 8b77f23fadcbb030a898f168bebe74f465e5d5a2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 27 Jun 2024 14:49:13 +0200 Subject: mpt3sas_scsih: don't set QUEUE_FLAG_NOMERGES Setting QUEUE_FLAG_NOMERGES was added in commit d1b01d14b7ba ("scsi: mpt3sas: Set NVMe device queue depth as 128") without any explanation. Drivers should second guess the block layer merge decisions, so remove the flag. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20240627124926.512662-4-hch@lst.de Signed-off-by: Jens Axboe --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 12d08d8ba538..b050aedc9d43 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -2680,12 +2680,6 @@ scsih_device_configure(struct scsi_device *sdev, struct queue_limits *lim) pcie_device_put(pcie_device); spin_unlock_irqrestore(&ioc->pcie_device_lock, flags); mpt3sas_scsih_change_queue_depth(sdev, qdepth); - /* Enable QUEUE_FLAG_NOMERGES flag, so that IOs won't be - ** merged and can eliminate holes created during merging - ** operation. - **/ - blk_queue_flag_set(QUEUE_FLAG_NOMERGES, - sdev->request_queue); lim->virt_boundary_mask = ioc->page_size - 1; return 0; } -- cgit v1.2.3 From f2a7bea23710fceb99dac6da4ef82c3cc8932f7f Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 4 Jul 2024 14:28:15 +0900 Subject: block: Remove REQ_OP_ZONE_RESET_ALL emulation Now that device mapper can handle resetting all zones of a mapped zoned device using REQ_OP_ZONE_RESET_ALL, all zoned block device drivers support this operation. With this, the request queue feature BLK_FEAT_ZONE_RESETALL is not necessary and the emulation code in blk-zone.c can be removed. Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240704052816.623865-5-dlemoal@kernel.org Signed-off-by: Jens Axboe --- block/blk-core.c | 5 +-- block/blk-zoned.c | 76 ++---------------------------------------- drivers/block/null_blk/zoned.c | 2 +- drivers/block/ublk_drv.c | 2 +- drivers/block/virtio_blk.c | 2 +- drivers/nvme/host/zns.c | 2 +- drivers/scsi/sd_zbc.c | 2 +- include/linux/blkdev.h | 5 --- 8 files changed, 9 insertions(+), 87 deletions(-) (limited to 'drivers/scsi') diff --git a/block/blk-core.c b/block/blk-core.c index 71b7622c523a..02bceeb36f2c 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -830,11 +830,8 @@ void submit_bio_noacct(struct bio *bio) case REQ_OP_ZONE_OPEN: case REQ_OP_ZONE_CLOSE: case REQ_OP_ZONE_FINISH: - if (!bdev_is_zoned(bio->bi_bdev)) - goto not_supported; - break; case REQ_OP_ZONE_RESET_ALL: - if (!bdev_is_zoned(bio->bi_bdev) || !blk_queue_zone_resetall(q)) + if (!bdev_is_zoned(bio->bi_bdev)) goto not_supported; break; case REQ_OP_DRV_IN: diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 601c21a224c9..0007ef9cd5ca 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -157,70 +157,6 @@ static inline unsigned long *blk_alloc_zone_bitmap(int node, GFP_NOIO, node); } -static int blk_zone_need_reset_cb(struct blk_zone *zone, unsigned int idx, - void *data) -{ - /* - * For an all-zones reset, ignore conventional, empty, read-only - * and offline zones. - */ - switch (zone->cond) { - case BLK_ZONE_COND_NOT_WP: - case BLK_ZONE_COND_EMPTY: - case BLK_ZONE_COND_READONLY: - case BLK_ZONE_COND_OFFLINE: - return 0; - default: - set_bit(idx, (unsigned long *)data); - return 0; - } -} - -static int blkdev_zone_reset_all_emulated(struct block_device *bdev) -{ - struct gendisk *disk = bdev->bd_disk; - sector_t capacity = bdev_nr_sectors(bdev); - sector_t zone_sectors = bdev_zone_sectors(bdev); - unsigned long *need_reset; - struct bio *bio = NULL; - sector_t sector = 0; - int ret; - - need_reset = blk_alloc_zone_bitmap(disk->queue->node, disk->nr_zones); - if (!need_reset) - return -ENOMEM; - - ret = disk->fops->report_zones(disk, 0, disk->nr_zones, - blk_zone_need_reset_cb, need_reset); - if (ret < 0) - goto out_free_need_reset; - - ret = 0; - while (sector < capacity) { - if (!test_bit(disk_zone_no(disk, sector), need_reset)) { - sector += zone_sectors; - continue; - } - - bio = blk_next_bio(bio, bdev, 0, REQ_OP_ZONE_RESET | REQ_SYNC, - GFP_KERNEL); - bio->bi_iter.bi_sector = sector; - sector += zone_sectors; - - /* This may take a while, so be nice to others */ - cond_resched(); - } - - if (bio) { - ret = submit_bio_wait(bio); - bio_put(bio); - } - -out_free_need_reset: - kfree(need_reset); - return ret; -} - static int blkdev_zone_reset_all(struct block_device *bdev) { struct bio bio; @@ -247,7 +183,6 @@ static int blkdev_zone_reset_all(struct block_device *bdev) int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op, sector_t sector, sector_t nr_sectors) { - struct request_queue *q = bdev_get_queue(bdev); sector_t zone_sectors = bdev_zone_sectors(bdev); sector_t capacity = bdev_nr_sectors(bdev); sector_t end_sector = sector + nr_sectors; @@ -275,16 +210,11 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op, return -EINVAL; /* - * In the case of a zone reset operation over all zones, - * REQ_OP_ZONE_RESET_ALL can be used with devices supporting this - * command. For other devices, we emulate this command behavior by - * identifying the zones needing a reset. + * In the case of a zone reset operation over all zones, use + * REQ_OP_ZONE_RESET_ALL. */ - if (op == REQ_OP_ZONE_RESET && sector == 0 && nr_sectors == capacity) { - if (!blk_queue_zone_resetall(q)) - return blkdev_zone_reset_all_emulated(bdev); + if (op == REQ_OP_ZONE_RESET && sector == 0 && nr_sectors == capacity) return blkdev_zone_reset_all(bdev); - } while (sector < end_sector) { bio = blk_next_bio(bio, bdev, 0, op | REQ_SYNC, GFP_KERNEL); diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c index 7996e2e7dce2..9bc768b2ca56 100644 --- a/drivers/block/null_blk/zoned.c +++ b/drivers/block/null_blk/zoned.c @@ -164,7 +164,7 @@ int null_init_zoned_dev(struct nullb_device *dev, sector += dev->zone_size_sects; } - lim->features |= BLK_FEAT_ZONED | BLK_FEAT_ZONE_RESETALL; + lim->features |= BLK_FEAT_ZONED; lim->chunk_sectors = dev->zone_size_sects; lim->max_zone_append_sectors = dev->zone_append_max_sectors; lim->max_open_zones = dev->zone_max_open; diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 4fdff13fc23b..d10a2ea07292 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -2194,7 +2194,7 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED)) return -EOPNOTSUPP; - lim.features |= BLK_FEAT_ZONED | BLK_FEAT_ZONE_RESETALL; + lim.features |= BLK_FEAT_ZONED; lim.max_active_zones = p->max_active_zones; lim.max_open_zones = p->max_open_zones; lim.max_zone_append_sectors = p->max_zone_append_sectors; diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 6c64a67ab9c9..84c3efd0c611 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -728,7 +728,7 @@ static int virtblk_read_zoned_limits(struct virtio_blk *vblk, dev_dbg(&vdev->dev, "probing host-managed zoned device\n"); - lim->features |= BLK_FEAT_ZONED | BLK_FEAT_ZONE_RESETALL; + lim->features |= BLK_FEAT_ZONED; virtio_cread(vdev, struct virtio_blk_config, zoned.max_open_zones, &v); diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c index 99bb89c2495a..9a06f9d98cd6 100644 --- a/drivers/nvme/host/zns.c +++ b/drivers/nvme/host/zns.c @@ -108,7 +108,7 @@ free_data: void nvme_update_zone_info(struct nvme_ns *ns, struct queue_limits *lim, struct nvme_zone_info *zi) { - lim->features |= BLK_FEAT_ZONED | BLK_FEAT_ZONE_RESETALL; + lim->features |= BLK_FEAT_ZONED; lim->max_open_zones = zi->max_open_zones; lim->max_active_zones = zi->max_active_zones; lim->max_zone_append_sectors = ns->ctrl->max_zone_append; diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index f7067afac79c..c8b9654d30f0 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -599,7 +599,7 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim, if (sdkp->device->type != TYPE_ZBC) return 0; - lim->features |= BLK_FEAT_ZONED | BLK_FEAT_ZONE_RESETALL; + lim->features |= BLK_FEAT_ZONED; /* * Per ZBC and ZAC specifications, writes in sequential write required diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4d0d4b83bc74..dc250d8070d2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -318,9 +318,6 @@ typedef unsigned int __bitwise blk_features_t; /* is a zoned device */ #define BLK_FEAT_ZONED ((__force blk_features_t)(1u << 10)) -/* supports Zone Reset All */ -#define BLK_FEAT_ZONE_RESETALL ((__force blk_features_t)(1u << 11)) - /* supports PCI(e) p2p requests */ #define BLK_FEAT_PCI_P2PDMA ((__force blk_features_t)(1u << 12)) @@ -618,8 +615,6 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) #define blk_queue_nonrot(q) (!((q)->limits.features & BLK_FEAT_ROTATIONAL)) #define blk_queue_io_stat(q) ((q)->limits.features & BLK_FEAT_IO_STAT) -#define blk_queue_zone_resetall(q) \ - ((q)->limits.features & BLK_FEAT_ZONE_RESETALL) #define blk_queue_dax(q) ((q)->limits.features & BLK_FEAT_DAX) #define blk_queue_pci_p2pdma(q) ((q)->limits.features & BLK_FEAT_PCI_P2PDMA) #ifdef CONFIG_BLK_RQ_ALLOC_TIME -- cgit v1.2.3