diff options
author | Damien Le Moal <damien.lemoal@wdc.com> | 2017-06-07 15:55:39 +0900 |
---|---|---|
committer | Mike Snitzer <snitzer@redhat.com> | 2017-06-19 11:05:20 -0400 |
commit | 3b1a94c88b798d4f3bd1a5b61f5c8fb9d987c242 (patch) | |
tree | 173fcaced4dffd3e7d334a2992e40a9466747b91 /drivers/md/dm-zoned.h | |
parent | b73c67c2cbb0004e6da9720a167fe42e31f7a6e8 (diff) | |
download | lwn-3b1a94c88b798d4f3bd1a5b61f5c8fb9d987c242.tar.gz lwn-3b1a94c88b798d4f3bd1a5b61f5c8fb9d987c242.zip |
dm zoned: drive-managed zoned block device target
The dm-zoned device mapper target provides transparent write access
to zoned block devices (ZBC and ZAC compliant block devices).
dm-zoned hides to the device user (a file system or an application
doing raw block device accesses) any constraint imposed on write
requests by the device, equivalent to a drive-managed zoned block
device model.
Write requests are processed using a combination of on-disk buffering
using the device conventional zones and direct in-place processing for
requests aligned to a zone sequential write pointer position.
A background reclaim process implemented using dm_kcopyd_copy ensures
that conventional zones are always available for executing unaligned
write requests. The reclaim process overhead is minimized by managing
buffer zones in a least-recently-written order and first targeting the
oldest buffer zones. Doing so, blocks under regular write access (such
as metadata blocks of a file system) remain stored in conventional
zones, resulting in no apparent overhead.
dm-zoned implementation focus on simplicity and on minimizing overhead
(CPU, memory and storage overhead). For a 14TB host-managed disk with
256 MB zones, dm-zoned memory usage per disk instance is at most about
3 MB and as little as 5 zones will be used internally for storing metadata
and performing buffer zone reclaim operations. This is achieved using
zone level indirection rather than a full block indirection system for
managing block movement between zones.
dm-zoned primary target is host-managed zoned block devices but it can
also be used with host-aware device models to mitigate potential
device-side performance degradation due to excessive random writing.
Zoned block devices can be formatted and checked for use with the dm-zoned
target using the dmzadm utility available at:
https://github.com/hgst/dm-zoned-tools
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>
[Mike Snitzer partly refactored Damien's original work to cleanup the code]
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Diffstat (limited to 'drivers/md/dm-zoned.h')
-rw-r--r-- | drivers/md/dm-zoned.h | 228 |
1 files changed, 228 insertions, 0 deletions
diff --git a/drivers/md/dm-zoned.h b/drivers/md/dm-zoned.h new file mode 100644 index 000000000000..12419f0bfe78 --- /dev/null +++ b/drivers/md/dm-zoned.h @@ -0,0 +1,228 @@ +/* + * Copyright (C) 2017 Western Digital Corporation or its affiliates. + * + * This file is released under the GPL. + */ + +#ifndef DM_ZONED_H +#define DM_ZONED_H + +#include <linux/types.h> +#include <linux/blkdev.h> +#include <linux/device-mapper.h> +#include <linux/dm-kcopyd.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/mutex.h> +#include <linux/workqueue.h> +#include <linux/rwsem.h> +#include <linux/rbtree.h> +#include <linux/radix-tree.h> +#include <linux/shrinker.h> + +/* + * dm-zoned creates block devices with 4KB blocks, always. + */ +#define DMZ_BLOCK_SHIFT 12 +#define DMZ_BLOCK_SIZE (1 << DMZ_BLOCK_SHIFT) +#define DMZ_BLOCK_MASK (DMZ_BLOCK_SIZE - 1) + +#define DMZ_BLOCK_SHIFT_BITS (DMZ_BLOCK_SHIFT + 3) +#define DMZ_BLOCK_SIZE_BITS (1 << DMZ_BLOCK_SHIFT_BITS) +#define DMZ_BLOCK_MASK_BITS (DMZ_BLOCK_SIZE_BITS - 1) + +#define DMZ_BLOCK_SECTORS_SHIFT (DMZ_BLOCK_SHIFT - SECTOR_SHIFT) +#define DMZ_BLOCK_SECTORS (DMZ_BLOCK_SIZE >> SECTOR_SHIFT) +#define DMZ_BLOCK_SECTORS_MASK (DMZ_BLOCK_SECTORS - 1) + +/* + * 4KB block <-> 512B sector conversion. + */ +#define dmz_blk2sect(b) ((sector_t)(b) << DMZ_BLOCK_SECTORS_SHIFT) +#define dmz_sect2blk(s) ((sector_t)(s) >> DMZ_BLOCK_SECTORS_SHIFT) + +#define dmz_bio_block(bio) dmz_sect2blk((bio)->bi_iter.bi_sector) +#define dmz_bio_blocks(bio) dmz_sect2blk(bio_sectors(bio)) + +/* + * Zoned block device information. + */ +struct dmz_dev { + struct block_device *bdev; + + char name[BDEVNAME_SIZE]; + + sector_t capacity; + + unsigned int nr_zones; + + sector_t zone_nr_sectors; + unsigned int zone_nr_sectors_shift; + + sector_t zone_nr_blocks; + sector_t zone_nr_blocks_shift; +}; + +#define dmz_bio_chunk(dev, bio) ((bio)->bi_iter.bi_sector >> \ + (dev)->zone_nr_sectors_shift) +#define dmz_chunk_block(dev, b) ((b) & ((dev)->zone_nr_blocks - 1)) + +/* + * Zone descriptor. + */ +struct dm_zone { + /* For listing the zone depending on its state */ + struct list_head link; + + /* Zone type and state */ + unsigned long flags; + + /* Zone activation reference count */ + atomic_t refcount; + + /* Zone write pointer block (relative to the zone start block) */ + unsigned int wp_block; + + /* Zone weight (number of valid blocks in the zone) */ + unsigned int weight; + + /* The chunk that the zone maps */ + unsigned int chunk; + + /* + * For a sequential data zone, pointer to the random zone + * used as a buffer for processing unaligned writes. + * For a buffer zone, this points back to the data zone. + */ + struct dm_zone *bzone; +}; + +/* + * Zone flags. + */ +enum { + /* Zone write type */ + DMZ_RND, + DMZ_SEQ, + + /* Zone critical condition */ + DMZ_OFFLINE, + DMZ_READ_ONLY, + + /* How the zone is being used */ + DMZ_META, + DMZ_DATA, + DMZ_BUF, + + /* Zone internal state */ + DMZ_ACTIVE, + DMZ_RECLAIM, + DMZ_SEQ_WRITE_ERR, +}; + +/* + * Zone data accessors. + */ +#define dmz_is_rnd(z) test_bit(DMZ_RND, &(z)->flags) +#define dmz_is_seq(z) test_bit(DMZ_SEQ, &(z)->flags) +#define dmz_is_empty(z) ((z)->wp_block == 0) +#define dmz_is_offline(z) test_bit(DMZ_OFFLINE, &(z)->flags) +#define dmz_is_readonly(z) test_bit(DMZ_READ_ONLY, &(z)->flags) +#define dmz_is_active(z) test_bit(DMZ_ACTIVE, &(z)->flags) +#define dmz_in_reclaim(z) test_bit(DMZ_RECLAIM, &(z)->flags) +#define dmz_seq_write_err(z) test_bit(DMZ_SEQ_WRITE_ERR, &(z)->flags) + +#define dmz_is_meta(z) test_bit(DMZ_META, &(z)->flags) +#define dmz_is_buf(z) test_bit(DMZ_BUF, &(z)->flags) +#define dmz_is_data(z) test_bit(DMZ_DATA, &(z)->flags) + +#define dmz_weight(z) ((z)->weight) + +/* + * Message functions. + */ +#define dmz_dev_info(dev, format, args...) \ + DMINFO("(%s): " format, (dev)->name, ## args) + +#define dmz_dev_err(dev, format, args...) \ + DMERR("(%s): " format, (dev)->name, ## args) + +#define dmz_dev_warn(dev, format, args...) \ + DMWARN("(%s): " format, (dev)->name, ## args) + +#define dmz_dev_debug(dev, format, args...) \ + DMDEBUG("(%s): " format, (dev)->name, ## args) + +struct dmz_metadata; +struct dmz_reclaim; + +/* + * Functions defined in dm-zoned-metadata.c + */ +int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **zmd); +void dmz_dtr_metadata(struct dmz_metadata *zmd); +int dmz_resume_metadata(struct dmz_metadata *zmd); + +void dmz_lock_map(struct dmz_metadata *zmd); +void dmz_unlock_map(struct dmz_metadata *zmd); +void dmz_lock_metadata(struct dmz_metadata *zmd); +void dmz_unlock_metadata(struct dmz_metadata *zmd); +void dmz_lock_flush(struct dmz_metadata *zmd); +void dmz_unlock_flush(struct dmz_metadata *zmd); +int dmz_flush_metadata(struct dmz_metadata *zmd); + +unsigned int dmz_id(struct dmz_metadata *zmd, struct dm_zone *zone); +sector_t dmz_start_sect(struct dmz_metadata *zmd, struct dm_zone *zone); +sector_t dmz_start_block(struct dmz_metadata *zmd, struct dm_zone *zone); +unsigned int dmz_nr_chunks(struct dmz_metadata *zmd); + +#define DMZ_ALLOC_RND 0x01 +#define DMZ_ALLOC_RECLAIM 0x02 + +struct dm_zone *dmz_alloc_zone(struct dmz_metadata *zmd, unsigned long flags); +void dmz_free_zone(struct dmz_metadata *zmd, struct dm_zone *zone); + +void dmz_map_zone(struct dmz_metadata *zmd, struct dm_zone *zone, + unsigned int chunk); +void dmz_unmap_zone(struct dmz_metadata *zmd, struct dm_zone *zone); +unsigned int dmz_nr_rnd_zones(struct dmz_metadata *zmd); +unsigned int dmz_nr_unmap_rnd_zones(struct dmz_metadata *zmd); + +void dmz_activate_zone(struct dm_zone *zone); +void dmz_deactivate_zone(struct dm_zone *zone); + +int dmz_lock_zone_reclaim(struct dm_zone *zone); +void dmz_unlock_zone_reclaim(struct dm_zone *zone); +struct dm_zone *dmz_get_zone_for_reclaim(struct dmz_metadata *zmd); + +struct dm_zone *dmz_get_chunk_mapping(struct dmz_metadata *zmd, + unsigned int chunk, int op); +void dmz_put_chunk_mapping(struct dmz_metadata *zmd, struct dm_zone *zone); +struct dm_zone *dmz_get_chunk_buffer(struct dmz_metadata *zmd, + struct dm_zone *dzone); + +int dmz_validate_blocks(struct dmz_metadata *zmd, struct dm_zone *zone, + sector_t chunk_block, unsigned int nr_blocks); +int dmz_invalidate_blocks(struct dmz_metadata *zmd, struct dm_zone *zone, + sector_t chunk_block, unsigned int nr_blocks); +int dmz_block_valid(struct dmz_metadata *zmd, struct dm_zone *zone, + sector_t chunk_block); +int dmz_first_valid_block(struct dmz_metadata *zmd, struct dm_zone *zone, + sector_t *chunk_block); +int dmz_copy_valid_blocks(struct dmz_metadata *zmd, struct dm_zone *from_zone, + struct dm_zone *to_zone); +int dmz_merge_valid_blocks(struct dmz_metadata *zmd, struct dm_zone *from_zone, + struct dm_zone *to_zone, sector_t chunk_block); + +/* + * Functions defined in dm-zoned-reclaim.c + */ +int dmz_ctr_reclaim(struct dmz_dev *dev, struct dmz_metadata *zmd, + struct dmz_reclaim **zrc); +void dmz_dtr_reclaim(struct dmz_reclaim *zrc); +void dmz_suspend_reclaim(struct dmz_reclaim *zrc); +void dmz_resume_reclaim(struct dmz_reclaim *zrc); +void dmz_reclaim_bio_acc(struct dmz_reclaim *zrc); +void dmz_schedule_reclaim(struct dmz_reclaim *zrc); + +#endif /* DM_ZONED_H */ |