diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-06 11:50:19 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-06 11:50:19 -0700 |
commit | 83c7c18b169bdac3dabab763d16549c1e4688a8b (patch) | |
tree | 438488db09b9776ff80738e5bb0e807319099556 | |
parent | 9022ca6b1129da44e3d5c4fa779b8bb9ceabe2ce (diff) | |
parent | 5bd5e8d891c1fd2d966a7e2c26f0452d22410683 (diff) | |
download | lwn-83c7c18b169bdac3dabab763d16549c1e4688a8b.tar.gz lwn-83c7c18b169bdac3dabab763d16549c1e4688a8b.zip |
Merge tag 'for-4.17/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer:
- DM core passthrough ioctl fix to retain reference to DM table, and
that table's block devices, while issuing the ioctl to one of those
block devices.
- DM core passthrough ioctl fix to _not_ override the fmode_t used to
issue the ioctl. Overriding by using the fmode_t that the block
device was originally open with during DM table load is a liability.
- Add DM core support for secure erase forwarding and update the DM
linear and DM striped targets to support them.
- A DM core 4.16 stable fix to allow abnormal IO (e.g. discard, write
same, write zeroes) for targets that make use of the non-splitting IO
variant (as is done for multipath or thinp when layered directly on
NVMe).
- Allow DM targets to return a payload in response to a DM message that
they are sent. This is useful for DM targets that would like to
provide statistics data in response to DM messages.
- Update DM bufio to support non-power-of-2 block sizes. Numerous other
related changes prepare the DM bufio code for this support.
- Fix DM crypt to use a bounded amount of memory across the entire
system. This is to avoid OOM that can otherwise occur in response to
certain pathological IO workloads (e.g. discarding a large DM crypt
device).
- Add a 'check_at_most_once' feature to the DM verity target to allow
verity to be used on mobile devices that have very limited resources.
- Fix the DM integrity target to fail early if a keyed algorithm (e.g.
HMAC) is to be used but the key isn't set.
- Add non-power-of-2 support to the DM unstripe target.
- Eliminate the use of a Variable Length Array in the DM stripe target.
- Update the DM log-writes target to record metadata (REQ_META flag).
- DM raid fixes for its nosync status and some variable range issues.
* tag 'for-4.17/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (28 commits)
dm: remove fmode_t argument from .prepare_ioctl hook
dm: hold DM table for duration of ioctl rather than use blkdev_get
dm raid: fix parse_raid_params() variable range issue
dm verity: make verity_for_io_block static
dm verity: add 'check_at_most_once' option to only validate hashes once
dm bufio: don't embed a bio in the dm_buffer structure
dm bufio: support non-power-of-two block sizes
dm bufio: use slab cache for dm_buffer structure allocations
dm bufio: reorder fields in dm_buffer structure
dm bufio: relax alignment constraint on slab cache
dm bufio: remove code that merges slab caches
dm bufio: get rid of slab cache name allocations
dm bufio: move dm-bufio.h to include/linux/
dm bufio: delete outdated comment
dm: add support for secure erase forwarding
dm: backfill abnormal IO support to non-splitting IO submission
dm raid: fix nosync status
dm mpath: use DM_MAPIO_SUBMITTED instead of magic number 0 in process_queued_bios()
dm stripe: get rid of a Variable Length Array (VLA)
dm log writes: record metadata flag for better flags record
...
27 files changed, 465 insertions, 310 deletions
diff --git a/Documentation/device-mapper/verity.txt b/Documentation/device-mapper/verity.txt index 89fd8f9a259f..b3d2e4a42255 100644 --- a/Documentation/device-mapper/verity.txt +++ b/Documentation/device-mapper/verity.txt @@ -109,6 +109,17 @@ fec_start <offset> This is the offset, in <data_block_size> blocks, from the start of the FEC device to the beginning of the encoding data. +check_at_most_once + Verify data blocks only the first time they are read from the data device, + rather than every time. This reduces the overhead of dm-verity so that it + can be used on systems that are memory and/or CPU constrained. However, it + provides a reduced level of security because only offline tampering of the + data device's content will be detected, not online tampering. + + Hash blocks are still verified each time they are read from the hash device, + since verification of hash blocks is less performance critical than data + blocks, and a hash block will not be verified any more after all the data + blocks it covers have been verified anyway. Theory of operation =================== diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index aa2032fa80d4..12aa9ca21d8c 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -6,7 +6,7 @@ * This file is released under the GPL. */ -#include "dm-bufio.h" +#include <linux/dm-bufio.h> #include <linux/device-mapper.h> #include <linux/dm-io.h> @@ -51,19 +51,6 @@ #define DM_BUFIO_DEFAULT_RETAIN_BYTES (256 * 1024) /* - * The number of bvec entries that are embedded directly in the buffer. - * If the chunk size is larger, dm-io is used to do the io. - */ -#define DM_BUFIO_INLINE_VECS 16 - -/* - * Don't try to use kmem_cache_alloc for blocks larger than this. - * For explanation, see alloc_buffer_data below. - */ -#define DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT (PAGE_SIZE >> 1) -#define DM_BUFIO_BLOCK_SIZE_GFP_LIMIT (PAGE_SIZE << (MAX_ORDER - 1)) - -/* * Align buffer writes to this boundary. * Tests show that SSDs have the highest IOPS when using 4k writes. */ @@ -99,13 +86,12 @@ struct dm_bufio_client { struct block_device *bdev; unsigned block_size; - unsigned char sectors_per_block_bits; - unsigned char pages_per_block_bits; - unsigned char blocks_per_page_bits; - unsigned aux_size; + s8 sectors_per_block_bits; void (*alloc_callback)(struct dm_buffer *); void (*write_callback)(struct dm_buffer *); + struct kmem_cache *slab_buffer; + struct kmem_cache *slab_cache; struct dm_io_client *dm_io; struct list_head reserved_buffers; @@ -148,11 +134,11 @@ struct dm_buffer { struct list_head lru_list; sector_t block; void *data; - enum data_mode data_mode; + unsigned char data_mode; /* DATA_MODE_* */ unsigned char list_mode; /* LIST_* */ - unsigned hold_count; blk_status_t read_error; blk_status_t write_error; + unsigned hold_count; unsigned long state; unsigned long last_accessed; unsigned dirty_start; @@ -161,8 +147,7 @@ struct dm_buffer { unsigned write_end; struct dm_bufio_client *c; struct list_head write_list; - struct bio bio; - struct bio_vec bio_vec[DM_BUFIO_INLINE_VECS]; + void (*end_io)(struct dm_buffer *, blk_status_t); #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING #define MAX_STACK 10 struct stack_trace stack_trace; @@ -172,21 +157,6 @@ struct dm_buffer { /*----------------------------------------------------------------*/ -static struct kmem_cache *dm_bufio_caches[PAGE_SHIFT - SECTOR_SHIFT]; -static char *dm_bufio_cache_names[PAGE_SHIFT - SECTOR_SHIFT]; - -static inline int dm_bufio_cache_index(struct dm_bufio_client *c) -{ - unsigned ret = c->blocks_per_page_bits - 1; - - BUG_ON(ret >= ARRAY_SIZE(dm_bufio_caches)); - - return ret; -} - -#define DM_BUFIO_CACHE(c) (dm_bufio_caches[dm_bufio_cache_index(c)]) -#define DM_BUFIO_CACHE_NAME(c) (dm_bufio_cache_names[dm_bufio_cache_index(c)]) - #define dm_bufio_in_request() (!!current->bio_list) static void dm_bufio_lock(struct dm_bufio_client *c) @@ -319,7 +289,7 @@ static void __remove(struct dm_bufio_client *c, struct dm_buffer *b) /*----------------------------------------------------------------*/ -static void adjust_total_allocated(enum data_mode data_mode, long diff) +static void adjust_total_allocated(unsigned char data_mode, long diff) { static unsigned long * const class_ptr[DATA_MODE_LIMIT] = { &dm_bufio_allocated_kmem_cache, @@ -384,18 +354,18 @@ static void __cache_size_refresh(void) * space. */ static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask, - enum data_mode *data_mode) + unsigned char *data_mode) { - if (c->block_size <= DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT) { + if (unlikely(c->slab_cache != NULL)) { *data_mode = DATA_MODE_SLAB; - return kmem_cache_alloc(DM_BUFIO_CACHE(c), gfp_mask); + return kmem_cache_alloc(c->slab_cache, gfp_mask); } - if (c->block_size <= DM_BUFIO_BLOCK_SIZE_GFP_LIMIT && + if (c->block_size <= KMALLOC_MAX_SIZE && gfp_mask & __GFP_NORETRY) { *data_mode = DATA_MODE_GET_FREE_PAGES; return (void *)__get_free_pages(gfp_mask, - c->pages_per_block_bits); + c->sectors_per_block_bits - (PAGE_SHIFT - SECTOR_SHIFT)); } *data_mode = DATA_MODE_VMALLOC; @@ -424,15 +394,16 @@ static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask, * Free buffer's data. */ static void free_buffer_data(struct dm_bufio_client *c, - void *data, enum data_mode data_mode) + void *data, unsigned char data_mode) { switch (data_mode) { case DATA_MODE_SLAB: - kmem_cache_free(DM_BUFIO_CACHE(c), data); + kmem_cache_free(c->slab_cache, data); break; case DATA_MODE_GET_FREE_PAGES: - free_pages((unsigned long)data, c->pages_per_block_bits); + free_pages((unsigned long)data, + c->sectors_per_block_bits - (PAGE_SHIFT - SECTOR_SHIFT)); break; case DATA_MODE_VMALLOC: @@ -451,8 +422,7 @@ static void free_buffer_data(struct dm_bufio_client *c, */ static struct dm_buffer *alloc_buffer(struct dm_bufio_client *c, gfp_t gfp_mask) { - struct dm_buffer *b = kmalloc(sizeof(struct dm_buffer) + c->aux_size, - gfp_mask); + struct dm_buffer *b = kmem_cache_alloc(c->slab_buffer, gfp_mask); if (!b) return NULL; @@ -461,7 +431,7 @@ static struct dm_buffer *alloc_buffer(struct dm_bufio_client *c, gfp_t gfp_mask) b->data = alloc_buffer_data(c, gfp_mask, &b->data_mode); if (!b->data) { - kfree(b); + kmem_cache_free(c->slab_buffer, b); return NULL; } @@ -483,7 +453,7 @@ static void free_buffer(struct dm_buffer *b) adjust_total_allocated(b->data_mode, -(long)c->block_size); free_buffer_data(c, b->data, b->data_mode); - kfree(b); + kmem_cache_free(c->slab_buffer, b); } /* @@ -540,10 +510,6 @@ static void __relink_lru(struct dm_buffer *b, int dirty) * * the memory must be direct-mapped, not vmalloced; * - * the I/O driver can reject requests spuriously if it thinks that - * the requests are too big for the device or if they cross a - * controller-defined memory boundary. - * * If the buffer is small enough (up to DM_BUFIO_INLINE_VECS pages) and * it is not vmalloced, try using the bio interface. * @@ -561,12 +527,11 @@ static void dmio_complete(unsigned long error, void *context) { struct dm_buffer *b = context; - b->bio.bi_status = error ? BLK_STS_IOERR : 0; - b->bio.bi_end_io(&b->bio); + b->end_io(b, unlikely(error != 0) ? BLK_STS_IOERR : 0); } static void use_dmio(struct dm_buffer *b, int rw, sector_t sector, - unsigned n_sectors, unsigned offset, bio_end_io_t *end_io) + unsigned n_sectors, unsigned offset) { int r; struct dm_io_request io_req = { @@ -590,76 +555,77 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t sector, io_req.mem.ptr.vma = (char *)b->data + offset; } - b->bio.bi_end_io = end_io; - r = dm_io(&io_req, 1, ®ion, NULL); - if (r) { - b->bio.bi_status = errno_to_blk_status(r); - end_io(&b->bio); - } + if (unlikely(r)) + b->end_io(b, errno_to_blk_status(r)); } -static void inline_endio(struct bio *bio) +static void bio_complete(struct bio *bio) { - bio_end_io_t *end_fn = bio->bi_private; + struct dm_buffer *b = bio->bi_private; blk_status_t status = bio->bi_status; - - /* - * Reset the bio to free any attached resources - * (e.g. bio integrity profiles). - */ - bio_reset(bio); - - bio->bi_status = status; - end_fn(bio); + bio_put(bio); + b->end_io(b, status); } -static void use_inline_bio(struct dm_buffer *b, int rw, sector_t sector, - unsigned n_sectors, unsigned offset, bio_end_io_t *end_io) +static void use_bio(struct dm_buffer *b, int rw, sector_t sector, + unsigned n_sectors, unsigned offset) { + struct bio *bio; char *ptr; - unsigned len; + unsigned vec_size, len; - bio_init(&b->bio, b->bio_vec, DM_BUFIO_INLINE_VECS); - b->bio.bi_iter.bi_sector = sector; - bio_set_dev(&b->bio, b->c->bdev); - b->bio.bi_end_io = inline_endio; - /* - * Use of .bi_private isn't a problem here because - * the dm_buffer's inline bio is local to bufio. - */ - b->bio.bi_private = end_io; - bio_set_op_attrs(&b->bio, rw, 0); + vec_size = b->c->block_size >> PAGE_SHIFT; + if (unlikely(b->c->sectors_per_block_bits < PAGE_SHIFT - SECTOR_SHIFT)) + vec_size += 2; + + bio = bio_kmalloc(GFP_NOWAIT | __GFP_NORETRY | __GFP_NOWARN, vec_size); + if (!bio) { +dmio: + use_dmio(b, rw, sector, n_sectors, offset); + return; + } + + bio->bi_iter.bi_sector = sector; + bio_set_dev(bio, b->c->bdev); + bio_set_op_attrs(bio, rw, 0); + bio->bi_end_io = bio_complete; + bio->bi_private = b; ptr = (char *)b->data + offset; len = n_sectors << SECTOR_SHIFT; do { unsigned this_step = min((unsigned)(PAGE_SIZE - offset_in_page(ptr)), len); - if (!bio_add_page(&b->bio, virt_to_page(ptr), this_step, + if (!bio_add_page(bio, virt_to_page(ptr), this_step, offset_in_page(ptr))) { - BUG_ON(b->c->block_size <= PAGE_SIZE); - use_dmio(b, rw, sector, n_sectors, offset, end_io); - return; + bio_put(bio); + goto dmio; } len -= this_step; ptr += this_step; } while (len > 0); - submit_bio(&b->bio); + submit_bio(bio); } -static void submit_io(struct dm_buffer *b, int rw, bio_end_io_t *end_io) +static void submit_io(struct dm_buffer *b, int rw, void (*end_io)(struct dm_buffer *, blk_status_t)) { unsigned n_sectors; sector_t sector; unsigned offset, end; - sector = (b->block << b->c->sectors_per_block_bits) + b->c->start; + b->end_io = end_io; + + if (likely(b->c->sectors_per_block_bits >= 0)) + sector = b->block << b->c->sectors_per_block_bits; + else + sector = b->block * (b->c->block_size >> SECTOR_SHIFT); + sector += b->c->start; if (rw != REQ_OP_WRITE) { - n_sectors = 1 << b->c->sectors_per_block_bits; + n_sectors = b->c->block_size >> SECTOR_SHIFT; offset = 0; } else { if (b->c->write_callback) @@ -676,11 +642,10 @@ static void submit_io(struct dm_buffer *b, int rw, bio_end_io_t *end_io) n_sectors = (end - offset) >> SECTOR_SHIFT; } - if (n_sectors <= ((DM_BUFIO_INLINE_VECS * PAGE_SIZE) >> SECTOR_SHIFT) && - b->data_mode != DATA_MODE_VMALLOC) - use_inline_bio(b, rw, sector, n_sectors, offset, end_io); + if (b->data_mode != DATA_MODE_VMALLOC) + use_bio(b, rw, sector, n_sectors, offset); else - use_dmio(b, rw, sector, n_sectors, offset, end_io); + use_dmio(b, rw, sector, n_sectors, offset); } /*---------------------------------------------------------------- @@ -693,16 +658,14 @@ static void submit_io(struct dm_buffer *b, int rw, bio_end_io_t *end_io) * Set the error, clear B_WRITING bit and wake anyone who was waiting on * it. */ -static void write_endio(struct bio *bio) +static void write_endio(struct dm_buffer *b, blk_status_t status) { - struct dm_buffer *b = container_of(bio, struct dm_buffer, bio); - - b->write_error = bio->bi_status; - if (unlikely(bio->bi_status)) { + b->write_error = status; + if (unlikely(status)) { struct dm_bufio_client *c = b->c; (void)cmpxchg(&c->async_write_error, 0, - blk_status_to_errno(bio->bi_status)); + blk_status_to_errno(status)); } BUG_ON(!test_bit(B_WRITING, &b->state)); @@ -963,8 +926,11 @@ static void __get_memory_limit(struct dm_bufio_client *c, } } - buffers = dm_bufio_cache_size_per_client >> - (c->sectors_per_block_bits + SECTOR_SHIFT); + buffers = dm_bufio_cache_size_per_client; + if (likely(c->sectors_per_block_bits >= 0)) + buffers >>= c->sectors_per_block_bits + SECTOR_SHIFT; + else + buffers /= c->block_size; if (buffers < c->minimum_buffers) buffers = c->minimum_buffers; @@ -1076,11 +1042,9 @@ found_buffer: * The endio routine for reading: set the error, clear the bit and wake up * anyone waiting on the buffer. */ -static void read_endio(struct bio *bio) +static void read_endio(struct dm_buffer *b, blk_status_t status) { - struct dm_buffer *b = container_of(bio, struct dm_buffer, bio); - - b->read_error = bio->bi_status; + b->read_error = status; BUG_ON(!test_bit(B_READING, &b->state)); @@ -1482,13 +1446,13 @@ void dm_bufio_forget(struct dm_bufio_client *c, sector_t block) dm_bufio_unlock(c); } -EXPORT_SYMBOL(dm_bufio_forget); +EXPORT_SYMBOL_GPL(dm_bufio_forget); void dm_bufio_set_minimum_buffers(struct dm_bufio_client *c, unsigned n) { c->minimum_buffers = n; } -EXPORT_SYMBOL(dm_bufio_set_minimum_buffers); +EXPORT_SYMBOL_GPL(dm_bufio_set_minimum_buffers); unsigned dm_bufio_get_block_size(struct dm_bufio_client *c) { @@ -1498,8 +1462,12 @@ EXPORT_SYMBOL_GPL(dm_bufio_get_block_size); sector_t dm_bufio_get_device_size(struct dm_bufio_client *c) { - return i_size_read(c->bdev->bd_inode) >> - (SECTOR_SHIFT + c->sectors_per_block_bits); + sector_t s = i_size_read(c->bdev->bd_inode) >> SECTOR_SHIFT; + if (likely(c->sectors_per_block_bits >= 0)) + s >>= c->sectors_per_block_bits; + else + sector_div(s, c->block_size >> SECTOR_SHIFT); + return s; } EXPORT_SYMBOL_GPL(dm_bufio_get_device_size); @@ -1597,8 +1565,12 @@ static bool __try_evict_buffer(struct dm_buffer *b, gfp_t gfp) static unsigned long get_retain_buffers(struct dm_bufio_client *c) { - unsigned long retain_bytes = READ_ONCE(dm_bufio_retain_bytes); - return retain_bytes >> (c->sectors_per_block_bits + SECTOR_SHIFT); + unsigned long retain_bytes = READ_ONCE(dm_bufio_retain_bytes); + if (likely(c->sectors_per_block_bits >= 0)) + retain_bytes >>= c->sectors_per_block_bits + SECTOR_SHIFT; + else + retain_bytes /= c->block_size; + return retain_bytes; } static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan, @@ -1662,9 +1634,13 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign int r; struct dm_bufio_client *c; unsigned i; + char slab_name[27]; - BUG_ON(block_size < 1 << SECTOR_SHIFT || - (block_size & (block_size - 1))); + if (!block_size || block_size & ((1 << SECTOR_SHIFT) - 1)) { + DMERR("%s: block size not specified or is not multiple of 512b", __func__); + r = -EINVAL; + goto bad_client; + } c = kzalloc(sizeof(*c), GFP_KERNEL); if (!c) { @@ -1675,13 +1651,11 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign c->bdev = bdev; c->block_size = block_size; - c->sectors_per_block_bits = __ffs(block_size) - SECTOR_SHIFT; - c->pages_per_block_bits = (__ffs(block_size) >= PAGE_SHIFT) ? - __ffs(block_size) - PAGE_SHIFT : 0; - c->blocks_per_page_bits = (__ffs(block_size) < PAGE_SHIFT ? - PAGE_SHIFT - __ffs(block_size) : 0); + if (is_power_of_2(block_size)) + c->sectors_per_block_bits = __ffs(block_size) - SECTOR_SHIFT; + else + c->sectors_per_block_bits = -1; - c->aux_size = aux_size; c->alloc_callback = alloc_callback; c->write_callback = write_callback; @@ -1694,7 +1668,7 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign INIT_LIST_HEAD(&c->reserved_buffers); c->need_reserved_buffers = reserved_buffers; - c->minimum_buffers = DM_BUFIO_MIN_BUFFERS; + dm_bufio_set_minimum_buffers(c, DM_BUFIO_MIN_BUFFERS); init_waitqueue_head(&c->free_buffer_wait); c->async_write_error = 0; @@ -1705,29 +1679,26 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign goto bad_dm_io; } - mutex_lock(&dm_bufio_clients_lock); - if (c->blocks_per_page_bits) { - if (!DM_BUFIO_CACHE_NAME(c)) { - DM_BUFIO_CACHE_NAME(c) = kasprintf(GFP_KERNEL, "dm_bufio_cache-%u", c->block_size); - if (!DM_BUFIO_CACHE_NAME(c)) { - r = -ENOMEM; - mutex_unlock(&dm_bufio_clients_lock); - goto bad; - } - } - - if (!DM_BUFIO_CACHE(c)) { - DM_BUFIO_CACHE(c) = kmem_cache_create(DM_BUFIO_CACHE_NAME(c), - c->block_size, - c->block_size, 0, NULL); - if (!DM_BUFIO_CACHE(c)) { - r = -ENOMEM; - mutex_unlock(&dm_bufio_clients_lock); - goto bad; - } + if (block_size <= KMALLOC_MAX_SIZE && + (block_size < PAGE_SIZE || !is_power_of_2(block_size))) { + snprintf(slab_name, sizeof slab_name, "dm_bufio_cache-%u", c->block_size); + c->slab_cache = kmem_cache_create(slab_name, c->block_size, ARCH_KMALLOC_MINALIGN, + SLAB_RECLAIM_ACCOUNT, NULL); + if (!c->slab_cache) { + r = -ENOMEM; + goto bad; } } - mutex_unlock(&dm_bufio_clients_lock); + if (aux_size) + snprintf(slab_name, sizeof slab_name, "dm_bufio_buffer-%u", aux_size); + else + snprintf(slab_name, sizeof slab_name, "dm_bufio_buffer"); + c->slab_buffer = kmem_cache_create(slab_name, sizeof(struct dm_buffer) + aux_size, + 0, SLAB_RECLAIM_ACCOUNT, NULL); + if (!c->slab_buffer) { + r = -ENOMEM; + goto bad; + } while (c->need_reserved_buffers) { struct dm_buffer *b = alloc_buffer(c, GFP_KERNEL); @@ -1762,6 +1733,8 @@ bad: list_del(&b->lru_list); free_buffer(b); } + kmem_cache_destroy(c->slab_cache); + kmem_cache_destroy(c->slab_buffer); dm_io_client_destroy(c->dm_io); bad_dm_io: mutex_destroy(&c->lock); @@ -1808,6 +1781,8 @@ void dm_bufio_client_destroy(struct dm_bufio_client *c) for (i = 0; i < LIST_SIZE; i++) BUG_ON(c->n_buffers[i]); + kmem_cache_destroy(c->slab_cache); + kmem_cache_destroy(c->slab_buffer); dm_io_client_destroy(c->dm_io); mutex_destroy(&c->lock); kfree(c); @@ -1911,9 +1886,6 @@ static int __init dm_bufio_init(void) dm_bufio_allocated_vmalloc = 0; dm_bufio_current_allocated = 0; - memset(&dm_bufio_caches, 0, sizeof dm_bufio_caches); - memset(&dm_bufio_cache_names, 0, sizeof dm_bufio_cache_names); - mem = (__u64)mult_frac(totalram_pages - totalhigh_pages, DM_BUFIO_MEMORY_PERCENT, 100) << PAGE_SHIFT; @@ -1948,17 +1920,10 @@ static int __init dm_bufio_init(void) static void __exit dm_bufio_exit(void) { int bug = 0; - int i; cancel_delayed_work_sync(&dm_bufio_work); destroy_workqueue(dm_bufio_wq); - for (i = 0; i < ARRAY_SIZE(dm_bufio_caches); i++) - kmem_cache_destroy(dm_bufio_caches[i]); - - for (i = 0; i < ARRAY_SIZE(dm_bufio_cache_names); i++) - kfree(dm_bufio_cache_names[i]); - if (dm_bufio_client_count) { DMCRIT("%s: dm_bufio_client_count leaked: %d", __func__, dm_bufio_client_count); diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 47407e43b96a..da208638fba4 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -3387,7 +3387,8 @@ static int process_invalidate_cblocks_message(struct cache *cache, unsigned coun * * The key migration_threshold is supported by the cache target core. */ -static int cache_message(struct dm_target *ti, unsigned argc, char **argv) +static int cache_message(struct dm_target *ti, unsigned argc, char **argv, + char *result, unsigned maxlen) { struct cache *cache = ti->private; diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 8168f737590e..44ff473dab3e 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -148,6 +148,8 @@ struct crypt_config { mempool_t *tag_pool; unsigned tag_pool_max_sectors; + struct percpu_counter n_allocated_pages; + struct bio_set *bs; struct mutex bio_alloc_lock; @@ -219,6 +221,12 @@ struct crypt_config { #define MAX_TAG_SIZE 480 #define POOL_ENTRY_SIZE 512 +static DEFINE_SPINLOCK(dm_crypt_clients_lock); +static unsigned dm_crypt_clients_n = 0; +static volatile unsigned long dm_crypt_pages_per_client; +#define DM_CRYPT_MEMORY_PERCENT 2 +#define DM_CRYPT_MIN_PAGES_PER_CLIENT (BIO_MAX_PAGES * 16) + static void clone_init(struct dm_crypt_io *, struct bio *); static void kcryptd_queue_crypt(struct dm_crypt_io *io); static struct scatterlist *crypt_get_sg_data(struct crypt_config *cc, @@ -2155,6 +2163,43 @@ static int crypt_wipe_key(struct crypt_config *cc) return r; } +static void crypt_calculate_pages_per_client(void) +{ + unsigned long pages = (totalram_pages - totalhigh_pages) * DM_CRYPT_MEMORY_PERCENT / 100; + + if (!dm_crypt_clients_n) + return; + + pages /= dm_crypt_clients_n; + if (pages < DM_CRYPT_MIN_PAGES_PER_CLIENT) + pages = DM_CRYPT_MIN_PAGES_PER_CLIENT; + dm_crypt_pages_per_client = pages; +} + +static void *crypt_page_alloc(gfp_t gfp_mask, void *pool_data) +{ + struct crypt_config *cc = pool_data; + struct page *page; + + if (unlikely(percpu_counter_compare(&cc->n_allocated_pages, dm_crypt_pages_per_client) >= 0) && + likely(gfp_mask & __GFP_NORETRY)) + return NULL; + + page = alloc_page(gfp_mask); + if (likely(page != NULL)) + percpu_counter_add(&cc->n_allocated_pages, 1); + + return page; +} + +static void crypt_page_free(void *page, void *pool_data) +{ + struct crypt_config *cc = pool_data; + + __free_page(page); + percpu_counter_sub(&cc->n_allocated_pages, 1); +} + static void crypt_dtr(struct dm_target *ti) { struct crypt_config *cc = ti->private; @@ -2181,6 +2226,10 @@ static void crypt_dtr(struct dm_target *ti) mempool_destroy(cc->req_pool); mempool_destroy(cc->tag_pool); + if (cc->page_pool) + WARN_ON(percpu_counter_sum(&cc->n_allocated_pages) != 0); + percpu_counter_destroy(&cc->n_allocated_pages); + if (cc->iv_gen_ops && cc->iv_gen_ops->dtr) cc->iv_gen_ops->dtr(cc); @@ -2197,6 +2246,12 @@ static void crypt_dtr(struct dm_target *ti) /* Must zero key material before freeing */ kzfree(cc); + + spin_lock(&dm_crypt_clients_lock); + WARN_ON(!dm_crypt_clients_n); + dm_crypt_clients_n--; + crypt_calculate_pages_per_client(); + spin_unlock(&dm_crypt_clients_lock); } static int crypt_ctr_ivmode(struct dm_target *ti, const char *ivmode) @@ -2644,6 +2699,15 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->private = cc; + spin_lock(&dm_crypt_clients_lock); + dm_crypt_clients_n++; + crypt_calculate_pages_per_client(); + spin_unlock(&dm_crypt_clients_lock); + + ret = percpu_counter_init(&cc->n_allocated_pages, 0, GFP_KERNEL); + if (ret < 0) + goto bad; + /* Optional parameters need to be read before cipher constructor */ if (argc > 5) { ret = crypt_ctr_optional(ti, argc - 5, &argv[5]); @@ -2698,7 +2762,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) ALIGN(sizeof(struct dm_crypt_io) + cc->dmreq_start + additional_req_size, ARCH_KMALLOC_MINALIGN); - cc->page_pool = mempool_create_page_pool(BIO_MAX_PAGES, 0); + cc->page_pool = mempool_create(BIO_MAX_PAGES, crypt_page_alloc, crypt_page_free, cc); if (!cc->page_pool) { ti->error = "Cannot allocate page mempool"; goto bad; @@ -2942,7 +3006,8 @@ static void crypt_resume(struct dm_target *ti) * key set <key> * key wipe */ -static int crypt_message(struct dm_target *ti, unsigned argc, char **argv) +static int crypt_message(struct dm_target *ti, unsigned argc, char **argv, + char *result, unsigned maxlen) { struct crypt_config *cc = ti->private; int key_size, ret = -EINVAL; diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c index 73a5c198113a..8e48920a3ffa 100644 --- a/drivers/md/dm-era-target.c +++ b/drivers/md/dm-era-target.c @@ -1635,7 +1635,8 @@ err: DMEMIT("Error"); } -static int era_message(struct dm_target *ti, unsigned argc, char **argv) +static int era_message(struct dm_target *ti, unsigned argc, char **argv, + char *result, unsigned maxlen) { struct era *era = ti->private; diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 1b907b15f5c3..21d126a5078c 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -442,8 +442,7 @@ static void flakey_status(struct dm_target *ti, status_type_t type, } } -static int flakey_prepare_ioctl(struct dm_target *ti, - struct block_device **bdev, fmode_t *mode) +static int flakey_prepare_ioctl(struct dm_target *ti, struct block_device **bdev) { struct flakey_c *fc = ti->private; diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 46d7c8749222..77d9fe58dae2 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -18,7 +18,7 @@ #include <crypto/hash.h> #include <crypto/skcipher.h> #include <linux/async_tx.h> -#include "dm-bufio.h" +#include <linux/dm-bufio.h> #define DM_MSG_PREFIX "integrity" @@ -2548,6 +2548,9 @@ static int get_mac(struct crypto_shash **hash, struct alg_spec *a, char **error, *error = error_key; return r; } + } else if (crypto_shash_get_flags(*hash) & CRYPTO_TFM_NEED_KEY) { + *error = error_key; + return -ENOKEY; } } diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index a89fd8f44453..5acf77de5945 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1595,7 +1595,7 @@ static int target_message(struct file *filp, struct dm_ioctl *param, size_t para DMWARN("Target message sector outside device."); r = -EINVAL; } else if (ti->type->message) - r = ti->type->message(ti, argc, argv); + r = ti->type->message(ti, argc, argv, result, maxlen); else { DMWARN("Target type does not support messages"); r = -EINVAL; diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index d5f8eff7c11d..99297212eeec 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -59,6 +59,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_flush_bios = 1; ti->num_discard_bios = 1; + ti->num_secure_erase_bios = 1; ti->num_write_same_bios = 1; ti->num_write_zeroes_bios = 1; ti->private = lc; @@ -129,8 +130,7 @@ static void linear_status(struct dm_target *ti, status_type_t type, } } -static int linear_prepare_ioctl(struct dm_target *ti, - struct block_device **bdev, fmode_t *mode) +static int linear_prepare_ioctl(struct dm_target *ti, struct block_device **bdev) { struct linear_c *lc = (struct linear_c *) ti->private; struct dm_dev *dev = lc->dev; diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index 3362d866793b..9de072b7782a 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -52,10 +52,11 @@ * in fact we want to do the data and the discard in the order that they * completed. */ -#define LOG_FLUSH_FLAG (1 << 0) -#define LOG_FUA_FLAG (1 << 1) -#define LOG_DISCARD_FLAG (1 << 2) -#define LOG_MARK_FLAG (1 << 3) +#define LOG_FLUSH_FLAG (1 << 0) +#define LOG_FUA_FLAG (1 << 1) +#define LOG_DISCARD_FLAG (1 << 2) +#define LOG_MARK_FLAG (1 << 3) +#define LOG_METADATA_FLAG (1 << 4) #define WRITE_LOG_VERSION 1ULL #define WRITE_LOG_MAGIC 0x6a736677736872ULL @@ -699,6 +700,7 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio) bool flush_bio = (bio->bi_opf & REQ_PREFLUSH); bool fua_bio = (bio->bi_opf & REQ_FUA); bool discard_bio = (bio_op(bio) == REQ_OP_DISCARD); + bool meta_bio = (bio->bi_opf & REQ_META); pb->block = NULL; @@ -743,6 +745,8 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio) block->flags |= LOG_FUA_FLAG; if (discard_bio) block->flags |= LOG_DISCARD_FLAG; + if (meta_bio) + block->flags |= LOG_METADATA_FLAG; block->sector = bio_to_dev_sectors(lc, bio->bi_iter.bi_sector); block->nr_sectors = bio_to_dev_sectors(lc, bio_sectors(bio)); @@ -860,7 +864,7 @@ static void log_writes_status(struct dm_target *ti, status_type_t type, } static int log_writes_prepare_ioctl(struct dm_target *ti, - struct block_device **bdev, fmode_t *mode) + struct block_device **bdev) { struct log_writes_c *lc = ti->private; struct dm_dev *dev = lc->dev; @@ -887,7 +891,8 @@ static int log_writes_iterate_devices(struct dm_target *ti, * Messages supported: * mark <mark data> - specify the marked data. */ -static int log_writes_message(struct dm_target *ti, unsigned argc, char **argv) +static int log_writes_message(struct dm_target *ti, unsigned argc, char **argv, + char *result, unsigned maxlen) { int r = -EINVAL; struct log_writes_c *lc = ti->private; diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index a6b7baf31cdd..203a0419d2b0 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -714,7 +714,7 @@ static void process_queued_bios(struct work_struct *work) case DM_MAPIO_REMAPPED: generic_make_request(bio); break; - case 0: + case DM_MAPIO_SUBMITTED: break; default: WARN_ONCE(true, "__multipath_map_bio() returned %d\n", r); @@ -1811,7 +1811,8 @@ static void multipath_status(struct dm_target *ti, status_type_t type, spin_unlock_irqrestore(&m->lock, flags); } -static int multipath_message(struct dm_target *ti, unsigned argc, char **argv) +static int multipath_message(struct dm_target *ti, unsigned argc, char **argv, + char *result, unsigned maxlen) { int r = -EINVAL; struct dm_dev *dev; @@ -1875,7 +1876,7 @@ out: } static int multipath_prepare_ioctl(struct dm_target *ti, - struct block_device **bdev, fmode_t *mode) + struct block_device **bdev) { struct multipath *m = ti->private; struct pgpath *current_pgpath; @@ -1888,7 +1889,6 @@ static int multipath_prepare_ioctl(struct dm_target *ti, if (current_pgpath) { if (!test_bit(MPATHF_QUEUE_IO, &m->flags)) { *bdev = current_pgpath->path.dev->bdev; - *mode = current_pgpath->path.dev->mode; r = 0; } else { /* pg_init has not started or completed */ diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index c1d1034ff7b7..6f823f44b4aa 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1370,19 +1370,18 @@ static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as, * In device-mapper, we specify things in sectors, but * MD records this value in kB */ - value /= 2; - if (value > COUNTER_MAX) { + if (value < 0 || value / 2 > COUNTER_MAX) { rs->ti->error = "Max write-behind limit out of range"; return -EINVAL; } - rs->md.bitmap_info.max_write_behind = value; + rs->md.bitmap_info.max_write_behind = value / 2; } else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_DAEMON_SLEEP))) { if (test_and_set_bit(__CTR_FLAG_DAEMON_SLEEP, &rs->ctr_flags)) { rs->ti->error = "Only one daemon_sleep argument pair allowed"; return -EINVAL; } - if (!value || (value > MAX_SCHEDULE_TIMEOUT)) { + if (value < 0) { rs->ti->error = "daemon sleep period out of range"; return -EINVAL; } @@ -1424,27 +1423,33 @@ static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as, return -EINVAL; } + if (value < 0) { + rs->ti->error = "Bogus stripe cache entries value"; + return -EINVAL; + } rs->stripe_cache_entries = value; } else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_MIN_RECOVERY_RATE))) { if (test_and_set_bit(__CTR_FLAG_MIN_RECOVERY_RATE, &rs->ctr_flags)) { rs->ti->error = "Only one min_recovery_rate argument pair allowed"; return -EINVAL; } - if (value > INT_MAX) { + + if (value < 0) { rs->ti->error = "min_recovery_rate out of range"; return -EINVAL; } - rs->md.sync_speed_min = (int)value; + rs->md.sync_speed_min = value; } else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_MAX_RECOVERY_RATE))) { if (test_and_set_bit(__CTR_FLAG_MAX_RECOVERY_RATE, &rs->ctr_flags)) { rs->ti->error = "Only one max_recovery_rate argument pair allowed"; return -EINVAL; } - if (value > INT_MAX) { + + if (value < 0) { rs->ti->error = "max_recovery_rate out of range"; return -EINVAL; } - rs->md.sync_speed_max = (int)value; + rs->md.sync_speed_max = value; } else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_REGION_SIZE))) { if (test_and_set_bit(__CTR_FLAG_REGION_SIZE, &rs->ctr_flags)) { rs->ti->error = "Only one region_size argument pair allowed"; @@ -1490,6 +1495,12 @@ static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as, return -EINVAL; } + if (rs->md.sync_speed_max && + rs->md.sync_speed_min > rs->md.sync_speed_max) { + rs->ti->error = "Bogus recovery rates"; + return -EINVAL; + } + if (validate_region_size(rs, region_size)) return -EINVAL; @@ -3408,7 +3419,8 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery, set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags); } else { - if (!test_bit(MD_RECOVERY_INTR, &recovery) && + if (!test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags) && + !test_bit(MD_RECOVERY_INTR, &recovery) && (test_bit(MD_RECOVERY_NEEDED, &recovery) || test_bit(MD_RECOVERY_RESHAPE, &recovery) || test_bit(MD_RECOVERY_RUNNING, &recovery))) @@ -3663,7 +3675,8 @@ static void raid_status(struct dm_target *ti, status_type_t type, } } -static int raid_message(struct dm_target *ti, unsigned int argc, char **argv) +static int raid_message(struct dm_target *ti, unsigned int argc, char **argv, + char *result, unsigned maxlen) { struct raid_set *rs = ti->private; struct mddev *mddev = &rs->md; diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index c5534d294773..3c50c4e4da8f 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -14,7 +14,7 @@ #include <linux/export.h> #include <linux/slab.h> #include <linux/dm-io.h> -#include "dm-bufio.h" +#include <linux/dm-bufio.h> #define DM_MSG_PREFIX "persistent snapshot" #define DM_CHUNK_SIZE_DEFAULT_SECTORS 32 /* 16KB */ diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index b5e892149c54..bb907cb3e60d 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -169,6 +169,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_flush_bios = stripes; ti->num_discard_bios = stripes; + ti->num_secure_erase_bios = stripes; ti->num_write_same_bios = stripes; ti->num_write_zeroes_bios = stripes; @@ -295,6 +296,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_REMAPPED; } if (unlikely(bio_op(bio) == REQ_OP_DISCARD) || + unlikely(bio_op(bio) == REQ_OP_SECURE_ERASE) || unlikely(bio_op(bio) == REQ_OP_WRITE_ZEROES) || unlikely(bio_op(bio) == REQ_OP_WRITE_SAME)) { target_bio_nr = dm_bio_get_target_bio_nr(bio); @@ -368,7 +370,6 @@ static void stripe_status(struct dm_target *ti, status_type_t type, unsigned status_flags, char *result, unsigned maxlen) { struct stripe_c *sc = (struct stripe_c *) ti->private; - char buffer[sc->stripes + 1]; unsigned int sz = 0; unsigned int i; @@ -377,11 +378,12 @@ static void stripe_status(struct dm_target *ti, status_type_t type, DMEMIT("%d ", sc->stripes); for (i = 0; i < sc->stripes; i++) { DMEMIT("%s ", sc->stripe[i].dev->name); - buffer[i] = atomic_read(&(sc->stripe[i].error_count)) ? - 'D' : 'A'; } - buffer[i] = '\0'; - DMEMIT("1 %s", buffer); + DMEMIT("1 "); + for (i = 0; i < sc->stripes; i++) { + DMEMIT("%c", atomic_read(&(sc->stripe[i].error_count)) ? + 'D' : 'A'); + } break; case STATUSTYPE_TABLE: diff --git a/drivers/md/dm-switch.c b/drivers/md/dm-switch.c index 8d0ba879777e..7924a6a33ddc 100644 --- a/drivers/md/dm-switch.c +++ b/drivers/md/dm-switch.c @@ -466,7 +466,8 @@ static int process_set_region_mappings(struct switch_ctx *sctx, * * Only set_region_mappings is supported. */ -static int switch_message(struct dm_target *ti, unsigned argc, char **argv) +static int switch_message(struct dm_target *ti, unsigned argc, char **argv, + char *result, unsigned maxlen) { static DEFINE_MUTEX(message_mutex); @@ -511,8 +512,7 @@ static void switch_status(struct dm_target *ti, status_type_t type, * * Passthrough all ioctls to the path for sector 0 */ -static int switch_prepare_ioctl(struct dm_target *ti, - struct block_device **bdev, fmode_t *mode) +static int switch_prepare_ioctl(struct dm_target *ti, struct block_device **bdev) { struct switch_ctx *sctx = ti->private; unsigned path_nr; @@ -520,7 +520,6 @@ static int switch_prepare_ioctl(struct dm_target *ti, path_nr = switch_get_path_nr(sctx, 0); *bdev = sctx->path_list[path_nr].dmdev->bdev; - *mode = sctx->path_list[path_nr].dmdev->mode; /* * Only pass ioctls through if the device sizes match exactly. diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 954f4e3b68ac..0589a4da12bb 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1846,6 +1846,34 @@ static bool dm_table_supports_discards(struct dm_table *t) return true; } +static int device_not_secure_erase_capable(struct dm_target *ti, + struct dm_dev *dev, sector_t start, + sector_t len, void *data) +{ + struct request_queue *q = bdev_get_queue(dev->bdev); + + return q && !blk_queue_secure_erase(q); +} + +static bool dm_table_supports_secure_erase(struct dm_table *t) +{ + struct dm_target *ti; + unsigned int i; + + for (i = 0; i < dm_table_get_num_targets(t); i++) { + ti = dm_table_get_target(t, i); + + if (!ti->num_secure_erase_bios) + return false; + + if (!ti->type->iterate_devices || + ti->type->iterate_devices(ti, device_not_secure_erase_capable, NULL)) + return false; + } + + return true; +} + void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, struct queue_limits *limits) { @@ -1867,6 +1895,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, } else blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); + if (dm_table_supports_secure_erase(t)) + blk_queue_flag_set(QUEUE_FLAG_SECERASE, q); + if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_WC))) { wc = true; if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_FUA))) diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c index c0d7e60820c4..314d17ca6466 100644 --- a/drivers/md/dm-target.c +++ b/drivers/md/dm-target.c @@ -16,8 +16,6 @@ static LIST_HEAD(_targets); static DECLARE_RWSEM(_lock); -#define DM_MOD_NAME_SIZE 32 - static inline struct target_type *__find_target_type(const char *name) { struct target_type *tt; diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 629c555890c1..b11107497d2e 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -3705,7 +3705,8 @@ static int process_release_metadata_snap_mesg(unsigned argc, char **argv, struct * reserve_metadata_snap * release_metadata_snap */ -static int pool_message(struct dm_target *ti, unsigned argc, char **argv) +static int pool_message(struct dm_target *ti, unsigned argc, char **argv, + char *result, unsigned maxlen) { int r = -EINVAL; struct pool_c *pt = ti->private; diff --git a/drivers/md/dm-unstripe.c b/drivers/md/dm-unstripe.c index 65f838fa2e99..954b7ab4e684 100644 --- a/drivers/md/dm-unstripe.c +++ b/drivers/md/dm-unstripe.c @@ -7,12 +7,6 @@ #include "dm.h" #include <linux/module.h> -#include <linux/init.h> -#include <linux/blkdev.h> -#include <linux/bio.h> -#include <linux/slab.h> -#include <linux/bitops.h> -#include <linux/device-mapper.h> struct unstripe_c { struct dm_dev *dev; @@ -69,12 +63,6 @@ static int unstripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto err; } - // FIXME: must support non power of 2 chunk_size, dm-stripe.c does - if (!is_power_of_2(uc->chunk_size)) { - ti->error = "Non power of 2 chunk_size is not supported yet"; - goto err; - } - if (kstrtouint(argv[2], 10, &uc->unstripe)) { ti->error = "Invalid stripe number"; goto err; @@ -98,7 +86,7 @@ static int unstripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) uc->unstripe_offset = uc->unstripe * uc->chunk_size; uc->unstripe_width = (uc->stripes - 1) * uc->chunk_size; - uc->chunk_shift = fls(uc->chunk_size) - 1; + uc->chunk_shift = is_power_of_2(uc->chunk_size) ? fls(uc->chunk_size) - 1 : 0; tmp_len = ti->len; if (sector_div(tmp_len, uc->chunk_size)) { @@ -129,14 +117,18 @@ static sector_t map_to_core(struct dm_target *ti, struct bio *bio) { struct unstripe_c *uc = ti->private; sector_t sector = bio->bi_iter.bi_sector; + sector_t tmp_sector = sector; /* Shift us up to the right "row" on the stripe */ - sector += uc->unstripe_width * (sector >> uc->chunk_shift); + if (uc->chunk_shift) + tmp_sector >>= uc->chunk_shift; + else + sector_div(tmp_sector, uc->chunk_size); - /* Account for what stripe we're operating on */ - sector += uc->unstripe_offset; + sector += uc->unstripe_width * tmp_sector; - return sector; + /* Account for what stripe we're operating on */ + return sector + uc->unstripe_offset; } static int unstripe_map(struct dm_target *ti, struct bio *bio) @@ -185,7 +177,7 @@ static void unstripe_io_hints(struct dm_target *ti, static struct target_type unstripe_target = { .name = "unstriped", - .version = {1, 0, 0}, + .version = {1, 1, 0}, .module = THIS_MODULE, .ctr = unstripe_ctr, .dtr = unstripe_dtr, @@ -197,13 +189,7 @@ static struct target_type unstripe_target = { static int __init dm_unstripe_init(void) { - int r; - - r = dm_register_target(&unstripe_target); - if (r < 0) - DMERR("target registration failed"); - - return r; + return dm_register_target(&unstripe_target); } static void __exit dm_unstripe_exit(void) @@ -215,5 +201,6 @@ module_init(dm_unstripe_init); module_exit(dm_unstripe_exit); MODULE_DESCRIPTION(DM_NAME " unstriped target"); +MODULE_ALIAS("dm-unstriped"); MODULE_AUTHOR("Scott Bauer <scott.bauer@intel.com>"); MODULE_LICENSE("GPL"); diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index aedb8222836b..fc893f636a98 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -32,6 +32,7 @@ #define DM_VERITY_OPT_LOGGING "ignore_corruption" #define DM_VERITY_OPT_RESTART "restart_on_corruption" #define DM_VERITY_OPT_IGN_ZEROES "ignore_zero_blocks" +#define DM_VERITY_OPT_AT_MOST_ONCE "check_at_most_once" #define DM_VERITY_OPTS_MAX (2 + DM_VERITY_OPTS_FEC) @@ -347,8 +348,8 @@ out: /* * Calculates the digest for the given bio */ -int verity_for_io_block(struct dm_verity *v, struct dm_verity_io *io, - struct bvec_iter *iter, struct crypto_wait *wait) +static int verity_for_io_block(struct dm_verity *v, struct dm_verity_io *io, + struct bvec_iter *iter, struct crypto_wait *wait) { unsigned int todo = 1 << v->data_dev_block_bits; struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size); @@ -433,6 +434,18 @@ static int verity_bv_zero(struct dm_verity *v, struct dm_verity_io *io, } /* + * Moves the bio iter one data block forward. + */ +static inline void verity_bv_skip_block(struct dm_verity *v, + struct dm_verity_io *io, + struct bvec_iter *iter) +{ + struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size); + + bio_advance_iter(bio, iter, 1 << v->data_dev_block_bits); +} + +/* * Verify one "dm_verity_io" structure. */ static int verity_verify_io(struct dm_verity_io *io) @@ -445,9 +458,16 @@ static int verity_verify_io(struct dm_verity_io *io) for (b = 0; b < io->n_blocks; b++) { int r; + sector_t cur_block = io->block + b; struct ahash_request *req = verity_io_hash_req(v, io); - r = verity_hash_for_block(v, io, io->block + b, + if (v->validated_blocks && + likely(test_bit(cur_block, v->validated_blocks))) { + verity_bv_skip_block(v, io, &io->iter); + continue; + } + + r = verity_hash_for_block(v, io, cur_block, verity_io_want_digest(v, io), &is_zero); if (unlikely(r < 0)) @@ -481,13 +501,16 @@ static int verity_verify_io(struct dm_verity_io *io) return r; if (likely(memcmp(verity_io_real_digest(v, io), - verity_io_want_digest(v, io), v->digest_size) == 0)) + verity_io_want_digest(v, io), v->digest_size) == 0)) { + if (v->validated_blocks) + set_bit(cur_block, v->validated_blocks); continue; + } else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA, - io->block + b, NULL, &start) == 0) + cur_block, NULL, &start) == 0) continue; else if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA, - io->block + b)) + cur_block)) return -EIO; } @@ -673,6 +696,8 @@ static void verity_status(struct dm_target *ti, status_type_t type, args += DM_VERITY_OPTS_FEC; if (v->zero_digest) args++; + if (v->validated_blocks) + args++; if (!args) return; DMEMIT(" %u", args); @@ -691,13 +716,14 @@ static void verity_status(struct dm_target *ti, status_type_t type, } if (v->zero_digest) DMEMIT(" " DM_VERITY_OPT_IGN_ZEROES); + if (v->validated_blocks) + DMEMIT(" " DM_VERITY_OPT_AT_MOST_ONCE); sz = verity_fec_status_table(v, sz, result, maxlen); break; } } -static int verity_prepare_ioctl(struct dm_target *ti, - struct block_device **bdev, fmode_t *mode) +static int verity_prepare_ioctl(struct dm_target *ti, struct block_device **bdev) { struct dm_verity *v = ti->private; @@ -740,6 +766,7 @@ static void verity_dtr(struct dm_target *ti) if (v->bufio) dm_bufio_client_destroy(v->bufio); + kvfree(v->validated_blocks); kfree(v->salt); kfree(v->root_digest); kfree(v->zero_digest); @@ -760,6 +787,26 @@ static void verity_dtr(struct dm_target *ti) kfree(v); } +static int verity_alloc_most_once(struct dm_verity *v) +{ + struct dm_target *ti = v->ti; + + /* the bitset can only handle INT_MAX blocks */ + if (v->data_blocks > INT_MAX) { + ti->error = "device too large to use check_at_most_once"; + return -E2BIG; + } + + v->validated_blocks = kvzalloc(BITS_TO_LONGS(v->data_blocks) * + sizeof(unsigned long), GFP_KERNEL); + if (!v->validated_blocks) { + ti->error = "failed to allocate bitset for check_at_most_once"; + return -ENOMEM; + } + + return 0; +} + static int verity_alloc_zero_digest(struct dm_verity *v) { int r = -ENOMEM; @@ -829,6 +876,12 @@ static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v) } continue; + } else if (!strcasecmp(arg_name, DM_VERITY_OPT_AT_MOST_ONCE)) { + r = verity_alloc_most_once(v); + if (r) + return r; + continue; + } else if (verity_is_fec_opt_arg(arg_name)) { r = verity_fec_parse_opt_args(as, v, &argc, arg_name); if (r) @@ -1096,7 +1149,7 @@ bad: static struct target_type verity_target = { .name = "verity", - .version = {1, 3, 0}, + .version = {1, 4, 0}, .module = THIS_MODULE, .ctr = verity_ctr, .dtr = verity_dtr, diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index b675bc015512..3441c10b840c 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -12,7 +12,7 @@ #ifndef DM_VERITY_H #define DM_VERITY_H -#include "dm-bufio.h" +#include <linux/dm-bufio.h> #include <linux/device-mapper.h> #include <crypto/hash.h> @@ -63,6 +63,7 @@ struct dm_verity { sector_t hash_level_block[DM_VERITY_MAX_LEVELS]; struct dm_verity_fec *fec; /* forward error correction */ + unsigned long *validated_blocks; /* bitset blocks validated */ }; struct dm_verity_io { diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index caff02caf083..e73b0776683c 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -898,8 +898,7 @@ static void dmz_io_hints(struct dm_target *ti, struct queue_limits *limits) /* * Pass on ioctl to the backend device. */ -static int dmz_prepare_ioctl(struct dm_target *ti, - struct block_device **bdev, fmode_t *mode) +static int dmz_prepare_ioctl(struct dm_target *ti, struct block_device **bdev) { struct dmz_target *dmz = ti->private; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index ded74e1eb0d1..5a81c47be4e4 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -458,67 +458,56 @@ static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo) return dm_get_geometry(md, geo); } -static char *_dm_claim_ptr = "I belong to device-mapper"; - -static int dm_get_bdev_for_ioctl(struct mapped_device *md, - struct block_device **bdev, - fmode_t *mode) +static int dm_prepare_ioctl(struct mapped_device *md, int *srcu_idx, + struct block_device **bdev) + __acquires(md->io_barrier) { struct dm_target *tgt; struct dm_table *map; - int srcu_idx, r, r2; + int r; retry: r = -ENOTTY; - map = dm_get_live_table(md, &srcu_idx); + map = dm_get_live_table(md, srcu_idx); if (!map || !dm_table_get_size(map)) - goto out; + return r; /* We only support devices that have a single target */ if (dm_table_get_num_targets(map) != 1) - goto out; + return r; tgt = dm_table_get_target(map, 0); if (!tgt->type->prepare_ioctl) - goto out; - - if (dm_suspended_md(md)) { - r = -EAGAIN; - goto out; - } - - r = tgt->type->prepare_ioctl(tgt, bdev, mode); - if (r < 0) - goto out; - - bdgrab(*bdev); - r2 = blkdev_get(*bdev, *mode, _dm_claim_ptr); - if (r2 < 0) { - r = r2; - goto out; - } + return r; - dm_put_live_table(md, srcu_idx); - return r; + if (dm_suspended_md(md)) + return -EAGAIN; -out: - dm_put_live_table(md, srcu_idx); + r = tgt->type->prepare_ioctl(tgt, bdev); if (r == -ENOTCONN && !fatal_signal_pending(current)) { + dm_put_live_table(md, *srcu_idx); msleep(10); goto retry; } + return r; } +static void dm_unprepare_ioctl(struct mapped_device *md, int srcu_idx) + __releases(md->io_barrier) +{ + dm_put_live_table(md, srcu_idx); +} + static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { struct mapped_device *md = bdev->bd_disk->private_data; - int r; + int r, srcu_idx; - r = dm_get_bdev_for_ioctl(md, &bdev, &mode); + r = dm_prepare_ioctl(md, &srcu_idx, &bdev); if (r < 0) - return r; + goto out; if (r > 0) { /* @@ -536,7 +525,7 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode, r = __blkdev_driver_ioctl(bdev, mode, cmd, arg); out: - blkdev_put(bdev, mode); + dm_unprepare_ioctl(md, srcu_idx); return r; } @@ -710,6 +699,8 @@ static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU) rcu_read_unlock(); } +static char *_dm_claim_ptr = "I belong to device-mapper"; + /* * Open a table device so we can use it as a map destination. */ @@ -1414,6 +1405,11 @@ static unsigned get_num_discard_bios(struct dm_target *ti) return ti->num_discard_bios; } +static unsigned get_num_secure_erase_bios(struct dm_target *ti) +{ + return ti->num_secure_erase_bios; +} + static unsigned get_num_write_same_bios(struct dm_target *ti) { return ti->num_write_same_bios; @@ -1467,6 +1463,11 @@ static int __send_discard(struct clone_info *ci, struct dm_target *ti) is_split_required_for_discard); } +static int __send_secure_erase(struct clone_info *ci, struct dm_target *ti) +{ + return __send_changing_extent_only(ci, ti, get_num_secure_erase_bios, NULL); +} + static int __send_write_same(struct clone_info *ci, struct dm_target *ti) { return __send_changing_extent_only(ci, ti, get_num_write_same_bios, NULL); @@ -1477,6 +1478,25 @@ static int __send_write_zeroes(struct clone_info *ci, struct dm_target *ti) return __send_changing_extent_only(ci, ti, get_num_write_zeroes_bios, NULL); } +static bool __process_abnormal_io(struct clone_info *ci, struct dm_target *ti, + int *result) +{ + struct bio *bio = ci->bio; + + if (bio_op(bio) == REQ_OP_DISCARD) + *result = __send_discard(ci, ti); + else if (bio_op(bio) == REQ_OP_SECURE_ERASE) + *result = __send_secure_erase(ci, ti); + else if (bio_op(bio) == REQ_OP_WRITE_SAME) + *result = __send_write_same(ci, ti); + else if (bio_op(bio) == REQ_OP_WRITE_ZEROES) + *result = __send_write_zeroes(ci, ti); + else + return false; + + return true; +} + /* * Select the correct strategy for processing a non-flush bio. */ @@ -1491,12 +1511,8 @@ static int __split_and_process_non_flush(struct clone_info *ci) if (!dm_target_is_valid(ti)) return -EIO; - if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) - return __send_discard(ci, ti); - else if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME)) - return __send_write_same(ci, ti); - else if (unlikely(bio_op(bio) == REQ_OP_WRITE_ZEROES)) - return __send_write_zeroes(ci, ti); + if (unlikely(__process_abnormal_io(ci, ti, &r))) + return r; if (bio_op(bio) == REQ_OP_ZONE_REPORT) len = ci->sector_count; @@ -1617,9 +1633,12 @@ static blk_qc_t __process_bio(struct mapped_device *md, goto out; } - tio = alloc_tio(&ci, ti, 0, GFP_NOIO); ci.bio = bio; ci.sector_count = bio_sectors(bio); + if (unlikely(__process_abnormal_io(&ci, ti, &error))) + goto out; + + tio = alloc_tio(&ci, ti, 0, GFP_NOIO); ret = __clone_and_map_simple_bio(&ci, tio, NULL); } out: @@ -3015,20 +3034,19 @@ static int dm_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type, { struct mapped_device *md = bdev->bd_disk->private_data; const struct pr_ops *ops; - fmode_t mode; - int r; + int r, srcu_idx; - r = dm_get_bdev_for_ioctl(md, &bdev, &mode); + r = dm_prepare_ioctl(md, &srcu_idx, &bdev); if (r < 0) - return r; + goto out; ops = bdev->bd_disk->fops->pr_ops; if (ops && ops->pr_reserve) r = ops->pr_reserve(bdev, key, type, flags); else r = -EOPNOTSUPP; - - blkdev_put(bdev, mode); +out: + dm_unprepare_ioctl(md, srcu_idx); return r; } @@ -3036,20 +3054,19 @@ static int dm_pr_release(struct block_device *bdev, u64 key, enum pr_type type) { struct mapped_device *md = bdev->bd_disk->private_data; const struct pr_ops *ops; - fmode_t mode; - int r; + int r, srcu_idx; - r = dm_get_bdev_for_ioctl(md, &bdev, &mode); + r = dm_prepare_ioctl(md, &srcu_idx, &bdev); if (r < 0) - return r; + goto out; ops = bdev->bd_disk->fops->pr_ops; if (ops && ops->pr_release) r = ops->pr_release(bdev, key, type); else r = -EOPNOTSUPP; - - blkdev_put(bdev, mode); +out: + dm_unprepare_ioctl(md, srcu_idx); return r; } @@ -3058,20 +3075,19 @@ static int dm_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key, { struct mapped_device *md = bdev->bd_disk->private_data; const struct pr_ops *ops; - fmode_t mode; - int r; + int r, srcu_idx; - r = dm_get_bdev_for_ioctl(md, &bdev, &mode); + r = dm_prepare_ioctl(md, &srcu_idx, &bdev); if (r < 0) - return r; + goto out; ops = bdev->bd_disk->fops->pr_ops; if (ops && ops->pr_preempt) r = ops->pr_preempt(bdev, old_key, new_key, type, abort); else r = -EOPNOTSUPP; - - blkdev_put(bdev, mode); +out: + dm_unprepare_ioctl(md, srcu_idx); return r; } @@ -3079,20 +3095,19 @@ static int dm_pr_clear(struct block_device *bdev, u64 key) { struct mapped_device *md = bdev->bd_disk->private_data; const struct pr_ops *ops; - fmode_t mode; - int r; + int r, srcu_idx; - r = dm_get_bdev_for_ioctl(md, &bdev, &mode); + r = dm_prepare_ioctl(md, &srcu_idx, &bdev); if (r < 0) - return r; + goto out; ops = bdev->bd_disk->fops->pr_ops; if (ops && ops->pr_clear) r = ops->pr_clear(bdev, key); else r = -EOPNOTSUPP; - - blkdev_put(bdev, mode); +out: + dm_unprepare_ioctl(md, srcu_idx); return r; } diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index ea15d220ced7..492a3f8ac119 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -5,8 +5,8 @@ */ #include "dm-block-manager.h" #include "dm-persistent-data-internal.h" -#include "../dm-bufio.h" +#include <linux/dm-bufio.h> #include <linux/crc32c.h> #include <linux/module.h> #include <linux/slab.h> diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 4384433b50e7..31fef7c34185 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -87,10 +87,10 @@ typedef void (*dm_resume_fn) (struct dm_target *ti); typedef void (*dm_status_fn) (struct dm_target *ti, status_type_t status_type, unsigned status_flags, char *result, unsigned maxlen); -typedef int (*dm_message_fn) (struct dm_target *ti, unsigned argc, char **argv); +typedef int (*dm_message_fn) (struct dm_target *ti, unsigned argc, char **argv, + char *result, unsigned maxlen); -typedef int (*dm_prepare_ioctl_fn) (struct dm_target *ti, - struct block_device **bdev, fmode_t *mode); +typedef int (*dm_prepare_ioctl_fn) (struct dm_target *ti, struct block_device **bdev); /* * These iteration functions are typically used to check (and combine) @@ -267,6 +267,12 @@ struct dm_target { unsigned num_discard_bios; /* + * The number of secure erase bios that will be submitted to the target. + * The bio number can be accessed with dm_bio_get_target_bio_nr. + */ + unsigned num_secure_erase_bios; + + /* * The number of WRITE SAME bios that will be submitted to the target. * The bio number can be accessed with dm_bio_get_target_bio_nr. */ diff --git a/drivers/md/dm-bufio.h b/include/linux/dm-bufio.h index be732d3f8611..3c8b7d274bd9 100644 --- a/drivers/md/dm-bufio.h +++ b/include/linux/dm-bufio.h @@ -6,8 +6,8 @@ * This file is released under the GPL. */ -#ifndef DM_BUFIO_H -#define DM_BUFIO_H +#ifndef _LINUX_DM_BUFIO_H +#define _LINUX_DM_BUFIO_H #include <linux/blkdev.h> #include <linux/types.h> diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index 14c44ec8b622..d1e49514977b 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -270,9 +270,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 37 +#define DM_VERSION_MINOR 39 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2017-09-20)" +#define DM_VERSION_EXTRA "-ioctl (2018-04-03)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ |