diff options
Diffstat (limited to 'drivers/md')
53 files changed, 1193 insertions, 806 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 35b1080752cd..1e9db8e4acdf 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -540,6 +540,16 @@ config DM_VERITY_VERIFY_ROOTHASH_SIG_SECONDARY_KEYRING If unsure, say N. +config DM_VERITY_VERIFY_ROOTHASH_SIG_PLATFORM_KEYRING + bool "Verity data device root hash signature verification with platform keyring" + default DM_VERITY_VERIFY_ROOTHASH_SIG_SECONDARY_KEYRING + depends on DM_VERITY_VERIFY_ROOTHASH_SIG + depends on INTEGRITY_PLATFORM_KEYRING + help + Rely also on the platform keyring to verify dm-verity signatures. + + If unsure, say N. + config DM_VERITY_FEC bool "Verity forward error correction support" depends on DM_VERITY diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 48ce750bf70a..da50f6661bae 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -164,40 +164,68 @@ static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *b) * prio is worth 1/8th of what INITIAL_PRIO is worth. */ -#define bucket_prio(b) \ -({ \ - unsigned int min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8; \ - \ - (b->prio - ca->set->min_prio + min_prio) * GC_SECTORS_USED(b); \ -}) +static inline unsigned int new_bucket_prio(struct cache *ca, struct bucket *b) +{ + unsigned int min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8; + + return (b->prio - ca->set->min_prio + min_prio) * GC_SECTORS_USED(b); +} + +static inline bool new_bucket_max_cmp(const void *l, const void *r, void *args) +{ + struct bucket **lhs = (struct bucket **)l; + struct bucket **rhs = (struct bucket **)r; + struct cache *ca = args; + + return new_bucket_prio(ca, *lhs) > new_bucket_prio(ca, *rhs); +} -#define bucket_max_cmp(l, r) (bucket_prio(l) < bucket_prio(r)) -#define bucket_min_cmp(l, r) (bucket_prio(l) > bucket_prio(r)) +static inline bool new_bucket_min_cmp(const void *l, const void *r, void *args) +{ + struct bucket **lhs = (struct bucket **)l; + struct bucket **rhs = (struct bucket **)r; + struct cache *ca = args; + + return new_bucket_prio(ca, *lhs) < new_bucket_prio(ca, *rhs); +} + +static inline void new_bucket_swap(void *l, void *r, void __always_unused *args) +{ + struct bucket **lhs = l, **rhs = r; + + swap(*lhs, *rhs); +} static void invalidate_buckets_lru(struct cache *ca) { struct bucket *b; - ssize_t i; + const struct min_heap_callbacks bucket_max_cmp_callback = { + .less = new_bucket_max_cmp, + .swp = new_bucket_swap, + }; + const struct min_heap_callbacks bucket_min_cmp_callback = { + .less = new_bucket_min_cmp, + .swp = new_bucket_swap, + }; - ca->heap.used = 0; + ca->heap.nr = 0; for_each_bucket(b, ca) { if (!bch_can_invalidate_bucket(ca, b)) continue; - if (!heap_full(&ca->heap)) - heap_add(&ca->heap, b, bucket_max_cmp); - else if (bucket_max_cmp(b, heap_peek(&ca->heap))) { + if (!min_heap_full(&ca->heap)) + min_heap_push(&ca->heap, &b, &bucket_max_cmp_callback, ca); + else if (!new_bucket_max_cmp(&b, min_heap_peek(&ca->heap), ca)) { ca->heap.data[0] = b; - heap_sift(&ca->heap, 0, bucket_max_cmp); + min_heap_sift_down(&ca->heap, 0, &bucket_max_cmp_callback, ca); } } - for (i = ca->heap.used / 2 - 1; i >= 0; --i) - heap_sift(&ca->heap, i, bucket_min_cmp); + min_heapify_all(&ca->heap, &bucket_min_cmp_callback, ca); while (!fifo_full(&ca->free_inc)) { - if (!heap_pop(&ca->heap, b, bucket_min_cmp)) { + if (!ca->heap.nr) { /* * We don't want to be calling invalidate_buckets() * multiple times when it can't do anything @@ -206,6 +234,8 @@ static void invalidate_buckets_lru(struct cache *ca) wake_up_gc(ca->set); return; } + b = min_heap_peek(&ca->heap)[0]; + min_heap_pop(&ca->heap, &bucket_min_cmp_callback, ca); bch_invalidate_one_bucket(ca, b); } diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 1d33e40d26ea..785b0d9008fa 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -458,7 +458,7 @@ struct cache { /* Allocation stuff: */ struct bucket *buckets; - DECLARE_HEAP(struct bucket *, heap); + DEFINE_MIN_HEAP(struct bucket *, cache_heap) heap; /* * If nonzero, we know we aren't going to find any buckets to invalidate diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 463eb13bd0b2..bd97d8626887 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -54,9 +54,11 @@ void bch_dump_bucket(struct btree_keys *b) int __bch_count_data(struct btree_keys *b) { unsigned int ret = 0; - struct btree_iter_stack iter; + struct btree_iter iter; struct bkey *k; + min_heap_init(&iter.heap, NULL, MAX_BSETS); + if (b->ops->is_extents) for_each_key(b, k, &iter) ret += KEY_SIZE(k); @@ -67,9 +69,11 @@ void __bch_check_keys(struct btree_keys *b, const char *fmt, ...) { va_list args; struct bkey *k, *p = NULL; - struct btree_iter_stack iter; + struct btree_iter iter; const char *err; + min_heap_init(&iter.heap, NULL, MAX_BSETS); + for_each_key(b, k, &iter) { if (b->ops->is_extents) { err = "Keys out of order"; @@ -110,9 +114,9 @@ bug: static void bch_btree_iter_next_check(struct btree_iter *iter) { - struct bkey *k = iter->data->k, *next = bkey_next(k); + struct bkey *k = iter->heap.data->k, *next = bkey_next(k); - if (next < iter->data->end && + if (next < iter->heap.data->end && bkey_cmp(k, iter->b->ops->is_extents ? &START_KEY(next) : next) > 0) { bch_dump_bucket(iter->b); @@ -879,12 +883,14 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k, unsigned int status = BTREE_INSERT_STATUS_NO_INSERT; struct bset *i = bset_tree_last(b)->data; struct bkey *m, *prev = NULL; - struct btree_iter_stack iter; + struct btree_iter iter; struct bkey preceding_key_on_stack = ZERO_KEY; struct bkey *preceding_key_p = &preceding_key_on_stack; BUG_ON(b->ops->is_extents && !KEY_SIZE(k)); + min_heap_init(&iter.heap, NULL, MAX_BSETS); + /* * If k has preceding key, preceding_key_p will be set to address * of k's preceding key; otherwise preceding_key_p will be set @@ -895,9 +901,9 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k, else preceding_key(k, &preceding_key_p); - m = bch_btree_iter_stack_init(b, &iter, preceding_key_p); + m = bch_btree_iter_init(b, &iter, preceding_key_p); - if (b->ops->insert_fixup(b, k, &iter.iter, replace_key)) + if (b->ops->insert_fixup(b, k, &iter, replace_key)) return status; status = BTREE_INSERT_STATUS_INSERT; @@ -1077,79 +1083,102 @@ struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t, /* Btree iterator */ -typedef bool (btree_iter_cmp_fn)(struct btree_iter_set, - struct btree_iter_set); +typedef bool (new_btree_iter_cmp_fn)(const void *, const void *, void *); + +static inline bool new_btree_iter_cmp(const void *l, const void *r, void __always_unused *args) +{ + const struct btree_iter_set *_l = l; + const struct btree_iter_set *_r = r; + + return bkey_cmp(_l->k, _r->k) <= 0; +} -static inline bool btree_iter_cmp(struct btree_iter_set l, - struct btree_iter_set r) +static inline void new_btree_iter_swap(void *iter1, void *iter2, void __always_unused *args) { - return bkey_cmp(l.k, r.k) > 0; + struct btree_iter_set *_iter1 = iter1; + struct btree_iter_set *_iter2 = iter2; + + swap(*_iter1, *_iter2); } static inline bool btree_iter_end(struct btree_iter *iter) { - return !iter->used; + return !iter->heap.nr; } void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k, struct bkey *end) { + const struct min_heap_callbacks callbacks = { + .less = new_btree_iter_cmp, + .swp = new_btree_iter_swap, + }; + if (k != end) - BUG_ON(!heap_add(iter, - ((struct btree_iter_set) { k, end }), - btree_iter_cmp)); + BUG_ON(!min_heap_push(&iter->heap, + &((struct btree_iter_set) { k, end }), + &callbacks, + NULL)); } -static struct bkey *__bch_btree_iter_stack_init(struct btree_keys *b, - struct btree_iter_stack *iter, - struct bkey *search, - struct bset_tree *start) +static struct bkey *__bch_btree_iter_init(struct btree_keys *b, + struct btree_iter *iter, + struct bkey *search, + struct bset_tree *start) { struct bkey *ret = NULL; - iter->iter.size = ARRAY_SIZE(iter->stack_data); - iter->iter.used = 0; + iter->heap.size = ARRAY_SIZE(iter->heap.preallocated); + iter->heap.nr = 0; #ifdef CONFIG_BCACHE_DEBUG - iter->iter.b = b; + iter->b = b; #endif for (; start <= bset_tree_last(b); start++) { ret = bch_bset_search(b, start, search); - bch_btree_iter_push(&iter->iter, ret, bset_bkey_last(start->data)); + bch_btree_iter_push(iter, ret, bset_bkey_last(start->data)); } return ret; } -struct bkey *bch_btree_iter_stack_init(struct btree_keys *b, - struct btree_iter_stack *iter, +struct bkey *bch_btree_iter_init(struct btree_keys *b, + struct btree_iter *iter, struct bkey *search) { - return __bch_btree_iter_stack_init(b, iter, search, b->set); + return __bch_btree_iter_init(b, iter, search, b->set); } static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter, - btree_iter_cmp_fn *cmp) + new_btree_iter_cmp_fn *cmp) { struct btree_iter_set b __maybe_unused; struct bkey *ret = NULL; + const struct min_heap_callbacks callbacks = { + .less = cmp, + .swp = new_btree_iter_swap, + }; if (!btree_iter_end(iter)) { bch_btree_iter_next_check(iter); - ret = iter->data->k; - iter->data->k = bkey_next(iter->data->k); + ret = iter->heap.data->k; + iter->heap.data->k = bkey_next(iter->heap.data->k); - if (iter->data->k > iter->data->end) { + if (iter->heap.data->k > iter->heap.data->end) { WARN_ONCE(1, "bset was corrupt!\n"); - iter->data->k = iter->data->end; + iter->heap.data->k = iter->heap.data->end; } - if (iter->data->k == iter->data->end) - heap_pop(iter, b, cmp); + if (iter->heap.data->k == iter->heap.data->end) { + if (iter->heap.nr) { + b = min_heap_peek(&iter->heap)[0]; + min_heap_pop(&iter->heap, &callbacks, NULL); + } + } else - heap_sift(iter, 0, cmp); + min_heap_sift_down(&iter->heap, 0, &callbacks, NULL); } return ret; @@ -1157,7 +1186,7 @@ static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter, struct bkey *bch_btree_iter_next(struct btree_iter *iter) { - return __bch_btree_iter_next(iter, btree_iter_cmp); + return __bch_btree_iter_next(iter, new_btree_iter_cmp); } @@ -1195,16 +1224,18 @@ static void btree_mergesort(struct btree_keys *b, struct bset *out, struct btree_iter *iter, bool fixup, bool remove_stale) { - int i; struct bkey *k, *last = NULL; BKEY_PADDED(k) tmp; bool (*bad)(struct btree_keys *, const struct bkey *) = remove_stale ? bch_ptr_bad : bch_ptr_invalid; + const struct min_heap_callbacks callbacks = { + .less = b->ops->sort_cmp, + .swp = new_btree_iter_swap, + }; /* Heapify the iterator, using our comparison function */ - for (i = iter->used / 2 - 1; i >= 0; --i) - heap_sift(iter, i, b->ops->sort_cmp); + min_heapify_all(&iter->heap, &callbacks, NULL); while (!btree_iter_end(iter)) { if (b->ops->sort_fixup && fixup) @@ -1293,10 +1324,11 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned int start, struct bset_sort_state *state) { size_t order = b->page_order, keys = 0; - struct btree_iter_stack iter; + struct btree_iter iter; int oldsize = bch_count_data(b); - __bch_btree_iter_stack_init(b, &iter, NULL, &b->set[start]); + min_heap_init(&iter.heap, NULL, MAX_BSETS); + __bch_btree_iter_init(b, &iter, NULL, &b->set[start]); if (start) { unsigned int i; @@ -1307,7 +1339,7 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned int start, order = get_order(__set_bytes(b->set->data, keys)); } - __btree_sort(b, &iter.iter, start, order, false, state); + __btree_sort(b, &iter, start, order, false, state); EBUG_ON(oldsize >= 0 && bch_count_data(b) != oldsize); } @@ -1323,11 +1355,13 @@ void bch_btree_sort_into(struct btree_keys *b, struct btree_keys *new, struct bset_sort_state *state) { uint64_t start_time = local_clock(); - struct btree_iter_stack iter; + struct btree_iter iter; + + min_heap_init(&iter.heap, NULL, MAX_BSETS); - bch_btree_iter_stack_init(b, &iter, NULL); + bch_btree_iter_init(b, &iter, NULL); - btree_mergesort(b, new->set->data, &iter.iter, false, true); + btree_mergesort(b, new->set->data, &iter, false, true); bch_time_stats_update(&state->time, start_time); diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index 011f6062c4c0..f79441acd4c1 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -187,8 +187,9 @@ struct bset_tree { }; struct btree_keys_ops { - bool (*sort_cmp)(struct btree_iter_set l, - struct btree_iter_set r); + bool (*sort_cmp)(const void *l, + const void *r, + void *args); struct bkey *(*sort_fixup)(struct btree_iter *iter, struct bkey *tmp); bool (*insert_fixup)(struct btree_keys *b, @@ -312,23 +313,17 @@ enum { BTREE_INSERT_STATUS_FRONT_MERGE, }; +struct btree_iter_set { + struct bkey *k, *end; +}; + /* Btree key iteration */ struct btree_iter { - size_t size, used; #ifdef CONFIG_BCACHE_DEBUG struct btree_keys *b; #endif - struct btree_iter_set { - struct bkey *k, *end; - } data[]; -}; - -/* Fixed-size btree_iter that can be allocated on the stack */ - -struct btree_iter_stack { - struct btree_iter iter; - struct btree_iter_set stack_data[MAX_BSETS]; + MIN_HEAP_PREALLOCATED(struct btree_iter_set, btree_iter_heap, MAX_BSETS) heap; }; typedef bool (*ptr_filter_fn)(struct btree_keys *b, const struct bkey *k); @@ -340,9 +335,9 @@ struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter, void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k, struct bkey *end); -struct bkey *bch_btree_iter_stack_init(struct btree_keys *b, - struct btree_iter_stack *iter, - struct bkey *search); +struct bkey *bch_btree_iter_init(struct btree_keys *b, + struct btree_iter *iter, + struct bkey *search); struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t, const struct bkey *search); @@ -357,14 +352,13 @@ static inline struct bkey *bch_bset_search(struct btree_keys *b, return search ? __bch_bset_search(b, t, search) : t->data->start; } -#define for_each_key_filter(b, k, stack_iter, filter) \ - for (bch_btree_iter_stack_init((b), (stack_iter), NULL); \ - ((k) = bch_btree_iter_next_filter(&((stack_iter)->iter), (b), \ - filter));) +#define for_each_key_filter(b, k, iter, filter) \ + for (bch_btree_iter_init((b), (iter), NULL); \ + ((k) = bch_btree_iter_next_filter((iter), (b), filter));) -#define for_each_key(b, k, stack_iter) \ - for (bch_btree_iter_stack_init((b), (stack_iter), NULL); \ - ((k) = bch_btree_iter_next(&((stack_iter)->iter)));) +#define for_each_key(b, k, iter) \ + for (bch_btree_iter_init((b), (iter), NULL); \ + ((k) = bch_btree_iter_next(iter));) /* Sorting */ diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 4e6ccf2c8a0b..ed40d8600656 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -149,19 +149,19 @@ void bch_btree_node_read_done(struct btree *b) { const char *err = "bad btree header"; struct bset *i = btree_bset_first(b); - struct btree_iter *iter; + struct btree_iter iter; /* * c->fill_iter can allocate an iterator with more memory space * than static MAX_BSETS. * See the comment arount cache_set->fill_iter. */ - iter = mempool_alloc(&b->c->fill_iter, GFP_NOIO); - iter->size = b->c->cache->sb.bucket_size / b->c->cache->sb.block_size; - iter->used = 0; + iter.heap.data = mempool_alloc(&b->c->fill_iter, GFP_NOIO); + iter.heap.size = b->c->cache->sb.bucket_size / b->c->cache->sb.block_size; + iter.heap.nr = 0; #ifdef CONFIG_BCACHE_DEBUG - iter->b = &b->keys; + iter.b = &b->keys; #endif if (!i->seq) @@ -199,7 +199,7 @@ void bch_btree_node_read_done(struct btree *b) if (i != b->keys.set[0].data && !i->keys) goto err; - bch_btree_iter_push(iter, i->start, bset_bkey_last(i)); + bch_btree_iter_push(&iter, i->start, bset_bkey_last(i)); b->written += set_blocks(i, block_bytes(b->c->cache)); } @@ -211,7 +211,7 @@ void bch_btree_node_read_done(struct btree *b) if (i->seq == b->keys.set[0].data->seq) goto err; - bch_btree_sort_and_fix_extents(&b->keys, iter, &b->c->sort); + bch_btree_sort_and_fix_extents(&b->keys, &iter, &b->c->sort); i = b->keys.set[0].data; err = "short btree key"; @@ -223,7 +223,7 @@ void bch_btree_node_read_done(struct btree *b) bch_bset_init_next(&b->keys, write_block(b), bset_magic(&b->c->cache->sb)); out: - mempool_free(iter, &b->c->fill_iter); + mempool_free(iter.heap.data, &b->c->fill_iter); return; err: set_btree_node_io_error(b); @@ -1309,9 +1309,11 @@ static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc) uint8_t stale = 0; unsigned int keys = 0, good_keys = 0; struct bkey *k; - struct btree_iter_stack iter; + struct btree_iter iter; struct bset_tree *t; + min_heap_init(&iter.heap, NULL, MAX_BSETS); + gc->nodes++; for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid) { @@ -1570,9 +1572,11 @@ static int btree_gc_rewrite_node(struct btree *b, struct btree_op *op, static unsigned int btree_gc_count_keys(struct btree *b) { struct bkey *k; - struct btree_iter_stack iter; + struct btree_iter iter; unsigned int ret = 0; + min_heap_init(&iter.heap, NULL, MAX_BSETS); + for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad) ret += bkey_u64s(k); @@ -1611,18 +1615,18 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op, int ret = 0; bool should_rewrite; struct bkey *k; - struct btree_iter_stack iter; + struct btree_iter iter; struct gc_merge_info r[GC_MERGE_NODES]; struct gc_merge_info *i, *last = r + ARRAY_SIZE(r) - 1; - bch_btree_iter_stack_init(&b->keys, &iter, &b->c->gc_done); + min_heap_init(&iter.heap, NULL, MAX_BSETS); + bch_btree_iter_init(&b->keys, &iter, &b->c->gc_done); for (i = r; i < r + ARRAY_SIZE(r); i++) i->b = ERR_PTR(-EINTR); while (1) { - k = bch_btree_iter_next_filter(&iter.iter, &b->keys, - bch_ptr_bad); + k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad); if (k) { r->b = bch_btree_node_get(b->c, op, k, b->level - 1, true, b); @@ -1917,7 +1921,9 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op) { int ret = 0; struct bkey *k, *p = NULL; - struct btree_iter_stack iter; + struct btree_iter iter; + + min_heap_init(&iter.heap, NULL, MAX_BSETS); for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid) bch_initial_mark_key(b->c, b->level, k); @@ -1925,10 +1931,10 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op) bch_initial_mark_key(b->c, b->level + 1, &b->key); if (b->level) { - bch_btree_iter_stack_init(&b->keys, &iter, NULL); + bch_btree_iter_init(&b->keys, &iter, NULL); do { - k = bch_btree_iter_next_filter(&iter.iter, &b->keys, + k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad); if (k) { btree_node_prefetch(b, k); @@ -1956,7 +1962,7 @@ static int bch_btree_check_thread(void *arg) struct btree_check_info *info = arg; struct btree_check_state *check_state = info->state; struct cache_set *c = check_state->c; - struct btree_iter_stack iter; + struct btree_iter iter; struct bkey *k, *p; int cur_idx, prev_idx, skip_nr; @@ -1964,9 +1970,11 @@ static int bch_btree_check_thread(void *arg) cur_idx = prev_idx = 0; ret = 0; + min_heap_init(&iter.heap, NULL, MAX_BSETS); + /* root node keys are checked before thread created */ - bch_btree_iter_stack_init(&c->root->keys, &iter, NULL); - k = bch_btree_iter_next_filter(&iter.iter, &c->root->keys, bch_ptr_bad); + bch_btree_iter_init(&c->root->keys, &iter, NULL); + k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad); BUG_ON(!k); p = k; @@ -1984,7 +1992,7 @@ static int bch_btree_check_thread(void *arg) skip_nr = cur_idx - prev_idx; while (skip_nr) { - k = bch_btree_iter_next_filter(&iter.iter, + k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad); if (k) @@ -2057,9 +2065,11 @@ int bch_btree_check(struct cache_set *c) int ret = 0; int i; struct bkey *k = NULL; - struct btree_iter_stack iter; + struct btree_iter iter; struct btree_check_state check_state; + min_heap_init(&iter.heap, NULL, MAX_BSETS); + /* check and mark root node keys */ for_each_key_filter(&c->root->keys, k, &iter, bch_ptr_invalid) bch_initial_mark_key(c, c->root->level, k); @@ -2553,11 +2563,12 @@ static int bch_btree_map_nodes_recurse(struct btree *b, struct btree_op *op, if (b->level) { struct bkey *k; - struct btree_iter_stack iter; + struct btree_iter iter; - bch_btree_iter_stack_init(&b->keys, &iter, from); + min_heap_init(&iter.heap, NULL, MAX_BSETS); + bch_btree_iter_init(&b->keys, &iter, from); - while ((k = bch_btree_iter_next_filter(&iter.iter, &b->keys, + while ((k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad))) { ret = bcache_btree(map_nodes_recurse, k, b, op, from, fn, flags); @@ -2586,12 +2597,12 @@ int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op, { int ret = MAP_CONTINUE; struct bkey *k; - struct btree_iter_stack iter; + struct btree_iter iter; - bch_btree_iter_stack_init(&b->keys, &iter, from); + min_heap_init(&iter.heap, NULL, MAX_BSETS); + bch_btree_iter_init(&b->keys, &iter, from); - while ((k = bch_btree_iter_next_filter(&iter.iter, &b->keys, - bch_ptr_bad))) { + while ((k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad))) { ret = !b->level ? fn(op, b, k) : bcache_btree(map_keys_recurse, k, diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index d626ffcbecb9..a7221e5dbe81 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -33,15 +33,16 @@ static void sort_key_next(struct btree_iter *iter, i->k = bkey_next(i->k); if (i->k == i->end) - *i = iter->data[--iter->used]; + *i = iter->heap.data[--iter->heap.nr]; } -static bool bch_key_sort_cmp(struct btree_iter_set l, - struct btree_iter_set r) +static bool new_bch_key_sort_cmp(const void *l, const void *r, void *args) { - int64_t c = bkey_cmp(l.k, r.k); + struct btree_iter_set *_l = (struct btree_iter_set *)l; + struct btree_iter_set *_r = (struct btree_iter_set *)r; + int64_t c = bkey_cmp(_l->k, _r->k); - return c ? c > 0 : l.k < r.k; + return !(c ? c > 0 : _l->k < _r->k); } static bool __ptr_invalid(struct cache_set *c, const struct bkey *k) @@ -238,7 +239,7 @@ static bool bch_btree_ptr_insert_fixup(struct btree_keys *bk, } const struct btree_keys_ops bch_btree_keys_ops = { - .sort_cmp = bch_key_sort_cmp, + .sort_cmp = new_bch_key_sort_cmp, .insert_fixup = bch_btree_ptr_insert_fixup, .key_invalid = bch_btree_ptr_invalid, .key_bad = bch_btree_ptr_bad, @@ -255,22 +256,36 @@ const struct btree_keys_ops bch_btree_keys_ops = { * Necessary for btree_sort_fixup() - if there are multiple keys that compare * equal in different sets, we have to process them newest to oldest. */ -static bool bch_extent_sort_cmp(struct btree_iter_set l, - struct btree_iter_set r) + +static bool new_bch_extent_sort_cmp(const void *l, const void *r, void __always_unused *args) +{ + struct btree_iter_set *_l = (struct btree_iter_set *)l; + struct btree_iter_set *_r = (struct btree_iter_set *)r; + int64_t c = bkey_cmp(&START_KEY(_l->k), &START_KEY(_r->k)); + + return !(c ? c > 0 : _l->k < _r->k); +} + +static inline void new_btree_iter_swap(void *iter1, void *iter2, void __always_unused *args) { - int64_t c = bkey_cmp(&START_KEY(l.k), &START_KEY(r.k)); + struct btree_iter_set *_iter1 = iter1; + struct btree_iter_set *_iter2 = iter2; - return c ? c > 0 : l.k < r.k; + swap(*_iter1, *_iter2); } static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter, struct bkey *tmp) { - while (iter->used > 1) { - struct btree_iter_set *top = iter->data, *i = top + 1; - - if (iter->used > 2 && - bch_extent_sort_cmp(i[0], i[1])) + const struct min_heap_callbacks callbacks = { + .less = new_bch_extent_sort_cmp, + .swp = new_btree_iter_swap, + }; + while (iter->heap.nr > 1) { + struct btree_iter_set *top = iter->heap.data, *i = top + 1; + + if (iter->heap.nr > 2 && + !new_bch_extent_sort_cmp(&i[0], &i[1], NULL)) i++; if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0) @@ -278,7 +293,7 @@ static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter, if (!KEY_SIZE(i->k)) { sort_key_next(iter, i); - heap_sift(iter, i - top, bch_extent_sort_cmp); + min_heap_sift_down(&iter->heap, i - top, &callbacks, NULL); continue; } @@ -288,7 +303,7 @@ static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter, else bch_cut_front(top->k, i->k); - heap_sift(iter, i - top, bch_extent_sort_cmp); + min_heap_sift_down(&iter->heap, i - top, &callbacks, NULL); } else { /* can't happen because of comparison func */ BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k))); @@ -298,7 +313,7 @@ static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter, bch_cut_back(&START_KEY(i->k), tmp); bch_cut_front(i->k, top->k); - heap_sift(iter, 0, bch_extent_sort_cmp); + min_heap_sift_down(&iter->heap, 0, &callbacks, NULL); return tmp; } else { @@ -618,7 +633,7 @@ static bool bch_extent_merge(struct btree_keys *bk, } const struct btree_keys_ops bch_extent_keys_ops = { - .sort_cmp = bch_extent_sort_cmp, + .sort_cmp = new_bch_extent_sort_cmp, .sort_fixup = bch_extent_sort_fixup, .insert_fixup = bch_extent_insert_fixup, .key_invalid = bch_extent_invalid, diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index ebd500bdf0b2..7f482729c56d 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -182,16 +182,27 @@ err: if (!IS_ERR_OR_NULL(w->private)) closure_sync(&cl); } -static bool bucket_cmp(struct bucket *l, struct bucket *r) +static bool new_bucket_cmp(const void *l, const void *r, void __always_unused *args) { - return GC_SECTORS_USED(l) < GC_SECTORS_USED(r); + struct bucket **_l = (struct bucket **)l; + struct bucket **_r = (struct bucket **)r; + + return GC_SECTORS_USED(*_l) >= GC_SECTORS_USED(*_r); +} + +static void new_bucket_swap(void *l, void *r, void __always_unused *args) +{ + struct bucket **_l = l; + struct bucket **_r = r; + + swap(*_l, *_r); } static unsigned int bucket_heap_top(struct cache *ca) { struct bucket *b; - return (b = heap_peek(&ca->heap)) ? GC_SECTORS_USED(b) : 0; + return (b = min_heap_peek(&ca->heap)[0]) ? GC_SECTORS_USED(b) : 0; } void bch_moving_gc(struct cache_set *c) @@ -199,6 +210,10 @@ void bch_moving_gc(struct cache_set *c) struct cache *ca = c->cache; struct bucket *b; unsigned long sectors_to_move, reserve_sectors; + const struct min_heap_callbacks callbacks = { + .less = new_bucket_cmp, + .swp = new_bucket_swap, + }; if (!c->copy_gc_enabled) return; @@ -209,7 +224,7 @@ void bch_moving_gc(struct cache_set *c) reserve_sectors = ca->sb.bucket_size * fifo_used(&ca->free[RESERVE_MOVINGGC]); - ca->heap.used = 0; + ca->heap.nr = 0; for_each_bucket(b, ca) { if (GC_MARK(b) == GC_MARK_METADATA || @@ -218,25 +233,31 @@ void bch_moving_gc(struct cache_set *c) atomic_read(&b->pin)) continue; - if (!heap_full(&ca->heap)) { + if (!min_heap_full(&ca->heap)) { sectors_to_move += GC_SECTORS_USED(b); - heap_add(&ca->heap, b, bucket_cmp); - } else if (bucket_cmp(b, heap_peek(&ca->heap))) { + min_heap_push(&ca->heap, &b, &callbacks, NULL); + } else if (!new_bucket_cmp(&b, min_heap_peek(&ca->heap), ca)) { sectors_to_move -= bucket_heap_top(ca); sectors_to_move += GC_SECTORS_USED(b); ca->heap.data[0] = b; - heap_sift(&ca->heap, 0, bucket_cmp); + min_heap_sift_down(&ca->heap, 0, &callbacks, NULL); } } while (sectors_to_move > reserve_sectors) { - heap_pop(&ca->heap, b, bucket_cmp); + if (ca->heap.nr) { + b = min_heap_peek(&ca->heap)[0]; + min_heap_pop(&ca->heap, &callbacks, NULL); + } sectors_to_move -= GC_SECTORS_USED(b); } - while (heap_pop(&ca->heap, b, bucket_cmp)) + while (ca->heap.nr) { + b = min_heap_peek(&ca->heap)[0]; + min_heap_pop(&ca->heap, &callbacks, NULL); SET_GC_MOVE(b, 1); + } mutex_unlock(&c->bucket_lock); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index b5d6ef430b86..e7abfdd77c3b 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1907,8 +1907,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) INIT_LIST_HEAD(&c->btree_cache_freed); INIT_LIST_HEAD(&c->data_buckets); - iter_size = sizeof(struct btree_iter) + - ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size) * + iter_size = ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size) * sizeof(struct btree_iter_set); c->devices = kcalloc(c->nr_uuids, sizeof(void *), GFP_KERNEL); diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 826b14cae4e5..e8f696cb58c0 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -660,7 +660,9 @@ static unsigned int bch_root_usage(struct cache_set *c) unsigned int bytes = 0; struct bkey *k; struct btree *b; - struct btree_iter_stack iter; + struct btree_iter iter; + + min_heap_init(&iter.heap, NULL, MAX_BSETS); goto lock_root; diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c index ae380bc3992e..410d8cb49e50 100644 --- a/drivers/md/bcache/util.c +++ b/drivers/md/bcache/util.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * random utiility code, for bcache but in theory not specific to bcache + * random utility code, for bcache but in theory not specific to bcache * * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com> * Copyright 2012 Google, Inc. diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index f61ab1bada6c..539454d8e2d0 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -9,6 +9,7 @@ #include <linux/kernel.h> #include <linux/sched/clock.h> #include <linux/llist.h> +#include <linux/min_heap.h> #include <linux/ratelimit.h> #include <linux/vmalloc.h> #include <linux/workqueue.h> @@ -30,16 +31,10 @@ struct closure; #endif -#define DECLARE_HEAP(type, name) \ - struct { \ - size_t size, used; \ - type *data; \ - } name - #define init_heap(heap, _size, gfp) \ ({ \ size_t _bytes; \ - (heap)->used = 0; \ + (heap)->nr = 0; \ (heap)->size = (_size); \ _bytes = (heap)->size * sizeof(*(heap)->data); \ (heap)->data = kvmalloc(_bytes, (gfp) & GFP_KERNEL); \ @@ -52,64 +47,6 @@ do { \ (heap)->data = NULL; \ } while (0) -#define heap_swap(h, i, j) swap((h)->data[i], (h)->data[j]) - -#define heap_sift(h, i, cmp) \ -do { \ - size_t _r, _j = i; \ - \ - for (; _j * 2 + 1 < (h)->used; _j = _r) { \ - _r = _j * 2 + 1; \ - if (_r + 1 < (h)->used && \ - cmp((h)->data[_r], (h)->data[_r + 1])) \ - _r++; \ - \ - if (cmp((h)->data[_r], (h)->data[_j])) \ - break; \ - heap_swap(h, _r, _j); \ - } \ -} while (0) - -#define heap_sift_down(h, i, cmp) \ -do { \ - while (i) { \ - size_t p = (i - 1) / 2; \ - if (cmp((h)->data[i], (h)->data[p])) \ - break; \ - heap_swap(h, i, p); \ - i = p; \ - } \ -} while (0) - -#define heap_add(h, d, cmp) \ -({ \ - bool _r = !heap_full(h); \ - if (_r) { \ - size_t _i = (h)->used++; \ - (h)->data[_i] = d; \ - \ - heap_sift_down(h, _i, cmp); \ - heap_sift(h, _i, cmp); \ - } \ - _r; \ -}) - -#define heap_pop(h, d, cmp) \ -({ \ - bool _r = (h)->used; \ - if (_r) { \ - (d) = (h)->data[0]; \ - (h)->used--; \ - heap_swap(h, 0, (h)->used); \ - heap_sift(h, 0, cmp); \ - } \ - _r; \ -}) - -#define heap_peek(h) ((h)->used ? (h)->data[0] : NULL) - -#define heap_full(h) ((h)->used == (h)->size) - #define DECLARE_FIFO(type, name) \ struct { \ size_t front, back, size, mask; \ diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 792e070ccf38..c1d28e365910 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -908,15 +908,16 @@ static int bch_dirty_init_thread(void *arg) struct dirty_init_thrd_info *info = arg; struct bch_dirty_init_state *state = info->state; struct cache_set *c = state->c; - struct btree_iter_stack iter; + struct btree_iter iter; struct bkey *k, *p; int cur_idx, prev_idx, skip_nr; k = p = NULL; prev_idx = 0; - bch_btree_iter_stack_init(&c->root->keys, &iter, NULL); - k = bch_btree_iter_next_filter(&iter.iter, &c->root->keys, bch_ptr_bad); + min_heap_init(&iter.heap, NULL, MAX_BSETS); + bch_btree_iter_init(&c->root->keys, &iter, NULL); + k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad); BUG_ON(!k); p = k; @@ -930,7 +931,7 @@ static int bch_dirty_init_thread(void *arg) skip_nr = cur_idx - prev_idx; while (skip_nr) { - k = bch_btree_iter_next_filter(&iter.iter, + k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad); if (k) @@ -979,11 +980,13 @@ void bch_sectors_dirty_init(struct bcache_device *d) int i; struct btree *b = NULL; struct bkey *k = NULL; - struct btree_iter_stack iter; + struct btree_iter iter; struct sectors_dirty_init op; struct cache_set *c = d->c; struct bch_dirty_init_state state; + min_heap_init(&iter.heap, NULL, MAX_BSETS); + retry_lock: b = c->root; rw_lock(0, b, b->level); diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index 96751cd3d181..24cd87fddf75 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -170,7 +170,7 @@ struct dm_cache_metadata { */ #define SUPERBLOCK_CSUM_XOR 9031977 -static void sb_prepare_for_write(struct dm_block_validator *v, +static void sb_prepare_for_write(const struct dm_block_validator *v, struct dm_block *b, size_t sb_block_size) { @@ -195,7 +195,7 @@ static int check_metadata_version(struct cache_disk_superblock *disk_super) return 0; } -static int sb_check(struct dm_block_validator *v, +static int sb_check(const struct dm_block_validator *v, struct dm_block *b, size_t sb_block_size) { @@ -228,7 +228,7 @@ static int sb_check(struct dm_block_validator *v, return check_metadata_version(disk_super); } -static struct dm_block_validator sb_validator = { +static const struct dm_block_validator sb_validator = { .name = "superblock", .prepare_for_write = sb_prepare_for_write, .check = sb_check @@ -1282,15 +1282,6 @@ int dm_cache_insert_mapping(struct dm_cache_metadata *cmd, return r; } -struct thunk { - load_mapping_fn fn; - void *context; - - struct dm_cache_metadata *cmd; - bool respect_dirty_flags; - bool hints_valid; -}; - static bool policy_unchanged(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy) { diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 2d8dd9283ff4..17f0fab1e254 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -3416,8 +3416,8 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) */ if (io_opt_sectors < cache->sectors_per_block || do_div(io_opt_sectors, cache->sectors_per_block)) { - blk_limits_io_min(limits, cache->sectors_per_block << SECTOR_SHIFT); - blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT); + limits->io_min = cache->sectors_per_block << SECTOR_SHIFT; + limits->io_opt = cache->sectors_per_block << SECTOR_SHIFT; } disable_passdown_if_not_supported(cache); diff --git a/drivers/md/dm-clone-metadata.c b/drivers/md/dm-clone-metadata.c index 47c1fa7aad8b..2db84cd2202b 100644 --- a/drivers/md/dm-clone-metadata.c +++ b/drivers/md/dm-clone-metadata.c @@ -163,7 +163,7 @@ struct dm_clone_metadata { /* * Superblock validation. */ -static void sb_prepare_for_write(struct dm_block_validator *v, +static void sb_prepare_for_write(const struct dm_block_validator *v, struct dm_block *b, size_t sb_block_size) { struct superblock_disk *sb; @@ -177,7 +177,7 @@ static void sb_prepare_for_write(struct dm_block_validator *v, sb->csum = cpu_to_le32(csum); } -static int sb_check(struct dm_block_validator *v, struct dm_block *b, +static int sb_check(const struct dm_block_validator *v, struct dm_block *b, size_t sb_block_size) { struct superblock_disk *sb; @@ -220,7 +220,7 @@ static int sb_check(struct dm_block_validator *v, struct dm_block *b, return 0; } -static struct dm_block_validator sb_validator = { +static const struct dm_block_validator sb_validator = { .name = "superblock", .prepare_for_write = sb_prepare_for_write, .check = sb_check diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c index b4384a8b13e3..12bbe487a4c8 100644 --- a/drivers/md/dm-clone-target.c +++ b/drivers/md/dm-clone-target.c @@ -2073,8 +2073,8 @@ static void clone_io_hints(struct dm_target *ti, struct queue_limits *limits) */ if (io_opt_sectors < clone->region_size || do_div(io_opt_sectors, clone->region_size)) { - blk_limits_io_min(limits, clone->region_size << SECTOR_SHIFT); - blk_limits_io_opt(limits, clone->region_size << SECTOR_SHIFT); + limits->io_min = clone->region_size << SECTOR_SHIFT; + limits->io_opt = clone->region_size << SECTOR_SHIFT; } disable_passdown_if_not_supported(clone); diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 14a44c0f8286..3637761f3585 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -206,6 +206,8 @@ struct dm_table { bool integrity_supported:1; bool singleton:1; + /* set if all the targets in the table have "flush_bypasses_map" set */ + bool flush_bypasses_map:1; /* * Indicates the rw permissions for the new logical device. This diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 6c013ceb0e5f..348b4b26c272 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -214,7 +214,8 @@ struct crypt_config { unsigned int integrity_tag_size; unsigned int integrity_iv_size; - unsigned int on_disk_tag_size; + unsigned int used_tag_size; + unsigned int tuple_size; /* * pool for per bio private data, crypto requests, @@ -241,6 +242,31 @@ static unsigned int dm_crypt_clients_n; static volatile unsigned long dm_crypt_pages_per_client; #define DM_CRYPT_MEMORY_PERCENT 2 #define DM_CRYPT_MIN_PAGES_PER_CLIENT (BIO_MAX_VECS * 16) +#define DM_CRYPT_DEFAULT_MAX_READ_SIZE 131072 +#define DM_CRYPT_DEFAULT_MAX_WRITE_SIZE 131072 + +static unsigned int max_read_size = 0; +module_param(max_read_size, uint, 0644); +MODULE_PARM_DESC(max_read_size, "Maximum size of a read request"); +static unsigned int max_write_size = 0; +module_param(max_write_size, uint, 0644); +MODULE_PARM_DESC(max_write_size, "Maximum size of a write request"); +static unsigned get_max_request_size(struct crypt_config *cc, bool wrt) +{ + unsigned val, sector_align; + val = !wrt ? READ_ONCE(max_read_size) : READ_ONCE(max_write_size); + if (likely(!val)) + val = !wrt ? DM_CRYPT_DEFAULT_MAX_READ_SIZE : DM_CRYPT_DEFAULT_MAX_WRITE_SIZE; + if (wrt || cc->used_tag_size) { + if (unlikely(val > BIO_MAX_VECS << PAGE_SHIFT)) + val = BIO_MAX_VECS << PAGE_SHIFT; + } + sector_align = max(bdev_logical_block_size(cc->dev->bdev), (unsigned)cc->sector_size); + val = round_down(val, sector_align); + if (unlikely(!val)) + val = sector_align; + return val >> SECTOR_SHIFT; +} static void crypt_endio(struct bio *clone); static void kcryptd_queue_crypt(struct dm_crypt_io *io); @@ -1151,14 +1177,14 @@ static int dm_crypt_integrity_io_alloc(struct dm_crypt_io *io, struct bio *bio) unsigned int tag_len; int ret; - if (!bio_sectors(bio) || !io->cc->on_disk_tag_size) + if (!bio_sectors(bio) || !io->cc->tuple_size) return 0; bip = bio_integrity_alloc(bio, GFP_NOIO, 1); if (IS_ERR(bip)) return PTR_ERR(bip); - tag_len = io->cc->on_disk_tag_size * (bio_sectors(bio) >> io->cc->sector_shift); + tag_len = io->cc->tuple_size * (bio_sectors(bio) >> io->cc->sector_shift); bip->bip_iter.bi_sector = io->cc->start + io->sector; @@ -1182,18 +1208,18 @@ static int crypt_integrity_ctr(struct crypt_config *cc, struct dm_target *ti) return -EINVAL; } - if (bi->tag_size != cc->on_disk_tag_size || - bi->tuple_size != cc->on_disk_tag_size) { + if (bi->tuple_size < cc->used_tag_size) { ti->error = "Integrity profile tag size mismatch."; return -EINVAL; } + cc->tuple_size = bi->tuple_size; if (1 << bi->interval_exp != cc->sector_size) { ti->error = "Integrity profile sector size mismatch."; return -EINVAL; } if (crypt_integrity_aead(cc)) { - cc->integrity_tag_size = cc->on_disk_tag_size - cc->integrity_iv_size; + cc->integrity_tag_size = cc->used_tag_size - cc->integrity_iv_size; DMDEBUG("%s: Integrity AEAD, tag size %u, IV size %u.", dm_device_name(md), cc->integrity_tag_size, cc->integrity_iv_size); @@ -1205,7 +1231,7 @@ static int crypt_integrity_ctr(struct crypt_config *cc, struct dm_target *ti) DMDEBUG("%s: Additional per-sector space %u bytes for IV.", dm_device_name(md), cc->integrity_iv_size); - if ((cc->integrity_tag_size + cc->integrity_iv_size) != bi->tag_size) { + if ((cc->integrity_tag_size + cc->integrity_iv_size) > cc->tuple_size) { ti->error = "Not enough space for integrity tag in the profile."; return -EINVAL; } @@ -1284,7 +1310,7 @@ static void *tag_from_dmreq(struct crypt_config *cc, struct dm_crypt_io *io = container_of(ctx, struct dm_crypt_io, ctx); return &io->integrity_metadata[*org_tag_of_dmreq(cc, dmreq) * - cc->on_disk_tag_size]; + cc->tuple_size]; } static void *iv_tag_from_dmreq(struct crypt_config *cc, @@ -1365,9 +1391,9 @@ static int crypt_convert_block_aead(struct crypt_config *cc, aead_request_set_crypt(req, dmreq->sg_in, dmreq->sg_out, cc->sector_size, iv); r = crypto_aead_encrypt(req); - if (cc->integrity_tag_size + cc->integrity_iv_size != cc->on_disk_tag_size) + if (cc->integrity_tag_size + cc->integrity_iv_size != cc->tuple_size) memset(tag + cc->integrity_tag_size + cc->integrity_iv_size, 0, - cc->on_disk_tag_size - (cc->integrity_tag_size + cc->integrity_iv_size)); + cc->tuple_size - (cc->integrity_tag_size + cc->integrity_iv_size)); } else { aead_request_set_crypt(req, dmreq->sg_in, dmreq->sg_out, cc->sector_size + cc->integrity_tag_size, iv); @@ -1797,7 +1823,7 @@ static void crypt_dec_pending(struct dm_crypt_io *io) return; if (likely(!io->ctx.aead_recheck) && unlikely(io->ctx.aead_failed) && - cc->on_disk_tag_size && bio_data_dir(base_bio) == READ) { + cc->used_tag_size && bio_data_dir(base_bio) == READ) { io->ctx.aead_recheck = true; io->ctx.aead_failed = false; io->error = 0; @@ -3181,7 +3207,7 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar ti->error = "Invalid integrity arguments"; return -EINVAL; } - cc->on_disk_tag_size = val; + cc->used_tag_size = val; sval = strchr(opt_string + strlen("integrity:"), ':') + 1; if (!strcasecmp(sval, "aead")) { set_bit(CRYPT_MODE_INTEGRITY_AEAD, &cc->cipher_flags); @@ -3393,12 +3419,12 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (ret) goto bad; - cc->tag_pool_max_sectors = POOL_ENTRY_SIZE / cc->on_disk_tag_size; + cc->tag_pool_max_sectors = POOL_ENTRY_SIZE / cc->tuple_size; if (!cc->tag_pool_max_sectors) cc->tag_pool_max_sectors = 1; ret = mempool_init_kmalloc_pool(&cc->tag_pool, MIN_IOS, - cc->tag_pool_max_sectors * cc->on_disk_tag_size); + cc->tag_pool_max_sectors * cc->tuple_size); if (ret) { ti->error = "Cannot allocate integrity tags mempool"; goto bad; @@ -3474,6 +3500,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio) { struct dm_crypt_io *io; struct crypt_config *cc = ti->private; + unsigned max_sectors; /* * If bio is REQ_PREFLUSH or REQ_OP_DISCARD, just bypass crypt queues. @@ -3492,9 +3519,9 @@ static int crypt_map(struct dm_target *ti, struct bio *bio) /* * Check if bio is too large, split as needed. */ - if (unlikely(bio->bi_iter.bi_size > (BIO_MAX_VECS << PAGE_SHIFT)) && - (bio_data_dir(bio) == WRITE || cc->on_disk_tag_size)) - dm_accept_partial_bio(bio, ((BIO_MAX_VECS << PAGE_SHIFT) >> SECTOR_SHIFT)); + max_sectors = get_max_request_size(cc, bio_data_dir(bio) == WRITE); + if (unlikely(bio_sectors(bio) > max_sectors)) + dm_accept_partial_bio(bio, max_sectors); /* * Ensure that bio is a multiple of internal sector encryption size @@ -3509,8 +3536,8 @@ static int crypt_map(struct dm_target *ti, struct bio *bio) io = dm_per_bio_data(bio, cc->per_bio_data_size); crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector)); - if (cc->on_disk_tag_size) { - unsigned int tag_len = cc->on_disk_tag_size * (bio_sectors(bio) >> cc->sector_shift); + if (cc->tuple_size) { + unsigned int tag_len = cc->tuple_size * (bio_sectors(bio) >> cc->sector_shift); if (unlikely(tag_len > KMALLOC_MAX_SIZE)) io->integrity_metadata = NULL; @@ -3582,7 +3609,7 @@ static void crypt_status(struct dm_target *ti, status_type_t type, num_feature_args += test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags); num_feature_args += cc->sector_size != (1 << SECTOR_SHIFT); num_feature_args += test_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags); - if (cc->on_disk_tag_size) + if (cc->used_tag_size) num_feature_args++; if (num_feature_args) { DMEMIT(" %d", num_feature_args); @@ -3598,8 +3625,8 @@ static void crypt_status(struct dm_target *ti, status_type_t type, DMEMIT(" no_read_workqueue"); if (test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags)) DMEMIT(" no_write_workqueue"); - if (cc->on_disk_tag_size) - DMEMIT(" integrity:%u:%s", cc->on_disk_tag_size, cc->cipher_auth); + if (cc->used_tag_size) + DMEMIT(" integrity:%u:%s", cc->used_tag_size, cc->cipher_auth); if (cc->sector_size != (1 << SECTOR_SHIFT)) DMEMIT(" sector_size:%d", cc->sector_size); if (test_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags)) @@ -3621,9 +3648,9 @@ static void crypt_status(struct dm_target *ti, status_type_t type, DMEMIT(",iv_large_sectors=%c", test_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags) ? 'y' : 'n'); - if (cc->on_disk_tag_size) + if (cc->used_tag_size) DMEMIT(",integrity_tag_size=%u,cipher_auth=%s", - cc->on_disk_tag_size, cc->cipher_auth); + cc->used_tag_size, cc->cipher_auth); if (cc->sector_size != (1 << SECTOR_SHIFT)) DMEMIT(",sector_size=%d", cc->sector_size); if (cc->cipher_string) @@ -3731,7 +3758,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type crypt_target = { .name = "crypt", - .version = {1, 26, 0}, + .version = {1, 27, 0}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, diff --git a/drivers/md/dm-ebs-target.c b/drivers/md/dm-ebs-target.c index b70d4016c2ac..ec5db1478b2f 100644 --- a/drivers/md/dm-ebs-target.c +++ b/drivers/md/dm-ebs-target.c @@ -428,7 +428,7 @@ static void ebs_io_hints(struct dm_target *ti, struct queue_limits *limits) limits->logical_block_size = to_bytes(ec->e_bs); limits->physical_block_size = to_bytes(ec->u_bs); limits->alignment_offset = limits->physical_block_size; - blk_limits_io_min(limits, limits->logical_block_size); + limits->io_min = limits->logical_block_size; } static int ebs_iterate_devices(struct dm_target *ti, diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c index 8f81e597858d..9c84e9d13eca 100644 --- a/drivers/md/dm-era-target.c +++ b/drivers/md/dm-era-target.c @@ -196,7 +196,7 @@ struct superblock_disk { * Superblock validation *-------------------------------------------------------------- */ -static void sb_prepare_for_write(struct dm_block_validator *v, +static void sb_prepare_for_write(const struct dm_block_validator *v, struct dm_block *b, size_t sb_block_size) { @@ -221,7 +221,7 @@ static int check_metadata_version(struct superblock_disk *disk) return 0; } -static int sb_check(struct dm_block_validator *v, +static int sb_check(const struct dm_block_validator *v, struct dm_block *b, size_t sb_block_size) { @@ -254,7 +254,7 @@ static int sb_check(struct dm_block_validator *v, return check_metadata_version(disk); } -static struct dm_block_validator sb_validator = { +static const struct dm_block_validator sb_validator = { .name = "superblock", .prepare_for_write = sb_prepare_for_write, .check = sb_check @@ -1733,8 +1733,8 @@ static void era_io_hints(struct dm_target *ti, struct queue_limits *limits) */ if (io_opt_sectors < era->sectors_per_block || do_div(io_opt_sectors, era->sectors_per_block)) { - blk_limits_io_min(limits, 0); - blk_limits_io_opt(limits, era->sectors_per_block << SECTOR_SHIFT); + limits->io_min = 0; + limits->io_opt = era->sectors_per_block << SECTOR_SHIFT; } } diff --git a/drivers/md/dm-init.c b/drivers/md/dm-init.c index 2a71bcdba92d..b37bbe762500 100644 --- a/drivers/md/dm-init.c +++ b/drivers/md/dm-init.c @@ -212,8 +212,10 @@ static char __init *dm_parse_device_entry(struct dm_device *dev, char *str) strscpy(dev->dmi.uuid, field[1], sizeof(dev->dmi.uuid)); /* minor */ if (strlen(field[2])) { - if (kstrtoull(field[2], 0, &dev->dmi.dev)) + if (kstrtoull(field[2], 0, &dev->dmi.dev) || + dev->dmi.dev >= (1 << MINORBITS)) return ERR_PTR(-EINVAL); + dev->dmi.dev = huge_encode_dev((dev_t)dev->dmi.dev); dev->dmi.flags |= DM_PERSISTENT_DEV_FLAG; } /* flags */ diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 2a89f8eb4713..48ac628f471b 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -44,6 +44,7 @@ #define BITMAP_FLUSH_INTERVAL (10 * HZ) #define DISCARD_FILLER 0xf6 #define SALT_SIZE 16 +#define RECHECK_POOL_SIZE 256 /* * Warning - DEBUG_PRINT prints security-sensitive data to the log, @@ -62,6 +63,7 @@ #define SB_VERSION_3 3 #define SB_VERSION_4 4 #define SB_VERSION_5 5 +#define SB_VERSION_6 6 #define SB_SECTORS 8 #define MAX_SECTORS_PER_BLOCK 8 @@ -86,6 +88,7 @@ struct superblock { #define SB_FLAG_DIRTY_BITMAP 0x4 #define SB_FLAG_FIXED_PADDING 0x8 #define SB_FLAG_FIXED_HMAC 0x10 +#define SB_FLAG_INLINE 0x20 #define JOURNAL_ENTRY_ROUNDUP 8 @@ -166,6 +169,7 @@ struct dm_integrity_c { struct dm_dev *meta_dev; unsigned int tag_size; __s8 log2_tag_size; + unsigned int tuple_size; sector_t start; mempool_t journal_io_mempool; struct dm_io_client *io; @@ -279,6 +283,7 @@ struct dm_integrity_c { atomic64_t number_of_mismatches; mempool_t recheck_pool; + struct bio_set recheck_bios; struct notifier_block reboot_notifier; }; @@ -314,6 +319,9 @@ struct dm_integrity_io { struct completion *completion; struct dm_bio_details bio_details; + + char *integrity_payload; + bool integrity_payload_from_mempool; }; struct journal_completion { @@ -351,6 +359,7 @@ static struct kmem_cache *journal_io_cache; #endif static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map); +static int dm_integrity_map_inline(struct dm_integrity_io *dio); static void integrity_bio_wait(struct work_struct *w); static void dm_integrity_dtr(struct dm_target *ti); @@ -460,7 +469,9 @@ static void wraparound_section(struct dm_integrity_c *ic, unsigned int *sec_ptr) static void sb_set_version(struct dm_integrity_c *ic) { - if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) + if (ic->sb->flags & cpu_to_le32(SB_FLAG_INLINE)) + ic->sb->version = SB_VERSION_6; + else if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) ic->sb->version = SB_VERSION_5; else if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) ic->sb->version = SB_VERSION_4; @@ -1894,6 +1905,35 @@ error: dec_in_flight(dio); } +static inline bool dm_integrity_check_limits(struct dm_integrity_c *ic, sector_t logical_sector, struct bio *bio) +{ + if (unlikely(logical_sector + bio_sectors(bio) > ic->provided_data_sectors)) { + DMERR("Too big sector number: 0x%llx + 0x%x > 0x%llx", + logical_sector, bio_sectors(bio), + ic->provided_data_sectors); + return false; + } + if (unlikely((logical_sector | bio_sectors(bio)) & (unsigned int)(ic->sectors_per_block - 1))) { + DMERR("Bio not aligned on %u sectors: 0x%llx, 0x%x", + ic->sectors_per_block, + logical_sector, bio_sectors(bio)); + return false; + } + if (ic->sectors_per_block > 1 && likely(bio_op(bio) != REQ_OP_DISCARD)) { + struct bvec_iter iter; + struct bio_vec bv; + + bio_for_each_segment(bv, bio, iter) { + if (unlikely(bv.bv_len & ((ic->sectors_per_block << SECTOR_SHIFT) - 1))) { + DMERR("Bio vector (%u,%u) is not aligned on %u-sector boundary", + bv.bv_offset, bv.bv_len, ic->sectors_per_block); + return false; + } + } + } + return true; +} + static int dm_integrity_map(struct dm_target *ti, struct bio *bio) { struct dm_integrity_c *ic = ti->private; @@ -1906,6 +1946,9 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio) dio->bi_status = 0; dio->op = bio_op(bio); + if (ic->mode == 'I') + return dm_integrity_map_inline(dio); + if (unlikely(dio->op == REQ_OP_DISCARD)) { if (ti->max_io_len) { sector_t sec = dm_target_offset(ti, bio->bi_iter.bi_sector); @@ -1935,31 +1978,8 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio) */ bio->bi_opf &= ~REQ_FUA; } - if (unlikely(dio->range.logical_sector + bio_sectors(bio) > ic->provided_data_sectors)) { - DMERR("Too big sector number: 0x%llx + 0x%x > 0x%llx", - dio->range.logical_sector, bio_sectors(bio), - ic->provided_data_sectors); - return DM_MAPIO_KILL; - } - if (unlikely((dio->range.logical_sector | bio_sectors(bio)) & (unsigned int)(ic->sectors_per_block - 1))) { - DMERR("Bio not aligned on %u sectors: 0x%llx, 0x%x", - ic->sectors_per_block, - dio->range.logical_sector, bio_sectors(bio)); + if (unlikely(!dm_integrity_check_limits(ic, dio->range.logical_sector, bio))) return DM_MAPIO_KILL; - } - - if (ic->sectors_per_block > 1 && likely(dio->op != REQ_OP_DISCARD)) { - struct bvec_iter iter; - struct bio_vec bv; - - bio_for_each_segment(bv, bio, iter) { - if (unlikely(bv.bv_len & ((ic->sectors_per_block << SECTOR_SHIFT) - 1))) { - DMERR("Bio vector (%u,%u) is not aligned on %u-sector boundary", - bv.bv_offset, bv.bv_len, ic->sectors_per_block); - return DM_MAPIO_KILL; - } - } - } bip = bio_integrity(bio); if (!ic->internal_hash) { @@ -2375,6 +2395,213 @@ journal_read_write: do_endio_flush(ic, dio); } +static int dm_integrity_map_inline(struct dm_integrity_io *dio) +{ + struct dm_integrity_c *ic = dio->ic; + struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); + struct bio_integrity_payload *bip; + unsigned payload_len, digest_size, extra_size, ret; + + dio->integrity_payload = NULL; + dio->integrity_payload_from_mempool = false; + + if (unlikely(bio_integrity(bio))) { + bio->bi_status = BLK_STS_NOTSUPP; + bio_endio(bio); + return DM_MAPIO_SUBMITTED; + } + + bio_set_dev(bio, ic->dev->bdev); + if (unlikely((bio->bi_opf & REQ_PREFLUSH) != 0)) + return DM_MAPIO_REMAPPED; + +retry: + payload_len = ic->tuple_size * (bio_sectors(bio) >> ic->sb->log2_sectors_per_block); + digest_size = crypto_shash_digestsize(ic->internal_hash); + extra_size = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0; + payload_len += extra_size; + dio->integrity_payload = kmalloc(payload_len, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); + if (unlikely(!dio->integrity_payload)) { + const unsigned x_size = PAGE_SIZE << 1; + if (payload_len > x_size) { + unsigned sectors = ((x_size - extra_size) / ic->tuple_size) << ic->sb->log2_sectors_per_block; + if (WARN_ON(!sectors || sectors >= bio_sectors(bio))) { + bio->bi_status = BLK_STS_NOTSUPP; + bio_endio(bio); + return DM_MAPIO_SUBMITTED; + } + dm_accept_partial_bio(bio, sectors); + goto retry; + } + dio->integrity_payload = page_to_virt((struct page *)mempool_alloc(&ic->recheck_pool, GFP_NOIO)); + dio->integrity_payload_from_mempool = true; + } + + bio->bi_iter.bi_sector = dm_target_offset(ic->ti, bio->bi_iter.bi_sector); + dio->bio_details.bi_iter = bio->bi_iter; + + if (unlikely(!dm_integrity_check_limits(ic, bio->bi_iter.bi_sector, bio))) { + return DM_MAPIO_KILL; + } + + bio->bi_iter.bi_sector += ic->start + SB_SECTORS; + + bip = bio_integrity_alloc(bio, GFP_NOIO, 1); + if (unlikely(IS_ERR(bip))) { + bio->bi_status = errno_to_blk_status(PTR_ERR(bip)); + bio_endio(bio); + return DM_MAPIO_SUBMITTED; + } + + if (dio->op == REQ_OP_WRITE) { + unsigned pos = 0; + while (dio->bio_details.bi_iter.bi_size) { + struct bio_vec bv = bio_iter_iovec(bio, dio->bio_details.bi_iter); + const char *mem = bvec_kmap_local(&bv); + if (ic->tag_size < ic->tuple_size) + memset(dio->integrity_payload + pos + ic->tag_size, 0, ic->tuple_size - ic->tuple_size); + integrity_sector_checksum(ic, dio->bio_details.bi_iter.bi_sector, mem, dio->integrity_payload + pos); + kunmap_local(mem); + pos += ic->tuple_size; + bio_advance_iter_single(bio, &dio->bio_details.bi_iter, ic->sectors_per_block << SECTOR_SHIFT); + } + } + + ret = bio_integrity_add_page(bio, virt_to_page(dio->integrity_payload), + payload_len, offset_in_page(dio->integrity_payload)); + if (unlikely(ret != payload_len)) { + bio->bi_status = BLK_STS_RESOURCE; + bio_endio(bio); + return DM_MAPIO_SUBMITTED; + } + + return DM_MAPIO_REMAPPED; +} + +static inline void dm_integrity_free_payload(struct dm_integrity_io *dio) +{ + struct dm_integrity_c *ic = dio->ic; + if (unlikely(dio->integrity_payload_from_mempool)) + mempool_free(virt_to_page(dio->integrity_payload), &ic->recheck_pool); + else + kfree(dio->integrity_payload); + dio->integrity_payload = NULL; + dio->integrity_payload_from_mempool = false; +} + +static void dm_integrity_inline_recheck(struct work_struct *w) +{ + struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work); + struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); + struct dm_integrity_c *ic = dio->ic; + struct bio *outgoing_bio; + void *outgoing_data; + + dio->integrity_payload = page_to_virt((struct page *)mempool_alloc(&ic->recheck_pool, GFP_NOIO)); + dio->integrity_payload_from_mempool = true; + + outgoing_data = dio->integrity_payload + PAGE_SIZE; + + while (dio->bio_details.bi_iter.bi_size) { + char digest[HASH_MAX_DIGESTSIZE]; + int r; + struct bio_integrity_payload *bip; + struct bio_vec bv; + char *mem; + + outgoing_bio = bio_alloc_bioset(ic->dev->bdev, 1, REQ_OP_READ, GFP_NOIO, &ic->recheck_bios); + + r = bio_add_page(outgoing_bio, virt_to_page(outgoing_data), ic->sectors_per_block << SECTOR_SHIFT, 0); + if (unlikely(r != (ic->sectors_per_block << SECTOR_SHIFT))) { + bio_put(outgoing_bio); + bio->bi_status = BLK_STS_RESOURCE; + bio_endio(bio); + return; + } + + bip = bio_integrity_alloc(outgoing_bio, GFP_NOIO, 1); + if (unlikely(IS_ERR(bip))) { + bio_put(outgoing_bio); + bio->bi_status = errno_to_blk_status(PTR_ERR(bip)); + bio_endio(bio); + return; + } + + r = bio_integrity_add_page(outgoing_bio, virt_to_page(dio->integrity_payload), ic->tuple_size, 0); + if (unlikely(r != ic->tuple_size)) { + bio_put(outgoing_bio); + bio->bi_status = BLK_STS_RESOURCE; + bio_endio(bio); + return; + } + + outgoing_bio->bi_iter.bi_sector = dio->bio_details.bi_iter.bi_sector + ic->start + SB_SECTORS; + + r = submit_bio_wait(outgoing_bio); + if (unlikely(r != 0)) { + bio_put(outgoing_bio); + bio->bi_status = errno_to_blk_status(r); + bio_endio(bio); + return; + } + bio_put(outgoing_bio); + + integrity_sector_checksum(ic, dio->bio_details.bi_iter.bi_sector, outgoing_data, digest); + if (unlikely(memcmp(digest, dio->integrity_payload, min(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)))) { + DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx", + ic->dev->bdev, dio->bio_details.bi_iter.bi_sector); + atomic64_inc(&ic->number_of_mismatches); + dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum", + bio, dio->bio_details.bi_iter.bi_sector, 0); + + bio->bi_status = BLK_STS_PROTECTION; + bio_endio(bio); + return; + } + + bv = bio_iter_iovec(bio, dio->bio_details.bi_iter); + mem = bvec_kmap_local(&bv); + memcpy(mem, outgoing_data, ic->sectors_per_block << SECTOR_SHIFT); + kunmap_local(mem); + + bio_advance_iter_single(bio, &dio->bio_details.bi_iter, ic->sectors_per_block << SECTOR_SHIFT); + } + + bio_endio(bio); +} + +static int dm_integrity_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *status) +{ + struct dm_integrity_c *ic = ti->private; + if (ic->mode == 'I') { + struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io)); + if (dio->op == REQ_OP_READ && likely(*status == BLK_STS_OK)) { + unsigned pos = 0; + while (dio->bio_details.bi_iter.bi_size) { + char digest[HASH_MAX_DIGESTSIZE]; + struct bio_vec bv = bio_iter_iovec(bio, dio->bio_details.bi_iter); + char *mem = bvec_kmap_local(&bv); + //memset(mem, 0xff, ic->sectors_per_block << SECTOR_SHIFT); + integrity_sector_checksum(ic, dio->bio_details.bi_iter.bi_sector, mem, digest); + if (unlikely(memcmp(digest, dio->integrity_payload + pos, + min(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)))) { + kunmap_local(mem); + dm_integrity_free_payload(dio); + INIT_WORK(&dio->work, dm_integrity_inline_recheck); + queue_work(ic->offload_wq, &dio->work); + return DM_ENDIO_INCOMPLETE; + } + kunmap_local(mem); + pos += ic->tuple_size; + bio_advance_iter_single(bio, &dio->bio_details.bi_iter, ic->sectors_per_block << SECTOR_SHIFT); + } + } + if (likely(dio->op == REQ_OP_READ) || likely(dio->op == REQ_OP_WRITE)) { + dm_integrity_free_payload(dio); + } + } + return DM_ENDIO_DONE; +} static void integrity_bio_wait(struct work_struct *w) { @@ -2413,6 +2640,9 @@ static void integrity_commit(struct work_struct *w) del_timer(&ic->autocommit_timer); + if (ic->mode == 'I') + return; + spin_lock_irq(&ic->endio_wait.lock); flushes = bio_list_get(&ic->flush_bio_list); if (unlikely(ic->mode != 'J')) { @@ -3471,7 +3701,7 @@ static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *lim if (ic->sectors_per_block > 1) { limits->logical_block_size = ic->sectors_per_block << SECTOR_SHIFT; limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT; - blk_limits_io_min(limits, ic->sectors_per_block << SECTOR_SHIFT); + limits->io_min = ic->sectors_per_block << SECTOR_SHIFT; limits->dma_alignment = limits->logical_block_size - 1; limits->discard_granularity = ic->sectors_per_block << SECTOR_SHIFT; } @@ -3515,7 +3745,10 @@ static int calculate_device_limits(struct dm_integrity_c *ic) return -EINVAL; ic->initial_sectors = initial_sectors; - if (!ic->meta_dev) { + if (ic->mode == 'I') { + if (ic->initial_sectors + ic->provided_data_sectors > ic->meta_device_sectors) + return -EINVAL; + } else if (!ic->meta_dev) { sector_t last_sector, last_area, last_offset; /* we have to maintain excessive padding for compatibility with existing volumes */ @@ -3578,6 +3811,8 @@ static int initialize_superblock(struct dm_integrity_c *ic, memset(ic->sb, 0, SB_SECTORS << SECTOR_SHIFT); memcpy(ic->sb->magic, SB_MAGIC, 8); + if (ic->mode == 'I') + ic->sb->flags |= cpu_to_le32(SB_FLAG_INLINE); ic->sb->integrity_tag_size = cpu_to_le16(ic->tag_size); ic->sb->log2_sectors_per_block = __ffs(ic->sectors_per_block); if (ic->journal_mac_alg.alg_string) @@ -3587,6 +3822,8 @@ static int initialize_superblock(struct dm_integrity_c *ic, journal_sections = journal_sectors / ic->journal_section_sectors; if (!journal_sections) journal_sections = 1; + if (ic->mode == 'I') + journal_sections = 0; if (ic->fix_hmac && (ic->internal_hash_alg.alg_string || ic->journal_mac_alg.alg_string)) { ic->sb->flags |= cpu_to_le32(SB_FLAG_FIXED_HMAC); @@ -4135,10 +4372,11 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv } if (!strcmp(argv[3], "J") || !strcmp(argv[3], "B") || - !strcmp(argv[3], "D") || !strcmp(argv[3], "R")) { + !strcmp(argv[3], "D") || !strcmp(argv[3], "R") || + !strcmp(argv[3], "I")) { ic->mode = argv[3][0]; } else { - ti->error = "Invalid mode (expecting J, B, D, R)"; + ti->error = "Invalid mode (expecting J, B, D, R, I)"; r = -EINVAL; goto bad; } @@ -4284,6 +4522,53 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv else ic->log2_tag_size = -1; + if (ic->mode == 'I') { + struct blk_integrity *bi; + if (ic->meta_dev) { + r = -EINVAL; + ti->error = "Metadata device not supported in inline mode"; + goto bad; + } + if (!ic->internal_hash_alg.alg_string) { + r = -EINVAL; + ti->error = "Internal hash not set in inline mode"; + goto bad; + } + if (ic->journal_crypt_alg.alg_string || ic->journal_mac_alg.alg_string) { + r = -EINVAL; + ti->error = "Journal crypt not supported in inline mode"; + goto bad; + } + if (ic->discard) { + r = -EINVAL; + ti->error = "Discards not supported in inline mode"; + goto bad; + } + bi = blk_get_integrity(ic->dev->bdev->bd_disk); + if (!bi || bi->csum_type != BLK_INTEGRITY_CSUM_NONE) { + r = -EINVAL; + ti->error = "Integrity profile not supported"; + goto bad; + } + /*printk("tag_size: %u, tuple_size: %u\n", bi->tag_size, bi->tuple_size);*/ + if (bi->tuple_size < ic->tag_size) { + r = -EINVAL; + ti->error = "The integrity profile is smaller than tag size"; + goto bad; + } + if ((unsigned long)bi->tuple_size > PAGE_SIZE / 2) { + r = -EINVAL; + ti->error = "Too big tuple size"; + goto bad; + } + ic->tuple_size = bi->tuple_size; + if (1 << bi->interval_exp != ic->sectors_per_block << SECTOR_SHIFT) { + r = -EINVAL; + ti->error = "Integrity profile sector size mismatch"; + goto bad; + } + } + if (ic->mode == 'B' && !ic->internal_hash) { r = -EINVAL; ti->error = "Bitmap mode can be only used with internal hash"; @@ -4314,12 +4599,26 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv goto bad; } - r = mempool_init_page_pool(&ic->recheck_pool, 1, 0); + r = mempool_init_page_pool(&ic->recheck_pool, 1, ic->mode == 'I' ? 1 : 0); if (r) { ti->error = "Cannot allocate mempool"; goto bad; } + if (ic->mode == 'I') { + r = bioset_init(&ic->recheck_bios, RECHECK_POOL_SIZE, 0, BIOSET_NEED_BVECS); + if (r) { + ti->error = "Cannot allocate bio set"; + goto bad; + } + r = bioset_integrity_create(&ic->recheck_bios, RECHECK_POOL_SIZE); + if (r) { + ti->error = "Cannot allocate bio integrity set"; + r = -ENOMEM; + goto bad; + } + } + ic->metadata_wq = alloc_workqueue("dm-integrity-metadata", WQ_MEM_RECLAIM, METADATA_WORKQUEUE_MAX_ACTIVE); if (!ic->metadata_wq) { @@ -4396,11 +4695,16 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv should_write_sb = true; } - if (!ic->sb->version || ic->sb->version > SB_VERSION_5) { + if (!ic->sb->version || ic->sb->version > SB_VERSION_6) { r = -EINVAL; ti->error = "Unknown version"; goto bad; } + if (!!(ic->sb->flags & cpu_to_le32(SB_FLAG_INLINE)) != (ic->mode == 'I')) { + r = -EINVAL; + ti->error = "Inline flag mismatch"; + goto bad; + } if (le16_to_cpu(ic->sb->integrity_tag_size) != ic->tag_size) { r = -EINVAL; ti->error = "Tag size doesn't match the information in superblock"; @@ -4411,9 +4715,12 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv ti->error = "Block size doesn't match the information in superblock"; goto bad; } - if (!le32_to_cpu(ic->sb->journal_sections)) { + if (!le32_to_cpu(ic->sb->journal_sections) != (ic->mode == 'I')) { r = -EINVAL; - ti->error = "Corrupted superblock, journal_sections is 0"; + if (ic->mode != 'I') + ti->error = "Corrupted superblock, journal_sections is 0"; + else + ti->error = "Corrupted superblock, journal_sections is not 0"; goto bad; } /* make sure that ti->max_io_len doesn't overflow */ @@ -4465,8 +4772,9 @@ try_smaller_buffer: bits_in_journal = ((__u64)ic->journal_section_sectors * ic->journal_sections) << (SECTOR_SHIFT + 3); if (bits_in_journal > UINT_MAX) bits_in_journal = UINT_MAX; - while (bits_in_journal < (ic->provided_data_sectors + ((sector_t)1 << log2_sectors_per_bitmap_bit) - 1) >> log2_sectors_per_bitmap_bit) - log2_sectors_per_bitmap_bit++; + if (bits_in_journal) + while (bits_in_journal < (ic->provided_data_sectors + ((sector_t)1 << log2_sectors_per_bitmap_bit) - 1) >> log2_sectors_per_bitmap_bit) + log2_sectors_per_bitmap_bit++; log2_blocks_per_bitmap_bit = log2_sectors_per_bitmap_bit - ic->sb->log2_sectors_per_block; ic->log2_blocks_per_bitmap_bit = log2_blocks_per_bitmap_bit; @@ -4486,7 +4794,6 @@ try_smaller_buffer: goto bad; } - threshold = (__u64)ic->journal_entries * (100 - journal_watermark); threshold += 50; do_div(threshold, 100); @@ -4538,17 +4845,19 @@ try_smaller_buffer: goto bad; } - ic->bufio = dm_bufio_client_create(ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev, - 1U << (SECTOR_SHIFT + ic->log2_buffer_sectors), 1, 0, NULL, NULL, 0); - if (IS_ERR(ic->bufio)) { - r = PTR_ERR(ic->bufio); - ti->error = "Cannot initialize dm-bufio"; - ic->bufio = NULL; - goto bad; + if (ic->mode != 'I') { + ic->bufio = dm_bufio_client_create(ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev, + 1U << (SECTOR_SHIFT + ic->log2_buffer_sectors), 1, 0, NULL, NULL, 0); + if (IS_ERR(ic->bufio)) { + r = PTR_ERR(ic->bufio); + ti->error = "Cannot initialize dm-bufio"; + ic->bufio = NULL; + goto bad; + } + dm_bufio_set_sector_offset(ic->bufio, ic->start + ic->initial_sectors); } - dm_bufio_set_sector_offset(ic->bufio, ic->start + ic->initial_sectors); - if (ic->mode != 'R') { + if (ic->mode != 'R' && ic->mode != 'I') { r = create_journal(ic, &ti->error); if (r) goto bad; @@ -4608,7 +4917,7 @@ try_smaller_buffer: ic->just_formatted = true; } - if (!ic->meta_dev) { + if (!ic->meta_dev && ic->mode != 'I') { r = dm_set_target_max_io_len(ti, 1U << ic->sb->log2_interleave_sectors); if (r) goto bad; @@ -4632,6 +4941,9 @@ try_smaller_buffer: if (ic->discard) ti->num_discard_bios = 1; + if (ic->mode == 'I') + ti->mempool_needs_integrity = true; + dm_audit_log_ctr(DM_MSG_PREFIX, ti, 1); return 0; @@ -4665,6 +4977,7 @@ static void dm_integrity_dtr(struct dm_target *ti) kvfree(ic->bbs); if (ic->bufio) dm_bufio_client_destroy(ic->bufio); + bioset_exit(&ic->recheck_bios); mempool_exit(&ic->recheck_pool); mempool_exit(&ic->journal_io_mempool); if (ic->io) @@ -4718,12 +5031,13 @@ static void dm_integrity_dtr(struct dm_target *ti) static struct target_type integrity_target = { .name = "integrity", - .version = {1, 11, 0}, + .version = {1, 12, 0}, .module = THIS_MODULE, .features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY, .ctr = dm_integrity_ctr, .dtr = dm_integrity_dtr, .map = dm_integrity_map, + .end_io = dm_integrity_end_io, .postsuspend = dm_integrity_postsuspend, .resume = dm_integrity_resume, .status = dm_integrity_status, diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 7409490259d1..d7a8e2f40db3 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -347,7 +347,7 @@ static void do_region(const blk_opf_t opf, unsigned int region, break; default: num_bvecs = bio_max_segs(dm_sector_div_up(remaining, - (PAGE_SIZE >> SECTOR_SHIFT))); + (PAGE_SIZE >> SECTOR_SHIFT)) + 1); } bio = bio_alloc_bioset(where->bdev, num_bvecs, opf, GFP_NOIO, @@ -384,16 +384,13 @@ static void do_region(const blk_opf_t opf, unsigned int region, static void dispatch_io(blk_opf_t opf, unsigned int num_regions, struct dm_io_region *where, struct dpages *dp, - struct io *io, int sync, unsigned short ioprio) + struct io *io, unsigned short ioprio) { int i; struct dpages old_pages = *dp; BUG_ON(num_regions > DM_IO_MAX_REGIONS); - if (sync) - opf |= REQ_SYNC; - /* * For multiple regions we need to be careful to rewind * the dp object for each call to do_region. @@ -411,6 +408,26 @@ static void dispatch_io(blk_opf_t opf, unsigned int num_regions, dec_count(io, 0, 0); } +static void async_io(struct dm_io_client *client, unsigned int num_regions, + struct dm_io_region *where, blk_opf_t opf, + struct dpages *dp, io_notify_fn fn, void *context, + unsigned short ioprio) +{ + struct io *io; + + io = mempool_alloc(&client->pool, GFP_NOIO); + io->error_bits = 0; + atomic_set(&io->count, 1); /* see dispatch_io() */ + io->client = client; + io->callback = fn; + io->context = context; + + io->vma_invalidate_address = dp->vma_invalidate_address; + io->vma_invalidate_size = dp->vma_invalidate_size; + + dispatch_io(opf, num_regions, where, dp, io, ioprio); +} + struct sync_io { unsigned long error_bits; struct completion wait; @@ -428,27 +445,12 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, struct dm_io_region *where, blk_opf_t opf, struct dpages *dp, unsigned long *error_bits, unsigned short ioprio) { - struct io *io; struct sync_io sio; - if (num_regions > 1 && !op_is_write(opf)) { - WARN_ON(1); - return -EIO; - } - init_completion(&sio.wait); - io = mempool_alloc(&client->pool, GFP_NOIO); - io->error_bits = 0; - atomic_set(&io->count, 1); /* see dispatch_io() */ - io->client = client; - io->callback = sync_io_complete; - io->context = &sio; - - io->vma_invalidate_address = dp->vma_invalidate_address; - io->vma_invalidate_size = dp->vma_invalidate_size; - - dispatch_io(opf, num_regions, where, dp, io, 1, ioprio); + async_io(client, num_regions, where, opf | REQ_SYNC, dp, + sync_io_complete, &sio, ioprio); wait_for_completion_io(&sio.wait); @@ -458,33 +460,6 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, return sio.error_bits ? -EIO : 0; } -static int async_io(struct dm_io_client *client, unsigned int num_regions, - struct dm_io_region *where, blk_opf_t opf, - struct dpages *dp, io_notify_fn fn, void *context, - unsigned short ioprio) -{ - struct io *io; - - if (num_regions > 1 && !op_is_write(opf)) { - WARN_ON(1); - fn(1, context); - return -EIO; - } - - io = mempool_alloc(&client->pool, GFP_NOIO); - io->error_bits = 0; - atomic_set(&io->count, 1); /* see dispatch_io() */ - io->client = client; - io->callback = fn; - io->context = context; - - io->vma_invalidate_address = dp->vma_invalidate_address; - io->vma_invalidate_size = dp->vma_invalidate_size; - - dispatch_io(opf, num_regions, where, dp, io, 0, ioprio); - return 0; -} - static int dp_init(struct dm_io_request *io_req, struct dpages *dp, unsigned long size) { @@ -529,6 +504,11 @@ int dm_io(struct dm_io_request *io_req, unsigned int num_regions, int r; struct dpages dp; + if (num_regions > 1 && !op_is_write(io_req->bi_opf)) { + WARN_ON(1); + return -EIO; + } + r = dp_init(io_req, &dp, (unsigned long)where->count << SECTOR_SHIFT); if (r) return r; @@ -537,9 +517,9 @@ int dm_io(struct dm_io_request *io_req, unsigned int num_regions, return sync_io(io_req->client, num_regions, where, io_req->bi_opf, &dp, sync_error_bits, ioprio); - return async_io(io_req->client, num_regions, where, - io_req->bi_opf, &dp, io_req->notify.fn, - io_req->notify.context, ioprio); + async_io(io_req->client, num_regions, where, io_req->bi_opf, &dp, + io_req->notify.fn, io_req->notify.context, ioprio); + return 0; } EXPORT_SYMBOL(dm_io); diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 2d3e186ca87e..49fb0f684193 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -62,6 +62,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_discard_bios = 1; ti->num_secure_erase_bios = 1; ti->num_write_zeroes_bios = 1; + ti->flush_bypasses_map = true; ti->private = lc; return 0; diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 15b681b90153..637977acc3dc 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1419,8 +1419,7 @@ out: /* * Fail or reinstate all paths that match the provided struct dm_dev. */ -static int action_dev(struct multipath *m, struct dm_dev *dev, - action_fn action) +static int action_dev(struct multipath *m, dev_t dev, action_fn action) { int r = -EINVAL; struct pgpath *pgpath; @@ -1428,7 +1427,7 @@ static int action_dev(struct multipath *m, struct dm_dev *dev, list_for_each_entry(pg, &m->priority_groups, list) { list_for_each_entry(pgpath, &pg->pgpaths, list) { - if (pgpath->path.dev == dev) + if (pgpath->path.dev->bdev->bd_dev == dev) r = action(pgpath); } } @@ -1959,7 +1958,7 @@ static int multipath_message(struct dm_target *ti, unsigned int argc, char **arg char *result, unsigned int maxlen) { int r = -EINVAL; - struct dm_dev *dev; + dev_t dev; struct multipath *m = ti->private; action_fn action; unsigned long flags; @@ -2008,7 +2007,7 @@ static int multipath_message(struct dm_target *ti, unsigned int argc, char **arg goto out; } - r = dm_get_device(ti, argv[1], dm_table_get_mode(ti->table), &dev); + r = dm_devt_from_path(argv[1], &dev); if (r) { DMWARN("message: error getting device %s", argv[1]); @@ -2017,8 +2016,6 @@ static int multipath_message(struct dm_target *ti, unsigned int argc, char **arg r = action_dev(m, dev, action); - dm_put_device(ti, dev); - out: mutex_unlock(&m->work_mutex); return r; diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 052c00c1eb15..0c3323e0adb2 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1626,6 +1626,23 @@ static int _check_data_dev_sectors(struct raid_set *rs) return 0; } +/* Get reshape sectors from data_offsets or raid set */ +static sector_t _get_reshape_sectors(struct raid_set *rs) +{ + struct md_rdev *rdev; + sector_t reshape_sectors = 0; + + rdev_for_each(rdev, &rs->md) + if (!test_bit(Journal, &rdev->flags)) { + reshape_sectors = (rdev->data_offset > rdev->new_data_offset) ? + rdev->data_offset - rdev->new_data_offset : + rdev->new_data_offset - rdev->data_offset; + break; + } + + return max(reshape_sectors, (sector_t) rs->data_offset); +} + /* Calculate the sectors per device and per array used for @rs */ static int rs_set_dev_and_array_sectors(struct raid_set *rs, sector_t sectors, bool use_mddev) { @@ -1656,7 +1673,7 @@ static int rs_set_dev_and_array_sectors(struct raid_set *rs, sector_t sectors, b if (sector_div(dev_sectors, data_stripes)) goto bad; - array_sectors = (data_stripes + delta_disks) * dev_sectors; + array_sectors = (data_stripes + delta_disks) * (dev_sectors - _get_reshape_sectors(rs)); if (sector_div(array_sectors, rs->raid10_copies)) goto bad; @@ -1665,7 +1682,7 @@ static int rs_set_dev_and_array_sectors(struct raid_set *rs, sector_t sectors, b else /* Striped layouts */ - array_sectors = (data_stripes + delta_disks) * dev_sectors; + array_sectors = (data_stripes + delta_disks) * (dev_sectors - _get_reshape_sectors(rs)); mddev->array_sectors = array_sectors; mddev->dev_sectors = dev_sectors; @@ -1704,11 +1721,20 @@ static void do_table_event(struct work_struct *ws) struct raid_set *rs = container_of(ws, struct raid_set, md.event_work); smp_rmb(); /* Make sure we access most actual mddev properties */ - if (!rs_is_reshaping(rs)) { + + /* Only grow size resulting from added stripe(s) after reshape ended. */ + if (!rs_is_reshaping(rs) && + rs->array_sectors > rs->md.array_sectors && + !rs->md.delta_disks && + rs->md.raid_disks == rs->raid_disks) { + /* The raid10 personality doesn't provide proper device sizes -> correct. */ if (rs_is_raid10(rs)) rs_set_rdev_sectors(rs); + + rs->md.array_sectors = rs->array_sectors; rs_set_capacity(rs); } + dm_table_event(rs->ti->table); } @@ -2811,23 +2837,6 @@ static int rs_prepare_reshape(struct raid_set *rs) return 0; } -/* Get reshape sectors from data_offsets or raid set */ -static sector_t _get_reshape_sectors(struct raid_set *rs) -{ - struct md_rdev *rdev; - sector_t reshape_sectors = 0; - - rdev_for_each(rdev, &rs->md) - if (!test_bit(Journal, &rdev->flags)) { - reshape_sectors = (rdev->data_offset > rdev->new_data_offset) ? - rdev->data_offset - rdev->new_data_offset : - rdev->new_data_offset - rdev->data_offset; - break; - } - - return max(reshape_sectors, (sector_t) rs->data_offset); -} - /* * Reshape: * - change raid layout @@ -3802,8 +3811,8 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits) struct raid_set *rs = ti->private; unsigned int chunk_size_bytes = to_bytes(rs->md.chunk_sectors); - blk_limits_io_min(limits, chunk_size_bytes); - blk_limits_io_opt(limits, chunk_size_bytes * mddev_data_stripes(rs)); + limits->io_min = chunk_size_bytes; + limits->io_opt = chunk_size_bytes * mddev_data_stripes(rs); } static void raid_presuspend(struct dm_target *ti) @@ -4023,6 +4032,11 @@ static int raid_preresume(struct dm_target *ti) if (test_and_set_bit(RT_FLAG_RS_PRERESUMED, &rs->runtime_flags)) return 0; + /* If different and no explicit grow request, expose MD array size as of superblock. */ + if (!test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags) && + rs->array_sectors != mddev->array_sectors) + rs_set_capacity(rs); + /* * The superblocks need to be updated on disk if the * array is new or new devices got added (thus zeroed @@ -4101,10 +4115,11 @@ static void raid_resume(struct dm_target *ti) if (mddev->delta_disks < 0) rs_set_capacity(rs); + mddev_lock_nointr(mddev); WARN_ON_ONCE(!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)); - WARN_ON_ONCE(test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)); + WARN_ON_ONCE(rcu_dereference_protected(mddev->sync_thread, + lockdep_is_held(&mddev->reconfig_mutex))); clear_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags); - mddev_lock_nointr(mddev); mddev->ro = 0; mddev->in_sync = 0; md_unfrozen_sync_thread(mddev); diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 16b93ae51d96..4112071de0be 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -157,6 +157,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_discard_bios = stripes; ti->num_secure_erase_bios = stripes; ti->num_write_zeroes_bios = stripes; + ti->flush_bypasses_map = true; sc->chunk_size = chunk_size; if (chunk_size & (chunk_size - 1)) @@ -458,8 +459,8 @@ static void stripe_io_hints(struct dm_target *ti, struct stripe_c *sc = ti->private; unsigned int chunk_size = sc->chunk_size << SECTOR_SHIFT; - blk_limits_io_min(limits, chunk_size); - blk_limits_io_opt(limits, chunk_size * sc->stripes); + limits->io_min = chunk_size; + limits->io_opt = chunk_size * sc->stripes; } static struct target_type stripe_target = { diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 92d18dcdb3e9..dbd39b9722b9 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -160,6 +160,7 @@ int dm_table_create(struct dm_table **result, blk_mode_t mode, t->type = DM_TYPE_NONE; t->mode = mode; t->md = md; + t->flush_bypasses_map = true; *result = t; return 0; } @@ -330,23 +331,15 @@ static int upgrade_mode(struct dm_dev_internal *dd, blk_mode_t new_mode, } /* - * Add a device to the list, or just increment the usage count if - * it's already present. - * * Note: the __ref annotation is because this function can call the __init * marked early_lookup_bdev when called during early boot code from dm-init.c. */ -int __ref dm_get_device(struct dm_target *ti, const char *path, blk_mode_t mode, - struct dm_dev **result) +int __ref dm_devt_from_path(const char *path, dev_t *dev_p) { int r; dev_t dev; unsigned int major, minor; char dummy; - struct dm_dev_internal *dd; - struct dm_table *t = ti->table; - - BUG_ON(!t); if (sscanf(path, "%u:%u%c", &major, &minor, &dummy) == 2) { /* Extract the major/minor numbers */ @@ -362,6 +355,29 @@ int __ref dm_get_device(struct dm_target *ti, const char *path, blk_mode_t mode, if (r) return r; } + *dev_p = dev; + return 0; +} +EXPORT_SYMBOL(dm_devt_from_path); + +/* + * Add a device to the list, or just increment the usage count if + * it's already present. + */ +int dm_get_device(struct dm_target *ti, const char *path, blk_mode_t mode, + struct dm_dev **result) +{ + int r; + dev_t dev; + struct dm_dev_internal *dd; + struct dm_table *t = ti->table; + + BUG_ON(!t); + + r = dm_devt_from_path(path, &dev); + if (r) + return r; + if (dev == disk_devt(t->md->disk)) return -EINVAL; @@ -748,6 +764,9 @@ int dm_table_add_target(struct dm_table *t, const char *type, if (ti->limit_swap_bios && !static_key_enabled(&swap_bios_enabled.key)) static_branch_enable(&swap_bios_enabled); + if (!ti->flush_bypasses_map) + t->flush_bypasses_map = false; + return 0; bad: @@ -1031,6 +1050,7 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device * unsigned int min_pool_size = 0, pool_size; struct dm_md_mempools *pools; unsigned int bioset_flags = 0; + bool mempool_needs_integrity = t->integrity_supported; if (unlikely(type == DM_TYPE_NONE)) { DMERR("no table type is set, can't allocate mempools"); @@ -1055,6 +1075,8 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device * per_io_data_size = max(per_io_data_size, ti->per_io_data_size); min_pool_size = max(min_pool_size, ti->num_flush_bios); + + mempool_needs_integrity |= ti->mempool_needs_integrity; } pool_size = max(dm_get_reserved_bio_based_ios(), min_pool_size); front_pad = roundup(per_io_data_size, @@ -1064,13 +1086,13 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device * __alignof__(struct dm_io)) + DM_IO_BIO_OFFSET; if (bioset_init(&pools->io_bs, pool_size, io_front_pad, bioset_flags)) goto out_free_pools; - if (t->integrity_supported && + if (mempool_needs_integrity && bioset_integrity_create(&pools->io_bs, pool_size)) goto out_free_pools; init_bs: if (bioset_init(&pools->bs, pool_size, front_pad, 0)) goto out_free_pools; - if (t->integrity_supported && + if (mempool_needs_integrity && bioset_integrity_create(&pools->bs, pool_size)) goto out_free_pools; diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 6022189c1388..f90679cfec5b 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -249,7 +249,7 @@ struct dm_thin_device { */ #define SUPERBLOCK_CSUM_XOR 160774 -static void sb_prepare_for_write(struct dm_block_validator *v, +static void sb_prepare_for_write(const struct dm_block_validator *v, struct dm_block *b, size_t block_size) { @@ -261,7 +261,7 @@ static void sb_prepare_for_write(struct dm_block_validator *v, SUPERBLOCK_CSUM_XOR)); } -static int sb_check(struct dm_block_validator *v, +static int sb_check(const struct dm_block_validator *v, struct dm_block *b, size_t block_size) { @@ -294,7 +294,7 @@ static int sb_check(struct dm_block_validator *v, return 0; } -static struct dm_block_validator sb_validator = { +static const struct dm_block_validator sb_validator = { .name = "superblock", .prepare_for_write = sb_prepare_for_write, .check = sb_check diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 991f77a5885b..a0c1620e90c8 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -4079,10 +4079,10 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) if (io_opt_sectors < pool->sectors_per_block || !is_factor(io_opt_sectors, pool->sectors_per_block)) { if (is_factor(pool->sectors_per_block, limits->max_sectors)) - blk_limits_io_min(limits, limits->max_sectors << SECTOR_SHIFT); + limits->io_min = limits->max_sectors << SECTOR_SHIFT; else - blk_limits_io_min(limits, pool->sectors_per_block << SECTOR_SHIFT); - blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT); + limits->io_min = pool->sectors_per_block << SECTOR_SHIFT; + limits->io_opt = pool->sectors_per_block << SECTOR_SHIFT; } /* diff --git a/drivers/md/dm-vdo/dedupe.c b/drivers/md/dm-vdo/dedupe.c index 117266e1b3ae..39ac68614419 100644 --- a/drivers/md/dm-vdo/dedupe.c +++ b/drivers/md/dm-vdo/dedupe.c @@ -148,11 +148,6 @@ #include "vdo.h" #include "wait-queue.h" -struct uds_attribute { - struct attribute attr; - const char *(*show_string)(struct hash_zones *hash_zones); -}; - #define DEDUPE_QUERY_TIMER_IDLE 0 #define DEDUPE_QUERY_TIMER_RUNNING 1 #define DEDUPE_QUERY_TIMER_FIRED 2 diff --git a/drivers/md/dm-vdo/dm-vdo-target.c b/drivers/md/dm-vdo/dm-vdo-target.c index b423bec6458b..dd05691e4097 100644 --- a/drivers/md/dm-vdo/dm-vdo-target.c +++ b/drivers/md/dm-vdo/dm-vdo-target.c @@ -928,9 +928,9 @@ static void vdo_io_hints(struct dm_target *ti, struct queue_limits *limits) limits->physical_block_size = VDO_BLOCK_SIZE; /* The minimum io size for random io */ - blk_limits_io_min(limits, VDO_BLOCK_SIZE); + limits->io_min = VDO_BLOCK_SIZE; /* The optimal io size for streamed/sequential io */ - blk_limits_io_opt(limits, VDO_BLOCK_SIZE); + limits->io_opt = VDO_BLOCK_SIZE; /* * Sets the maximum discard size that will be passed into VDO. This value comes from a @@ -945,7 +945,7 @@ static void vdo_io_hints(struct dm_target *ti, struct queue_limits *limits) * The value is used by dm-thin to determine whether to pass down discards. The block layer * splits large discards on this boundary when this is set. */ - limits->max_discard_sectors = + limits->max_hw_discard_sectors = (vdo->device_config->max_discard_blocks * VDO_SECTORS_PER_BLOCK); /* diff --git a/drivers/md/dm-vdo/indexer/index.c b/drivers/md/dm-vdo/indexer/index.c index 1ba767144426..df4934846244 100644 --- a/drivers/md/dm-vdo/indexer/index.c +++ b/drivers/md/dm-vdo/indexer/index.c @@ -197,15 +197,12 @@ static int finish_previous_chapter(struct uds_index *index, u64 current_chapter_ static int swap_open_chapter(struct index_zone *zone) { int result; - struct open_chapter_zone *temporary_chapter; result = finish_previous_chapter(zone->index, zone->newest_virtual_chapter); if (result != UDS_SUCCESS) return result; - temporary_chapter = zone->open_chapter; - zone->open_chapter = zone->writing_chapter; - zone->writing_chapter = temporary_chapter; + swap(zone->open_chapter, zone->writing_chapter); return UDS_SUCCESS; } diff --git a/drivers/md/dm-vdo/int-map.c b/drivers/md/dm-vdo/int-map.c index 3aa438f84ea1..f6fe58e437b3 100644 --- a/drivers/md/dm-vdo/int-map.c +++ b/drivers/md/dm-vdo/int-map.c @@ -96,7 +96,7 @@ struct int_map { size_t size; /** @capacity: The number of neighborhoods in the map. */ size_t capacity; - /* @bucket_count: The number of buckets in the bucket array. */ + /** @bucket_count: The number of buckets in the bucket array. */ size_t bucket_count; /** @buckets: The array of hash buckets. */ struct bucket *buckets; diff --git a/drivers/md/dm-vdo/repair.c b/drivers/md/dm-vdo/repair.c index defc9359f10e..7e0009d2f67d 100644 --- a/drivers/md/dm-vdo/repair.c +++ b/drivers/md/dm-vdo/repair.c @@ -51,6 +51,8 @@ struct recovery_point { bool increment_applied; }; +DEFINE_MIN_HEAP(struct numbered_block_mapping, replay_heap); + struct repair_completion { /* The completion header */ struct vdo_completion completion; @@ -97,7 +99,7 @@ struct repair_completion { * order, then original journal order. This permits efficient iteration over the journal * entries in order. */ - struct min_heap replay_heap; + struct replay_heap replay_heap; /* Fields tracking progress through the journal entries. */ struct numbered_block_mapping *current_entry; struct numbered_block_mapping *current_unfetched_entry; @@ -135,7 +137,7 @@ struct repair_completion { * to sort by slot while still ensuring we replay all entries with the same slot in the exact order * as they appeared in the journal. */ -static bool mapping_is_less_than(const void *item1, const void *item2) +static bool mapping_is_less_than(const void *item1, const void *item2, void __always_unused *args) { const struct numbered_block_mapping *mapping1 = (const struct numbered_block_mapping *) item1; @@ -154,7 +156,7 @@ static bool mapping_is_less_than(const void *item1, const void *item2) return 0; } -static void swap_mappings(void *item1, void *item2) +static void swap_mappings(void *item1, void *item2, void __always_unused *args) { struct numbered_block_mapping *mapping1 = item1; struct numbered_block_mapping *mapping2 = item2; @@ -163,14 +165,13 @@ static void swap_mappings(void *item1, void *item2) } static const struct min_heap_callbacks repair_min_heap = { - .elem_size = sizeof(struct numbered_block_mapping), .less = mapping_is_less_than, .swp = swap_mappings, }; static struct numbered_block_mapping *sort_next_heap_element(struct repair_completion *repair) { - struct min_heap *heap = &repair->replay_heap; + struct replay_heap *heap = &repair->replay_heap; struct numbered_block_mapping *last; if (heap->nr == 0) @@ -181,8 +182,8 @@ static struct numbered_block_mapping *sort_next_heap_element(struct repair_compl * restore the heap invariant, and return a pointer to the popped element. */ last = &repair->entries[--heap->nr]; - swap_mappings(heap->data, last); - min_heapify(heap, 0, &repair_min_heap); + swap_mappings(heap->data, last, NULL); + min_heap_sift_down(heap, 0, &repair_min_heap, NULL); return last; } @@ -318,6 +319,7 @@ static bool __must_check abort_on_error(int result, struct repair_completion *re /** * drain_slab_depot() - Flush out all dirty refcounts blocks now that they have been rebuilt or * recovered. + * @completion: The repair completion. */ static void drain_slab_depot(struct vdo_completion *completion) { @@ -653,9 +655,6 @@ static void rebuild_reference_counts(struct vdo_completion *completion) vdo_traverse_forest(vdo->block_map, process_entry, completion); } -/** - * increment_recovery_point() - Move the given recovery point forward by one entry. - */ static void increment_recovery_point(struct recovery_point *point) { if (++point->entry_count < RECOVERY_JOURNAL_ENTRIES_PER_SECTOR) @@ -952,6 +951,7 @@ static void abort_block_map_recovery(struct repair_completion *repair, int resul /** * find_entry_starting_next_page() - Find the first journal entry after a given entry which is not * on the same block map page. + * @repair: The repair completion. * @current_entry: The entry to search from. * @needs_sort: Whether sorting is needed to proceed. * @@ -1117,12 +1117,12 @@ static void recover_block_map(struct vdo_completion *completion) * Organize the journal entries into a binary heap so we can iterate over them in sorted * order incrementally, avoiding an expensive sort call. */ - repair->replay_heap = (struct min_heap) { + repair->replay_heap = (struct replay_heap) { .data = repair->entries, .nr = repair->block_map_entry_count, .size = repair->block_map_entry_count, }; - min_heapify_all(&repair->replay_heap, &repair_min_heap); + min_heapify_all(&repair->replay_heap, &repair_min_heap, NULL); vdo_log_info("Replaying %zu recovery entries into block map", repair->block_map_entry_count); @@ -1218,6 +1218,7 @@ static bool __must_check is_exact_recovery_journal_block(const struct recovery_j /** * find_recovery_journal_head_and_tail() - Find the tail and head of the journal. + * @repair: The repair completion. * * Return: True if there were valid journal blocks. */ @@ -1446,6 +1447,7 @@ static int validate_heads(struct repair_completion *repair) /** * extract_new_mappings() - Find all valid new mappings to be applied to the block map. + * @repair: The repair completion. * * The mappings are extracted from the journal and stored in a sortable array so that all of the * mappings to be applied to a given block map page can be done in a single page fetch. @@ -1500,6 +1502,7 @@ static int extract_new_mappings(struct repair_completion *repair) /** * compute_usages() - Compute the lbns in use and block map data blocks counts from the tail of * the journal. + * @repair: The repair completion. */ static noinline int compute_usages(struct repair_completion *repair) { diff --git a/drivers/md/dm-vdo/slab-depot.c b/drivers/md/dm-vdo/slab-depot.c index 46e4721e5b4f..274f9ccd072f 100644 --- a/drivers/md/dm-vdo/slab-depot.c +++ b/drivers/md/dm-vdo/slab-depot.c @@ -3288,7 +3288,8 @@ int vdo_release_block_reference(struct block_allocator *allocator, * Thus, the ordering is reversed from the usual sense since min_heap returns smaller elements * before larger ones. */ -static bool slab_status_is_less_than(const void *item1, const void *item2) +static bool slab_status_is_less_than(const void *item1, const void *item2, + void __always_unused *args) { const struct slab_status *info1 = item1; const struct slab_status *info2 = item2; @@ -3300,7 +3301,7 @@ static bool slab_status_is_less_than(const void *item1, const void *item2) return info1->slab_number < info2->slab_number; } -static void swap_slab_statuses(void *item1, void *item2) +static void swap_slab_statuses(void *item1, void *item2, void __always_unused *args) { struct slab_status *info1 = item1; struct slab_status *info2 = item2; @@ -3309,7 +3310,6 @@ static void swap_slab_statuses(void *item1, void *item2) } static const struct min_heap_callbacks slab_status_min_heap = { - .elem_size = sizeof(struct slab_status), .less = slab_status_is_less_than, .swp = swap_slab_statuses, }; @@ -3509,7 +3509,7 @@ static int get_slab_statuses(struct block_allocator *allocator, static int __must_check vdo_prepare_slabs_for_allocation(struct block_allocator *allocator) { struct slab_status current_slab_status; - struct min_heap heap; + DEFINE_MIN_HEAP(struct slab_status, heap) heap; int result; struct slab_status *slab_statuses; struct slab_depot *depot = allocator->depot; @@ -3521,12 +3521,12 @@ static int __must_check vdo_prepare_slabs_for_allocation(struct block_allocator return result; /* Sort the slabs by cleanliness, then by emptiness hint. */ - heap = (struct min_heap) { + heap = (struct heap) { .data = slab_statuses, .nr = allocator->slab_count, .size = allocator->slab_count, }; - min_heapify_all(&heap, &slab_status_min_heap); + min_heapify_all(&heap, &slab_status_min_heap, NULL); while (heap.nr > 0) { bool high_priority; @@ -3534,7 +3534,7 @@ static int __must_check vdo_prepare_slabs_for_allocation(struct block_allocator struct slab_journal *journal; current_slab_status = slab_statuses[0]; - min_heap_pop(&heap, &slab_status_min_heap); + min_heap_pop(&heap, &slab_status_min_heap, NULL); slab = depot->slabs[current_slab_status.slab_number]; if ((depot->load_type == VDO_SLAB_DEPOT_REBUILD_LOAD) || diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index e46aee6f932e..62b1a44b8dd2 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -186,8 +186,7 @@ error: static int fec_is_erasure(struct dm_verity *v, struct dm_verity_io *io, u8 *want_digest, u8 *data) { - if (unlikely(verity_hash(v, verity_io_hash_req(v, io), - data, 1 << v->data_dev_block_bits, + if (unlikely(verity_hash(v, io, data, 1 << v->data_dev_block_bits, verity_io_real_digest(v, io), true))) return 0; @@ -388,8 +387,7 @@ static int fec_decode_rsb(struct dm_verity *v, struct dm_verity_io *io, } /* Always re-validate the corrected block against the expected hash */ - r = verity_hash(v, verity_io_hash_req(v, io), fio->output, - 1 << v->data_dev_block_bits, + r = verity_hash(v, io, fio->output, 1 << v->data_dev_block_bits, verity_io_real_digest(v, io), true); if (unlikely(r < 0)) return r; @@ -404,24 +402,9 @@ static int fec_decode_rsb(struct dm_verity *v, struct dm_verity_io *io, return 0; } -static int fec_bv_copy(struct dm_verity *v, struct dm_verity_io *io, u8 *data, - size_t len) -{ - struct dm_verity_fec_io *fio = fec_io(io); - - memcpy(data, &fio->output[fio->output_pos], len); - fio->output_pos += len; - - return 0; -} - -/* - * Correct errors in a block. Copies corrected block to dest if non-NULL, - * otherwise to a bio_vec starting from iter. - */ +/* Correct errors in a block. Copies corrected block to dest. */ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io, - enum verity_block_type type, sector_t block, u8 *dest, - struct bvec_iter *iter) + enum verity_block_type type, sector_t block, u8 *dest) { int r; struct dm_verity_fec_io *fio = fec_io(io); @@ -471,12 +454,7 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io, goto done; } - if (dest) - memcpy(dest, fio->output, 1 << v->data_dev_block_bits); - else if (iter) { - fio->output_pos = 0; - r = verity_for_bv_block(v, io, iter, fec_bv_copy); - } + memcpy(dest, fio->output, 1 << v->data_dev_block_bits); done: fio->level--; diff --git a/drivers/md/dm-verity-fec.h b/drivers/md/dm-verity-fec.h index 8454070d2824..09123a612953 100644 --- a/drivers/md/dm-verity-fec.h +++ b/drivers/md/dm-verity-fec.h @@ -57,7 +57,6 @@ struct dm_verity_fec_io { u8 *bufs[DM_VERITY_FEC_BUF_MAX]; /* bufs for deinterleaving */ unsigned int nbufs; /* number of buffers allocated */ u8 *output; /* buffer for corrected output */ - size_t output_pos; unsigned int level; /* recursion level */ }; @@ -70,7 +69,7 @@ extern bool verity_fec_is_enabled(struct dm_verity *v); extern int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io, enum verity_block_type type, sector_t block, - u8 *dest, struct bvec_iter *iter); + u8 *dest); extern unsigned int verity_fec_status_table(struct dm_verity *v, unsigned int sz, char *result, unsigned int maxlen); @@ -100,8 +99,7 @@ static inline bool verity_fec_is_enabled(struct dm_verity *v) static inline int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io, enum verity_block_type type, - sector_t block, u8 *dest, - struct bvec_iter *iter) + sector_t block, u8 *dest) { return -EOPNOTSUPP; } diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index bb5da66da4c1..cf659c8feb29 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -48,6 +48,9 @@ module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, 0644); static DEFINE_STATIC_KEY_FALSE(use_bh_wq_enabled); +/* Is at least one dm-verity instance using ahash_tfm instead of shash_tfm? */ +static DEFINE_STATIC_KEY_FALSE(ahash_enabled); + struct dm_verity_prefetch_work { struct work_struct work; struct dm_verity *v; @@ -102,7 +105,7 @@ static sector_t verity_position_at_level(struct dm_verity *v, sector_t block, return block >> (level * v->hash_per_block_bits); } -static int verity_hash_update(struct dm_verity *v, struct ahash_request *req, +static int verity_ahash_update(struct dm_verity *v, struct ahash_request *req, const u8 *data, size_t len, struct crypto_wait *wait) { @@ -135,12 +138,12 @@ static int verity_hash_update(struct dm_verity *v, struct ahash_request *req, /* * Wrapper for crypto_ahash_init, which handles verity salting. */ -static int verity_hash_init(struct dm_verity *v, struct ahash_request *req, +static int verity_ahash_init(struct dm_verity *v, struct ahash_request *req, struct crypto_wait *wait, bool may_sleep) { int r; - ahash_request_set_tfm(req, v->tfm); + ahash_request_set_tfm(req, v->ahash_tfm); ahash_request_set_callback(req, may_sleep ? CRYPTO_TFM_REQ_MAY_SLEEP | CRYPTO_TFM_REQ_MAY_BACKLOG : 0, crypto_req_done, (void *)wait); @@ -155,18 +158,18 @@ static int verity_hash_init(struct dm_verity *v, struct ahash_request *req, } if (likely(v->salt_size && (v->version >= 1))) - r = verity_hash_update(v, req, v->salt, v->salt_size, wait); + r = verity_ahash_update(v, req, v->salt, v->salt_size, wait); return r; } -static int verity_hash_final(struct dm_verity *v, struct ahash_request *req, - u8 *digest, struct crypto_wait *wait) +static int verity_ahash_final(struct dm_verity *v, struct ahash_request *req, + u8 *digest, struct crypto_wait *wait) { int r; if (unlikely(v->salt_size && (!v->version))) { - r = verity_hash_update(v, req, v->salt, v->salt_size, wait); + r = verity_ahash_update(v, req, v->salt, v->salt_size, wait); if (r < 0) { DMERR("%s failed updating salt: %d", __func__, r); @@ -180,23 +183,27 @@ out: return r; } -int verity_hash(struct dm_verity *v, struct ahash_request *req, +int verity_hash(struct dm_verity *v, struct dm_verity_io *io, const u8 *data, size_t len, u8 *digest, bool may_sleep) { int r; - struct crypto_wait wait; - - r = verity_hash_init(v, req, &wait, may_sleep); - if (unlikely(r < 0)) - goto out; - r = verity_hash_update(v, req, data, len, &wait); - if (unlikely(r < 0)) - goto out; + if (static_branch_unlikely(&ahash_enabled) && !v->shash_tfm) { + struct ahash_request *req = verity_io_hash_req(v, io); + struct crypto_wait wait; - r = verity_hash_final(v, req, digest, &wait); + r = verity_ahash_init(v, req, &wait, may_sleep) ?: + verity_ahash_update(v, req, data, len, &wait) ?: + verity_ahash_final(v, req, digest, &wait); + } else { + struct shash_desc *desc = verity_io_hash_req(v, io); -out: + desc->tfm = v->shash_tfm; + r = crypto_shash_import(desc, v->initial_hashstate) ?: + crypto_shash_finup(desc, data, len, digest); + } + if (unlikely(r)) + DMERR("Error hashing block: %d", r); return r; } @@ -325,8 +332,7 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io, goto release_ret_r; } - r = verity_hash(v, verity_io_hash_req(v, io), - data, 1 << v->hash_dev_block_bits, + r = verity_hash(v, io, data, 1 << v->hash_dev_block_bits, verity_io_real_digest(v, io), !io->in_bh); if (unlikely(r < 0)) goto release_ret_r; @@ -342,7 +348,7 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io, r = -EAGAIN; goto release_ret_r; } else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_METADATA, - hash_block, data, NULL) == 0) + hash_block, data) == 0) aux->hash_verified = 1; else if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_METADATA, @@ -404,98 +410,8 @@ out: return r; } -/* - * Calculates the digest for the given bio - */ -static int verity_for_io_block(struct dm_verity *v, struct dm_verity_io *io, - struct bvec_iter *iter, struct crypto_wait *wait) -{ - unsigned int todo = 1 << v->data_dev_block_bits; - struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size); - struct scatterlist sg; - struct ahash_request *req = verity_io_hash_req(v, io); - - do { - int r; - unsigned int len; - struct bio_vec bv = bio_iter_iovec(bio, *iter); - - sg_init_table(&sg, 1); - - len = bv.bv_len; - - if (likely(len >= todo)) - len = todo; - /* - * Operating on a single page at a time looks suboptimal - * until you consider the typical block size is 4,096B. - * Going through this loops twice should be very rare. - */ - sg_set_page(&sg, bv.bv_page, len, bv.bv_offset); - ahash_request_set_crypt(req, &sg, NULL, len); - r = crypto_wait_req(crypto_ahash_update(req), wait); - - if (unlikely(r < 0)) { - DMERR("%s crypto op failed: %d", __func__, r); - return r; - } - - bio_advance_iter(bio, iter, len); - todo -= len; - } while (todo); - - return 0; -} - -/* - * Calls function process for 1 << v->data_dev_block_bits bytes in the bio_vec - * starting from iter. - */ -int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io, - struct bvec_iter *iter, - int (*process)(struct dm_verity *v, - struct dm_verity_io *io, u8 *data, - size_t len)) -{ - unsigned int todo = 1 << v->data_dev_block_bits; - struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size); - - do { - int r; - u8 *page; - unsigned int len; - struct bio_vec bv = bio_iter_iovec(bio, *iter); - - page = bvec_kmap_local(&bv); - len = bv.bv_len; - - if (likely(len >= todo)) - len = todo; - - r = process(v, io, page, len); - kunmap_local(page); - - if (r < 0) - return r; - - bio_advance_iter(bio, iter, len); - todo -= len; - } while (todo); - - return 0; -} - -static int verity_recheck_copy(struct dm_verity *v, struct dm_verity_io *io, - u8 *data, size_t len) -{ - memcpy(data, io->recheck_buffer, len); - io->recheck_buffer += len; - - return 0; -} - static noinline int verity_recheck(struct dm_verity *v, struct dm_verity_io *io, - struct bvec_iter start, sector_t cur_block) + sector_t cur_block, u8 *dest) { struct page *page; void *buffer; @@ -518,8 +434,7 @@ static noinline int verity_recheck(struct dm_verity *v, struct dm_verity_io *io, if (unlikely(r)) goto free_ret; - r = verity_hash(v, verity_io_hash_req(v, io), buffer, - 1 << v->data_dev_block_bits, + r = verity_hash(v, io, buffer, 1 << v->data_dev_block_bits, verity_io_real_digest(v, io), true); if (unlikely(r)) goto free_ret; @@ -530,11 +445,7 @@ static noinline int verity_recheck(struct dm_verity *v, struct dm_verity_io *io, goto free_ret; } - io->recheck_buffer = buffer; - r = verity_for_bv_block(v, io, &start, verity_recheck_copy); - if (unlikely(r)) - goto free_ret; - + memcpy(dest, buffer, 1 << v->data_dev_block_bits); r = 0; free_ret: mempool_free(page, &v->recheck_pool); @@ -542,23 +453,36 @@ free_ret: return r; } -static int verity_bv_zero(struct dm_verity *v, struct dm_verity_io *io, - u8 *data, size_t len) -{ - memset(data, 0, len); - return 0; -} - -/* - * Moves the bio iter one data block forward. - */ -static inline void verity_bv_skip_block(struct dm_verity *v, - struct dm_verity_io *io, - struct bvec_iter *iter) +static int verity_handle_data_hash_mismatch(struct dm_verity *v, + struct dm_verity_io *io, + struct bio *bio, sector_t blkno, + u8 *data) { - struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size); + if (static_branch_unlikely(&use_bh_wq_enabled) && io->in_bh) { + /* + * Error handling code (FEC included) cannot be run in the + * BH workqueue, so fallback to a standard workqueue. + */ + return -EAGAIN; + } + if (verity_recheck(v, io, blkno, data) == 0) { + if (v->validated_blocks) + set_bit(blkno, v->validated_blocks); + return 0; + } +#if defined(CONFIG_DM_VERITY_FEC) + if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA, blkno, + data) == 0) + return 0; +#endif + if (bio->bi_status) + return -EIO; /* Error correction failed; Just return error */ - bio_advance_iter(bio, iter, 1 << v->data_dev_block_bits); + if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA, blkno)) { + dm_audit_log_bio(DM_MSG_PREFIX, "verify-data", bio, blkno, 0); + return -EIO; + } + return 0; } /* @@ -566,12 +490,10 @@ static inline void verity_bv_skip_block(struct dm_verity *v, */ static int verity_verify_io(struct dm_verity_io *io) { - bool is_zero; struct dm_verity *v = io->v; - struct bvec_iter start; + const unsigned int block_size = 1 << v->data_dev_block_bits; struct bvec_iter iter_copy; struct bvec_iter *iter; - struct crypto_wait wait; struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size); unsigned int b; @@ -585,16 +507,17 @@ static int verity_verify_io(struct dm_verity_io *io) } else iter = &io->iter; - for (b = 0; b < io->n_blocks; b++) { + for (b = 0; b < io->n_blocks; + b++, bio_advance_iter(bio, iter, block_size)) { int r; sector_t cur_block = io->block + b; - struct ahash_request *req = verity_io_hash_req(v, io); + bool is_zero; + struct bio_vec bv; + void *data; if (v->validated_blocks && bio->bi_status == BLK_STS_OK && - likely(test_bit(cur_block, v->validated_blocks))) { - verity_bv_skip_block(v, io, iter); + likely(test_bit(cur_block, v->validated_blocks))) continue; - } r = verity_hash_for_block(v, io, cur_block, verity_io_want_digest(v, io), @@ -602,67 +525,49 @@ static int verity_verify_io(struct dm_verity_io *io) if (unlikely(r < 0)) return r; + bv = bio_iter_iovec(bio, *iter); + if (unlikely(bv.bv_len < block_size)) { + /* + * Data block spans pages. This should not happen, + * since dm-verity sets dma_alignment to the data block + * size minus 1, and dm-verity also doesn't allow the + * data block size to be greater than PAGE_SIZE. + */ + DMERR_LIMIT("unaligned io (data block spans pages)"); + return -EIO; + } + + data = bvec_kmap_local(&bv); + if (is_zero) { /* * If we expect a zero block, don't validate, just * return zeros. */ - r = verity_for_bv_block(v, io, iter, - verity_bv_zero); - if (unlikely(r < 0)) - return r; - + memset(data, 0, block_size); + kunmap_local(data); continue; } - r = verity_hash_init(v, req, &wait, !io->in_bh); - if (unlikely(r < 0)) - return r; - - start = *iter; - r = verity_for_io_block(v, io, iter, &wait); - if (unlikely(r < 0)) - return r; - - r = verity_hash_final(v, req, verity_io_real_digest(v, io), - &wait); - if (unlikely(r < 0)) + r = verity_hash(v, io, data, block_size, + verity_io_real_digest(v, io), !io->in_bh); + if (unlikely(r < 0)) { + kunmap_local(data); return r; + } if (likely(memcmp(verity_io_real_digest(v, io), verity_io_want_digest(v, io), v->digest_size) == 0)) { if (v->validated_blocks) set_bit(cur_block, v->validated_blocks); + kunmap_local(data); continue; - } else if (static_branch_unlikely(&use_bh_wq_enabled) && io->in_bh) { - /* - * Error handling code (FEC included) cannot be run in a - * tasklet since it may sleep, so fallback to work-queue. - */ - return -EAGAIN; - } else if (verity_recheck(v, io, start, cur_block) == 0) { - if (v->validated_blocks) - set_bit(cur_block, v->validated_blocks); - continue; -#if defined(CONFIG_DM_VERITY_FEC) - } else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA, - cur_block, NULL, &start) == 0) { - continue; -#endif - } else { - if (bio->bi_status) { - /* - * Error correction failed; Just return error - */ - return -EIO; - } - if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA, - cur_block)) { - dm_audit_log_bio(DM_MSG_PREFIX, "verify-data", - bio, cur_block, 0); - return -EIO; - } } + r = verity_handle_data_hash_mismatch(v, io, bio, cur_block, + data); + kunmap_local(data); + if (unlikely(r)) + return r; } return 0; @@ -1014,7 +919,15 @@ static void verity_io_hints(struct dm_target *ti, struct queue_limits *limits) if (limits->physical_block_size < 1 << v->data_dev_block_bits) limits->physical_block_size = 1 << v->data_dev_block_bits; - blk_limits_io_min(limits, limits->logical_block_size); + limits->io_min = limits->logical_block_size; + + /* + * Similar to what dm-crypt does, opt dm-verity out of support for + * direct I/O that is aligned to less than the traditional direct I/O + * alignment requirement of logical_block_size. This prevents dm-verity + * data blocks from crossing pages, eliminating various edge cases. + */ + limits->dma_alignment = limits->logical_block_size - 1; } static void verity_dtr(struct dm_target *ti) @@ -1033,11 +946,16 @@ static void verity_dtr(struct dm_target *ti) kvfree(v->validated_blocks); kfree(v->salt); + kfree(v->initial_hashstate); kfree(v->root_digest); kfree(v->zero_digest); - if (v->tfm) - crypto_free_ahash(v->tfm); + if (v->ahash_tfm) { + static_branch_dec(&ahash_enabled); + crypto_free_ahash(v->ahash_tfm); + } else { + crypto_free_shash(v->shash_tfm); + } kfree(v->alg_name); @@ -1083,7 +1001,7 @@ static int verity_alloc_most_once(struct dm_verity *v) static int verity_alloc_zero_digest(struct dm_verity *v) { int r = -ENOMEM; - struct ahash_request *req; + struct dm_verity_io *io; u8 *zero_data; v->zero_digest = kmalloc(v->digest_size, GFP_KERNEL); @@ -1091,9 +1009,9 @@ static int verity_alloc_zero_digest(struct dm_verity *v) if (!v->zero_digest) return r; - req = kmalloc(v->ahash_reqsize, GFP_KERNEL); + io = kmalloc(sizeof(*io) + v->hash_reqsize, GFP_KERNEL); - if (!req) + if (!io) return r; /* verity_dtr will free zero_digest */ zero_data = kzalloc(1 << v->data_dev_block_bits, GFP_KERNEL); @@ -1101,11 +1019,11 @@ static int verity_alloc_zero_digest(struct dm_verity *v) if (!zero_data) goto out; - r = verity_hash(v, req, zero_data, 1 << v->data_dev_block_bits, + r = verity_hash(v, io, zero_data, 1 << v->data_dev_block_bits, v->zero_digest, true); out: - kfree(req); + kfree(io); kfree(zero_data); return r; @@ -1226,6 +1144,113 @@ static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v, return r; } +static int verity_setup_hash_alg(struct dm_verity *v, const char *alg_name) +{ + struct dm_target *ti = v->ti; + struct crypto_ahash *ahash; + struct crypto_shash *shash = NULL; + const char *driver_name; + + v->alg_name = kstrdup(alg_name, GFP_KERNEL); + if (!v->alg_name) { + ti->error = "Cannot allocate algorithm name"; + return -ENOMEM; + } + + /* + * Allocate the hash transformation object that this dm-verity instance + * will use. The vast majority of dm-verity users use CPU-based + * hashing, so when possible use the shash API to minimize the crypto + * API overhead. If the ahash API resolves to a different driver + * (likely an off-CPU hardware offload), use ahash instead. Also use + * ahash if the obsolete dm-verity format with the appended salt is + * being used, so that quirk only needs to be handled in one place. + */ + ahash = crypto_alloc_ahash(alg_name, 0, + v->use_bh_wq ? CRYPTO_ALG_ASYNC : 0); + if (IS_ERR(ahash)) { + ti->error = "Cannot initialize hash function"; + return PTR_ERR(ahash); + } + driver_name = crypto_ahash_driver_name(ahash); + if (v->version >= 1 /* salt prepended, not appended? */) { + shash = crypto_alloc_shash(alg_name, 0, 0); + if (!IS_ERR(shash) && + strcmp(crypto_shash_driver_name(shash), driver_name) != 0) { + /* + * ahash gave a different driver than shash, so probably + * this is a case of real hardware offload. Use ahash. + */ + crypto_free_shash(shash); + shash = NULL; + } + } + if (!IS_ERR_OR_NULL(shash)) { + crypto_free_ahash(ahash); + ahash = NULL; + v->shash_tfm = shash; + v->digest_size = crypto_shash_digestsize(shash); + v->hash_reqsize = sizeof(struct shash_desc) + + crypto_shash_descsize(shash); + DMINFO("%s using shash \"%s\"", alg_name, driver_name); + } else { + v->ahash_tfm = ahash; + static_branch_inc(&ahash_enabled); + v->digest_size = crypto_ahash_digestsize(ahash); + v->hash_reqsize = sizeof(struct ahash_request) + + crypto_ahash_reqsize(ahash); + DMINFO("%s using ahash \"%s\"", alg_name, driver_name); + } + if ((1 << v->hash_dev_block_bits) < v->digest_size * 2) { + ti->error = "Digest size too big"; + return -EINVAL; + } + return 0; +} + +static int verity_setup_salt_and_hashstate(struct dm_verity *v, const char *arg) +{ + struct dm_target *ti = v->ti; + + if (strcmp(arg, "-") != 0) { + v->salt_size = strlen(arg) / 2; + v->salt = kmalloc(v->salt_size, GFP_KERNEL); + if (!v->salt) { + ti->error = "Cannot allocate salt"; + return -ENOMEM; + } + if (strlen(arg) != v->salt_size * 2 || + hex2bin(v->salt, arg, v->salt_size)) { + ti->error = "Invalid salt"; + return -EINVAL; + } + } + if (v->shash_tfm) { + SHASH_DESC_ON_STACK(desc, v->shash_tfm); + int r; + + /* + * Compute the pre-salted hash state that can be passed to + * crypto_shash_import() for each block later. + */ + v->initial_hashstate = kmalloc( + crypto_shash_statesize(v->shash_tfm), GFP_KERNEL); + if (!v->initial_hashstate) { + ti->error = "Cannot allocate initial hash state"; + return -ENOMEM; + } + desc->tfm = v->shash_tfm; + r = crypto_shash_init(desc) ?: + crypto_shash_update(desc, v->salt, v->salt_size) ?: + crypto_shash_export(desc, v->initial_hashstate); + if (r) { + ti->error = "Cannot set up initial hash state"; + return r; + } + } + return 0; +} + /* * Target parameters: * <version> The current format is version 1. @@ -1350,38 +1375,9 @@ static int verity_ctr(struct dm_target *ti, unsigned int argc, char **argv) } v->hash_start = num_ll; - v->alg_name = kstrdup(argv[7], GFP_KERNEL); - if (!v->alg_name) { - ti->error = "Cannot allocate algorithm name"; - r = -ENOMEM; - goto bad; - } - - v->tfm = crypto_alloc_ahash(v->alg_name, 0, - v->use_bh_wq ? CRYPTO_ALG_ASYNC : 0); - if (IS_ERR(v->tfm)) { - ti->error = "Cannot initialize hash function"; - r = PTR_ERR(v->tfm); - v->tfm = NULL; - goto bad; - } - - /* - * dm-verity performance can vary greatly depending on which hash - * algorithm implementation is used. Help people debug performance - * problems by logging the ->cra_driver_name. - */ - DMINFO("%s using implementation \"%s\"", v->alg_name, - crypto_hash_alg_common(v->tfm)->base.cra_driver_name); - - v->digest_size = crypto_ahash_digestsize(v->tfm); - if ((1 << v->hash_dev_block_bits) < v->digest_size * 2) { - ti->error = "Digest size too big"; - r = -EINVAL; + r = verity_setup_hash_alg(v, argv[7]); + if (r) goto bad; - } - v->ahash_reqsize = sizeof(struct ahash_request) + - crypto_ahash_reqsize(v->tfm); v->root_digest = kmalloc(v->digest_size, GFP_KERNEL); if (!v->root_digest) { @@ -1397,21 +1393,9 @@ static int verity_ctr(struct dm_target *ti, unsigned int argc, char **argv) } root_hash_digest_to_validate = argv[8]; - if (strcmp(argv[9], "-")) { - v->salt_size = strlen(argv[9]) / 2; - v->salt = kmalloc(v->salt_size, GFP_KERNEL); - if (!v->salt) { - ti->error = "Cannot allocate salt"; - r = -ENOMEM; - goto bad; - } - if (strlen(argv[9]) != v->salt_size * 2 || - hex2bin(v->salt, argv[9], v->salt_size)) { - ti->error = "Invalid salt"; - r = -EINVAL; - goto bad; - } - } + r = verity_setup_salt_and_hashstate(v, argv[9]); + if (r) + goto bad; argv += 10; argc -= 10; @@ -1513,8 +1497,7 @@ static int verity_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } - ti->per_io_data_size = sizeof(struct dm_verity_io) + - v->ahash_reqsize + v->digest_size * 2; + ti->per_io_data_size = sizeof(struct dm_verity_io) + v->hash_reqsize; r = verity_fec_ctr(v); if (r) @@ -1539,14 +1522,6 @@ bad: } /* - * Check whether a DM target is a verity target. - */ -bool dm_is_verity_target(struct dm_target *ti) -{ - return ti->type->module == THIS_MODULE; -} - -/* * Get the verity mode (error behavior) of a verity target. * * Returns the verity mode of the target, or -EINVAL if 'ti' is not a verity @@ -1599,6 +1574,14 @@ static struct target_type verity_target = { }; module_dm(verity); +/* + * Check whether a DM target is a verity target. + */ +bool dm_is_verity_target(struct dm_target *ti) +{ + return ti->type == &verity_target; +} + MODULE_AUTHOR("Mikulas Patocka <mpatocka@redhat.com>"); MODULE_AUTHOR("Mandeep Baines <msb@chromium.org>"); MODULE_AUTHOR("Will Drewry <wad@chromium.org>"); diff --git a/drivers/md/dm-verity-verify-sig.c b/drivers/md/dm-verity-verify-sig.c index 4836508ea50c..d351d7d39c60 100644 --- a/drivers/md/dm-verity-verify-sig.c +++ b/drivers/md/dm-verity-verify-sig.c @@ -126,6 +126,13 @@ int verity_verify_root_hash(const void *root_hash, size_t root_hash_len, NULL, #endif VERIFYING_UNSPECIFIED_SIGNATURE, NULL, NULL); +#ifdef CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG_PLATFORM_KEYRING + if (ret == -ENOKEY) + ret = verify_pkcs7_signature(root_hash, root_hash_len, sig_data, + sig_len, + VERIFY_USE_PLATFORM_KEYRING, + VERIFYING_UNSPECIFIED_SIGNATURE, NULL, NULL); +#endif return ret; } diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index 20b1bcf03474..aac3a1b1d94a 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -39,9 +39,11 @@ struct dm_verity { struct dm_target *ti; struct dm_bufio_client *bufio; char *alg_name; - struct crypto_ahash *tfm; + struct crypto_ahash *ahash_tfm; /* either this or shash_tfm is set */ + struct crypto_shash *shash_tfm; /* either this or ahash_tfm is set */ u8 *root_digest; /* digest of the root block */ u8 *salt; /* salt: its size is salt_size */ + u8 *initial_hashstate; /* salted initial state, if shash_tfm is set */ u8 *zero_digest; /* digest for a zero block */ unsigned int salt_size; sector_t data_start; /* data offset in 512-byte sectors */ @@ -56,7 +58,7 @@ struct dm_verity { bool hash_failed:1; /* set if hash of any block failed */ bool use_bh_wq:1; /* try to verify in BH wq before normal work-queue */ unsigned int digest_size; /* digest size for the current hash algorithm */ - unsigned int ahash_reqsize;/* the size of temporary space for crypto */ + unsigned int hash_reqsize; /* the size of temporary space for crypto */ enum verity_mode mode; /* mode for handling verification errors */ unsigned int corrupted_errs;/* Number of errors for corrupted blocks */ @@ -89,45 +91,36 @@ struct dm_verity_io { struct work_struct work; struct work_struct bh_work; - char *recheck_buffer; + u8 real_digest[HASH_MAX_DIGESTSIZE]; + u8 want_digest[HASH_MAX_DIGESTSIZE]; /* - * Three variably-size fields follow this struct: - * - * u8 hash_req[v->ahash_reqsize]; - * u8 real_digest[v->digest_size]; - * u8 want_digest[v->digest_size]; - * - * To access them use: verity_io_hash_req(), verity_io_real_digest() - * and verity_io_want_digest(). + * This struct is followed by a variable-sized hash request of size + * v->hash_reqsize, either a struct ahash_request or a struct shash_desc + * (depending on whether ahash_tfm or shash_tfm is being used). To + * access it, use verity_io_hash_req(). */ }; -static inline struct ahash_request *verity_io_hash_req(struct dm_verity *v, - struct dm_verity_io *io) +static inline void *verity_io_hash_req(struct dm_verity *v, + struct dm_verity_io *io) { - return (struct ahash_request *)(io + 1); + return io + 1; } static inline u8 *verity_io_real_digest(struct dm_verity *v, struct dm_verity_io *io) { - return (u8 *)(io + 1) + v->ahash_reqsize; + return io->real_digest; } static inline u8 *verity_io_want_digest(struct dm_verity *v, struct dm_verity_io *io) { - return (u8 *)(io + 1) + v->ahash_reqsize + v->digest_size; + return io->want_digest; } -extern int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io, - struct bvec_iter *iter, - int (*process)(struct dm_verity *v, - struct dm_verity_io *io, - u8 *data, size_t len)); - -extern int verity_hash(struct dm_verity *v, struct ahash_request *req, +extern int verity_hash(struct dm_verity *v, struct dm_verity_io *io, const u8 *data, size_t len, u8 *digest, bool may_sleep); extern int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io, diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index cd0ee144973f..6141fc25d842 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -996,8 +996,8 @@ static void dmz_io_hints(struct dm_target *ti, struct queue_limits *limits) limits->logical_block_size = DMZ_BLOCK_SIZE; limits->physical_block_size = DMZ_BLOCK_SIZE; - blk_limits_io_min(limits, DMZ_BLOCK_SIZE); - blk_limits_io_opt(limits, DMZ_BLOCK_SIZE); + limits->io_min = DMZ_BLOCK_SIZE; + limits->io_opt = DMZ_BLOCK_SIZE; limits->discard_alignment = 0; limits->discard_granularity = DMZ_BLOCK_SIZE; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 4b1b69e576a5..97fab2087df8 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -11,6 +11,7 @@ #include "dm-uevent.h" #include "dm-ima.h" +#include <linux/bio-integrity.h> #include <linux/init.h> #include <linux/module.h> #include <linux/mutex.h> @@ -645,7 +646,7 @@ static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti, /* Set default bdev, but target must bio_set_dev() before issuing IO */ clone->bi_bdev = md->disk->part0; - if (unlikely(ti->needs_bio_set_dev)) + if (likely(ti != NULL) && unlikely(ti->needs_bio_set_dev)) bio_set_dev(clone, md->disk->part0); if (len) { @@ -1107,7 +1108,7 @@ static void clone_endio(struct bio *bio) blk_status_t error = bio->bi_status; struct dm_target_io *tio = clone_to_tio(bio); struct dm_target *ti = tio->ti; - dm_endio_fn endio = ti->type->end_io; + dm_endio_fn endio = likely(ti != NULL) ? ti->type->end_io : NULL; struct dm_io *io = tio->io; struct mapped_device *md = io->md; @@ -1154,7 +1155,7 @@ static void clone_endio(struct bio *bio) } if (static_branch_unlikely(&swap_bios_enabled) && - unlikely(swap_bios_limit(ti, bio))) + likely(ti != NULL) && unlikely(swap_bios_limit(ti, bio))) up(&md->swap_bios_semaphore); free_tio(bio); @@ -1553,17 +1554,43 @@ static void __send_empty_flush(struct clone_info *ci) ci->sector_count = 0; ci->io->tio.clone.bi_iter.bi_size = 0; - for (unsigned int i = 0; i < t->num_targets; i++) { - unsigned int bios; - struct dm_target *ti = dm_table_get_target(t, i); + if (!t->flush_bypasses_map) { + for (unsigned int i = 0; i < t->num_targets; i++) { + unsigned int bios; + struct dm_target *ti = dm_table_get_target(t, i); - if (unlikely(ti->num_flush_bios == 0)) - continue; + if (unlikely(ti->num_flush_bios == 0)) + continue; - atomic_add(ti->num_flush_bios, &ci->io->io_count); - bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios, - NULL, GFP_NOWAIT); - atomic_sub(ti->num_flush_bios - bios, &ci->io->io_count); + atomic_add(ti->num_flush_bios, &ci->io->io_count); + bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios, + NULL, GFP_NOWAIT); + atomic_sub(ti->num_flush_bios - bios, &ci->io->io_count); + } + } else { + /* + * Note that there's no need to grab t->devices_lock here + * because the targets that support flush optimization don't + * modify the list of devices. + */ + struct list_head *devices = dm_table_get_devices(t); + unsigned int len = 0; + struct dm_dev_internal *dd; + list_for_each_entry(dd, devices, list) { + struct bio *clone; + /* + * Note that the structure dm_target_io is not + * associated with any target (because the device may be + * used by multiple targets), so we set tio->ti = NULL. + * We must check for NULL in the I/O processing path, to + * avoid NULL pointer dereference. + */ + clone = alloc_tio(ci, NULL, 0, &len, GFP_NOIO); + atomic_add(1, &ci->io->io_count); + bio_set_dev(clone, dd->dm_dev->bdev); + clone->bi_end_io = clone_endio; + dm_submit_bio_remap(clone, NULL); + } } /* @@ -1631,14 +1658,10 @@ static blk_status_t __process_abnormal_io(struct clone_info *ci, case REQ_OP_SECURE_ERASE: num_bios = ti->num_secure_erase_bios; max_sectors = limits->max_secure_erase_sectors; - if (ti->max_secure_erase_granularity) - max_granularity = max_sectors; break; case REQ_OP_WRITE_ZEROES: num_bios = ti->num_write_zeroes_bios; max_sectors = limits->max_write_zeroes_sectors; - if (ti->max_write_zeroes_granularity) - max_granularity = max_sectors; break; default: break; diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 6ce75c82969c..1d0db62f0351 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -897,7 +897,7 @@ static int join(struct mddev *mddev, int nodes) memset(str, 0, 64); sprintf(str, "%pU", mddev->uuid); ret = dlm_new_lockspace(str, mddev->bitmap_info.cluster_name, - 0, LVB_SIZE, &md_ls_ops, mddev, + DLM_LSFL_SOFTIRQ, LVB_SIZE, &md_ls_ops, mddev, &ops_rv, &cinfo->lockspace); if (ret) goto err; diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c index 798c9c53a343..157c9bd2fed7 100644 --- a/drivers/md/persistent-data/dm-array.c +++ b/drivers/md/persistent-data/dm-array.c @@ -38,7 +38,7 @@ struct array_block { */ #define CSUM_XOR 595846735 -static void array_block_prepare_for_write(struct dm_block_validator *v, +static void array_block_prepare_for_write(const struct dm_block_validator *v, struct dm_block *b, size_t size_of_block) { @@ -50,7 +50,7 @@ static void array_block_prepare_for_write(struct dm_block_validator *v, CSUM_XOR)); } -static int array_block_check(struct dm_block_validator *v, +static int array_block_check(const struct dm_block_validator *v, struct dm_block *b, size_t size_of_block) { @@ -77,7 +77,7 @@ static int array_block_check(struct dm_block_validator *v, return 0; } -static struct dm_block_validator array_validator = { +static const struct dm_block_validator array_validator = { .name = "array", .prepare_for_write = array_block_prepare_for_write, .check = array_block_check diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index b17b54df673b..1ef71e5fcde7 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -345,7 +345,7 @@ void *dm_block_data(struct dm_block *b) EXPORT_SYMBOL_GPL(dm_block_data); struct buffer_aux { - struct dm_block_validator *validator; + const struct dm_block_validator *validator; int write_locked; #ifdef CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING @@ -441,7 +441,7 @@ dm_block_t dm_bm_nr_blocks(struct dm_block_manager *bm) static int dm_bm_validate_buffer(struct dm_block_manager *bm, struct dm_buffer *buf, struct buffer_aux *aux, - struct dm_block_validator *v) + const struct dm_block_validator *v) { if (unlikely(!aux->validator)) { int r; @@ -467,7 +467,7 @@ static int dm_bm_validate_buffer(struct dm_block_manager *bm, return 0; } int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b, - struct dm_block_validator *v, + const struct dm_block_validator *v, struct dm_block **result) { struct buffer_aux *aux; @@ -500,7 +500,7 @@ int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b, EXPORT_SYMBOL_GPL(dm_bm_read_lock); int dm_bm_write_lock(struct dm_block_manager *bm, - dm_block_t b, struct dm_block_validator *v, + dm_block_t b, const struct dm_block_validator *v, struct dm_block **result) { struct buffer_aux *aux; @@ -536,7 +536,7 @@ int dm_bm_write_lock(struct dm_block_manager *bm, EXPORT_SYMBOL_GPL(dm_bm_write_lock); int dm_bm_read_try_lock(struct dm_block_manager *bm, - dm_block_t b, struct dm_block_validator *v, + dm_block_t b, const struct dm_block_validator *v, struct dm_block **result) { struct buffer_aux *aux; @@ -569,7 +569,7 @@ int dm_bm_read_try_lock(struct dm_block_manager *bm, } int dm_bm_write_lock_zero(struct dm_block_manager *bm, - dm_block_t b, struct dm_block_validator *v, + dm_block_t b, const struct dm_block_validator *v, struct dm_block **result) { int r; diff --git a/drivers/md/persistent-data/dm-block-manager.h b/drivers/md/persistent-data/dm-block-manager.h index f706d3de8d5a..b1998968594c 100644 --- a/drivers/md/persistent-data/dm-block-manager.h +++ b/drivers/md/persistent-data/dm-block-manager.h @@ -51,12 +51,14 @@ dm_block_t dm_bm_nr_blocks(struct dm_block_manager *bm); */ struct dm_block_validator { const char *name; - void (*prepare_for_write)(struct dm_block_validator *v, struct dm_block *b, size_t block_size); + void (*prepare_for_write)(const struct dm_block_validator *v, + struct dm_block *b, size_t block_size); /* * Return 0 if the checksum is valid or < 0 on error. */ - int (*check)(struct dm_block_validator *v, struct dm_block *b, size_t block_size); + int (*check)(const struct dm_block_validator *v, + struct dm_block *b, size_t block_size); }; /*----------------------------------------------------------------*/ @@ -73,11 +75,11 @@ struct dm_block_validator { * written back to the disk sometime after dm_bm_unlock is called. */ int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b, - struct dm_block_validator *v, + const struct dm_block_validator *v, struct dm_block **result); int dm_bm_write_lock(struct dm_block_manager *bm, dm_block_t b, - struct dm_block_validator *v, + const struct dm_block_validator *v, struct dm_block **result); /* @@ -85,7 +87,7 @@ int dm_bm_write_lock(struct dm_block_manager *bm, dm_block_t b, * available immediately. */ int dm_bm_read_try_lock(struct dm_block_manager *bm, dm_block_t b, - struct dm_block_validator *v, + const struct dm_block_validator *v, struct dm_block **result); /* @@ -93,7 +95,7 @@ int dm_bm_read_try_lock(struct dm_block_manager *bm, dm_block_t b, * overwrite the block completely. It saves a disk read. */ int dm_bm_write_lock_zero(struct dm_block_manager *bm, dm_block_t b, - struct dm_block_validator *v, + const struct dm_block_validator *v, struct dm_block **result); void dm_bm_unlock(struct dm_block *b); diff --git a/drivers/md/persistent-data/dm-btree-internal.h b/drivers/md/persistent-data/dm-btree-internal.h index 7ed2ce656fcc..acebd32858a7 100644 --- a/drivers/md/persistent-data/dm-btree-internal.h +++ b/drivers/md/persistent-data/dm-btree-internal.h @@ -138,7 +138,7 @@ static inline uint64_t value64(struct btree_node *n, uint32_t index) */ int lower_bound(struct btree_node *n, uint64_t key); -extern struct dm_block_validator btree_node_validator; +extern const struct dm_block_validator btree_node_validator; /* * Value type for upper levels of multi-level btrees. diff --git a/drivers/md/persistent-data/dm-btree-spine.c b/drivers/md/persistent-data/dm-btree-spine.c index 7540383b7cf3..c46fc50c274e 100644 --- a/drivers/md/persistent-data/dm-btree-spine.c +++ b/drivers/md/persistent-data/dm-btree-spine.c @@ -16,7 +16,7 @@ #define BTREE_CSUM_XOR 121107 -static void node_prepare_for_write(struct dm_block_validator *v, +static void node_prepare_for_write(const struct dm_block_validator *v, struct dm_block *b, size_t block_size) { @@ -29,7 +29,7 @@ static void node_prepare_for_write(struct dm_block_validator *v, BTREE_CSUM_XOR)); } -static int node_check(struct dm_block_validator *v, +static int node_check(const struct dm_block_validator *v, struct dm_block *b, size_t block_size) { @@ -81,7 +81,7 @@ static int node_check(struct dm_block_validator *v, return 0; } -struct dm_block_validator btree_node_validator = { +const struct dm_block_validator btree_node_validator = { .name = "btree_node", .prepare_for_write = node_prepare_for_write, .check = node_check diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index 591d1a43d035..3a19124ee279 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c @@ -22,7 +22,7 @@ */ #define INDEX_CSUM_XOR 160478 -static void index_prepare_for_write(struct dm_block_validator *v, +static void index_prepare_for_write(const struct dm_block_validator *v, struct dm_block *b, size_t block_size) { @@ -34,7 +34,7 @@ static void index_prepare_for_write(struct dm_block_validator *v, INDEX_CSUM_XOR)); } -static int index_check(struct dm_block_validator *v, +static int index_check(const struct dm_block_validator *v, struct dm_block *b, size_t block_size) { @@ -59,7 +59,7 @@ static int index_check(struct dm_block_validator *v, return 0; } -static struct dm_block_validator index_validator = { +static const struct dm_block_validator index_validator = { .name = "index", .prepare_for_write = index_prepare_for_write, .check = index_check @@ -72,7 +72,7 @@ static struct dm_block_validator index_validator = { */ #define BITMAP_CSUM_XOR 240779 -static void dm_bitmap_prepare_for_write(struct dm_block_validator *v, +static void dm_bitmap_prepare_for_write(const struct dm_block_validator *v, struct dm_block *b, size_t block_size) { @@ -84,7 +84,7 @@ static void dm_bitmap_prepare_for_write(struct dm_block_validator *v, BITMAP_CSUM_XOR)); } -static int dm_bitmap_check(struct dm_block_validator *v, +static int dm_bitmap_check(const struct dm_block_validator *v, struct dm_block *b, size_t block_size) { @@ -109,7 +109,7 @@ static int dm_bitmap_check(struct dm_block_validator *v, return 0; } -static struct dm_block_validator dm_sm_bitmap_validator = { +static const struct dm_block_validator dm_sm_bitmap_validator = { .name = "sm_bitmap", .prepare_for_write = dm_bitmap_prepare_for_write, .check = dm_bitmap_check, diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c index c88fa6266203..c7ba4e6cbbc7 100644 --- a/drivers/md/persistent-data/dm-transaction-manager.c +++ b/drivers/md/persistent-data/dm-transaction-manager.c @@ -237,7 +237,7 @@ int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *root) EXPORT_SYMBOL_GPL(dm_tm_commit); int dm_tm_new_block(struct dm_transaction_manager *tm, - struct dm_block_validator *v, + const struct dm_block_validator *v, struct dm_block **result) { int r; @@ -266,7 +266,7 @@ int dm_tm_new_block(struct dm_transaction_manager *tm, } static int __shadow_block(struct dm_transaction_manager *tm, dm_block_t orig, - struct dm_block_validator *v, + const struct dm_block_validator *v, struct dm_block **result) { int r; @@ -306,7 +306,7 @@ static int __shadow_block(struct dm_transaction_manager *tm, dm_block_t orig, } int dm_tm_shadow_block(struct dm_transaction_manager *tm, dm_block_t orig, - struct dm_block_validator *v, struct dm_block **result, + const struct dm_block_validator *v, struct dm_block **result, int *inc_children) { int r; @@ -331,7 +331,7 @@ int dm_tm_shadow_block(struct dm_transaction_manager *tm, dm_block_t orig, EXPORT_SYMBOL_GPL(dm_tm_shadow_block); int dm_tm_read_lock(struct dm_transaction_manager *tm, dm_block_t b, - struct dm_block_validator *v, + const struct dm_block_validator *v, struct dm_block **blk) { if (tm->is_clone) { diff --git a/drivers/md/persistent-data/dm-transaction-manager.h b/drivers/md/persistent-data/dm-transaction-manager.h index 01f7e650118d..61a8d10825ca 100644 --- a/drivers/md/persistent-data/dm-transaction-manager.h +++ b/drivers/md/persistent-data/dm-transaction-manager.h @@ -64,7 +64,7 @@ int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *superblock) * Zeroes the new block and returns with write lock held. */ int dm_tm_new_block(struct dm_transaction_manager *tm, - struct dm_block_validator *v, + const struct dm_block_validator *v, struct dm_block **result); /* @@ -84,7 +84,7 @@ int dm_tm_new_block(struct dm_transaction_manager *tm, * it locked when you call this. */ int dm_tm_shadow_block(struct dm_transaction_manager *tm, dm_block_t orig, - struct dm_block_validator *v, + const struct dm_block_validator *v, struct dm_block **result, int *inc_children); /* @@ -92,7 +92,7 @@ int dm_tm_shadow_block(struct dm_transaction_manager *tm, dm_block_t orig, * on it outstanding then it'll block. */ int dm_tm_read_lock(struct dm_transaction_manager *tm, dm_block_t b, - struct dm_block_validator *v, + const struct dm_block_validator *v, struct dm_block **result); void dm_tm_unlock(struct dm_transaction_manager *tm, struct dm_block *b); |