summaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/Kconfig10
-rw-r--r--drivers/md/bcache/alloc.c64
-rw-r--r--drivers/md/bcache/bcache.h2
-rw-r--r--drivers/md/bcache/bset.c124
-rw-r--r--drivers/md/bcache/bset.h40
-rw-r--r--drivers/md/bcache/btree.c69
-rw-r--r--drivers/md/bcache/extents.c53
-rw-r--r--drivers/md/bcache/movinggc.c41
-rw-r--r--drivers/md/bcache/super.c3
-rw-r--r--drivers/md/bcache/sysfs.c4
-rw-r--r--drivers/md/bcache/util.c2
-rw-r--r--drivers/md/bcache/util.h67
-rw-r--r--drivers/md/bcache/writeback.c13
-rw-r--r--drivers/md/dm-cache-metadata.c15
-rw-r--r--drivers/md/dm-cache-target.c4
-rw-r--r--drivers/md/dm-clone-metadata.c6
-rw-r--r--drivers/md/dm-clone-target.c4
-rw-r--r--drivers/md/dm-core.h2
-rw-r--r--drivers/md/dm-crypt.c77
-rw-r--r--drivers/md/dm-ebs-target.c2
-rw-r--r--drivers/md/dm-era-target.c10
-rw-r--r--drivers/md/dm-init.c4
-rw-r--r--drivers/md/dm-integrity.c408
-rw-r--r--drivers/md/dm-io.c84
-rw-r--r--drivers/md/dm-linear.c1
-rw-r--r--drivers/md/dm-mpath.c11
-rw-r--r--drivers/md/dm-raid.c63
-rw-r--r--drivers/md/dm-stripe.c5
-rw-r--r--drivers/md/dm-table.c44
-rw-r--r--drivers/md/dm-thin-metadata.c6
-rw-r--r--drivers/md/dm-thin.c6
-rw-r--r--drivers/md/dm-vdo/dedupe.c5
-rw-r--r--drivers/md/dm-vdo/dm-vdo-target.c6
-rw-r--r--drivers/md/dm-vdo/indexer/index.c5
-rw-r--r--drivers/md/dm-vdo/int-map.c2
-rw-r--r--drivers/md/dm-vdo/repair.c27
-rw-r--r--drivers/md/dm-vdo/slab-depot.c14
-rw-r--r--drivers/md/dm-verity-fec.c32
-rw-r--r--drivers/md/dm-verity-fec.h6
-rw-r--r--drivers/md/dm-verity-target.c485
-rw-r--r--drivers/md/dm-verity-verify-sig.c7
-rw-r--r--drivers/md/dm-verity.h39
-rw-r--r--drivers/md/dm-zoned-target.c4
-rw-r--r--drivers/md/dm.c55
-rw-r--r--drivers/md/md-cluster.c2
-rw-r--r--drivers/md/persistent-data/dm-array.c6
-rw-r--r--drivers/md/persistent-data/dm-block-manager.c12
-rw-r--r--drivers/md/persistent-data/dm-block-manager.h14
-rw-r--r--drivers/md/persistent-data/dm-btree-internal.h2
-rw-r--r--drivers/md/persistent-data/dm-btree-spine.c6
-rw-r--r--drivers/md/persistent-data/dm-space-map-common.c12
-rw-r--r--drivers/md/persistent-data/dm-transaction-manager.c8
-rw-r--r--drivers/md/persistent-data/dm-transaction-manager.h6
53 files changed, 1193 insertions, 806 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 35b1080752cd..1e9db8e4acdf 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -540,6 +540,16 @@ config DM_VERITY_VERIFY_ROOTHASH_SIG_SECONDARY_KEYRING
If unsure, say N.
+config DM_VERITY_VERIFY_ROOTHASH_SIG_PLATFORM_KEYRING
+ bool "Verity data device root hash signature verification with platform keyring"
+ default DM_VERITY_VERIFY_ROOTHASH_SIG_SECONDARY_KEYRING
+ depends on DM_VERITY_VERIFY_ROOTHASH_SIG
+ depends on INTEGRITY_PLATFORM_KEYRING
+ help
+ Rely also on the platform keyring to verify dm-verity signatures.
+
+ If unsure, say N.
+
config DM_VERITY_FEC
bool "Verity forward error correction support"
depends on DM_VERITY
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 48ce750bf70a..da50f6661bae 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -164,40 +164,68 @@ static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
* prio is worth 1/8th of what INITIAL_PRIO is worth.
*/
-#define bucket_prio(b) \
-({ \
- unsigned int min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8; \
- \
- (b->prio - ca->set->min_prio + min_prio) * GC_SECTORS_USED(b); \
-})
+static inline unsigned int new_bucket_prio(struct cache *ca, struct bucket *b)
+{
+ unsigned int min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8;
+
+ return (b->prio - ca->set->min_prio + min_prio) * GC_SECTORS_USED(b);
+}
+
+static inline bool new_bucket_max_cmp(const void *l, const void *r, void *args)
+{
+ struct bucket **lhs = (struct bucket **)l;
+ struct bucket **rhs = (struct bucket **)r;
+ struct cache *ca = args;
+
+ return new_bucket_prio(ca, *lhs) > new_bucket_prio(ca, *rhs);
+}
-#define bucket_max_cmp(l, r) (bucket_prio(l) < bucket_prio(r))
-#define bucket_min_cmp(l, r) (bucket_prio(l) > bucket_prio(r))
+static inline bool new_bucket_min_cmp(const void *l, const void *r, void *args)
+{
+ struct bucket **lhs = (struct bucket **)l;
+ struct bucket **rhs = (struct bucket **)r;
+ struct cache *ca = args;
+
+ return new_bucket_prio(ca, *lhs) < new_bucket_prio(ca, *rhs);
+}
+
+static inline void new_bucket_swap(void *l, void *r, void __always_unused *args)
+{
+ struct bucket **lhs = l, **rhs = r;
+
+ swap(*lhs, *rhs);
+}
static void invalidate_buckets_lru(struct cache *ca)
{
struct bucket *b;
- ssize_t i;
+ const struct min_heap_callbacks bucket_max_cmp_callback = {
+ .less = new_bucket_max_cmp,
+ .swp = new_bucket_swap,
+ };
+ const struct min_heap_callbacks bucket_min_cmp_callback = {
+ .less = new_bucket_min_cmp,
+ .swp = new_bucket_swap,
+ };
- ca->heap.used = 0;
+ ca->heap.nr = 0;
for_each_bucket(b, ca) {
if (!bch_can_invalidate_bucket(ca, b))
continue;
- if (!heap_full(&ca->heap))
- heap_add(&ca->heap, b, bucket_max_cmp);
- else if (bucket_max_cmp(b, heap_peek(&ca->heap))) {
+ if (!min_heap_full(&ca->heap))
+ min_heap_push(&ca->heap, &b, &bucket_max_cmp_callback, ca);
+ else if (!new_bucket_max_cmp(&b, min_heap_peek(&ca->heap), ca)) {
ca->heap.data[0] = b;
- heap_sift(&ca->heap, 0, bucket_max_cmp);
+ min_heap_sift_down(&ca->heap, 0, &bucket_max_cmp_callback, ca);
}
}
- for (i = ca->heap.used / 2 - 1; i >= 0; --i)
- heap_sift(&ca->heap, i, bucket_min_cmp);
+ min_heapify_all(&ca->heap, &bucket_min_cmp_callback, ca);
while (!fifo_full(&ca->free_inc)) {
- if (!heap_pop(&ca->heap, b, bucket_min_cmp)) {
+ if (!ca->heap.nr) {
/*
* We don't want to be calling invalidate_buckets()
* multiple times when it can't do anything
@@ -206,6 +234,8 @@ static void invalidate_buckets_lru(struct cache *ca)
wake_up_gc(ca->set);
return;
}
+ b = min_heap_peek(&ca->heap)[0];
+ min_heap_pop(&ca->heap, &bucket_min_cmp_callback, ca);
bch_invalidate_one_bucket(ca, b);
}
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 1d33e40d26ea..785b0d9008fa 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -458,7 +458,7 @@ struct cache {
/* Allocation stuff: */
struct bucket *buckets;
- DECLARE_HEAP(struct bucket *, heap);
+ DEFINE_MIN_HEAP(struct bucket *, cache_heap) heap;
/*
* If nonzero, we know we aren't going to find any buckets to invalidate
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 463eb13bd0b2..bd97d8626887 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -54,9 +54,11 @@ void bch_dump_bucket(struct btree_keys *b)
int __bch_count_data(struct btree_keys *b)
{
unsigned int ret = 0;
- struct btree_iter_stack iter;
+ struct btree_iter iter;
struct bkey *k;
+ min_heap_init(&iter.heap, NULL, MAX_BSETS);
+
if (b->ops->is_extents)
for_each_key(b, k, &iter)
ret += KEY_SIZE(k);
@@ -67,9 +69,11 @@ void __bch_check_keys(struct btree_keys *b, const char *fmt, ...)
{
va_list args;
struct bkey *k, *p = NULL;
- struct btree_iter_stack iter;
+ struct btree_iter iter;
const char *err;
+ min_heap_init(&iter.heap, NULL, MAX_BSETS);
+
for_each_key(b, k, &iter) {
if (b->ops->is_extents) {
err = "Keys out of order";
@@ -110,9 +114,9 @@ bug:
static void bch_btree_iter_next_check(struct btree_iter *iter)
{
- struct bkey *k = iter->data->k, *next = bkey_next(k);
+ struct bkey *k = iter->heap.data->k, *next = bkey_next(k);
- if (next < iter->data->end &&
+ if (next < iter->heap.data->end &&
bkey_cmp(k, iter->b->ops->is_extents ?
&START_KEY(next) : next) > 0) {
bch_dump_bucket(iter->b);
@@ -879,12 +883,14 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
unsigned int status = BTREE_INSERT_STATUS_NO_INSERT;
struct bset *i = bset_tree_last(b)->data;
struct bkey *m, *prev = NULL;
- struct btree_iter_stack iter;
+ struct btree_iter iter;
struct bkey preceding_key_on_stack = ZERO_KEY;
struct bkey *preceding_key_p = &preceding_key_on_stack;
BUG_ON(b->ops->is_extents && !KEY_SIZE(k));
+ min_heap_init(&iter.heap, NULL, MAX_BSETS);
+
/*
* If k has preceding key, preceding_key_p will be set to address
* of k's preceding key; otherwise preceding_key_p will be set
@@ -895,9 +901,9 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
else
preceding_key(k, &preceding_key_p);
- m = bch_btree_iter_stack_init(b, &iter, preceding_key_p);
+ m = bch_btree_iter_init(b, &iter, preceding_key_p);
- if (b->ops->insert_fixup(b, k, &iter.iter, replace_key))
+ if (b->ops->insert_fixup(b, k, &iter, replace_key))
return status;
status = BTREE_INSERT_STATUS_INSERT;
@@ -1077,79 +1083,102 @@ struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t,
/* Btree iterator */
-typedef bool (btree_iter_cmp_fn)(struct btree_iter_set,
- struct btree_iter_set);
+typedef bool (new_btree_iter_cmp_fn)(const void *, const void *, void *);
+
+static inline bool new_btree_iter_cmp(const void *l, const void *r, void __always_unused *args)
+{
+ const struct btree_iter_set *_l = l;
+ const struct btree_iter_set *_r = r;
+
+ return bkey_cmp(_l->k, _r->k) <= 0;
+}
-static inline bool btree_iter_cmp(struct btree_iter_set l,
- struct btree_iter_set r)
+static inline void new_btree_iter_swap(void *iter1, void *iter2, void __always_unused *args)
{
- return bkey_cmp(l.k, r.k) > 0;
+ struct btree_iter_set *_iter1 = iter1;
+ struct btree_iter_set *_iter2 = iter2;
+
+ swap(*_iter1, *_iter2);
}
static inline bool btree_iter_end(struct btree_iter *iter)
{
- return !iter->used;
+ return !iter->heap.nr;
}
void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k,
struct bkey *end)
{
+ const struct min_heap_callbacks callbacks = {
+ .less = new_btree_iter_cmp,
+ .swp = new_btree_iter_swap,
+ };
+
if (k != end)
- BUG_ON(!heap_add(iter,
- ((struct btree_iter_set) { k, end }),
- btree_iter_cmp));
+ BUG_ON(!min_heap_push(&iter->heap,
+ &((struct btree_iter_set) { k, end }),
+ &callbacks,
+ NULL));
}
-static struct bkey *__bch_btree_iter_stack_init(struct btree_keys *b,
- struct btree_iter_stack *iter,
- struct bkey *search,
- struct bset_tree *start)
+static struct bkey *__bch_btree_iter_init(struct btree_keys *b,
+ struct btree_iter *iter,
+ struct bkey *search,
+ struct bset_tree *start)
{
struct bkey *ret = NULL;
- iter->iter.size = ARRAY_SIZE(iter->stack_data);
- iter->iter.used = 0;
+ iter->heap.size = ARRAY_SIZE(iter->heap.preallocated);
+ iter->heap.nr = 0;
#ifdef CONFIG_BCACHE_DEBUG
- iter->iter.b = b;
+ iter->b = b;
#endif
for (; start <= bset_tree_last(b); start++) {
ret = bch_bset_search(b, start, search);
- bch_btree_iter_push(&iter->iter, ret, bset_bkey_last(start->data));
+ bch_btree_iter_push(iter, ret, bset_bkey_last(start->data));
}
return ret;
}
-struct bkey *bch_btree_iter_stack_init(struct btree_keys *b,
- struct btree_iter_stack *iter,
+struct bkey *bch_btree_iter_init(struct btree_keys *b,
+ struct btree_iter *iter,
struct bkey *search)
{
- return __bch_btree_iter_stack_init(b, iter, search, b->set);
+ return __bch_btree_iter_init(b, iter, search, b->set);
}
static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter,
- btree_iter_cmp_fn *cmp)
+ new_btree_iter_cmp_fn *cmp)
{
struct btree_iter_set b __maybe_unused;
struct bkey *ret = NULL;
+ const struct min_heap_callbacks callbacks = {
+ .less = cmp,
+ .swp = new_btree_iter_swap,
+ };
if (!btree_iter_end(iter)) {
bch_btree_iter_next_check(iter);
- ret = iter->data->k;
- iter->data->k = bkey_next(iter->data->k);
+ ret = iter->heap.data->k;
+ iter->heap.data->k = bkey_next(iter->heap.data->k);
- if (iter->data->k > iter->data->end) {
+ if (iter->heap.data->k > iter->heap.data->end) {
WARN_ONCE(1, "bset was corrupt!\n");
- iter->data->k = iter->data->end;
+ iter->heap.data->k = iter->heap.data->end;
}
- if (iter->data->k == iter->data->end)
- heap_pop(iter, b, cmp);
+ if (iter->heap.data->k == iter->heap.data->end) {
+ if (iter->heap.nr) {
+ b = min_heap_peek(&iter->heap)[0];
+ min_heap_pop(&iter->heap, &callbacks, NULL);
+ }
+ }
else
- heap_sift(iter, 0, cmp);
+ min_heap_sift_down(&iter->heap, 0, &callbacks, NULL);
}
return ret;
@@ -1157,7 +1186,7 @@ static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter,
struct bkey *bch_btree_iter_next(struct btree_iter *iter)
{
- return __bch_btree_iter_next(iter, btree_iter_cmp);
+ return __bch_btree_iter_next(iter, new_btree_iter_cmp);
}
@@ -1195,16 +1224,18 @@ static void btree_mergesort(struct btree_keys *b, struct bset *out,
struct btree_iter *iter,
bool fixup, bool remove_stale)
{
- int i;
struct bkey *k, *last = NULL;
BKEY_PADDED(k) tmp;
bool (*bad)(struct btree_keys *, const struct bkey *) = remove_stale
? bch_ptr_bad
: bch_ptr_invalid;
+ const struct min_heap_callbacks callbacks = {
+ .less = b->ops->sort_cmp,
+ .swp = new_btree_iter_swap,
+ };
/* Heapify the iterator, using our comparison function */
- for (i = iter->used / 2 - 1; i >= 0; --i)
- heap_sift(iter, i, b->ops->sort_cmp);
+ min_heapify_all(&iter->heap, &callbacks, NULL);
while (!btree_iter_end(iter)) {
if (b->ops->sort_fixup && fixup)
@@ -1293,10 +1324,11 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned int start,
struct bset_sort_state *state)
{
size_t order = b->page_order, keys = 0;
- struct btree_iter_stack iter;
+ struct btree_iter iter;
int oldsize = bch_count_data(b);
- __bch_btree_iter_stack_init(b, &iter, NULL, &b->set[start]);
+ min_heap_init(&iter.heap, NULL, MAX_BSETS);
+ __bch_btree_iter_init(b, &iter, NULL, &b->set[start]);
if (start) {
unsigned int i;
@@ -1307,7 +1339,7 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned int start,
order = get_order(__set_bytes(b->set->data, keys));
}
- __btree_sort(b, &iter.iter, start, order, false, state);
+ __btree_sort(b, &iter, start, order, false, state);
EBUG_ON(oldsize >= 0 && bch_count_data(b) != oldsize);
}
@@ -1323,11 +1355,13 @@ void bch_btree_sort_into(struct btree_keys *b, struct btree_keys *new,
struct bset_sort_state *state)
{
uint64_t start_time = local_clock();
- struct btree_iter_stack iter;
+ struct btree_iter iter;
+
+ min_heap_init(&iter.heap, NULL, MAX_BSETS);
- bch_btree_iter_stack_init(b, &iter, NULL);
+ bch_btree_iter_init(b, &iter, NULL);
- btree_mergesort(b, new->set->data, &iter.iter, false, true);
+ btree_mergesort(b, new->set->data, &iter, false, true);
bch_time_stats_update(&state->time, start_time);
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index 011f6062c4c0..f79441acd4c1 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -187,8 +187,9 @@ struct bset_tree {
};
struct btree_keys_ops {
- bool (*sort_cmp)(struct btree_iter_set l,
- struct btree_iter_set r);
+ bool (*sort_cmp)(const void *l,
+ const void *r,
+ void *args);
struct bkey *(*sort_fixup)(struct btree_iter *iter,
struct bkey *tmp);
bool (*insert_fixup)(struct btree_keys *b,
@@ -312,23 +313,17 @@ enum {
BTREE_INSERT_STATUS_FRONT_MERGE,
};
+struct btree_iter_set {
+ struct bkey *k, *end;
+};
+
/* Btree key iteration */
struct btree_iter {
- size_t size, used;
#ifdef CONFIG_BCACHE_DEBUG
struct btree_keys *b;
#endif
- struct btree_iter_set {
- struct bkey *k, *end;
- } data[];
-};
-
-/* Fixed-size btree_iter that can be allocated on the stack */
-
-struct btree_iter_stack {
- struct btree_iter iter;
- struct btree_iter_set stack_data[MAX_BSETS];
+ MIN_HEAP_PREALLOCATED(struct btree_iter_set, btree_iter_heap, MAX_BSETS) heap;
};
typedef bool (*ptr_filter_fn)(struct btree_keys *b, const struct bkey *k);
@@ -340,9 +335,9 @@ struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter,
void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k,
struct bkey *end);
-struct bkey *bch_btree_iter_stack_init(struct btree_keys *b,
- struct btree_iter_stack *iter,
- struct bkey *search);
+struct bkey *bch_btree_iter_init(struct btree_keys *b,
+ struct btree_iter *iter,
+ struct bkey *search);
struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t,
const struct bkey *search);
@@ -357,14 +352,13 @@ static inline struct bkey *bch_bset_search(struct btree_keys *b,
return search ? __bch_bset_search(b, t, search) : t->data->start;
}
-#define for_each_key_filter(b, k, stack_iter, filter) \
- for (bch_btree_iter_stack_init((b), (stack_iter), NULL); \
- ((k) = bch_btree_iter_next_filter(&((stack_iter)->iter), (b), \
- filter));)
+#define for_each_key_filter(b, k, iter, filter) \
+ for (bch_btree_iter_init((b), (iter), NULL); \
+ ((k) = bch_btree_iter_next_filter((iter), (b), filter));)
-#define for_each_key(b, k, stack_iter) \
- for (bch_btree_iter_stack_init((b), (stack_iter), NULL); \
- ((k) = bch_btree_iter_next(&((stack_iter)->iter)));)
+#define for_each_key(b, k, iter) \
+ for (bch_btree_iter_init((b), (iter), NULL); \
+ ((k) = bch_btree_iter_next(iter));)
/* Sorting */
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 4e6ccf2c8a0b..ed40d8600656 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -149,19 +149,19 @@ void bch_btree_node_read_done(struct btree *b)
{
const char *err = "bad btree header";
struct bset *i = btree_bset_first(b);
- struct btree_iter *iter;
+ struct btree_iter iter;
/*
* c->fill_iter can allocate an iterator with more memory space
* than static MAX_BSETS.
* See the comment arount cache_set->fill_iter.
*/
- iter = mempool_alloc(&b->c->fill_iter, GFP_NOIO);
- iter->size = b->c->cache->sb.bucket_size / b->c->cache->sb.block_size;
- iter->used = 0;
+ iter.heap.data = mempool_alloc(&b->c->fill_iter, GFP_NOIO);
+ iter.heap.size = b->c->cache->sb.bucket_size / b->c->cache->sb.block_size;
+ iter.heap.nr = 0;
#ifdef CONFIG_BCACHE_DEBUG
- iter->b = &b->keys;
+ iter.b = &b->keys;
#endif
if (!i->seq)
@@ -199,7 +199,7 @@ void bch_btree_node_read_done(struct btree *b)
if (i != b->keys.set[0].data && !i->keys)
goto err;
- bch_btree_iter_push(iter, i->start, bset_bkey_last(i));
+ bch_btree_iter_push(&iter, i->start, bset_bkey_last(i));
b->written += set_blocks(i, block_bytes(b->c->cache));
}
@@ -211,7 +211,7 @@ void bch_btree_node_read_done(struct btree *b)
if (i->seq == b->keys.set[0].data->seq)
goto err;
- bch_btree_sort_and_fix_extents(&b->keys, iter, &b->c->sort);
+ bch_btree_sort_and_fix_extents(&b->keys, &iter, &b->c->sort);
i = b->keys.set[0].data;
err = "short btree key";
@@ -223,7 +223,7 @@ void bch_btree_node_read_done(struct btree *b)
bch_bset_init_next(&b->keys, write_block(b),
bset_magic(&b->c->cache->sb));
out:
- mempool_free(iter, &b->c->fill_iter);
+ mempool_free(iter.heap.data, &b->c->fill_iter);
return;
err:
set_btree_node_io_error(b);
@@ -1309,9 +1309,11 @@ static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc)
uint8_t stale = 0;
unsigned int keys = 0, good_keys = 0;
struct bkey *k;
- struct btree_iter_stack iter;
+ struct btree_iter iter;
struct bset_tree *t;
+ min_heap_init(&iter.heap, NULL, MAX_BSETS);
+
gc->nodes++;
for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid) {
@@ -1570,9 +1572,11 @@ static int btree_gc_rewrite_node(struct btree *b, struct btree_op *op,
static unsigned int btree_gc_count_keys(struct btree *b)
{
struct bkey *k;
- struct btree_iter_stack iter;
+ struct btree_iter iter;
unsigned int ret = 0;
+ min_heap_init(&iter.heap, NULL, MAX_BSETS);
+
for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad)
ret += bkey_u64s(k);
@@ -1611,18 +1615,18 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op,
int ret = 0;
bool should_rewrite;
struct bkey *k;
- struct btree_iter_stack iter;
+ struct btree_iter iter;
struct gc_merge_info r[GC_MERGE_NODES];
struct gc_merge_info *i, *last = r + ARRAY_SIZE(r) - 1;
- bch_btree_iter_stack_init(&b->keys, &iter, &b->c->gc_done);
+ min_heap_init(&iter.heap, NULL, MAX_BSETS);
+ bch_btree_iter_init(&b->keys, &iter, &b->c->gc_done);
for (i = r; i < r + ARRAY_SIZE(r); i++)
i->b = ERR_PTR(-EINTR);
while (1) {
- k = bch_btree_iter_next_filter(&iter.iter, &b->keys,
- bch_ptr_bad);
+ k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad);
if (k) {
r->b = bch_btree_node_get(b->c, op, k, b->level - 1,
true, b);
@@ -1917,7 +1921,9 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
{
int ret = 0;
struct bkey *k, *p = NULL;
- struct btree_iter_stack iter;
+ struct btree_iter iter;
+
+ min_heap_init(&iter.heap, NULL, MAX_BSETS);
for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid)
bch_initial_mark_key(b->c, b->level, k);
@@ -1925,10 +1931,10 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
bch_initial_mark_key(b->c, b->level + 1, &b->key);
if (b->level) {
- bch_btree_iter_stack_init(&b->keys, &iter, NULL);
+ bch_btree_iter_init(&b->keys, &iter, NULL);
do {
- k = bch_btree_iter_next_filter(&iter.iter, &b->keys,
+ k = bch_btree_iter_next_filter(&iter, &b->keys,
bch_ptr_bad);
if (k) {
btree_node_prefetch(b, k);
@@ -1956,7 +1962,7 @@ static int bch_btree_check_thread(void *arg)
struct btree_check_info *info = arg;
struct btree_check_state *check_state = info->state;
struct cache_set *c = check_state->c;
- struct btree_iter_stack iter;
+ struct btree_iter iter;
struct bkey *k, *p;
int cur_idx, prev_idx, skip_nr;
@@ -1964,9 +1970,11 @@ static int bch_btree_check_thread(void *arg)
cur_idx = prev_idx = 0;
ret = 0;
+ min_heap_init(&iter.heap, NULL, MAX_BSETS);
+
/* root node keys are checked before thread created */
- bch_btree_iter_stack_init(&c->root->keys, &iter, NULL);
- k = bch_btree_iter_next_filter(&iter.iter, &c->root->keys, bch_ptr_bad);
+ bch_btree_iter_init(&c->root->keys, &iter, NULL);
+ k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad);
BUG_ON(!k);
p = k;
@@ -1984,7 +1992,7 @@ static int bch_btree_check_thread(void *arg)
skip_nr = cur_idx - prev_idx;
while (skip_nr) {
- k = bch_btree_iter_next_filter(&iter.iter,
+ k = bch_btree_iter_next_filter(&iter,
&c->root->keys,
bch_ptr_bad);
if (k)
@@ -2057,9 +2065,11 @@ int bch_btree_check(struct cache_set *c)
int ret = 0;
int i;
struct bkey *k = NULL;
- struct btree_iter_stack iter;
+ struct btree_iter iter;
struct btree_check_state check_state;
+ min_heap_init(&iter.heap, NULL, MAX_BSETS);
+
/* check and mark root node keys */
for_each_key_filter(&c->root->keys, k, &iter, bch_ptr_invalid)
bch_initial_mark_key(c, c->root->level, k);
@@ -2553,11 +2563,12 @@ static int bch_btree_map_nodes_recurse(struct btree *b, struct btree_op *op,
if (b->level) {
struct bkey *k;
- struct btree_iter_stack iter;
+ struct btree_iter iter;
- bch_btree_iter_stack_init(&b->keys, &iter, from);
+ min_heap_init(&iter.heap, NULL, MAX_BSETS);
+ bch_btree_iter_init(&b->keys, &iter, from);
- while ((k = bch_btree_iter_next_filter(&iter.iter, &b->keys,
+ while ((k = bch_btree_iter_next_filter(&iter, &b->keys,
bch_ptr_bad))) {
ret = bcache_btree(map_nodes_recurse, k, b,
op, from, fn, flags);
@@ -2586,12 +2597,12 @@ int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op,
{
int ret = MAP_CONTINUE;
struct bkey *k;
- struct btree_iter_stack iter;
+ struct btree_iter iter;
- bch_btree_iter_stack_init(&b->keys, &iter, from);
+ min_heap_init(&iter.heap, NULL, MAX_BSETS);
+ bch_btree_iter_init(&b->keys, &iter, from);
- while ((k = bch_btree_iter_next_filter(&iter.iter, &b->keys,
- bch_ptr_bad))) {
+ while ((k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad))) {
ret = !b->level
? fn(op, b, k)
: bcache_btree(map_keys_recurse, k,
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index d626ffcbecb9..a7221e5dbe81 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -33,15 +33,16 @@ static void sort_key_next(struct btree_iter *iter,
i->k = bkey_next(i->k);
if (i->k == i->end)
- *i = iter->data[--iter->used];
+ *i = iter->heap.data[--iter->heap.nr];
}
-static bool bch_key_sort_cmp(struct btree_iter_set l,
- struct btree_iter_set r)
+static bool new_bch_key_sort_cmp(const void *l, const void *r, void *args)
{
- int64_t c = bkey_cmp(l.k, r.k);
+ struct btree_iter_set *_l = (struct btree_iter_set *)l;
+ struct btree_iter_set *_r = (struct btree_iter_set *)r;
+ int64_t c = bkey_cmp(_l->k, _r->k);
- return c ? c > 0 : l.k < r.k;
+ return !(c ? c > 0 : _l->k < _r->k);
}
static bool __ptr_invalid(struct cache_set *c, const struct bkey *k)
@@ -238,7 +239,7 @@ static bool bch_btree_ptr_insert_fixup(struct btree_keys *bk,
}
const struct btree_keys_ops bch_btree_keys_ops = {
- .sort_cmp = bch_key_sort_cmp,
+ .sort_cmp = new_bch_key_sort_cmp,
.insert_fixup = bch_btree_ptr_insert_fixup,
.key_invalid = bch_btree_ptr_invalid,
.key_bad = bch_btree_ptr_bad,
@@ -255,22 +256,36 @@ const struct btree_keys_ops bch_btree_keys_ops = {
* Necessary for btree_sort_fixup() - if there are multiple keys that compare
* equal in different sets, we have to process them newest to oldest.
*/
-static bool bch_extent_sort_cmp(struct btree_iter_set l,
- struct btree_iter_set r)
+
+static bool new_bch_extent_sort_cmp(const void *l, const void *r, void __always_unused *args)
+{
+ struct btree_iter_set *_l = (struct btree_iter_set *)l;
+ struct btree_iter_set *_r = (struct btree_iter_set *)r;
+ int64_t c = bkey_cmp(&START_KEY(_l->k), &START_KEY(_r->k));
+
+ return !(c ? c > 0 : _l->k < _r->k);
+}
+
+static inline void new_btree_iter_swap(void *iter1, void *iter2, void __always_unused *args)
{
- int64_t c = bkey_cmp(&START_KEY(l.k), &START_KEY(r.k));
+ struct btree_iter_set *_iter1 = iter1;
+ struct btree_iter_set *_iter2 = iter2;
- return c ? c > 0 : l.k < r.k;
+ swap(*_iter1, *_iter2);
}
static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter,
struct bkey *tmp)
{
- while (iter->used > 1) {
- struct btree_iter_set *top = iter->data, *i = top + 1;
-
- if (iter->used > 2 &&
- bch_extent_sort_cmp(i[0], i[1]))
+ const struct min_heap_callbacks callbacks = {
+ .less = new_bch_extent_sort_cmp,
+ .swp = new_btree_iter_swap,
+ };
+ while (iter->heap.nr > 1) {
+ struct btree_iter_set *top = iter->heap.data, *i = top + 1;
+
+ if (iter->heap.nr > 2 &&
+ !new_bch_extent_sort_cmp(&i[0], &i[1], NULL))
i++;
if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0)
@@ -278,7 +293,7 @@ static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter,
if (!KEY_SIZE(i->k)) {
sort_key_next(iter, i);
- heap_sift(iter, i - top, bch_extent_sort_cmp);
+ min_heap_sift_down(&iter->heap, i - top, &callbacks, NULL);
continue;
}
@@ -288,7 +303,7 @@ static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter,
else
bch_cut_front(top->k, i->k);
- heap_sift(iter, i - top, bch_extent_sort_cmp);
+ min_heap_sift_down(&iter->heap, i - top, &callbacks, NULL);
} else {
/* can't happen because of comparison func */
BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k)));
@@ -298,7 +313,7 @@ static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter,
bch_cut_back(&START_KEY(i->k), tmp);
bch_cut_front(i->k, top->k);
- heap_sift(iter, 0, bch_extent_sort_cmp);
+ min_heap_sift_down(&iter->heap, 0, &callbacks, NULL);
return tmp;
} else {
@@ -618,7 +633,7 @@ static bool bch_extent_merge(struct btree_keys *bk,
}
const struct btree_keys_ops bch_extent_keys_ops = {
- .sort_cmp = bch_extent_sort_cmp,
+ .sort_cmp = new_bch_extent_sort_cmp,
.sort_fixup = bch_extent_sort_fixup,
.insert_fixup = bch_extent_insert_fixup,
.key_invalid = bch_extent_invalid,
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index ebd500bdf0b2..7f482729c56d 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -182,16 +182,27 @@ err: if (!IS_ERR_OR_NULL(w->private))
closure_sync(&cl);
}
-static bool bucket_cmp(struct bucket *l, struct bucket *r)
+static bool new_bucket_cmp(const void *l, const void *r, void __always_unused *args)
{
- return GC_SECTORS_USED(l) < GC_SECTORS_USED(r);
+ struct bucket **_l = (struct bucket **)l;
+ struct bucket **_r = (struct bucket **)r;
+
+ return GC_SECTORS_USED(*_l) >= GC_SECTORS_USED(*_r);
+}
+
+static void new_bucket_swap(void *l, void *r, void __always_unused *args)
+{
+ struct bucket **_l = l;
+ struct bucket **_r = r;
+
+ swap(*_l, *_r);
}
static unsigned int bucket_heap_top(struct cache *ca)
{
struct bucket *b;
- return (b = heap_peek(&ca->heap)) ? GC_SECTORS_USED(b) : 0;
+ return (b = min_heap_peek(&ca->heap)[0]) ? GC_SECTORS_USED(b) : 0;
}
void bch_moving_gc(struct cache_set *c)
@@ -199,6 +210,10 @@ void bch_moving_gc(struct cache_set *c)
struct cache *ca = c->cache;
struct bucket *b;
unsigned long sectors_to_move, reserve_sectors;
+ const struct min_heap_callbacks callbacks = {
+ .less = new_bucket_cmp,
+ .swp = new_bucket_swap,
+ };
if (!c->copy_gc_enabled)
return;
@@ -209,7 +224,7 @@ void bch_moving_gc(struct cache_set *c)
reserve_sectors = ca->sb.bucket_size *
fifo_used(&ca->free[RESERVE_MOVINGGC]);
- ca->heap.used = 0;
+ ca->heap.nr = 0;
for_each_bucket(b, ca) {
if (GC_MARK(b) == GC_MARK_METADATA ||
@@ -218,25 +233,31 @@ void bch_moving_gc(struct cache_set *c)
atomic_read(&b->pin))
continue;
- if (!heap_full(&ca->heap)) {
+ if (!min_heap_full(&ca->heap)) {
sectors_to_move += GC_SECTORS_USED(b);
- heap_add(&ca->heap, b, bucket_cmp);
- } else if (bucket_cmp(b, heap_peek(&ca->heap))) {
+ min_heap_push(&ca->heap, &b, &callbacks, NULL);
+ } else if (!new_bucket_cmp(&b, min_heap_peek(&ca->heap), ca)) {
sectors_to_move -= bucket_heap_top(ca);
sectors_to_move += GC_SECTORS_USED(b);
ca->heap.data[0] = b;
- heap_sift(&ca->heap, 0, bucket_cmp);
+ min_heap_sift_down(&ca->heap, 0, &callbacks, NULL);
}
}
while (sectors_to_move > reserve_sectors) {
- heap_pop(&ca->heap, b, bucket_cmp);
+ if (ca->heap.nr) {
+ b = min_heap_peek(&ca->heap)[0];
+ min_heap_pop(&ca->heap, &callbacks, NULL);
+ }
sectors_to_move -= GC_SECTORS_USED(b);
}
- while (heap_pop(&ca->heap, b, bucket_cmp))
+ while (ca->heap.nr) {
+ b = min_heap_peek(&ca->heap)[0];
+ min_heap_pop(&ca->heap, &callbacks, NULL);
SET_GC_MOVE(b, 1);
+ }
mutex_unlock(&c->bucket_lock);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index b5d6ef430b86..e7abfdd77c3b 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1907,8 +1907,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
INIT_LIST_HEAD(&c->btree_cache_freed);
INIT_LIST_HEAD(&c->data_buckets);
- iter_size = sizeof(struct btree_iter) +
- ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size) *
+ iter_size = ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size) *
sizeof(struct btree_iter_set);
c->devices = kcalloc(c->nr_uuids, sizeof(void *), GFP_KERNEL);
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 826b14cae4e5..e8f696cb58c0 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -660,7 +660,9 @@ static unsigned int bch_root_usage(struct cache_set *c)
unsigned int bytes = 0;
struct bkey *k;
struct btree *b;
- struct btree_iter_stack iter;
+ struct btree_iter iter;
+
+ min_heap_init(&iter.heap, NULL, MAX_BSETS);
goto lock_root;
diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c
index ae380bc3992e..410d8cb49e50 100644
--- a/drivers/md/bcache/util.c
+++ b/drivers/md/bcache/util.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * random utiility code, for bcache but in theory not specific to bcache
+ * random utility code, for bcache but in theory not specific to bcache
*
* Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
* Copyright 2012 Google, Inc.
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index f61ab1bada6c..539454d8e2d0 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <linux/sched/clock.h>
#include <linux/llist.h>
+#include <linux/min_heap.h>
#include <linux/ratelimit.h>
#include <linux/vmalloc.h>
#include <linux/workqueue.h>
@@ -30,16 +31,10 @@ struct closure;
#endif
-#define DECLARE_HEAP(type, name) \
- struct { \
- size_t size, used; \
- type *data; \
- } name
-
#define init_heap(heap, _size, gfp) \
({ \
size_t _bytes; \
- (heap)->used = 0; \
+ (heap)->nr = 0; \
(heap)->size = (_size); \
_bytes = (heap)->size * sizeof(*(heap)->data); \
(heap)->data = kvmalloc(_bytes, (gfp) & GFP_KERNEL); \
@@ -52,64 +47,6 @@ do { \
(heap)->data = NULL; \
} while (0)
-#define heap_swap(h, i, j) swap((h)->data[i], (h)->data[j])
-
-#define heap_sift(h, i, cmp) \
-do { \
- size_t _r, _j = i; \
- \
- for (; _j * 2 + 1 < (h)->used; _j = _r) { \
- _r = _j * 2 + 1; \
- if (_r + 1 < (h)->used && \
- cmp((h)->data[_r], (h)->data[_r + 1])) \
- _r++; \
- \
- if (cmp((h)->data[_r], (h)->data[_j])) \
- break; \
- heap_swap(h, _r, _j); \
- } \
-} while (0)
-
-#define heap_sift_down(h, i, cmp) \
-do { \
- while (i) { \
- size_t p = (i - 1) / 2; \
- if (cmp((h)->data[i], (h)->data[p])) \
- break; \
- heap_swap(h, i, p); \
- i = p; \
- } \
-} while (0)
-
-#define heap_add(h, d, cmp) \
-({ \
- bool _r = !heap_full(h); \
- if (_r) { \
- size_t _i = (h)->used++; \
- (h)->data[_i] = d; \
- \
- heap_sift_down(h, _i, cmp); \
- heap_sift(h, _i, cmp); \
- } \
- _r; \
-})
-
-#define heap_pop(h, d, cmp) \
-({ \
- bool _r = (h)->used; \
- if (_r) { \
- (d) = (h)->data[0]; \
- (h)->used--; \
- heap_swap(h, 0, (h)->used); \
- heap_sift(h, 0, cmp); \
- } \
- _r; \
-})
-
-#define heap_peek(h) ((h)->used ? (h)->data[0] : NULL)
-
-#define heap_full(h) ((h)->used == (h)->size)
-
#define DECLARE_FIFO(type, name) \
struct { \
size_t front, back, size, mask; \
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 792e070ccf38..c1d28e365910 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -908,15 +908,16 @@ static int bch_dirty_init_thread(void *arg)
struct dirty_init_thrd_info *info = arg;
struct bch_dirty_init_state *state = info->state;
struct cache_set *c = state->c;
- struct btree_iter_stack iter;
+ struct btree_iter iter;
struct bkey *k, *p;
int cur_idx, prev_idx, skip_nr;
k = p = NULL;
prev_idx = 0;
- bch_btree_iter_stack_init(&c->root->keys, &iter, NULL);
- k = bch_btree_iter_next_filter(&iter.iter, &c->root->keys, bch_ptr_bad);
+ min_heap_init(&iter.heap, NULL, MAX_BSETS);
+ bch_btree_iter_init(&c->root->keys, &iter, NULL);
+ k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad);
BUG_ON(!k);
p = k;
@@ -930,7 +931,7 @@ static int bch_dirty_init_thread(void *arg)
skip_nr = cur_idx - prev_idx;
while (skip_nr) {
- k = bch_btree_iter_next_filter(&iter.iter,
+ k = bch_btree_iter_next_filter(&iter,
&c->root->keys,
bch_ptr_bad);
if (k)
@@ -979,11 +980,13 @@ void bch_sectors_dirty_init(struct bcache_device *d)
int i;
struct btree *b = NULL;
struct bkey *k = NULL;
- struct btree_iter_stack iter;
+ struct btree_iter iter;
struct sectors_dirty_init op;
struct cache_set *c = d->c;
struct bch_dirty_init_state state;
+ min_heap_init(&iter.heap, NULL, MAX_BSETS);
+
retry_lock:
b = c->root;
rw_lock(0, b, b->level);
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index 96751cd3d181..24cd87fddf75 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -170,7 +170,7 @@ struct dm_cache_metadata {
*/
#define SUPERBLOCK_CSUM_XOR 9031977
-static void sb_prepare_for_write(struct dm_block_validator *v,
+static void sb_prepare_for_write(const struct dm_block_validator *v,
struct dm_block *b,
size_t sb_block_size)
{
@@ -195,7 +195,7 @@ static int check_metadata_version(struct cache_disk_superblock *disk_super)
return 0;
}
-static int sb_check(struct dm_block_validator *v,
+static int sb_check(const struct dm_block_validator *v,
struct dm_block *b,
size_t sb_block_size)
{
@@ -228,7 +228,7 @@ static int sb_check(struct dm_block_validator *v,
return check_metadata_version(disk_super);
}
-static struct dm_block_validator sb_validator = {
+static const struct dm_block_validator sb_validator = {
.name = "superblock",
.prepare_for_write = sb_prepare_for_write,
.check = sb_check
@@ -1282,15 +1282,6 @@ int dm_cache_insert_mapping(struct dm_cache_metadata *cmd,
return r;
}
-struct thunk {
- load_mapping_fn fn;
- void *context;
-
- struct dm_cache_metadata *cmd;
- bool respect_dirty_flags;
- bool hints_valid;
-};
-
static bool policy_unchanged(struct dm_cache_metadata *cmd,
struct dm_cache_policy *policy)
{
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 2d8dd9283ff4..17f0fab1e254 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -3416,8 +3416,8 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
*/
if (io_opt_sectors < cache->sectors_per_block ||
do_div(io_opt_sectors, cache->sectors_per_block)) {
- blk_limits_io_min(limits, cache->sectors_per_block << SECTOR_SHIFT);
- blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT);
+ limits->io_min = cache->sectors_per_block << SECTOR_SHIFT;
+ limits->io_opt = cache->sectors_per_block << SECTOR_SHIFT;
}
disable_passdown_if_not_supported(cache);
diff --git a/drivers/md/dm-clone-metadata.c b/drivers/md/dm-clone-metadata.c
index 47c1fa7aad8b..2db84cd2202b 100644
--- a/drivers/md/dm-clone-metadata.c
+++ b/drivers/md/dm-clone-metadata.c
@@ -163,7 +163,7 @@ struct dm_clone_metadata {
/*
* Superblock validation.
*/
-static void sb_prepare_for_write(struct dm_block_validator *v,
+static void sb_prepare_for_write(const struct dm_block_validator *v,
struct dm_block *b, size_t sb_block_size)
{
struct superblock_disk *sb;
@@ -177,7 +177,7 @@ static void sb_prepare_for_write(struct dm_block_validator *v,
sb->csum = cpu_to_le32(csum);
}
-static int sb_check(struct dm_block_validator *v, struct dm_block *b,
+static int sb_check(const struct dm_block_validator *v, struct dm_block *b,
size_t sb_block_size)
{
struct superblock_disk *sb;
@@ -220,7 +220,7 @@ static int sb_check(struct dm_block_validator *v, struct dm_block *b,
return 0;
}
-static struct dm_block_validator sb_validator = {
+static const struct dm_block_validator sb_validator = {
.name = "superblock",
.prepare_for_write = sb_prepare_for_write,
.check = sb_check
diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c
index b4384a8b13e3..12bbe487a4c8 100644
--- a/drivers/md/dm-clone-target.c
+++ b/drivers/md/dm-clone-target.c
@@ -2073,8 +2073,8 @@ static void clone_io_hints(struct dm_target *ti, struct queue_limits *limits)
*/
if (io_opt_sectors < clone->region_size ||
do_div(io_opt_sectors, clone->region_size)) {
- blk_limits_io_min(limits, clone->region_size << SECTOR_SHIFT);
- blk_limits_io_opt(limits, clone->region_size << SECTOR_SHIFT);
+ limits->io_min = clone->region_size << SECTOR_SHIFT;
+ limits->io_opt = clone->region_size << SECTOR_SHIFT;
}
disable_passdown_if_not_supported(clone);
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index 14a44c0f8286..3637761f3585 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -206,6 +206,8 @@ struct dm_table {
bool integrity_supported:1;
bool singleton:1;
+ /* set if all the targets in the table have "flush_bypasses_map" set */
+ bool flush_bypasses_map:1;
/*
* Indicates the rw permissions for the new logical device. This
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 6c013ceb0e5f..348b4b26c272 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -214,7 +214,8 @@ struct crypt_config {
unsigned int integrity_tag_size;
unsigned int integrity_iv_size;
- unsigned int on_disk_tag_size;
+ unsigned int used_tag_size;
+ unsigned int tuple_size;
/*
* pool for per bio private data, crypto requests,
@@ -241,6 +242,31 @@ static unsigned int dm_crypt_clients_n;
static volatile unsigned long dm_crypt_pages_per_client;
#define DM_CRYPT_MEMORY_PERCENT 2
#define DM_CRYPT_MIN_PAGES_PER_CLIENT (BIO_MAX_VECS * 16)
+#define DM_CRYPT_DEFAULT_MAX_READ_SIZE 131072
+#define DM_CRYPT_DEFAULT_MAX_WRITE_SIZE 131072
+
+static unsigned int max_read_size = 0;
+module_param(max_read_size, uint, 0644);
+MODULE_PARM_DESC(max_read_size, "Maximum size of a read request");
+static unsigned int max_write_size = 0;
+module_param(max_write_size, uint, 0644);
+MODULE_PARM_DESC(max_write_size, "Maximum size of a write request");
+static unsigned get_max_request_size(struct crypt_config *cc, bool wrt)
+{
+ unsigned val, sector_align;
+ val = !wrt ? READ_ONCE(max_read_size) : READ_ONCE(max_write_size);
+ if (likely(!val))
+ val = !wrt ? DM_CRYPT_DEFAULT_MAX_READ_SIZE : DM_CRYPT_DEFAULT_MAX_WRITE_SIZE;
+ if (wrt || cc->used_tag_size) {
+ if (unlikely(val > BIO_MAX_VECS << PAGE_SHIFT))
+ val = BIO_MAX_VECS << PAGE_SHIFT;
+ }
+ sector_align = max(bdev_logical_block_size(cc->dev->bdev), (unsigned)cc->sector_size);
+ val = round_down(val, sector_align);
+ if (unlikely(!val))
+ val = sector_align;
+ return val >> SECTOR_SHIFT;
+}
static void crypt_endio(struct bio *clone);
static void kcryptd_queue_crypt(struct dm_crypt_io *io);
@@ -1151,14 +1177,14 @@ static int dm_crypt_integrity_io_alloc(struct dm_crypt_io *io, struct bio *bio)
unsigned int tag_len;
int ret;
- if (!bio_sectors(bio) || !io->cc->on_disk_tag_size)
+ if (!bio_sectors(bio) || !io->cc->tuple_size)
return 0;
bip = bio_integrity_alloc(bio, GFP_NOIO, 1);
if (IS_ERR(bip))
return PTR_ERR(bip);
- tag_len = io->cc->on_disk_tag_size * (bio_sectors(bio) >> io->cc->sector_shift);
+ tag_len = io->cc->tuple_size * (bio_sectors(bio) >> io->cc->sector_shift);
bip->bip_iter.bi_sector = io->cc->start + io->sector;
@@ -1182,18 +1208,18 @@ static int crypt_integrity_ctr(struct crypt_config *cc, struct dm_target *ti)
return -EINVAL;
}
- if (bi->tag_size != cc->on_disk_tag_size ||
- bi->tuple_size != cc->on_disk_tag_size) {
+ if (bi->tuple_size < cc->used_tag_size) {
ti->error = "Integrity profile tag size mismatch.";
return -EINVAL;
}
+ cc->tuple_size = bi->tuple_size;
if (1 << bi->interval_exp != cc->sector_size) {
ti->error = "Integrity profile sector size mismatch.";
return -EINVAL;
}
if (crypt_integrity_aead(cc)) {
- cc->integrity_tag_size = cc->on_disk_tag_size - cc->integrity_iv_size;
+ cc->integrity_tag_size = cc->used_tag_size - cc->integrity_iv_size;
DMDEBUG("%s: Integrity AEAD, tag size %u, IV size %u.", dm_device_name(md),
cc->integrity_tag_size, cc->integrity_iv_size);
@@ -1205,7 +1231,7 @@ static int crypt_integrity_ctr(struct crypt_config *cc, struct dm_target *ti)
DMDEBUG("%s: Additional per-sector space %u bytes for IV.", dm_device_name(md),
cc->integrity_iv_size);
- if ((cc->integrity_tag_size + cc->integrity_iv_size) != bi->tag_size) {
+ if ((cc->integrity_tag_size + cc->integrity_iv_size) > cc->tuple_size) {
ti->error = "Not enough space for integrity tag in the profile.";
return -EINVAL;
}
@@ -1284,7 +1310,7 @@ static void *tag_from_dmreq(struct crypt_config *cc,
struct dm_crypt_io *io = container_of(ctx, struct dm_crypt_io, ctx);
return &io->integrity_metadata[*org_tag_of_dmreq(cc, dmreq) *
- cc->on_disk_tag_size];
+ cc->tuple_size];
}
static void *iv_tag_from_dmreq(struct crypt_config *cc,
@@ -1365,9 +1391,9 @@ static int crypt_convert_block_aead(struct crypt_config *cc,
aead_request_set_crypt(req, dmreq->sg_in, dmreq->sg_out,
cc->sector_size, iv);
r = crypto_aead_encrypt(req);
- if (cc->integrity_tag_size + cc->integrity_iv_size != cc->on_disk_tag_size)
+ if (cc->integrity_tag_size + cc->integrity_iv_size != cc->tuple_size)
memset(tag + cc->integrity_tag_size + cc->integrity_iv_size, 0,
- cc->on_disk_tag_size - (cc->integrity_tag_size + cc->integrity_iv_size));
+ cc->tuple_size - (cc->integrity_tag_size + cc->integrity_iv_size));
} else {
aead_request_set_crypt(req, dmreq->sg_in, dmreq->sg_out,
cc->sector_size + cc->integrity_tag_size, iv);
@@ -1797,7 +1823,7 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
return;
if (likely(!io->ctx.aead_recheck) && unlikely(io->ctx.aead_failed) &&
- cc->on_disk_tag_size && bio_data_dir(base_bio) == READ) {
+ cc->used_tag_size && bio_data_dir(base_bio) == READ) {
io->ctx.aead_recheck = true;
io->ctx.aead_failed = false;
io->error = 0;
@@ -3181,7 +3207,7 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar
ti->error = "Invalid integrity arguments";
return -EINVAL;
}
- cc->on_disk_tag_size = val;
+ cc->used_tag_size = val;
sval = strchr(opt_string + strlen("integrity:"), ':') + 1;
if (!strcasecmp(sval, "aead")) {
set_bit(CRYPT_MODE_INTEGRITY_AEAD, &cc->cipher_flags);
@@ -3393,12 +3419,12 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
if (ret)
goto bad;
- cc->tag_pool_max_sectors = POOL_ENTRY_SIZE / cc->on_disk_tag_size;
+ cc->tag_pool_max_sectors = POOL_ENTRY_SIZE / cc->tuple_size;
if (!cc->tag_pool_max_sectors)
cc->tag_pool_max_sectors = 1;
ret = mempool_init_kmalloc_pool(&cc->tag_pool, MIN_IOS,
- cc->tag_pool_max_sectors * cc->on_disk_tag_size);
+ cc->tag_pool_max_sectors * cc->tuple_size);
if (ret) {
ti->error = "Cannot allocate integrity tags mempool";
goto bad;
@@ -3474,6 +3500,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
{
struct dm_crypt_io *io;
struct crypt_config *cc = ti->private;
+ unsigned max_sectors;
/*
* If bio is REQ_PREFLUSH or REQ_OP_DISCARD, just bypass crypt queues.
@@ -3492,9 +3519,9 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
/*
* Check if bio is too large, split as needed.
*/
- if (unlikely(bio->bi_iter.bi_size > (BIO_MAX_VECS << PAGE_SHIFT)) &&
- (bio_data_dir(bio) == WRITE || cc->on_disk_tag_size))
- dm_accept_partial_bio(bio, ((BIO_MAX_VECS << PAGE_SHIFT) >> SECTOR_SHIFT));
+ max_sectors = get_max_request_size(cc, bio_data_dir(bio) == WRITE);
+ if (unlikely(bio_sectors(bio) > max_sectors))
+ dm_accept_partial_bio(bio, max_sectors);
/*
* Ensure that bio is a multiple of internal sector encryption size
@@ -3509,8 +3536,8 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
io = dm_per_bio_data(bio, cc->per_bio_data_size);
crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector));
- if (cc->on_disk_tag_size) {
- unsigned int tag_len = cc->on_disk_tag_size * (bio_sectors(bio) >> cc->sector_shift);
+ if (cc->tuple_size) {
+ unsigned int tag_len = cc->tuple_size * (bio_sectors(bio) >> cc->sector_shift);
if (unlikely(tag_len > KMALLOC_MAX_SIZE))
io->integrity_metadata = NULL;
@@ -3582,7 +3609,7 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
num_feature_args += test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags);
num_feature_args += cc->sector_size != (1 << SECTOR_SHIFT);
num_feature_args += test_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags);
- if (cc->on_disk_tag_size)
+ if (cc->used_tag_size)
num_feature_args++;
if (num_feature_args) {
DMEMIT(" %d", num_feature_args);
@@ -3598,8 +3625,8 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
DMEMIT(" no_read_workqueue");
if (test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags))
DMEMIT(" no_write_workqueue");
- if (cc->on_disk_tag_size)
- DMEMIT(" integrity:%u:%s", cc->on_disk_tag_size, cc->cipher_auth);
+ if (cc->used_tag_size)
+ DMEMIT(" integrity:%u:%s", cc->used_tag_size, cc->cipher_auth);
if (cc->sector_size != (1 << SECTOR_SHIFT))
DMEMIT(" sector_size:%d", cc->sector_size);
if (test_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags))
@@ -3621,9 +3648,9 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
DMEMIT(",iv_large_sectors=%c", test_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags) ?
'y' : 'n');
- if (cc->on_disk_tag_size)
+ if (cc->used_tag_size)
DMEMIT(",integrity_tag_size=%u,cipher_auth=%s",
- cc->on_disk_tag_size, cc->cipher_auth);
+ cc->used_tag_size, cc->cipher_auth);
if (cc->sector_size != (1 << SECTOR_SHIFT))
DMEMIT(",sector_size=%d", cc->sector_size);
if (cc->cipher_string)
@@ -3731,7 +3758,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type crypt_target = {
.name = "crypt",
- .version = {1, 26, 0},
+ .version = {1, 27, 0},
.module = THIS_MODULE,
.ctr = crypt_ctr,
.dtr = crypt_dtr,
diff --git a/drivers/md/dm-ebs-target.c b/drivers/md/dm-ebs-target.c
index b70d4016c2ac..ec5db1478b2f 100644
--- a/drivers/md/dm-ebs-target.c
+++ b/drivers/md/dm-ebs-target.c
@@ -428,7 +428,7 @@ static void ebs_io_hints(struct dm_target *ti, struct queue_limits *limits)
limits->logical_block_size = to_bytes(ec->e_bs);
limits->physical_block_size = to_bytes(ec->u_bs);
limits->alignment_offset = limits->physical_block_size;
- blk_limits_io_min(limits, limits->logical_block_size);
+ limits->io_min = limits->logical_block_size;
}
static int ebs_iterate_devices(struct dm_target *ti,
diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c
index 8f81e597858d..9c84e9d13eca 100644
--- a/drivers/md/dm-era-target.c
+++ b/drivers/md/dm-era-target.c
@@ -196,7 +196,7 @@ struct superblock_disk {
* Superblock validation
*--------------------------------------------------------------
*/
-static void sb_prepare_for_write(struct dm_block_validator *v,
+static void sb_prepare_for_write(const struct dm_block_validator *v,
struct dm_block *b,
size_t sb_block_size)
{
@@ -221,7 +221,7 @@ static int check_metadata_version(struct superblock_disk *disk)
return 0;
}
-static int sb_check(struct dm_block_validator *v,
+static int sb_check(const struct dm_block_validator *v,
struct dm_block *b,
size_t sb_block_size)
{
@@ -254,7 +254,7 @@ static int sb_check(struct dm_block_validator *v,
return check_metadata_version(disk);
}
-static struct dm_block_validator sb_validator = {
+static const struct dm_block_validator sb_validator = {
.name = "superblock",
.prepare_for_write = sb_prepare_for_write,
.check = sb_check
@@ -1733,8 +1733,8 @@ static void era_io_hints(struct dm_target *ti, struct queue_limits *limits)
*/
if (io_opt_sectors < era->sectors_per_block ||
do_div(io_opt_sectors, era->sectors_per_block)) {
- blk_limits_io_min(limits, 0);
- blk_limits_io_opt(limits, era->sectors_per_block << SECTOR_SHIFT);
+ limits->io_min = 0;
+ limits->io_opt = era->sectors_per_block << SECTOR_SHIFT;
}
}
diff --git a/drivers/md/dm-init.c b/drivers/md/dm-init.c
index 2a71bcdba92d..b37bbe762500 100644
--- a/drivers/md/dm-init.c
+++ b/drivers/md/dm-init.c
@@ -212,8 +212,10 @@ static char __init *dm_parse_device_entry(struct dm_device *dev, char *str)
strscpy(dev->dmi.uuid, field[1], sizeof(dev->dmi.uuid));
/* minor */
if (strlen(field[2])) {
- if (kstrtoull(field[2], 0, &dev->dmi.dev))
+ if (kstrtoull(field[2], 0, &dev->dmi.dev) ||
+ dev->dmi.dev >= (1 << MINORBITS))
return ERR_PTR(-EINVAL);
+ dev->dmi.dev = huge_encode_dev((dev_t)dev->dmi.dev);
dev->dmi.flags |= DM_PERSISTENT_DEV_FLAG;
}
/* flags */
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 2a89f8eb4713..48ac628f471b 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -44,6 +44,7 @@
#define BITMAP_FLUSH_INTERVAL (10 * HZ)
#define DISCARD_FILLER 0xf6
#define SALT_SIZE 16
+#define RECHECK_POOL_SIZE 256
/*
* Warning - DEBUG_PRINT prints security-sensitive data to the log,
@@ -62,6 +63,7 @@
#define SB_VERSION_3 3
#define SB_VERSION_4 4
#define SB_VERSION_5 5
+#define SB_VERSION_6 6
#define SB_SECTORS 8
#define MAX_SECTORS_PER_BLOCK 8
@@ -86,6 +88,7 @@ struct superblock {
#define SB_FLAG_DIRTY_BITMAP 0x4
#define SB_FLAG_FIXED_PADDING 0x8
#define SB_FLAG_FIXED_HMAC 0x10
+#define SB_FLAG_INLINE 0x20
#define JOURNAL_ENTRY_ROUNDUP 8
@@ -166,6 +169,7 @@ struct dm_integrity_c {
struct dm_dev *meta_dev;
unsigned int tag_size;
__s8 log2_tag_size;
+ unsigned int tuple_size;
sector_t start;
mempool_t journal_io_mempool;
struct dm_io_client *io;
@@ -279,6 +283,7 @@ struct dm_integrity_c {
atomic64_t number_of_mismatches;
mempool_t recheck_pool;
+ struct bio_set recheck_bios;
struct notifier_block reboot_notifier;
};
@@ -314,6 +319,9 @@ struct dm_integrity_io {
struct completion *completion;
struct dm_bio_details bio_details;
+
+ char *integrity_payload;
+ bool integrity_payload_from_mempool;
};
struct journal_completion {
@@ -351,6 +359,7 @@ static struct kmem_cache *journal_io_cache;
#endif
static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map);
+static int dm_integrity_map_inline(struct dm_integrity_io *dio);
static void integrity_bio_wait(struct work_struct *w);
static void dm_integrity_dtr(struct dm_target *ti);
@@ -460,7 +469,9 @@ static void wraparound_section(struct dm_integrity_c *ic, unsigned int *sec_ptr)
static void sb_set_version(struct dm_integrity_c *ic)
{
- if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC))
+ if (ic->sb->flags & cpu_to_le32(SB_FLAG_INLINE))
+ ic->sb->version = SB_VERSION_6;
+ else if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC))
ic->sb->version = SB_VERSION_5;
else if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING))
ic->sb->version = SB_VERSION_4;
@@ -1894,6 +1905,35 @@ error:
dec_in_flight(dio);
}
+static inline bool dm_integrity_check_limits(struct dm_integrity_c *ic, sector_t logical_sector, struct bio *bio)
+{
+ if (unlikely(logical_sector + bio_sectors(bio) > ic->provided_data_sectors)) {
+ DMERR("Too big sector number: 0x%llx + 0x%x > 0x%llx",
+ logical_sector, bio_sectors(bio),
+ ic->provided_data_sectors);
+ return false;
+ }
+ if (unlikely((logical_sector | bio_sectors(bio)) & (unsigned int)(ic->sectors_per_block - 1))) {
+ DMERR("Bio not aligned on %u sectors: 0x%llx, 0x%x",
+ ic->sectors_per_block,
+ logical_sector, bio_sectors(bio));
+ return false;
+ }
+ if (ic->sectors_per_block > 1 && likely(bio_op(bio) != REQ_OP_DISCARD)) {
+ struct bvec_iter iter;
+ struct bio_vec bv;
+
+ bio_for_each_segment(bv, bio, iter) {
+ if (unlikely(bv.bv_len & ((ic->sectors_per_block << SECTOR_SHIFT) - 1))) {
+ DMERR("Bio vector (%u,%u) is not aligned on %u-sector boundary",
+ bv.bv_offset, bv.bv_len, ic->sectors_per_block);
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
{
struct dm_integrity_c *ic = ti->private;
@@ -1906,6 +1946,9 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
dio->bi_status = 0;
dio->op = bio_op(bio);
+ if (ic->mode == 'I')
+ return dm_integrity_map_inline(dio);
+
if (unlikely(dio->op == REQ_OP_DISCARD)) {
if (ti->max_io_len) {
sector_t sec = dm_target_offset(ti, bio->bi_iter.bi_sector);
@@ -1935,31 +1978,8 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
*/
bio->bi_opf &= ~REQ_FUA;
}
- if (unlikely(dio->range.logical_sector + bio_sectors(bio) > ic->provided_data_sectors)) {
- DMERR("Too big sector number: 0x%llx + 0x%x > 0x%llx",
- dio->range.logical_sector, bio_sectors(bio),
- ic->provided_data_sectors);
- return DM_MAPIO_KILL;
- }
- if (unlikely((dio->range.logical_sector | bio_sectors(bio)) & (unsigned int)(ic->sectors_per_block - 1))) {
- DMERR("Bio not aligned on %u sectors: 0x%llx, 0x%x",
- ic->sectors_per_block,
- dio->range.logical_sector, bio_sectors(bio));
+ if (unlikely(!dm_integrity_check_limits(ic, dio->range.logical_sector, bio)))
return DM_MAPIO_KILL;
- }
-
- if (ic->sectors_per_block > 1 && likely(dio->op != REQ_OP_DISCARD)) {
- struct bvec_iter iter;
- struct bio_vec bv;
-
- bio_for_each_segment(bv, bio, iter) {
- if (unlikely(bv.bv_len & ((ic->sectors_per_block << SECTOR_SHIFT) - 1))) {
- DMERR("Bio vector (%u,%u) is not aligned on %u-sector boundary",
- bv.bv_offset, bv.bv_len, ic->sectors_per_block);
- return DM_MAPIO_KILL;
- }
- }
- }
bip = bio_integrity(bio);
if (!ic->internal_hash) {
@@ -2375,6 +2395,213 @@ journal_read_write:
do_endio_flush(ic, dio);
}
+static int dm_integrity_map_inline(struct dm_integrity_io *dio)
+{
+ struct dm_integrity_c *ic = dio->ic;
+ struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
+ struct bio_integrity_payload *bip;
+ unsigned payload_len, digest_size, extra_size, ret;
+
+ dio->integrity_payload = NULL;
+ dio->integrity_payload_from_mempool = false;
+
+ if (unlikely(bio_integrity(bio))) {
+ bio->bi_status = BLK_STS_NOTSUPP;
+ bio_endio(bio);
+ return DM_MAPIO_SUBMITTED;
+ }
+
+ bio_set_dev(bio, ic->dev->bdev);
+ if (unlikely((bio->bi_opf & REQ_PREFLUSH) != 0))
+ return DM_MAPIO_REMAPPED;
+
+retry:
+ payload_len = ic->tuple_size * (bio_sectors(bio) >> ic->sb->log2_sectors_per_block);
+ digest_size = crypto_shash_digestsize(ic->internal_hash);
+ extra_size = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0;
+ payload_len += extra_size;
+ dio->integrity_payload = kmalloc(payload_len, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
+ if (unlikely(!dio->integrity_payload)) {
+ const unsigned x_size = PAGE_SIZE << 1;
+ if (payload_len > x_size) {
+ unsigned sectors = ((x_size - extra_size) / ic->tuple_size) << ic->sb->log2_sectors_per_block;
+ if (WARN_ON(!sectors || sectors >= bio_sectors(bio))) {
+ bio->bi_status = BLK_STS_NOTSUPP;
+ bio_endio(bio);
+ return DM_MAPIO_SUBMITTED;
+ }
+ dm_accept_partial_bio(bio, sectors);
+ goto retry;
+ }
+ dio->integrity_payload = page_to_virt((struct page *)mempool_alloc(&ic->recheck_pool, GFP_NOIO));
+ dio->integrity_payload_from_mempool = true;
+ }
+
+ bio->bi_iter.bi_sector = dm_target_offset(ic->ti, bio->bi_iter.bi_sector);
+ dio->bio_details.bi_iter = bio->bi_iter;
+
+ if (unlikely(!dm_integrity_check_limits(ic, bio->bi_iter.bi_sector, bio))) {
+ return DM_MAPIO_KILL;
+ }
+
+ bio->bi_iter.bi_sector += ic->start + SB_SECTORS;
+
+ bip = bio_integrity_alloc(bio, GFP_NOIO, 1);
+ if (unlikely(IS_ERR(bip))) {
+ bio->bi_status = errno_to_blk_status(PTR_ERR(bip));
+ bio_endio(bio);
+ return DM_MAPIO_SUBMITTED;
+ }
+
+ if (dio->op == REQ_OP_WRITE) {
+ unsigned pos = 0;
+ while (dio->bio_details.bi_iter.bi_size) {
+ struct bio_vec bv = bio_iter_iovec(bio, dio->bio_details.bi_iter);
+ const char *mem = bvec_kmap_local(&bv);
+ if (ic->tag_size < ic->tuple_size)
+ memset(dio->integrity_payload + pos + ic->tag_size, 0, ic->tuple_size - ic->tuple_size);
+ integrity_sector_checksum(ic, dio->bio_details.bi_iter.bi_sector, mem, dio->integrity_payload + pos);
+ kunmap_local(mem);
+ pos += ic->tuple_size;
+ bio_advance_iter_single(bio, &dio->bio_details.bi_iter, ic->sectors_per_block << SECTOR_SHIFT);
+ }
+ }
+
+ ret = bio_integrity_add_page(bio, virt_to_page(dio->integrity_payload),
+ payload_len, offset_in_page(dio->integrity_payload));
+ if (unlikely(ret != payload_len)) {
+ bio->bi_status = BLK_STS_RESOURCE;
+ bio_endio(bio);
+ return DM_MAPIO_SUBMITTED;
+ }
+
+ return DM_MAPIO_REMAPPED;
+}
+
+static inline void dm_integrity_free_payload(struct dm_integrity_io *dio)
+{
+ struct dm_integrity_c *ic = dio->ic;
+ if (unlikely(dio->integrity_payload_from_mempool))
+ mempool_free(virt_to_page(dio->integrity_payload), &ic->recheck_pool);
+ else
+ kfree(dio->integrity_payload);
+ dio->integrity_payload = NULL;
+ dio->integrity_payload_from_mempool = false;
+}
+
+static void dm_integrity_inline_recheck(struct work_struct *w)
+{
+ struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work);
+ struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
+ struct dm_integrity_c *ic = dio->ic;
+ struct bio *outgoing_bio;
+ void *outgoing_data;
+
+ dio->integrity_payload = page_to_virt((struct page *)mempool_alloc(&ic->recheck_pool, GFP_NOIO));
+ dio->integrity_payload_from_mempool = true;
+
+ outgoing_data = dio->integrity_payload + PAGE_SIZE;
+
+ while (dio->bio_details.bi_iter.bi_size) {
+ char digest[HASH_MAX_DIGESTSIZE];
+ int r;
+ struct bio_integrity_payload *bip;
+ struct bio_vec bv;
+ char *mem;
+
+ outgoing_bio = bio_alloc_bioset(ic->dev->bdev, 1, REQ_OP_READ, GFP_NOIO, &ic->recheck_bios);
+
+ r = bio_add_page(outgoing_bio, virt_to_page(outgoing_data), ic->sectors_per_block << SECTOR_SHIFT, 0);
+ if (unlikely(r != (ic->sectors_per_block << SECTOR_SHIFT))) {
+ bio_put(outgoing_bio);
+ bio->bi_status = BLK_STS_RESOURCE;
+ bio_endio(bio);
+ return;
+ }
+
+ bip = bio_integrity_alloc(outgoing_bio, GFP_NOIO, 1);
+ if (unlikely(IS_ERR(bip))) {
+ bio_put(outgoing_bio);
+ bio->bi_status = errno_to_blk_status(PTR_ERR(bip));
+ bio_endio(bio);
+ return;
+ }
+
+ r = bio_integrity_add_page(outgoing_bio, virt_to_page(dio->integrity_payload), ic->tuple_size, 0);
+ if (unlikely(r != ic->tuple_size)) {
+ bio_put(outgoing_bio);
+ bio->bi_status = BLK_STS_RESOURCE;
+ bio_endio(bio);
+ return;
+ }
+
+ outgoing_bio->bi_iter.bi_sector = dio->bio_details.bi_iter.bi_sector + ic->start + SB_SECTORS;
+
+ r = submit_bio_wait(outgoing_bio);
+ if (unlikely(r != 0)) {
+ bio_put(outgoing_bio);
+ bio->bi_status = errno_to_blk_status(r);
+ bio_endio(bio);
+ return;
+ }
+ bio_put(outgoing_bio);
+
+ integrity_sector_checksum(ic, dio->bio_details.bi_iter.bi_sector, outgoing_data, digest);
+ if (unlikely(memcmp(digest, dio->integrity_payload, min(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)))) {
+ DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx",
+ ic->dev->bdev, dio->bio_details.bi_iter.bi_sector);
+ atomic64_inc(&ic->number_of_mismatches);
+ dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum",
+ bio, dio->bio_details.bi_iter.bi_sector, 0);
+
+ bio->bi_status = BLK_STS_PROTECTION;
+ bio_endio(bio);
+ return;
+ }
+
+ bv = bio_iter_iovec(bio, dio->bio_details.bi_iter);
+ mem = bvec_kmap_local(&bv);
+ memcpy(mem, outgoing_data, ic->sectors_per_block << SECTOR_SHIFT);
+ kunmap_local(mem);
+
+ bio_advance_iter_single(bio, &dio->bio_details.bi_iter, ic->sectors_per_block << SECTOR_SHIFT);
+ }
+
+ bio_endio(bio);
+}
+
+static int dm_integrity_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *status)
+{
+ struct dm_integrity_c *ic = ti->private;
+ if (ic->mode == 'I') {
+ struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io));
+ if (dio->op == REQ_OP_READ && likely(*status == BLK_STS_OK)) {
+ unsigned pos = 0;
+ while (dio->bio_details.bi_iter.bi_size) {
+ char digest[HASH_MAX_DIGESTSIZE];
+ struct bio_vec bv = bio_iter_iovec(bio, dio->bio_details.bi_iter);
+ char *mem = bvec_kmap_local(&bv);
+ //memset(mem, 0xff, ic->sectors_per_block << SECTOR_SHIFT);
+ integrity_sector_checksum(ic, dio->bio_details.bi_iter.bi_sector, mem, digest);
+ if (unlikely(memcmp(digest, dio->integrity_payload + pos,
+ min(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)))) {
+ kunmap_local(mem);
+ dm_integrity_free_payload(dio);
+ INIT_WORK(&dio->work, dm_integrity_inline_recheck);
+ queue_work(ic->offload_wq, &dio->work);
+ return DM_ENDIO_INCOMPLETE;
+ }
+ kunmap_local(mem);
+ pos += ic->tuple_size;
+ bio_advance_iter_single(bio, &dio->bio_details.bi_iter, ic->sectors_per_block << SECTOR_SHIFT);
+ }
+ }
+ if (likely(dio->op == REQ_OP_READ) || likely(dio->op == REQ_OP_WRITE)) {
+ dm_integrity_free_payload(dio);
+ }
+ }
+ return DM_ENDIO_DONE;
+}
static void integrity_bio_wait(struct work_struct *w)
{
@@ -2413,6 +2640,9 @@ static void integrity_commit(struct work_struct *w)
del_timer(&ic->autocommit_timer);
+ if (ic->mode == 'I')
+ return;
+
spin_lock_irq(&ic->endio_wait.lock);
flushes = bio_list_get(&ic->flush_bio_list);
if (unlikely(ic->mode != 'J')) {
@@ -3471,7 +3701,7 @@ static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *lim
if (ic->sectors_per_block > 1) {
limits->logical_block_size = ic->sectors_per_block << SECTOR_SHIFT;
limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT;
- blk_limits_io_min(limits, ic->sectors_per_block << SECTOR_SHIFT);
+ limits->io_min = ic->sectors_per_block << SECTOR_SHIFT;
limits->dma_alignment = limits->logical_block_size - 1;
limits->discard_granularity = ic->sectors_per_block << SECTOR_SHIFT;
}
@@ -3515,7 +3745,10 @@ static int calculate_device_limits(struct dm_integrity_c *ic)
return -EINVAL;
ic->initial_sectors = initial_sectors;
- if (!ic->meta_dev) {
+ if (ic->mode == 'I') {
+ if (ic->initial_sectors + ic->provided_data_sectors > ic->meta_device_sectors)
+ return -EINVAL;
+ } else if (!ic->meta_dev) {
sector_t last_sector, last_area, last_offset;
/* we have to maintain excessive padding for compatibility with existing volumes */
@@ -3578,6 +3811,8 @@ static int initialize_superblock(struct dm_integrity_c *ic,
memset(ic->sb, 0, SB_SECTORS << SECTOR_SHIFT);
memcpy(ic->sb->magic, SB_MAGIC, 8);
+ if (ic->mode == 'I')
+ ic->sb->flags |= cpu_to_le32(SB_FLAG_INLINE);
ic->sb->integrity_tag_size = cpu_to_le16(ic->tag_size);
ic->sb->log2_sectors_per_block = __ffs(ic->sectors_per_block);
if (ic->journal_mac_alg.alg_string)
@@ -3587,6 +3822,8 @@ static int initialize_superblock(struct dm_integrity_c *ic,
journal_sections = journal_sectors / ic->journal_section_sectors;
if (!journal_sections)
journal_sections = 1;
+ if (ic->mode == 'I')
+ journal_sections = 0;
if (ic->fix_hmac && (ic->internal_hash_alg.alg_string || ic->journal_mac_alg.alg_string)) {
ic->sb->flags |= cpu_to_le32(SB_FLAG_FIXED_HMAC);
@@ -4135,10 +4372,11 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv
}
if (!strcmp(argv[3], "J") || !strcmp(argv[3], "B") ||
- !strcmp(argv[3], "D") || !strcmp(argv[3], "R")) {
+ !strcmp(argv[3], "D") || !strcmp(argv[3], "R") ||
+ !strcmp(argv[3], "I")) {
ic->mode = argv[3][0];
} else {
- ti->error = "Invalid mode (expecting J, B, D, R)";
+ ti->error = "Invalid mode (expecting J, B, D, R, I)";
r = -EINVAL;
goto bad;
}
@@ -4284,6 +4522,53 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv
else
ic->log2_tag_size = -1;
+ if (ic->mode == 'I') {
+ struct blk_integrity *bi;
+ if (ic->meta_dev) {
+ r = -EINVAL;
+ ti->error = "Metadata device not supported in inline mode";
+ goto bad;
+ }
+ if (!ic->internal_hash_alg.alg_string) {
+ r = -EINVAL;
+ ti->error = "Internal hash not set in inline mode";
+ goto bad;
+ }
+ if (ic->journal_crypt_alg.alg_string || ic->journal_mac_alg.alg_string) {
+ r = -EINVAL;
+ ti->error = "Journal crypt not supported in inline mode";
+ goto bad;
+ }
+ if (ic->discard) {
+ r = -EINVAL;
+ ti->error = "Discards not supported in inline mode";
+ goto bad;
+ }
+ bi = blk_get_integrity(ic->dev->bdev->bd_disk);
+ if (!bi || bi->csum_type != BLK_INTEGRITY_CSUM_NONE) {
+ r = -EINVAL;
+ ti->error = "Integrity profile not supported";
+ goto bad;
+ }
+ /*printk("tag_size: %u, tuple_size: %u\n", bi->tag_size, bi->tuple_size);*/
+ if (bi->tuple_size < ic->tag_size) {
+ r = -EINVAL;
+ ti->error = "The integrity profile is smaller than tag size";
+ goto bad;
+ }
+ if ((unsigned long)bi->tuple_size > PAGE_SIZE / 2) {
+ r = -EINVAL;
+ ti->error = "Too big tuple size";
+ goto bad;
+ }
+ ic->tuple_size = bi->tuple_size;
+ if (1 << bi->interval_exp != ic->sectors_per_block << SECTOR_SHIFT) {
+ r = -EINVAL;
+ ti->error = "Integrity profile sector size mismatch";
+ goto bad;
+ }
+ }
+
if (ic->mode == 'B' && !ic->internal_hash) {
r = -EINVAL;
ti->error = "Bitmap mode can be only used with internal hash";
@@ -4314,12 +4599,26 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv
goto bad;
}
- r = mempool_init_page_pool(&ic->recheck_pool, 1, 0);
+ r = mempool_init_page_pool(&ic->recheck_pool, 1, ic->mode == 'I' ? 1 : 0);
if (r) {
ti->error = "Cannot allocate mempool";
goto bad;
}
+ if (ic->mode == 'I') {
+ r = bioset_init(&ic->recheck_bios, RECHECK_POOL_SIZE, 0, BIOSET_NEED_BVECS);
+ if (r) {
+ ti->error = "Cannot allocate bio set";
+ goto bad;
+ }
+ r = bioset_integrity_create(&ic->recheck_bios, RECHECK_POOL_SIZE);
+ if (r) {
+ ti->error = "Cannot allocate bio integrity set";
+ r = -ENOMEM;
+ goto bad;
+ }
+ }
+
ic->metadata_wq = alloc_workqueue("dm-integrity-metadata",
WQ_MEM_RECLAIM, METADATA_WORKQUEUE_MAX_ACTIVE);
if (!ic->metadata_wq) {
@@ -4396,11 +4695,16 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv
should_write_sb = true;
}
- if (!ic->sb->version || ic->sb->version > SB_VERSION_5) {
+ if (!ic->sb->version || ic->sb->version > SB_VERSION_6) {
r = -EINVAL;
ti->error = "Unknown version";
goto bad;
}
+ if (!!(ic->sb->flags & cpu_to_le32(SB_FLAG_INLINE)) != (ic->mode == 'I')) {
+ r = -EINVAL;
+ ti->error = "Inline flag mismatch";
+ goto bad;
+ }
if (le16_to_cpu(ic->sb->integrity_tag_size) != ic->tag_size) {
r = -EINVAL;
ti->error = "Tag size doesn't match the information in superblock";
@@ -4411,9 +4715,12 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv
ti->error = "Block size doesn't match the information in superblock";
goto bad;
}
- if (!le32_to_cpu(ic->sb->journal_sections)) {
+ if (!le32_to_cpu(ic->sb->journal_sections) != (ic->mode == 'I')) {
r = -EINVAL;
- ti->error = "Corrupted superblock, journal_sections is 0";
+ if (ic->mode != 'I')
+ ti->error = "Corrupted superblock, journal_sections is 0";
+ else
+ ti->error = "Corrupted superblock, journal_sections is not 0";
goto bad;
}
/* make sure that ti->max_io_len doesn't overflow */
@@ -4465,8 +4772,9 @@ try_smaller_buffer:
bits_in_journal = ((__u64)ic->journal_section_sectors * ic->journal_sections) << (SECTOR_SHIFT + 3);
if (bits_in_journal > UINT_MAX)
bits_in_journal = UINT_MAX;
- while (bits_in_journal < (ic->provided_data_sectors + ((sector_t)1 << log2_sectors_per_bitmap_bit) - 1) >> log2_sectors_per_bitmap_bit)
- log2_sectors_per_bitmap_bit++;
+ if (bits_in_journal)
+ while (bits_in_journal < (ic->provided_data_sectors + ((sector_t)1 << log2_sectors_per_bitmap_bit) - 1) >> log2_sectors_per_bitmap_bit)
+ log2_sectors_per_bitmap_bit++;
log2_blocks_per_bitmap_bit = log2_sectors_per_bitmap_bit - ic->sb->log2_sectors_per_block;
ic->log2_blocks_per_bitmap_bit = log2_blocks_per_bitmap_bit;
@@ -4486,7 +4794,6 @@ try_smaller_buffer:
goto bad;
}
-
threshold = (__u64)ic->journal_entries * (100 - journal_watermark);
threshold += 50;
do_div(threshold, 100);
@@ -4538,17 +4845,19 @@ try_smaller_buffer:
goto bad;
}
- ic->bufio = dm_bufio_client_create(ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev,
- 1U << (SECTOR_SHIFT + ic->log2_buffer_sectors), 1, 0, NULL, NULL, 0);
- if (IS_ERR(ic->bufio)) {
- r = PTR_ERR(ic->bufio);
- ti->error = "Cannot initialize dm-bufio";
- ic->bufio = NULL;
- goto bad;
+ if (ic->mode != 'I') {
+ ic->bufio = dm_bufio_client_create(ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev,
+ 1U << (SECTOR_SHIFT + ic->log2_buffer_sectors), 1, 0, NULL, NULL, 0);
+ if (IS_ERR(ic->bufio)) {
+ r = PTR_ERR(ic->bufio);
+ ti->error = "Cannot initialize dm-bufio";
+ ic->bufio = NULL;
+ goto bad;
+ }
+ dm_bufio_set_sector_offset(ic->bufio, ic->start + ic->initial_sectors);
}
- dm_bufio_set_sector_offset(ic->bufio, ic->start + ic->initial_sectors);
- if (ic->mode != 'R') {
+ if (ic->mode != 'R' && ic->mode != 'I') {
r = create_journal(ic, &ti->error);
if (r)
goto bad;
@@ -4608,7 +4917,7 @@ try_smaller_buffer:
ic->just_formatted = true;
}
- if (!ic->meta_dev) {
+ if (!ic->meta_dev && ic->mode != 'I') {
r = dm_set_target_max_io_len(ti, 1U << ic->sb->log2_interleave_sectors);
if (r)
goto bad;
@@ -4632,6 +4941,9 @@ try_smaller_buffer:
if (ic->discard)
ti->num_discard_bios = 1;
+ if (ic->mode == 'I')
+ ti->mempool_needs_integrity = true;
+
dm_audit_log_ctr(DM_MSG_PREFIX, ti, 1);
return 0;
@@ -4665,6 +4977,7 @@ static void dm_integrity_dtr(struct dm_target *ti)
kvfree(ic->bbs);
if (ic->bufio)
dm_bufio_client_destroy(ic->bufio);
+ bioset_exit(&ic->recheck_bios);
mempool_exit(&ic->recheck_pool);
mempool_exit(&ic->journal_io_mempool);
if (ic->io)
@@ -4718,12 +5031,13 @@ static void dm_integrity_dtr(struct dm_target *ti)
static struct target_type integrity_target = {
.name = "integrity",
- .version = {1, 11, 0},
+ .version = {1, 12, 0},
.module = THIS_MODULE,
.features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY,
.ctr = dm_integrity_ctr,
.dtr = dm_integrity_dtr,
.map = dm_integrity_map,
+ .end_io = dm_integrity_end_io,
.postsuspend = dm_integrity_postsuspend,
.resume = dm_integrity_resume,
.status = dm_integrity_status,
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 7409490259d1..d7a8e2f40db3 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -347,7 +347,7 @@ static void do_region(const blk_opf_t opf, unsigned int region,
break;
default:
num_bvecs = bio_max_segs(dm_sector_div_up(remaining,
- (PAGE_SIZE >> SECTOR_SHIFT)));
+ (PAGE_SIZE >> SECTOR_SHIFT)) + 1);
}
bio = bio_alloc_bioset(where->bdev, num_bvecs, opf, GFP_NOIO,
@@ -384,16 +384,13 @@ static void do_region(const blk_opf_t opf, unsigned int region,
static void dispatch_io(blk_opf_t opf, unsigned int num_regions,
struct dm_io_region *where, struct dpages *dp,
- struct io *io, int sync, unsigned short ioprio)
+ struct io *io, unsigned short ioprio)
{
int i;
struct dpages old_pages = *dp;
BUG_ON(num_regions > DM_IO_MAX_REGIONS);
- if (sync)
- opf |= REQ_SYNC;
-
/*
* For multiple regions we need to be careful to rewind
* the dp object for each call to do_region.
@@ -411,6 +408,26 @@ static void dispatch_io(blk_opf_t opf, unsigned int num_regions,
dec_count(io, 0, 0);
}
+static void async_io(struct dm_io_client *client, unsigned int num_regions,
+ struct dm_io_region *where, blk_opf_t opf,
+ struct dpages *dp, io_notify_fn fn, void *context,
+ unsigned short ioprio)
+{
+ struct io *io;
+
+ io = mempool_alloc(&client->pool, GFP_NOIO);
+ io->error_bits = 0;
+ atomic_set(&io->count, 1); /* see dispatch_io() */
+ io->client = client;
+ io->callback = fn;
+ io->context = context;
+
+ io->vma_invalidate_address = dp->vma_invalidate_address;
+ io->vma_invalidate_size = dp->vma_invalidate_size;
+
+ dispatch_io(opf, num_regions, where, dp, io, ioprio);
+}
+
struct sync_io {
unsigned long error_bits;
struct completion wait;
@@ -428,27 +445,12 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
struct dm_io_region *where, blk_opf_t opf, struct dpages *dp,
unsigned long *error_bits, unsigned short ioprio)
{
- struct io *io;
struct sync_io sio;
- if (num_regions > 1 && !op_is_write(opf)) {
- WARN_ON(1);
- return -EIO;
- }
-
init_completion(&sio.wait);
- io = mempool_alloc(&client->pool, GFP_NOIO);
- io->error_bits = 0;
- atomic_set(&io->count, 1); /* see dispatch_io() */
- io->client = client;
- io->callback = sync_io_complete;
- io->context = &sio;
-
- io->vma_invalidate_address = dp->vma_invalidate_address;
- io->vma_invalidate_size = dp->vma_invalidate_size;
-
- dispatch_io(opf, num_regions, where, dp, io, 1, ioprio);
+ async_io(client, num_regions, where, opf | REQ_SYNC, dp,
+ sync_io_complete, &sio, ioprio);
wait_for_completion_io(&sio.wait);
@@ -458,33 +460,6 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
return sio.error_bits ? -EIO : 0;
}
-static int async_io(struct dm_io_client *client, unsigned int num_regions,
- struct dm_io_region *where, blk_opf_t opf,
- struct dpages *dp, io_notify_fn fn, void *context,
- unsigned short ioprio)
-{
- struct io *io;
-
- if (num_regions > 1 && !op_is_write(opf)) {
- WARN_ON(1);
- fn(1, context);
- return -EIO;
- }
-
- io = mempool_alloc(&client->pool, GFP_NOIO);
- io->error_bits = 0;
- atomic_set(&io->count, 1); /* see dispatch_io() */
- io->client = client;
- io->callback = fn;
- io->context = context;
-
- io->vma_invalidate_address = dp->vma_invalidate_address;
- io->vma_invalidate_size = dp->vma_invalidate_size;
-
- dispatch_io(opf, num_regions, where, dp, io, 0, ioprio);
- return 0;
-}
-
static int dp_init(struct dm_io_request *io_req, struct dpages *dp,
unsigned long size)
{
@@ -529,6 +504,11 @@ int dm_io(struct dm_io_request *io_req, unsigned int num_regions,
int r;
struct dpages dp;
+ if (num_regions > 1 && !op_is_write(io_req->bi_opf)) {
+ WARN_ON(1);
+ return -EIO;
+ }
+
r = dp_init(io_req, &dp, (unsigned long)where->count << SECTOR_SHIFT);
if (r)
return r;
@@ -537,9 +517,9 @@ int dm_io(struct dm_io_request *io_req, unsigned int num_regions,
return sync_io(io_req->client, num_regions, where,
io_req->bi_opf, &dp, sync_error_bits, ioprio);
- return async_io(io_req->client, num_regions, where,
- io_req->bi_opf, &dp, io_req->notify.fn,
- io_req->notify.context, ioprio);
+ async_io(io_req->client, num_regions, where, io_req->bi_opf, &dp,
+ io_req->notify.fn, io_req->notify.context, ioprio);
+ return 0;
}
EXPORT_SYMBOL(dm_io);
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 2d3e186ca87e..49fb0f684193 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -62,6 +62,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->num_discard_bios = 1;
ti->num_secure_erase_bios = 1;
ti->num_write_zeroes_bios = 1;
+ ti->flush_bypasses_map = true;
ti->private = lc;
return 0;
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 15b681b90153..637977acc3dc 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -1419,8 +1419,7 @@ out:
/*
* Fail or reinstate all paths that match the provided struct dm_dev.
*/
-static int action_dev(struct multipath *m, struct dm_dev *dev,
- action_fn action)
+static int action_dev(struct multipath *m, dev_t dev, action_fn action)
{
int r = -EINVAL;
struct pgpath *pgpath;
@@ -1428,7 +1427,7 @@ static int action_dev(struct multipath *m, struct dm_dev *dev,
list_for_each_entry(pg, &m->priority_groups, list) {
list_for_each_entry(pgpath, &pg->pgpaths, list) {
- if (pgpath->path.dev == dev)
+ if (pgpath->path.dev->bdev->bd_dev == dev)
r = action(pgpath);
}
}
@@ -1959,7 +1958,7 @@ static int multipath_message(struct dm_target *ti, unsigned int argc, char **arg
char *result, unsigned int maxlen)
{
int r = -EINVAL;
- struct dm_dev *dev;
+ dev_t dev;
struct multipath *m = ti->private;
action_fn action;
unsigned long flags;
@@ -2008,7 +2007,7 @@ static int multipath_message(struct dm_target *ti, unsigned int argc, char **arg
goto out;
}
- r = dm_get_device(ti, argv[1], dm_table_get_mode(ti->table), &dev);
+ r = dm_devt_from_path(argv[1], &dev);
if (r) {
DMWARN("message: error getting device %s",
argv[1]);
@@ -2017,8 +2016,6 @@ static int multipath_message(struct dm_target *ti, unsigned int argc, char **arg
r = action_dev(m, dev, action);
- dm_put_device(ti, dev);
-
out:
mutex_unlock(&m->work_mutex);
return r;
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 052c00c1eb15..0c3323e0adb2 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -1626,6 +1626,23 @@ static int _check_data_dev_sectors(struct raid_set *rs)
return 0;
}
+/* Get reshape sectors from data_offsets or raid set */
+static sector_t _get_reshape_sectors(struct raid_set *rs)
+{
+ struct md_rdev *rdev;
+ sector_t reshape_sectors = 0;
+
+ rdev_for_each(rdev, &rs->md)
+ if (!test_bit(Journal, &rdev->flags)) {
+ reshape_sectors = (rdev->data_offset > rdev->new_data_offset) ?
+ rdev->data_offset - rdev->new_data_offset :
+ rdev->new_data_offset - rdev->data_offset;
+ break;
+ }
+
+ return max(reshape_sectors, (sector_t) rs->data_offset);
+}
+
/* Calculate the sectors per device and per array used for @rs */
static int rs_set_dev_and_array_sectors(struct raid_set *rs, sector_t sectors, bool use_mddev)
{
@@ -1656,7 +1673,7 @@ static int rs_set_dev_and_array_sectors(struct raid_set *rs, sector_t sectors, b
if (sector_div(dev_sectors, data_stripes))
goto bad;
- array_sectors = (data_stripes + delta_disks) * dev_sectors;
+ array_sectors = (data_stripes + delta_disks) * (dev_sectors - _get_reshape_sectors(rs));
if (sector_div(array_sectors, rs->raid10_copies))
goto bad;
@@ -1665,7 +1682,7 @@ static int rs_set_dev_and_array_sectors(struct raid_set *rs, sector_t sectors, b
else
/* Striped layouts */
- array_sectors = (data_stripes + delta_disks) * dev_sectors;
+ array_sectors = (data_stripes + delta_disks) * (dev_sectors - _get_reshape_sectors(rs));
mddev->array_sectors = array_sectors;
mddev->dev_sectors = dev_sectors;
@@ -1704,11 +1721,20 @@ static void do_table_event(struct work_struct *ws)
struct raid_set *rs = container_of(ws, struct raid_set, md.event_work);
smp_rmb(); /* Make sure we access most actual mddev properties */
- if (!rs_is_reshaping(rs)) {
+
+ /* Only grow size resulting from added stripe(s) after reshape ended. */
+ if (!rs_is_reshaping(rs) &&
+ rs->array_sectors > rs->md.array_sectors &&
+ !rs->md.delta_disks &&
+ rs->md.raid_disks == rs->raid_disks) {
+ /* The raid10 personality doesn't provide proper device sizes -> correct. */
if (rs_is_raid10(rs))
rs_set_rdev_sectors(rs);
+
+ rs->md.array_sectors = rs->array_sectors;
rs_set_capacity(rs);
}
+
dm_table_event(rs->ti->table);
}
@@ -2811,23 +2837,6 @@ static int rs_prepare_reshape(struct raid_set *rs)
return 0;
}
-/* Get reshape sectors from data_offsets or raid set */
-static sector_t _get_reshape_sectors(struct raid_set *rs)
-{
- struct md_rdev *rdev;
- sector_t reshape_sectors = 0;
-
- rdev_for_each(rdev, &rs->md)
- if (!test_bit(Journal, &rdev->flags)) {
- reshape_sectors = (rdev->data_offset > rdev->new_data_offset) ?
- rdev->data_offset - rdev->new_data_offset :
- rdev->new_data_offset - rdev->data_offset;
- break;
- }
-
- return max(reshape_sectors, (sector_t) rs->data_offset);
-}
-
/*
* Reshape:
* - change raid layout
@@ -3802,8 +3811,8 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits)
struct raid_set *rs = ti->private;
unsigned int chunk_size_bytes = to_bytes(rs->md.chunk_sectors);
- blk_limits_io_min(limits, chunk_size_bytes);
- blk_limits_io_opt(limits, chunk_size_bytes * mddev_data_stripes(rs));
+ limits->io_min = chunk_size_bytes;
+ limits->io_opt = chunk_size_bytes * mddev_data_stripes(rs);
}
static void raid_presuspend(struct dm_target *ti)
@@ -4023,6 +4032,11 @@ static int raid_preresume(struct dm_target *ti)
if (test_and_set_bit(RT_FLAG_RS_PRERESUMED, &rs->runtime_flags))
return 0;
+ /* If different and no explicit grow request, expose MD array size as of superblock. */
+ if (!test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags) &&
+ rs->array_sectors != mddev->array_sectors)
+ rs_set_capacity(rs);
+
/*
* The superblocks need to be updated on disk if the
* array is new or new devices got added (thus zeroed
@@ -4101,10 +4115,11 @@ static void raid_resume(struct dm_target *ti)
if (mddev->delta_disks < 0)
rs_set_capacity(rs);
+ mddev_lock_nointr(mddev);
WARN_ON_ONCE(!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery));
- WARN_ON_ONCE(test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
+ WARN_ON_ONCE(rcu_dereference_protected(mddev->sync_thread,
+ lockdep_is_held(&mddev->reconfig_mutex)));
clear_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags);
- mddev_lock_nointr(mddev);
mddev->ro = 0;
mddev->in_sync = 0;
md_unfrozen_sync_thread(mddev);
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 16b93ae51d96..4112071de0be 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -157,6 +157,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->num_discard_bios = stripes;
ti->num_secure_erase_bios = stripes;
ti->num_write_zeroes_bios = stripes;
+ ti->flush_bypasses_map = true;
sc->chunk_size = chunk_size;
if (chunk_size & (chunk_size - 1))
@@ -458,8 +459,8 @@ static void stripe_io_hints(struct dm_target *ti,
struct stripe_c *sc = ti->private;
unsigned int chunk_size = sc->chunk_size << SECTOR_SHIFT;
- blk_limits_io_min(limits, chunk_size);
- blk_limits_io_opt(limits, chunk_size * sc->stripes);
+ limits->io_min = chunk_size;
+ limits->io_opt = chunk_size * sc->stripes;
}
static struct target_type stripe_target = {
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 92d18dcdb3e9..dbd39b9722b9 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -160,6 +160,7 @@ int dm_table_create(struct dm_table **result, blk_mode_t mode,
t->type = DM_TYPE_NONE;
t->mode = mode;
t->md = md;
+ t->flush_bypasses_map = true;
*result = t;
return 0;
}
@@ -330,23 +331,15 @@ static int upgrade_mode(struct dm_dev_internal *dd, blk_mode_t new_mode,
}
/*
- * Add a device to the list, or just increment the usage count if
- * it's already present.
- *
* Note: the __ref annotation is because this function can call the __init
* marked early_lookup_bdev when called during early boot code from dm-init.c.
*/
-int __ref dm_get_device(struct dm_target *ti, const char *path, blk_mode_t mode,
- struct dm_dev **result)
+int __ref dm_devt_from_path(const char *path, dev_t *dev_p)
{
int r;
dev_t dev;
unsigned int major, minor;
char dummy;
- struct dm_dev_internal *dd;
- struct dm_table *t = ti->table;
-
- BUG_ON(!t);
if (sscanf(path, "%u:%u%c", &major, &minor, &dummy) == 2) {
/* Extract the major/minor numbers */
@@ -362,6 +355,29 @@ int __ref dm_get_device(struct dm_target *ti, const char *path, blk_mode_t mode,
if (r)
return r;
}
+ *dev_p = dev;
+ return 0;
+}
+EXPORT_SYMBOL(dm_devt_from_path);
+
+/*
+ * Add a device to the list, or just increment the usage count if
+ * it's already present.
+ */
+int dm_get_device(struct dm_target *ti, const char *path, blk_mode_t mode,
+ struct dm_dev **result)
+{
+ int r;
+ dev_t dev;
+ struct dm_dev_internal *dd;
+ struct dm_table *t = ti->table;
+
+ BUG_ON(!t);
+
+ r = dm_devt_from_path(path, &dev);
+ if (r)
+ return r;
+
if (dev == disk_devt(t->md->disk))
return -EINVAL;
@@ -748,6 +764,9 @@ int dm_table_add_target(struct dm_table *t, const char *type,
if (ti->limit_swap_bios && !static_key_enabled(&swap_bios_enabled.key))
static_branch_enable(&swap_bios_enabled);
+ if (!ti->flush_bypasses_map)
+ t->flush_bypasses_map = false;
+
return 0;
bad:
@@ -1031,6 +1050,7 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *
unsigned int min_pool_size = 0, pool_size;
struct dm_md_mempools *pools;
unsigned int bioset_flags = 0;
+ bool mempool_needs_integrity = t->integrity_supported;
if (unlikely(type == DM_TYPE_NONE)) {
DMERR("no table type is set, can't allocate mempools");
@@ -1055,6 +1075,8 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *
per_io_data_size = max(per_io_data_size, ti->per_io_data_size);
min_pool_size = max(min_pool_size, ti->num_flush_bios);
+
+ mempool_needs_integrity |= ti->mempool_needs_integrity;
}
pool_size = max(dm_get_reserved_bio_based_ios(), min_pool_size);
front_pad = roundup(per_io_data_size,
@@ -1064,13 +1086,13 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *
__alignof__(struct dm_io)) + DM_IO_BIO_OFFSET;
if (bioset_init(&pools->io_bs, pool_size, io_front_pad, bioset_flags))
goto out_free_pools;
- if (t->integrity_supported &&
+ if (mempool_needs_integrity &&
bioset_integrity_create(&pools->io_bs, pool_size))
goto out_free_pools;
init_bs:
if (bioset_init(&pools->bs, pool_size, front_pad, 0))
goto out_free_pools;
- if (t->integrity_supported &&
+ if (mempool_needs_integrity &&
bioset_integrity_create(&pools->bs, pool_size))
goto out_free_pools;
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 6022189c1388..f90679cfec5b 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -249,7 +249,7 @@ struct dm_thin_device {
*/
#define SUPERBLOCK_CSUM_XOR 160774
-static void sb_prepare_for_write(struct dm_block_validator *v,
+static void sb_prepare_for_write(const struct dm_block_validator *v,
struct dm_block *b,
size_t block_size)
{
@@ -261,7 +261,7 @@ static void sb_prepare_for_write(struct dm_block_validator *v,
SUPERBLOCK_CSUM_XOR));
}
-static int sb_check(struct dm_block_validator *v,
+static int sb_check(const struct dm_block_validator *v,
struct dm_block *b,
size_t block_size)
{
@@ -294,7 +294,7 @@ static int sb_check(struct dm_block_validator *v,
return 0;
}
-static struct dm_block_validator sb_validator = {
+static const struct dm_block_validator sb_validator = {
.name = "superblock",
.prepare_for_write = sb_prepare_for_write,
.check = sb_check
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 991f77a5885b..a0c1620e90c8 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -4079,10 +4079,10 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
if (io_opt_sectors < pool->sectors_per_block ||
!is_factor(io_opt_sectors, pool->sectors_per_block)) {
if (is_factor(pool->sectors_per_block, limits->max_sectors))
- blk_limits_io_min(limits, limits->max_sectors << SECTOR_SHIFT);
+ limits->io_min = limits->max_sectors << SECTOR_SHIFT;
else
- blk_limits_io_min(limits, pool->sectors_per_block << SECTOR_SHIFT);
- blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
+ limits->io_min = pool->sectors_per_block << SECTOR_SHIFT;
+ limits->io_opt = pool->sectors_per_block << SECTOR_SHIFT;
}
/*
diff --git a/drivers/md/dm-vdo/dedupe.c b/drivers/md/dm-vdo/dedupe.c
index 117266e1b3ae..39ac68614419 100644
--- a/drivers/md/dm-vdo/dedupe.c
+++ b/drivers/md/dm-vdo/dedupe.c
@@ -148,11 +148,6 @@
#include "vdo.h"
#include "wait-queue.h"
-struct uds_attribute {
- struct attribute attr;
- const char *(*show_string)(struct hash_zones *hash_zones);
-};
-
#define DEDUPE_QUERY_TIMER_IDLE 0
#define DEDUPE_QUERY_TIMER_RUNNING 1
#define DEDUPE_QUERY_TIMER_FIRED 2
diff --git a/drivers/md/dm-vdo/dm-vdo-target.c b/drivers/md/dm-vdo/dm-vdo-target.c
index b423bec6458b..dd05691e4097 100644
--- a/drivers/md/dm-vdo/dm-vdo-target.c
+++ b/drivers/md/dm-vdo/dm-vdo-target.c
@@ -928,9 +928,9 @@ static void vdo_io_hints(struct dm_target *ti, struct queue_limits *limits)
limits->physical_block_size = VDO_BLOCK_SIZE;
/* The minimum io size for random io */
- blk_limits_io_min(limits, VDO_BLOCK_SIZE);
+ limits->io_min = VDO_BLOCK_SIZE;
/* The optimal io size for streamed/sequential io */
- blk_limits_io_opt(limits, VDO_BLOCK_SIZE);
+ limits->io_opt = VDO_BLOCK_SIZE;
/*
* Sets the maximum discard size that will be passed into VDO. This value comes from a
@@ -945,7 +945,7 @@ static void vdo_io_hints(struct dm_target *ti, struct queue_limits *limits)
* The value is used by dm-thin to determine whether to pass down discards. The block layer
* splits large discards on this boundary when this is set.
*/
- limits->max_discard_sectors =
+ limits->max_hw_discard_sectors =
(vdo->device_config->max_discard_blocks * VDO_SECTORS_PER_BLOCK);
/*
diff --git a/drivers/md/dm-vdo/indexer/index.c b/drivers/md/dm-vdo/indexer/index.c
index 1ba767144426..df4934846244 100644
--- a/drivers/md/dm-vdo/indexer/index.c
+++ b/drivers/md/dm-vdo/indexer/index.c
@@ -197,15 +197,12 @@ static int finish_previous_chapter(struct uds_index *index, u64 current_chapter_
static int swap_open_chapter(struct index_zone *zone)
{
int result;
- struct open_chapter_zone *temporary_chapter;
result = finish_previous_chapter(zone->index, zone->newest_virtual_chapter);
if (result != UDS_SUCCESS)
return result;
- temporary_chapter = zone->open_chapter;
- zone->open_chapter = zone->writing_chapter;
- zone->writing_chapter = temporary_chapter;
+ swap(zone->open_chapter, zone->writing_chapter);
return UDS_SUCCESS;
}
diff --git a/drivers/md/dm-vdo/int-map.c b/drivers/md/dm-vdo/int-map.c
index 3aa438f84ea1..f6fe58e437b3 100644
--- a/drivers/md/dm-vdo/int-map.c
+++ b/drivers/md/dm-vdo/int-map.c
@@ -96,7 +96,7 @@ struct int_map {
size_t size;
/** @capacity: The number of neighborhoods in the map. */
size_t capacity;
- /* @bucket_count: The number of buckets in the bucket array. */
+ /** @bucket_count: The number of buckets in the bucket array. */
size_t bucket_count;
/** @buckets: The array of hash buckets. */
struct bucket *buckets;
diff --git a/drivers/md/dm-vdo/repair.c b/drivers/md/dm-vdo/repair.c
index defc9359f10e..7e0009d2f67d 100644
--- a/drivers/md/dm-vdo/repair.c
+++ b/drivers/md/dm-vdo/repair.c
@@ -51,6 +51,8 @@ struct recovery_point {
bool increment_applied;
};
+DEFINE_MIN_HEAP(struct numbered_block_mapping, replay_heap);
+
struct repair_completion {
/* The completion header */
struct vdo_completion completion;
@@ -97,7 +99,7 @@ struct repair_completion {
* order, then original journal order. This permits efficient iteration over the journal
* entries in order.
*/
- struct min_heap replay_heap;
+ struct replay_heap replay_heap;
/* Fields tracking progress through the journal entries. */
struct numbered_block_mapping *current_entry;
struct numbered_block_mapping *current_unfetched_entry;
@@ -135,7 +137,7 @@ struct repair_completion {
* to sort by slot while still ensuring we replay all entries with the same slot in the exact order
* as they appeared in the journal.
*/
-static bool mapping_is_less_than(const void *item1, const void *item2)
+static bool mapping_is_less_than(const void *item1, const void *item2, void __always_unused *args)
{
const struct numbered_block_mapping *mapping1 =
(const struct numbered_block_mapping *) item1;
@@ -154,7 +156,7 @@ static bool mapping_is_less_than(const void *item1, const void *item2)
return 0;
}
-static void swap_mappings(void *item1, void *item2)
+static void swap_mappings(void *item1, void *item2, void __always_unused *args)
{
struct numbered_block_mapping *mapping1 = item1;
struct numbered_block_mapping *mapping2 = item2;
@@ -163,14 +165,13 @@ static void swap_mappings(void *item1, void *item2)
}
static const struct min_heap_callbacks repair_min_heap = {
- .elem_size = sizeof(struct numbered_block_mapping),
.less = mapping_is_less_than,
.swp = swap_mappings,
};
static struct numbered_block_mapping *sort_next_heap_element(struct repair_completion *repair)
{
- struct min_heap *heap = &repair->replay_heap;
+ struct replay_heap *heap = &repair->replay_heap;
struct numbered_block_mapping *last;
if (heap->nr == 0)
@@ -181,8 +182,8 @@ static struct numbered_block_mapping *sort_next_heap_element(struct repair_compl
* restore the heap invariant, and return a pointer to the popped element.
*/
last = &repair->entries[--heap->nr];
- swap_mappings(heap->data, last);
- min_heapify(heap, 0, &repair_min_heap);
+ swap_mappings(heap->data, last, NULL);
+ min_heap_sift_down(heap, 0, &repair_min_heap, NULL);
return last;
}
@@ -318,6 +319,7 @@ static bool __must_check abort_on_error(int result, struct repair_completion *re
/**
* drain_slab_depot() - Flush out all dirty refcounts blocks now that they have been rebuilt or
* recovered.
+ * @completion: The repair completion.
*/
static void drain_slab_depot(struct vdo_completion *completion)
{
@@ -653,9 +655,6 @@ static void rebuild_reference_counts(struct vdo_completion *completion)
vdo_traverse_forest(vdo->block_map, process_entry, completion);
}
-/**
- * increment_recovery_point() - Move the given recovery point forward by one entry.
- */
static void increment_recovery_point(struct recovery_point *point)
{
if (++point->entry_count < RECOVERY_JOURNAL_ENTRIES_PER_SECTOR)
@@ -952,6 +951,7 @@ static void abort_block_map_recovery(struct repair_completion *repair, int resul
/**
* find_entry_starting_next_page() - Find the first journal entry after a given entry which is not
* on the same block map page.
+ * @repair: The repair completion.
* @current_entry: The entry to search from.
* @needs_sort: Whether sorting is needed to proceed.
*
@@ -1117,12 +1117,12 @@ static void recover_block_map(struct vdo_completion *completion)
* Organize the journal entries into a binary heap so we can iterate over them in sorted
* order incrementally, avoiding an expensive sort call.
*/
- repair->replay_heap = (struct min_heap) {
+ repair->replay_heap = (struct replay_heap) {
.data = repair->entries,
.nr = repair->block_map_entry_count,
.size = repair->block_map_entry_count,
};
- min_heapify_all(&repair->replay_heap, &repair_min_heap);
+ min_heapify_all(&repair->replay_heap, &repair_min_heap, NULL);
vdo_log_info("Replaying %zu recovery entries into block map",
repair->block_map_entry_count);
@@ -1218,6 +1218,7 @@ static bool __must_check is_exact_recovery_journal_block(const struct recovery_j
/**
* find_recovery_journal_head_and_tail() - Find the tail and head of the journal.
+ * @repair: The repair completion.
*
* Return: True if there were valid journal blocks.
*/
@@ -1446,6 +1447,7 @@ static int validate_heads(struct repair_completion *repair)
/**
* extract_new_mappings() - Find all valid new mappings to be applied to the block map.
+ * @repair: The repair completion.
*
* The mappings are extracted from the journal and stored in a sortable array so that all of the
* mappings to be applied to a given block map page can be done in a single page fetch.
@@ -1500,6 +1502,7 @@ static int extract_new_mappings(struct repair_completion *repair)
/**
* compute_usages() - Compute the lbns in use and block map data blocks counts from the tail of
* the journal.
+ * @repair: The repair completion.
*/
static noinline int compute_usages(struct repair_completion *repair)
{
diff --git a/drivers/md/dm-vdo/slab-depot.c b/drivers/md/dm-vdo/slab-depot.c
index 46e4721e5b4f..274f9ccd072f 100644
--- a/drivers/md/dm-vdo/slab-depot.c
+++ b/drivers/md/dm-vdo/slab-depot.c
@@ -3288,7 +3288,8 @@ int vdo_release_block_reference(struct block_allocator *allocator,
* Thus, the ordering is reversed from the usual sense since min_heap returns smaller elements
* before larger ones.
*/
-static bool slab_status_is_less_than(const void *item1, const void *item2)
+static bool slab_status_is_less_than(const void *item1, const void *item2,
+ void __always_unused *args)
{
const struct slab_status *info1 = item1;
const struct slab_status *info2 = item2;
@@ -3300,7 +3301,7 @@ static bool slab_status_is_less_than(const void *item1, const void *item2)
return info1->slab_number < info2->slab_number;
}
-static void swap_slab_statuses(void *item1, void *item2)
+static void swap_slab_statuses(void *item1, void *item2, void __always_unused *args)
{
struct slab_status *info1 = item1;
struct slab_status *info2 = item2;
@@ -3309,7 +3310,6 @@ static void swap_slab_statuses(void *item1, void *item2)
}
static const struct min_heap_callbacks slab_status_min_heap = {
- .elem_size = sizeof(struct slab_status),
.less = slab_status_is_less_than,
.swp = swap_slab_statuses,
};
@@ -3509,7 +3509,7 @@ static int get_slab_statuses(struct block_allocator *allocator,
static int __must_check vdo_prepare_slabs_for_allocation(struct block_allocator *allocator)
{
struct slab_status current_slab_status;
- struct min_heap heap;
+ DEFINE_MIN_HEAP(struct slab_status, heap) heap;
int result;
struct slab_status *slab_statuses;
struct slab_depot *depot = allocator->depot;
@@ -3521,12 +3521,12 @@ static int __must_check vdo_prepare_slabs_for_allocation(struct block_allocator
return result;
/* Sort the slabs by cleanliness, then by emptiness hint. */
- heap = (struct min_heap) {
+ heap = (struct heap) {
.data = slab_statuses,
.nr = allocator->slab_count,
.size = allocator->slab_count,
};
- min_heapify_all(&heap, &slab_status_min_heap);
+ min_heapify_all(&heap, &slab_status_min_heap, NULL);
while (heap.nr > 0) {
bool high_priority;
@@ -3534,7 +3534,7 @@ static int __must_check vdo_prepare_slabs_for_allocation(struct block_allocator
struct slab_journal *journal;
current_slab_status = slab_statuses[0];
- min_heap_pop(&heap, &slab_status_min_heap);
+ min_heap_pop(&heap, &slab_status_min_heap, NULL);
slab = depot->slabs[current_slab_status.slab_number];
if ((depot->load_type == VDO_SLAB_DEPOT_REBUILD_LOAD) ||
diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c
index e46aee6f932e..62b1a44b8dd2 100644
--- a/drivers/md/dm-verity-fec.c
+++ b/drivers/md/dm-verity-fec.c
@@ -186,8 +186,7 @@ error:
static int fec_is_erasure(struct dm_verity *v, struct dm_verity_io *io,
u8 *want_digest, u8 *data)
{
- if (unlikely(verity_hash(v, verity_io_hash_req(v, io),
- data, 1 << v->data_dev_block_bits,
+ if (unlikely(verity_hash(v, io, data, 1 << v->data_dev_block_bits,
verity_io_real_digest(v, io), true)))
return 0;
@@ -388,8 +387,7 @@ static int fec_decode_rsb(struct dm_verity *v, struct dm_verity_io *io,
}
/* Always re-validate the corrected block against the expected hash */
- r = verity_hash(v, verity_io_hash_req(v, io), fio->output,
- 1 << v->data_dev_block_bits,
+ r = verity_hash(v, io, fio->output, 1 << v->data_dev_block_bits,
verity_io_real_digest(v, io), true);
if (unlikely(r < 0))
return r;
@@ -404,24 +402,9 @@ static int fec_decode_rsb(struct dm_verity *v, struct dm_verity_io *io,
return 0;
}
-static int fec_bv_copy(struct dm_verity *v, struct dm_verity_io *io, u8 *data,
- size_t len)
-{
- struct dm_verity_fec_io *fio = fec_io(io);
-
- memcpy(data, &fio->output[fio->output_pos], len);
- fio->output_pos += len;
-
- return 0;
-}
-
-/*
- * Correct errors in a block. Copies corrected block to dest if non-NULL,
- * otherwise to a bio_vec starting from iter.
- */
+/* Correct errors in a block. Copies corrected block to dest. */
int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
- enum verity_block_type type, sector_t block, u8 *dest,
- struct bvec_iter *iter)
+ enum verity_block_type type, sector_t block, u8 *dest)
{
int r;
struct dm_verity_fec_io *fio = fec_io(io);
@@ -471,12 +454,7 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
goto done;
}
- if (dest)
- memcpy(dest, fio->output, 1 << v->data_dev_block_bits);
- else if (iter) {
- fio->output_pos = 0;
- r = verity_for_bv_block(v, io, iter, fec_bv_copy);
- }
+ memcpy(dest, fio->output, 1 << v->data_dev_block_bits);
done:
fio->level--;
diff --git a/drivers/md/dm-verity-fec.h b/drivers/md/dm-verity-fec.h
index 8454070d2824..09123a612953 100644
--- a/drivers/md/dm-verity-fec.h
+++ b/drivers/md/dm-verity-fec.h
@@ -57,7 +57,6 @@ struct dm_verity_fec_io {
u8 *bufs[DM_VERITY_FEC_BUF_MAX]; /* bufs for deinterleaving */
unsigned int nbufs; /* number of buffers allocated */
u8 *output; /* buffer for corrected output */
- size_t output_pos;
unsigned int level; /* recursion level */
};
@@ -70,7 +69,7 @@ extern bool verity_fec_is_enabled(struct dm_verity *v);
extern int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
enum verity_block_type type, sector_t block,
- u8 *dest, struct bvec_iter *iter);
+ u8 *dest);
extern unsigned int verity_fec_status_table(struct dm_verity *v, unsigned int sz,
char *result, unsigned int maxlen);
@@ -100,8 +99,7 @@ static inline bool verity_fec_is_enabled(struct dm_verity *v)
static inline int verity_fec_decode(struct dm_verity *v,
struct dm_verity_io *io,
enum verity_block_type type,
- sector_t block, u8 *dest,
- struct bvec_iter *iter)
+ sector_t block, u8 *dest)
{
return -EOPNOTSUPP;
}
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index bb5da66da4c1..cf659c8feb29 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -48,6 +48,9 @@ module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, 0644);
static DEFINE_STATIC_KEY_FALSE(use_bh_wq_enabled);
+/* Is at least one dm-verity instance using ahash_tfm instead of shash_tfm? */
+static DEFINE_STATIC_KEY_FALSE(ahash_enabled);
+
struct dm_verity_prefetch_work {
struct work_struct work;
struct dm_verity *v;
@@ -102,7 +105,7 @@ static sector_t verity_position_at_level(struct dm_verity *v, sector_t block,
return block >> (level * v->hash_per_block_bits);
}
-static int verity_hash_update(struct dm_verity *v, struct ahash_request *req,
+static int verity_ahash_update(struct dm_verity *v, struct ahash_request *req,
const u8 *data, size_t len,
struct crypto_wait *wait)
{
@@ -135,12 +138,12 @@ static int verity_hash_update(struct dm_verity *v, struct ahash_request *req,
/*
* Wrapper for crypto_ahash_init, which handles verity salting.
*/
-static int verity_hash_init(struct dm_verity *v, struct ahash_request *req,
+static int verity_ahash_init(struct dm_verity *v, struct ahash_request *req,
struct crypto_wait *wait, bool may_sleep)
{
int r;
- ahash_request_set_tfm(req, v->tfm);
+ ahash_request_set_tfm(req, v->ahash_tfm);
ahash_request_set_callback(req,
may_sleep ? CRYPTO_TFM_REQ_MAY_SLEEP | CRYPTO_TFM_REQ_MAY_BACKLOG : 0,
crypto_req_done, (void *)wait);
@@ -155,18 +158,18 @@ static int verity_hash_init(struct dm_verity *v, struct ahash_request *req,
}
if (likely(v->salt_size && (v->version >= 1)))
- r = verity_hash_update(v, req, v->salt, v->salt_size, wait);
+ r = verity_ahash_update(v, req, v->salt, v->salt_size, wait);
return r;
}
-static int verity_hash_final(struct dm_verity *v, struct ahash_request *req,
- u8 *digest, struct crypto_wait *wait)
+static int verity_ahash_final(struct dm_verity *v, struct ahash_request *req,
+ u8 *digest, struct crypto_wait *wait)
{
int r;
if (unlikely(v->salt_size && (!v->version))) {
- r = verity_hash_update(v, req, v->salt, v->salt_size, wait);
+ r = verity_ahash_update(v, req, v->salt, v->salt_size, wait);
if (r < 0) {
DMERR("%s failed updating salt: %d", __func__, r);
@@ -180,23 +183,27 @@ out:
return r;
}
-int verity_hash(struct dm_verity *v, struct ahash_request *req,
+int verity_hash(struct dm_verity *v, struct dm_verity_io *io,
const u8 *data, size_t len, u8 *digest, bool may_sleep)
{
int r;
- struct crypto_wait wait;
-
- r = verity_hash_init(v, req, &wait, may_sleep);
- if (unlikely(r < 0))
- goto out;
- r = verity_hash_update(v, req, data, len, &wait);
- if (unlikely(r < 0))
- goto out;
+ if (static_branch_unlikely(&ahash_enabled) && !v->shash_tfm) {
+ struct ahash_request *req = verity_io_hash_req(v, io);
+ struct crypto_wait wait;
- r = verity_hash_final(v, req, digest, &wait);
+ r = verity_ahash_init(v, req, &wait, may_sleep) ?:
+ verity_ahash_update(v, req, data, len, &wait) ?:
+ verity_ahash_final(v, req, digest, &wait);
+ } else {
+ struct shash_desc *desc = verity_io_hash_req(v, io);
-out:
+ desc->tfm = v->shash_tfm;
+ r = crypto_shash_import(desc, v->initial_hashstate) ?:
+ crypto_shash_finup(desc, data, len, digest);
+ }
+ if (unlikely(r))
+ DMERR("Error hashing block: %d", r);
return r;
}
@@ -325,8 +332,7 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
goto release_ret_r;
}
- r = verity_hash(v, verity_io_hash_req(v, io),
- data, 1 << v->hash_dev_block_bits,
+ r = verity_hash(v, io, data, 1 << v->hash_dev_block_bits,
verity_io_real_digest(v, io), !io->in_bh);
if (unlikely(r < 0))
goto release_ret_r;
@@ -342,7 +348,7 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
r = -EAGAIN;
goto release_ret_r;
} else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_METADATA,
- hash_block, data, NULL) == 0)
+ hash_block, data) == 0)
aux->hash_verified = 1;
else if (verity_handle_err(v,
DM_VERITY_BLOCK_TYPE_METADATA,
@@ -404,98 +410,8 @@ out:
return r;
}
-/*
- * Calculates the digest for the given bio
- */
-static int verity_for_io_block(struct dm_verity *v, struct dm_verity_io *io,
- struct bvec_iter *iter, struct crypto_wait *wait)
-{
- unsigned int todo = 1 << v->data_dev_block_bits;
- struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size);
- struct scatterlist sg;
- struct ahash_request *req = verity_io_hash_req(v, io);
-
- do {
- int r;
- unsigned int len;
- struct bio_vec bv = bio_iter_iovec(bio, *iter);
-
- sg_init_table(&sg, 1);
-
- len = bv.bv_len;
-
- if (likely(len >= todo))
- len = todo;
- /*
- * Operating on a single page at a time looks suboptimal
- * until you consider the typical block size is 4,096B.
- * Going through this loops twice should be very rare.
- */
- sg_set_page(&sg, bv.bv_page, len, bv.bv_offset);
- ahash_request_set_crypt(req, &sg, NULL, len);
- r = crypto_wait_req(crypto_ahash_update(req), wait);
-
- if (unlikely(r < 0)) {
- DMERR("%s crypto op failed: %d", __func__, r);
- return r;
- }
-
- bio_advance_iter(bio, iter, len);
- todo -= len;
- } while (todo);
-
- return 0;
-}
-
-/*
- * Calls function process for 1 << v->data_dev_block_bits bytes in the bio_vec
- * starting from iter.
- */
-int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io,
- struct bvec_iter *iter,
- int (*process)(struct dm_verity *v,
- struct dm_verity_io *io, u8 *data,
- size_t len))
-{
- unsigned int todo = 1 << v->data_dev_block_bits;
- struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size);
-
- do {
- int r;
- u8 *page;
- unsigned int len;
- struct bio_vec bv = bio_iter_iovec(bio, *iter);
-
- page = bvec_kmap_local(&bv);
- len = bv.bv_len;
-
- if (likely(len >= todo))
- len = todo;
-
- r = process(v, io, page, len);
- kunmap_local(page);
-
- if (r < 0)
- return r;
-
- bio_advance_iter(bio, iter, len);
- todo -= len;
- } while (todo);
-
- return 0;
-}
-
-static int verity_recheck_copy(struct dm_verity *v, struct dm_verity_io *io,
- u8 *data, size_t len)
-{
- memcpy(data, io->recheck_buffer, len);
- io->recheck_buffer += len;
-
- return 0;
-}
-
static noinline int verity_recheck(struct dm_verity *v, struct dm_verity_io *io,
- struct bvec_iter start, sector_t cur_block)
+ sector_t cur_block, u8 *dest)
{
struct page *page;
void *buffer;
@@ -518,8 +434,7 @@ static noinline int verity_recheck(struct dm_verity *v, struct dm_verity_io *io,
if (unlikely(r))
goto free_ret;
- r = verity_hash(v, verity_io_hash_req(v, io), buffer,
- 1 << v->data_dev_block_bits,
+ r = verity_hash(v, io, buffer, 1 << v->data_dev_block_bits,
verity_io_real_digest(v, io), true);
if (unlikely(r))
goto free_ret;
@@ -530,11 +445,7 @@ static noinline int verity_recheck(struct dm_verity *v, struct dm_verity_io *io,
goto free_ret;
}
- io->recheck_buffer = buffer;
- r = verity_for_bv_block(v, io, &start, verity_recheck_copy);
- if (unlikely(r))
- goto free_ret;
-
+ memcpy(dest, buffer, 1 << v->data_dev_block_bits);
r = 0;
free_ret:
mempool_free(page, &v->recheck_pool);
@@ -542,23 +453,36 @@ free_ret:
return r;
}
-static int verity_bv_zero(struct dm_verity *v, struct dm_verity_io *io,
- u8 *data, size_t len)
-{
- memset(data, 0, len);
- return 0;
-}
-
-/*
- * Moves the bio iter one data block forward.
- */
-static inline void verity_bv_skip_block(struct dm_verity *v,
- struct dm_verity_io *io,
- struct bvec_iter *iter)
+static int verity_handle_data_hash_mismatch(struct dm_verity *v,
+ struct dm_verity_io *io,
+ struct bio *bio, sector_t blkno,
+ u8 *data)
{
- struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size);
+ if (static_branch_unlikely(&use_bh_wq_enabled) && io->in_bh) {
+ /*
+ * Error handling code (FEC included) cannot be run in the
+ * BH workqueue, so fallback to a standard workqueue.
+ */
+ return -EAGAIN;
+ }
+ if (verity_recheck(v, io, blkno, data) == 0) {
+ if (v->validated_blocks)
+ set_bit(blkno, v->validated_blocks);
+ return 0;
+ }
+#if defined(CONFIG_DM_VERITY_FEC)
+ if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA, blkno,
+ data) == 0)
+ return 0;
+#endif
+ if (bio->bi_status)
+ return -EIO; /* Error correction failed; Just return error */
- bio_advance_iter(bio, iter, 1 << v->data_dev_block_bits);
+ if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA, blkno)) {
+ dm_audit_log_bio(DM_MSG_PREFIX, "verify-data", bio, blkno, 0);
+ return -EIO;
+ }
+ return 0;
}
/*
@@ -566,12 +490,10 @@ static inline void verity_bv_skip_block(struct dm_verity *v,
*/
static int verity_verify_io(struct dm_verity_io *io)
{
- bool is_zero;
struct dm_verity *v = io->v;
- struct bvec_iter start;
+ const unsigned int block_size = 1 << v->data_dev_block_bits;
struct bvec_iter iter_copy;
struct bvec_iter *iter;
- struct crypto_wait wait;
struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size);
unsigned int b;
@@ -585,16 +507,17 @@ static int verity_verify_io(struct dm_verity_io *io)
} else
iter = &io->iter;
- for (b = 0; b < io->n_blocks; b++) {
+ for (b = 0; b < io->n_blocks;
+ b++, bio_advance_iter(bio, iter, block_size)) {
int r;
sector_t cur_block = io->block + b;
- struct ahash_request *req = verity_io_hash_req(v, io);
+ bool is_zero;
+ struct bio_vec bv;
+ void *data;
if (v->validated_blocks && bio->bi_status == BLK_STS_OK &&
- likely(test_bit(cur_block, v->validated_blocks))) {
- verity_bv_skip_block(v, io, iter);
+ likely(test_bit(cur_block, v->validated_blocks)))
continue;
- }
r = verity_hash_for_block(v, io, cur_block,
verity_io_want_digest(v, io),
@@ -602,67 +525,49 @@ static int verity_verify_io(struct dm_verity_io *io)
if (unlikely(r < 0))
return r;
+ bv = bio_iter_iovec(bio, *iter);
+ if (unlikely(bv.bv_len < block_size)) {
+ /*
+ * Data block spans pages. This should not happen,
+ * since dm-verity sets dma_alignment to the data block
+ * size minus 1, and dm-verity also doesn't allow the
+ * data block size to be greater than PAGE_SIZE.
+ */
+ DMERR_LIMIT("unaligned io (data block spans pages)");
+ return -EIO;
+ }
+
+ data = bvec_kmap_local(&bv);
+
if (is_zero) {
/*
* If we expect a zero block, don't validate, just
* return zeros.
*/
- r = verity_for_bv_block(v, io, iter,
- verity_bv_zero);
- if (unlikely(r < 0))
- return r;
-
+ memset(data, 0, block_size);
+ kunmap_local(data);
continue;
}
- r = verity_hash_init(v, req, &wait, !io->in_bh);
- if (unlikely(r < 0))
- return r;
-
- start = *iter;
- r = verity_for_io_block(v, io, iter, &wait);
- if (unlikely(r < 0))
- return r;
-
- r = verity_hash_final(v, req, verity_io_real_digest(v, io),
- &wait);
- if (unlikely(r < 0))
+ r = verity_hash(v, io, data, block_size,
+ verity_io_real_digest(v, io), !io->in_bh);
+ if (unlikely(r < 0)) {
+ kunmap_local(data);
return r;
+ }
if (likely(memcmp(verity_io_real_digest(v, io),
verity_io_want_digest(v, io), v->digest_size) == 0)) {
if (v->validated_blocks)
set_bit(cur_block, v->validated_blocks);
+ kunmap_local(data);
continue;
- } else if (static_branch_unlikely(&use_bh_wq_enabled) && io->in_bh) {
- /*
- * Error handling code (FEC included) cannot be run in a
- * tasklet since it may sleep, so fallback to work-queue.
- */
- return -EAGAIN;
- } else if (verity_recheck(v, io, start, cur_block) == 0) {
- if (v->validated_blocks)
- set_bit(cur_block, v->validated_blocks);
- continue;
-#if defined(CONFIG_DM_VERITY_FEC)
- } else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA,
- cur_block, NULL, &start) == 0) {
- continue;
-#endif
- } else {
- if (bio->bi_status) {
- /*
- * Error correction failed; Just return error
- */
- return -EIO;
- }
- if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA,
- cur_block)) {
- dm_audit_log_bio(DM_MSG_PREFIX, "verify-data",
- bio, cur_block, 0);
- return -EIO;
- }
}
+ r = verity_handle_data_hash_mismatch(v, io, bio, cur_block,
+ data);
+ kunmap_local(data);
+ if (unlikely(r))
+ return r;
}
return 0;
@@ -1014,7 +919,15 @@ static void verity_io_hints(struct dm_target *ti, struct queue_limits *limits)
if (limits->physical_block_size < 1 << v->data_dev_block_bits)
limits->physical_block_size = 1 << v->data_dev_block_bits;
- blk_limits_io_min(limits, limits->logical_block_size);
+ limits->io_min = limits->logical_block_size;
+
+ /*
+ * Similar to what dm-crypt does, opt dm-verity out of support for
+ * direct I/O that is aligned to less than the traditional direct I/O
+ * alignment requirement of logical_block_size. This prevents dm-verity
+ * data blocks from crossing pages, eliminating various edge cases.
+ */
+ limits->dma_alignment = limits->logical_block_size - 1;
}
static void verity_dtr(struct dm_target *ti)
@@ -1033,11 +946,16 @@ static void verity_dtr(struct dm_target *ti)
kvfree(v->validated_blocks);
kfree(v->salt);
+ kfree(v->initial_hashstate);
kfree(v->root_digest);
kfree(v->zero_digest);
- if (v->tfm)
- crypto_free_ahash(v->tfm);
+ if (v->ahash_tfm) {
+ static_branch_dec(&ahash_enabled);
+ crypto_free_ahash(v->ahash_tfm);
+ } else {
+ crypto_free_shash(v->shash_tfm);
+ }
kfree(v->alg_name);
@@ -1083,7 +1001,7 @@ static int verity_alloc_most_once(struct dm_verity *v)
static int verity_alloc_zero_digest(struct dm_verity *v)
{
int r = -ENOMEM;
- struct ahash_request *req;
+ struct dm_verity_io *io;
u8 *zero_data;
v->zero_digest = kmalloc(v->digest_size, GFP_KERNEL);
@@ -1091,9 +1009,9 @@ static int verity_alloc_zero_digest(struct dm_verity *v)
if (!v->zero_digest)
return r;
- req = kmalloc(v->ahash_reqsize, GFP_KERNEL);
+ io = kmalloc(sizeof(*io) + v->hash_reqsize, GFP_KERNEL);
- if (!req)
+ if (!io)
return r; /* verity_dtr will free zero_digest */
zero_data = kzalloc(1 << v->data_dev_block_bits, GFP_KERNEL);
@@ -1101,11 +1019,11 @@ static int verity_alloc_zero_digest(struct dm_verity *v)
if (!zero_data)
goto out;
- r = verity_hash(v, req, zero_data, 1 << v->data_dev_block_bits,
+ r = verity_hash(v, io, zero_data, 1 << v->data_dev_block_bits,
v->zero_digest, true);
out:
- kfree(req);
+ kfree(io);
kfree(zero_data);
return r;
@@ -1226,6 +1144,113 @@ static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v,
return r;
}
+static int verity_setup_hash_alg(struct dm_verity *v, const char *alg_name)
+{
+ struct dm_target *ti = v->ti;
+ struct crypto_ahash *ahash;
+ struct crypto_shash *shash = NULL;
+ const char *driver_name;
+
+ v->alg_name = kstrdup(alg_name, GFP_KERNEL);
+ if (!v->alg_name) {
+ ti->error = "Cannot allocate algorithm name";
+ return -ENOMEM;
+ }
+
+ /*
+ * Allocate the hash transformation object that this dm-verity instance
+ * will use. The vast majority of dm-verity users use CPU-based
+ * hashing, so when possible use the shash API to minimize the crypto
+ * API overhead. If the ahash API resolves to a different driver
+ * (likely an off-CPU hardware offload), use ahash instead. Also use
+ * ahash if the obsolete dm-verity format with the appended salt is
+ * being used, so that quirk only needs to be handled in one place.
+ */
+ ahash = crypto_alloc_ahash(alg_name, 0,
+ v->use_bh_wq ? CRYPTO_ALG_ASYNC : 0);
+ if (IS_ERR(ahash)) {
+ ti->error = "Cannot initialize hash function";
+ return PTR_ERR(ahash);
+ }
+ driver_name = crypto_ahash_driver_name(ahash);
+ if (v->version >= 1 /* salt prepended, not appended? */) {
+ shash = crypto_alloc_shash(alg_name, 0, 0);
+ if (!IS_ERR(shash) &&
+ strcmp(crypto_shash_driver_name(shash), driver_name) != 0) {
+ /*
+ * ahash gave a different driver than shash, so probably
+ * this is a case of real hardware offload. Use ahash.
+ */
+ crypto_free_shash(shash);
+ shash = NULL;
+ }
+ }
+ if (!IS_ERR_OR_NULL(shash)) {
+ crypto_free_ahash(ahash);
+ ahash = NULL;
+ v->shash_tfm = shash;
+ v->digest_size = crypto_shash_digestsize(shash);
+ v->hash_reqsize = sizeof(struct shash_desc) +
+ crypto_shash_descsize(shash);
+ DMINFO("%s using shash \"%s\"", alg_name, driver_name);
+ } else {
+ v->ahash_tfm = ahash;
+ static_branch_inc(&ahash_enabled);
+ v->digest_size = crypto_ahash_digestsize(ahash);
+ v->hash_reqsize = sizeof(struct ahash_request) +
+ crypto_ahash_reqsize(ahash);
+ DMINFO("%s using ahash \"%s\"", alg_name, driver_name);
+ }
+ if ((1 << v->hash_dev_block_bits) < v->digest_size * 2) {
+ ti->error = "Digest size too big";
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int verity_setup_salt_and_hashstate(struct dm_verity *v, const char *arg)
+{
+ struct dm_target *ti = v->ti;
+
+ if (strcmp(arg, "-") != 0) {
+ v->salt_size = strlen(arg) / 2;
+ v->salt = kmalloc(v->salt_size, GFP_KERNEL);
+ if (!v->salt) {
+ ti->error = "Cannot allocate salt";
+ return -ENOMEM;
+ }
+ if (strlen(arg) != v->salt_size * 2 ||
+ hex2bin(v->salt, arg, v->salt_size)) {
+ ti->error = "Invalid salt";
+ return -EINVAL;
+ }
+ }
+ if (v->shash_tfm) {
+ SHASH_DESC_ON_STACK(desc, v->shash_tfm);
+ int r;
+
+ /*
+ * Compute the pre-salted hash state that can be passed to
+ * crypto_shash_import() for each block later.
+ */
+ v->initial_hashstate = kmalloc(
+ crypto_shash_statesize(v->shash_tfm), GFP_KERNEL);
+ if (!v->initial_hashstate) {
+ ti->error = "Cannot allocate initial hash state";
+ return -ENOMEM;
+ }
+ desc->tfm = v->shash_tfm;
+ r = crypto_shash_init(desc) ?:
+ crypto_shash_update(desc, v->salt, v->salt_size) ?:
+ crypto_shash_export(desc, v->initial_hashstate);
+ if (r) {
+ ti->error = "Cannot set up initial hash state";
+ return r;
+ }
+ }
+ return 0;
+}
+
/*
* Target parameters:
* <version> The current format is version 1.
@@ -1350,38 +1375,9 @@ static int verity_ctr(struct dm_target *ti, unsigned int argc, char **argv)
}
v->hash_start = num_ll;
- v->alg_name = kstrdup(argv[7], GFP_KERNEL);
- if (!v->alg_name) {
- ti->error = "Cannot allocate algorithm name";
- r = -ENOMEM;
- goto bad;
- }
-
- v->tfm = crypto_alloc_ahash(v->alg_name, 0,
- v->use_bh_wq ? CRYPTO_ALG_ASYNC : 0);
- if (IS_ERR(v->tfm)) {
- ti->error = "Cannot initialize hash function";
- r = PTR_ERR(v->tfm);
- v->tfm = NULL;
- goto bad;
- }
-
- /*
- * dm-verity performance can vary greatly depending on which hash
- * algorithm implementation is used. Help people debug performance
- * problems by logging the ->cra_driver_name.
- */
- DMINFO("%s using implementation \"%s\"", v->alg_name,
- crypto_hash_alg_common(v->tfm)->base.cra_driver_name);
-
- v->digest_size = crypto_ahash_digestsize(v->tfm);
- if ((1 << v->hash_dev_block_bits) < v->digest_size * 2) {
- ti->error = "Digest size too big";
- r = -EINVAL;
+ r = verity_setup_hash_alg(v, argv[7]);
+ if (r)
goto bad;
- }
- v->ahash_reqsize = sizeof(struct ahash_request) +
- crypto_ahash_reqsize(v->tfm);
v->root_digest = kmalloc(v->digest_size, GFP_KERNEL);
if (!v->root_digest) {
@@ -1397,21 +1393,9 @@ static int verity_ctr(struct dm_target *ti, unsigned int argc, char **argv)
}
root_hash_digest_to_validate = argv[8];
- if (strcmp(argv[9], "-")) {
- v->salt_size = strlen(argv[9]) / 2;
- v->salt = kmalloc(v->salt_size, GFP_KERNEL);
- if (!v->salt) {
- ti->error = "Cannot allocate salt";
- r = -ENOMEM;
- goto bad;
- }
- if (strlen(argv[9]) != v->salt_size * 2 ||
- hex2bin(v->salt, argv[9], v->salt_size)) {
- ti->error = "Invalid salt";
- r = -EINVAL;
- goto bad;
- }
- }
+ r = verity_setup_salt_and_hashstate(v, argv[9]);
+ if (r)
+ goto bad;
argv += 10;
argc -= 10;
@@ -1513,8 +1497,7 @@ static int verity_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad;
}
- ti->per_io_data_size = sizeof(struct dm_verity_io) +
- v->ahash_reqsize + v->digest_size * 2;
+ ti->per_io_data_size = sizeof(struct dm_verity_io) + v->hash_reqsize;
r = verity_fec_ctr(v);
if (r)
@@ -1539,14 +1522,6 @@ bad:
}
/*
- * Check whether a DM target is a verity target.
- */
-bool dm_is_verity_target(struct dm_target *ti)
-{
- return ti->type->module == THIS_MODULE;
-}
-
-/*
* Get the verity mode (error behavior) of a verity target.
*
* Returns the verity mode of the target, or -EINVAL if 'ti' is not a verity
@@ -1599,6 +1574,14 @@ static struct target_type verity_target = {
};
module_dm(verity);
+/*
+ * Check whether a DM target is a verity target.
+ */
+bool dm_is_verity_target(struct dm_target *ti)
+{
+ return ti->type == &verity_target;
+}
+
MODULE_AUTHOR("Mikulas Patocka <mpatocka@redhat.com>");
MODULE_AUTHOR("Mandeep Baines <msb@chromium.org>");
MODULE_AUTHOR("Will Drewry <wad@chromium.org>");
diff --git a/drivers/md/dm-verity-verify-sig.c b/drivers/md/dm-verity-verify-sig.c
index 4836508ea50c..d351d7d39c60 100644
--- a/drivers/md/dm-verity-verify-sig.c
+++ b/drivers/md/dm-verity-verify-sig.c
@@ -126,6 +126,13 @@ int verity_verify_root_hash(const void *root_hash, size_t root_hash_len,
NULL,
#endif
VERIFYING_UNSPECIFIED_SIGNATURE, NULL, NULL);
+#ifdef CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG_PLATFORM_KEYRING
+ if (ret == -ENOKEY)
+ ret = verify_pkcs7_signature(root_hash, root_hash_len, sig_data,
+ sig_len,
+ VERIFY_USE_PLATFORM_KEYRING,
+ VERIFYING_UNSPECIFIED_SIGNATURE, NULL, NULL);
+#endif
return ret;
}
diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h
index 20b1bcf03474..aac3a1b1d94a 100644
--- a/drivers/md/dm-verity.h
+++ b/drivers/md/dm-verity.h
@@ -39,9 +39,11 @@ struct dm_verity {
struct dm_target *ti;
struct dm_bufio_client *bufio;
char *alg_name;
- struct crypto_ahash *tfm;
+ struct crypto_ahash *ahash_tfm; /* either this or shash_tfm is set */
+ struct crypto_shash *shash_tfm; /* either this or ahash_tfm is set */
u8 *root_digest; /* digest of the root block */
u8 *salt; /* salt: its size is salt_size */
+ u8 *initial_hashstate; /* salted initial state, if shash_tfm is set */
u8 *zero_digest; /* digest for a zero block */
unsigned int salt_size;
sector_t data_start; /* data offset in 512-byte sectors */
@@ -56,7 +58,7 @@ struct dm_verity {
bool hash_failed:1; /* set if hash of any block failed */
bool use_bh_wq:1; /* try to verify in BH wq before normal work-queue */
unsigned int digest_size; /* digest size for the current hash algorithm */
- unsigned int ahash_reqsize;/* the size of temporary space for crypto */
+ unsigned int hash_reqsize; /* the size of temporary space for crypto */
enum verity_mode mode; /* mode for handling verification errors */
unsigned int corrupted_errs;/* Number of errors for corrupted blocks */
@@ -89,45 +91,36 @@ struct dm_verity_io {
struct work_struct work;
struct work_struct bh_work;
- char *recheck_buffer;
+ u8 real_digest[HASH_MAX_DIGESTSIZE];
+ u8 want_digest[HASH_MAX_DIGESTSIZE];
/*
- * Three variably-size fields follow this struct:
- *
- * u8 hash_req[v->ahash_reqsize];
- * u8 real_digest[v->digest_size];
- * u8 want_digest[v->digest_size];
- *
- * To access them use: verity_io_hash_req(), verity_io_real_digest()
- * and verity_io_want_digest().
+ * This struct is followed by a variable-sized hash request of size
+ * v->hash_reqsize, either a struct ahash_request or a struct shash_desc
+ * (depending on whether ahash_tfm or shash_tfm is being used). To
+ * access it, use verity_io_hash_req().
*/
};
-static inline struct ahash_request *verity_io_hash_req(struct dm_verity *v,
- struct dm_verity_io *io)
+static inline void *verity_io_hash_req(struct dm_verity *v,
+ struct dm_verity_io *io)
{
- return (struct ahash_request *)(io + 1);
+ return io + 1;
}
static inline u8 *verity_io_real_digest(struct dm_verity *v,
struct dm_verity_io *io)
{
- return (u8 *)(io + 1) + v->ahash_reqsize;
+ return io->real_digest;
}
static inline u8 *verity_io_want_digest(struct dm_verity *v,
struct dm_verity_io *io)
{
- return (u8 *)(io + 1) + v->ahash_reqsize + v->digest_size;
+ return io->want_digest;
}
-extern int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io,
- struct bvec_iter *iter,
- int (*process)(struct dm_verity *v,
- struct dm_verity_io *io,
- u8 *data, size_t len));
-
-extern int verity_hash(struct dm_verity *v, struct ahash_request *req,
+extern int verity_hash(struct dm_verity *v, struct dm_verity_io *io,
const u8 *data, size_t len, u8 *digest, bool may_sleep);
extern int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
index cd0ee144973f..6141fc25d842 100644
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -996,8 +996,8 @@ static void dmz_io_hints(struct dm_target *ti, struct queue_limits *limits)
limits->logical_block_size = DMZ_BLOCK_SIZE;
limits->physical_block_size = DMZ_BLOCK_SIZE;
- blk_limits_io_min(limits, DMZ_BLOCK_SIZE);
- blk_limits_io_opt(limits, DMZ_BLOCK_SIZE);
+ limits->io_min = DMZ_BLOCK_SIZE;
+ limits->io_opt = DMZ_BLOCK_SIZE;
limits->discard_alignment = 0;
limits->discard_granularity = DMZ_BLOCK_SIZE;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 4b1b69e576a5..97fab2087df8 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -11,6 +11,7 @@
#include "dm-uevent.h"
#include "dm-ima.h"
+#include <linux/bio-integrity.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mutex.h>
@@ -645,7 +646,7 @@ static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti,
/* Set default bdev, but target must bio_set_dev() before issuing IO */
clone->bi_bdev = md->disk->part0;
- if (unlikely(ti->needs_bio_set_dev))
+ if (likely(ti != NULL) && unlikely(ti->needs_bio_set_dev))
bio_set_dev(clone, md->disk->part0);
if (len) {
@@ -1107,7 +1108,7 @@ static void clone_endio(struct bio *bio)
blk_status_t error = bio->bi_status;
struct dm_target_io *tio = clone_to_tio(bio);
struct dm_target *ti = tio->ti;
- dm_endio_fn endio = ti->type->end_io;
+ dm_endio_fn endio = likely(ti != NULL) ? ti->type->end_io : NULL;
struct dm_io *io = tio->io;
struct mapped_device *md = io->md;
@@ -1154,7 +1155,7 @@ static void clone_endio(struct bio *bio)
}
if (static_branch_unlikely(&swap_bios_enabled) &&
- unlikely(swap_bios_limit(ti, bio)))
+ likely(ti != NULL) && unlikely(swap_bios_limit(ti, bio)))
up(&md->swap_bios_semaphore);
free_tio(bio);
@@ -1553,17 +1554,43 @@ static void __send_empty_flush(struct clone_info *ci)
ci->sector_count = 0;
ci->io->tio.clone.bi_iter.bi_size = 0;
- for (unsigned int i = 0; i < t->num_targets; i++) {
- unsigned int bios;
- struct dm_target *ti = dm_table_get_target(t, i);
+ if (!t->flush_bypasses_map) {
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ unsigned int bios;
+ struct dm_target *ti = dm_table_get_target(t, i);
- if (unlikely(ti->num_flush_bios == 0))
- continue;
+ if (unlikely(ti->num_flush_bios == 0))
+ continue;
- atomic_add(ti->num_flush_bios, &ci->io->io_count);
- bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios,
- NULL, GFP_NOWAIT);
- atomic_sub(ti->num_flush_bios - bios, &ci->io->io_count);
+ atomic_add(ti->num_flush_bios, &ci->io->io_count);
+ bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios,
+ NULL, GFP_NOWAIT);
+ atomic_sub(ti->num_flush_bios - bios, &ci->io->io_count);
+ }
+ } else {
+ /*
+ * Note that there's no need to grab t->devices_lock here
+ * because the targets that support flush optimization don't
+ * modify the list of devices.
+ */
+ struct list_head *devices = dm_table_get_devices(t);
+ unsigned int len = 0;
+ struct dm_dev_internal *dd;
+ list_for_each_entry(dd, devices, list) {
+ struct bio *clone;
+ /*
+ * Note that the structure dm_target_io is not
+ * associated with any target (because the device may be
+ * used by multiple targets), so we set tio->ti = NULL.
+ * We must check for NULL in the I/O processing path, to
+ * avoid NULL pointer dereference.
+ */
+ clone = alloc_tio(ci, NULL, 0, &len, GFP_NOIO);
+ atomic_add(1, &ci->io->io_count);
+ bio_set_dev(clone, dd->dm_dev->bdev);
+ clone->bi_end_io = clone_endio;
+ dm_submit_bio_remap(clone, NULL);
+ }
}
/*
@@ -1631,14 +1658,10 @@ static blk_status_t __process_abnormal_io(struct clone_info *ci,
case REQ_OP_SECURE_ERASE:
num_bios = ti->num_secure_erase_bios;
max_sectors = limits->max_secure_erase_sectors;
- if (ti->max_secure_erase_granularity)
- max_granularity = max_sectors;
break;
case REQ_OP_WRITE_ZEROES:
num_bios = ti->num_write_zeroes_bios;
max_sectors = limits->max_write_zeroes_sectors;
- if (ti->max_write_zeroes_granularity)
- max_granularity = max_sectors;
break;
default:
break;
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 6ce75c82969c..1d0db62f0351 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -897,7 +897,7 @@ static int join(struct mddev *mddev, int nodes)
memset(str, 0, 64);
sprintf(str, "%pU", mddev->uuid);
ret = dlm_new_lockspace(str, mddev->bitmap_info.cluster_name,
- 0, LVB_SIZE, &md_ls_ops, mddev,
+ DLM_LSFL_SOFTIRQ, LVB_SIZE, &md_ls_ops, mddev,
&ops_rv, &cinfo->lockspace);
if (ret)
goto err;
diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c
index 798c9c53a343..157c9bd2fed7 100644
--- a/drivers/md/persistent-data/dm-array.c
+++ b/drivers/md/persistent-data/dm-array.c
@@ -38,7 +38,7 @@ struct array_block {
*/
#define CSUM_XOR 595846735
-static void array_block_prepare_for_write(struct dm_block_validator *v,
+static void array_block_prepare_for_write(const struct dm_block_validator *v,
struct dm_block *b,
size_t size_of_block)
{
@@ -50,7 +50,7 @@ static void array_block_prepare_for_write(struct dm_block_validator *v,
CSUM_XOR));
}
-static int array_block_check(struct dm_block_validator *v,
+static int array_block_check(const struct dm_block_validator *v,
struct dm_block *b,
size_t size_of_block)
{
@@ -77,7 +77,7 @@ static int array_block_check(struct dm_block_validator *v,
return 0;
}
-static struct dm_block_validator array_validator = {
+static const struct dm_block_validator array_validator = {
.name = "array",
.prepare_for_write = array_block_prepare_for_write,
.check = array_block_check
diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c
index b17b54df673b..1ef71e5fcde7 100644
--- a/drivers/md/persistent-data/dm-block-manager.c
+++ b/drivers/md/persistent-data/dm-block-manager.c
@@ -345,7 +345,7 @@ void *dm_block_data(struct dm_block *b)
EXPORT_SYMBOL_GPL(dm_block_data);
struct buffer_aux {
- struct dm_block_validator *validator;
+ const struct dm_block_validator *validator;
int write_locked;
#ifdef CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING
@@ -441,7 +441,7 @@ dm_block_t dm_bm_nr_blocks(struct dm_block_manager *bm)
static int dm_bm_validate_buffer(struct dm_block_manager *bm,
struct dm_buffer *buf,
struct buffer_aux *aux,
- struct dm_block_validator *v)
+ const struct dm_block_validator *v)
{
if (unlikely(!aux->validator)) {
int r;
@@ -467,7 +467,7 @@ static int dm_bm_validate_buffer(struct dm_block_manager *bm,
return 0;
}
int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b,
- struct dm_block_validator *v,
+ const struct dm_block_validator *v,
struct dm_block **result)
{
struct buffer_aux *aux;
@@ -500,7 +500,7 @@ int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b,
EXPORT_SYMBOL_GPL(dm_bm_read_lock);
int dm_bm_write_lock(struct dm_block_manager *bm,
- dm_block_t b, struct dm_block_validator *v,
+ dm_block_t b, const struct dm_block_validator *v,
struct dm_block **result)
{
struct buffer_aux *aux;
@@ -536,7 +536,7 @@ int dm_bm_write_lock(struct dm_block_manager *bm,
EXPORT_SYMBOL_GPL(dm_bm_write_lock);
int dm_bm_read_try_lock(struct dm_block_manager *bm,
- dm_block_t b, struct dm_block_validator *v,
+ dm_block_t b, const struct dm_block_validator *v,
struct dm_block **result)
{
struct buffer_aux *aux;
@@ -569,7 +569,7 @@ int dm_bm_read_try_lock(struct dm_block_manager *bm,
}
int dm_bm_write_lock_zero(struct dm_block_manager *bm,
- dm_block_t b, struct dm_block_validator *v,
+ dm_block_t b, const struct dm_block_validator *v,
struct dm_block **result)
{
int r;
diff --git a/drivers/md/persistent-data/dm-block-manager.h b/drivers/md/persistent-data/dm-block-manager.h
index f706d3de8d5a..b1998968594c 100644
--- a/drivers/md/persistent-data/dm-block-manager.h
+++ b/drivers/md/persistent-data/dm-block-manager.h
@@ -51,12 +51,14 @@ dm_block_t dm_bm_nr_blocks(struct dm_block_manager *bm);
*/
struct dm_block_validator {
const char *name;
- void (*prepare_for_write)(struct dm_block_validator *v, struct dm_block *b, size_t block_size);
+ void (*prepare_for_write)(const struct dm_block_validator *v,
+ struct dm_block *b, size_t block_size);
/*
* Return 0 if the checksum is valid or < 0 on error.
*/
- int (*check)(struct dm_block_validator *v, struct dm_block *b, size_t block_size);
+ int (*check)(const struct dm_block_validator *v,
+ struct dm_block *b, size_t block_size);
};
/*----------------------------------------------------------------*/
@@ -73,11 +75,11 @@ struct dm_block_validator {
* written back to the disk sometime after dm_bm_unlock is called.
*/
int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b,
- struct dm_block_validator *v,
+ const struct dm_block_validator *v,
struct dm_block **result);
int dm_bm_write_lock(struct dm_block_manager *bm, dm_block_t b,
- struct dm_block_validator *v,
+ const struct dm_block_validator *v,
struct dm_block **result);
/*
@@ -85,7 +87,7 @@ int dm_bm_write_lock(struct dm_block_manager *bm, dm_block_t b,
* available immediately.
*/
int dm_bm_read_try_lock(struct dm_block_manager *bm, dm_block_t b,
- struct dm_block_validator *v,
+ const struct dm_block_validator *v,
struct dm_block **result);
/*
@@ -93,7 +95,7 @@ int dm_bm_read_try_lock(struct dm_block_manager *bm, dm_block_t b,
* overwrite the block completely. It saves a disk read.
*/
int dm_bm_write_lock_zero(struct dm_block_manager *bm, dm_block_t b,
- struct dm_block_validator *v,
+ const struct dm_block_validator *v,
struct dm_block **result);
void dm_bm_unlock(struct dm_block *b);
diff --git a/drivers/md/persistent-data/dm-btree-internal.h b/drivers/md/persistent-data/dm-btree-internal.h
index 7ed2ce656fcc..acebd32858a7 100644
--- a/drivers/md/persistent-data/dm-btree-internal.h
+++ b/drivers/md/persistent-data/dm-btree-internal.h
@@ -138,7 +138,7 @@ static inline uint64_t value64(struct btree_node *n, uint32_t index)
*/
int lower_bound(struct btree_node *n, uint64_t key);
-extern struct dm_block_validator btree_node_validator;
+extern const struct dm_block_validator btree_node_validator;
/*
* Value type for upper levels of multi-level btrees.
diff --git a/drivers/md/persistent-data/dm-btree-spine.c b/drivers/md/persistent-data/dm-btree-spine.c
index 7540383b7cf3..c46fc50c274e 100644
--- a/drivers/md/persistent-data/dm-btree-spine.c
+++ b/drivers/md/persistent-data/dm-btree-spine.c
@@ -16,7 +16,7 @@
#define BTREE_CSUM_XOR 121107
-static void node_prepare_for_write(struct dm_block_validator *v,
+static void node_prepare_for_write(const struct dm_block_validator *v,
struct dm_block *b,
size_t block_size)
{
@@ -29,7 +29,7 @@ static void node_prepare_for_write(struct dm_block_validator *v,
BTREE_CSUM_XOR));
}
-static int node_check(struct dm_block_validator *v,
+static int node_check(const struct dm_block_validator *v,
struct dm_block *b,
size_t block_size)
{
@@ -81,7 +81,7 @@ static int node_check(struct dm_block_validator *v,
return 0;
}
-struct dm_block_validator btree_node_validator = {
+const struct dm_block_validator btree_node_validator = {
.name = "btree_node",
.prepare_for_write = node_prepare_for_write,
.check = node_check
diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c
index 591d1a43d035..3a19124ee279 100644
--- a/drivers/md/persistent-data/dm-space-map-common.c
+++ b/drivers/md/persistent-data/dm-space-map-common.c
@@ -22,7 +22,7 @@
*/
#define INDEX_CSUM_XOR 160478
-static void index_prepare_for_write(struct dm_block_validator *v,
+static void index_prepare_for_write(const struct dm_block_validator *v,
struct dm_block *b,
size_t block_size)
{
@@ -34,7 +34,7 @@ static void index_prepare_for_write(struct dm_block_validator *v,
INDEX_CSUM_XOR));
}
-static int index_check(struct dm_block_validator *v,
+static int index_check(const struct dm_block_validator *v,
struct dm_block *b,
size_t block_size)
{
@@ -59,7 +59,7 @@ static int index_check(struct dm_block_validator *v,
return 0;
}
-static struct dm_block_validator index_validator = {
+static const struct dm_block_validator index_validator = {
.name = "index",
.prepare_for_write = index_prepare_for_write,
.check = index_check
@@ -72,7 +72,7 @@ static struct dm_block_validator index_validator = {
*/
#define BITMAP_CSUM_XOR 240779
-static void dm_bitmap_prepare_for_write(struct dm_block_validator *v,
+static void dm_bitmap_prepare_for_write(const struct dm_block_validator *v,
struct dm_block *b,
size_t block_size)
{
@@ -84,7 +84,7 @@ static void dm_bitmap_prepare_for_write(struct dm_block_validator *v,
BITMAP_CSUM_XOR));
}
-static int dm_bitmap_check(struct dm_block_validator *v,
+static int dm_bitmap_check(const struct dm_block_validator *v,
struct dm_block *b,
size_t block_size)
{
@@ -109,7 +109,7 @@ static int dm_bitmap_check(struct dm_block_validator *v,
return 0;
}
-static struct dm_block_validator dm_sm_bitmap_validator = {
+static const struct dm_block_validator dm_sm_bitmap_validator = {
.name = "sm_bitmap",
.prepare_for_write = dm_bitmap_prepare_for_write,
.check = dm_bitmap_check,
diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c
index c88fa6266203..c7ba4e6cbbc7 100644
--- a/drivers/md/persistent-data/dm-transaction-manager.c
+++ b/drivers/md/persistent-data/dm-transaction-manager.c
@@ -237,7 +237,7 @@ int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *root)
EXPORT_SYMBOL_GPL(dm_tm_commit);
int dm_tm_new_block(struct dm_transaction_manager *tm,
- struct dm_block_validator *v,
+ const struct dm_block_validator *v,
struct dm_block **result)
{
int r;
@@ -266,7 +266,7 @@ int dm_tm_new_block(struct dm_transaction_manager *tm,
}
static int __shadow_block(struct dm_transaction_manager *tm, dm_block_t orig,
- struct dm_block_validator *v,
+ const struct dm_block_validator *v,
struct dm_block **result)
{
int r;
@@ -306,7 +306,7 @@ static int __shadow_block(struct dm_transaction_manager *tm, dm_block_t orig,
}
int dm_tm_shadow_block(struct dm_transaction_manager *tm, dm_block_t orig,
- struct dm_block_validator *v, struct dm_block **result,
+ const struct dm_block_validator *v, struct dm_block **result,
int *inc_children)
{
int r;
@@ -331,7 +331,7 @@ int dm_tm_shadow_block(struct dm_transaction_manager *tm, dm_block_t orig,
EXPORT_SYMBOL_GPL(dm_tm_shadow_block);
int dm_tm_read_lock(struct dm_transaction_manager *tm, dm_block_t b,
- struct dm_block_validator *v,
+ const struct dm_block_validator *v,
struct dm_block **blk)
{
if (tm->is_clone) {
diff --git a/drivers/md/persistent-data/dm-transaction-manager.h b/drivers/md/persistent-data/dm-transaction-manager.h
index 01f7e650118d..61a8d10825ca 100644
--- a/drivers/md/persistent-data/dm-transaction-manager.h
+++ b/drivers/md/persistent-data/dm-transaction-manager.h
@@ -64,7 +64,7 @@ int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *superblock)
* Zeroes the new block and returns with write lock held.
*/
int dm_tm_new_block(struct dm_transaction_manager *tm,
- struct dm_block_validator *v,
+ const struct dm_block_validator *v,
struct dm_block **result);
/*
@@ -84,7 +84,7 @@ int dm_tm_new_block(struct dm_transaction_manager *tm,
* it locked when you call this.
*/
int dm_tm_shadow_block(struct dm_transaction_manager *tm, dm_block_t orig,
- struct dm_block_validator *v,
+ const struct dm_block_validator *v,
struct dm_block **result, int *inc_children);
/*
@@ -92,7 +92,7 @@ int dm_tm_shadow_block(struct dm_transaction_manager *tm, dm_block_t orig,
* on it outstanding then it'll block.
*/
int dm_tm_read_lock(struct dm_transaction_manager *tm, dm_block_t b,
- struct dm_block_validator *v,
+ const struct dm_block_validator *v,
struct dm_block **result);
void dm_tm_unlock(struct dm_transaction_manager *tm, struct dm_block *b);