summaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/Kconfig3
-rw-r--r--drivers/md/bcache/stats.c2
-rw-r--r--drivers/md/dm-bufio.c4
-rw-r--r--drivers/md/dm-cache-target.c96
-rw-r--r--drivers/md/dm-crypt.c41
-rw-r--r--drivers/md/dm-delay.c18
-rw-r--r--drivers/md/dm-ebs-target.c7
-rw-r--r--drivers/md/dm-integrity.c52
-rw-r--r--drivers/md/dm-mpath.c2
-rw-r--r--drivers/md/dm-raid1.c2
-rw-r--r--drivers/md/dm-stripe.c2
-rw-r--r--drivers/md/dm-table.c4
-rw-r--r--drivers/md/dm-vdo/block-map.c13
-rw-r--r--drivers/md/dm-vdo/constants.h3
-rw-r--r--drivers/md/dm-vdo/dedupe.c22
-rw-r--r--drivers/md/dm-vdo/encodings.c20
-rw-r--r--drivers/md/dm-vdo/indexer/index-layout.c5
-rw-r--r--drivers/md/dm-vdo/indexer/index-session.c6
-rw-r--r--drivers/md/dm-vdo/indexer/indexer.h53
-rw-r--r--drivers/md/dm-vdo/io-submitter.c6
-rw-r--r--drivers/md/dm-vdo/io-submitter.h18
-rw-r--r--drivers/md/dm-vdo/packer.h2
-rw-r--r--drivers/md/dm-vdo/priority-table.c2
-rw-r--r--drivers/md/dm-vdo/recovery-journal.h6
-rw-r--r--drivers/md/dm-vdo/slab-depot.c193
-rw-r--r--drivers/md/dm-vdo/slab-depot.h13
-rw-r--r--drivers/md/dm-vdo/types.h3
-rw-r--r--drivers/md/dm-vdo/vdo.c11
-rw-r--r--drivers/md/dm-vdo/vio.c54
-rw-r--r--drivers/md/dm-vdo/vio.h13
-rw-r--r--drivers/md/dm-vdo/wait-queue.c2
-rw-r--r--drivers/md/dm-verity-target.c62
-rw-r--r--drivers/md/dm-writecache.c6
-rw-r--r--drivers/md/dm.c8
-rw-r--r--drivers/md/md.c4
-rw-r--r--drivers/md/persistent-data/Kconfig2
36 files changed, 505 insertions, 255 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 0b1870a09e1f..ddb37f6670de 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -139,7 +139,7 @@ config MD_RAID456
tristate "RAID-4/RAID-5/RAID-6 mode"
depends on BLK_DEV_MD
select RAID6_PQ
- select LIBCRC32C
+ select CRC32
select ASYNC_MEMCPY
select ASYNC_XOR
select ASYNC_PQ
@@ -267,6 +267,7 @@ config DM_CRYPT
depends on BLK_DEV_DM
depends on (ENCRYPTED_KEYS || ENCRYPTED_KEYS=n)
depends on (TRUSTED_KEYS || TRUSTED_KEYS=n)
+ select CRC32
select CRYPTO
select CRYPTO_CBC
select CRYPTO_ESSIV
diff --git a/drivers/md/bcache/stats.c b/drivers/md/bcache/stats.c
index 68b02216033d..d39dec34b7a3 100644
--- a/drivers/md/bcache/stats.c
+++ b/drivers/md/bcache/stats.c
@@ -123,7 +123,7 @@ void bch_cache_accounting_destroy(struct cache_accounting *acc)
kobject_put(&acc->day.kobj);
atomic_set(&acc->closing, 1);
- if (del_timer_sync(&acc->timer))
+ if (timer_delete_sync(&acc->timer))
closure_return(&acc->cl);
}
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index aab8240429b0..9c8ed65cd87e 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -2234,7 +2234,7 @@ int dm_bufio_issue_discard(struct dm_bufio_client *c, sector_t block, sector_t c
}
EXPORT_SYMBOL_GPL(dm_bufio_issue_discard);
-static bool forget_buffer(struct dm_bufio_client *c, sector_t block)
+static void forget_buffer(struct dm_bufio_client *c, sector_t block)
{
struct dm_buffer *b;
@@ -2249,8 +2249,6 @@ static bool forget_buffer(struct dm_bufio_client *c, sector_t block)
cache_put_and_wake(c, b);
}
}
-
- return b ? true : false;
}
/*
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 9cb797a561d6..a10d75a562db 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -406,6 +406,12 @@ struct cache {
mempool_t migration_pool;
struct bio_set bs;
+
+ /*
+ * Cache_size entries. Set bits indicate blocks mapped beyond the
+ * target length, which are marked for invalidation.
+ */
+ unsigned long *invalid_bitset;
};
struct per_bio_data {
@@ -1922,6 +1928,9 @@ static void __destroy(struct cache *cache)
if (cache->discard_bitset)
free_bitset(cache->discard_bitset);
+ if (cache->invalid_bitset)
+ free_bitset(cache->invalid_bitset);
+
if (cache->copier)
dm_kcopyd_client_destroy(cache->copier);
@@ -2510,6 +2519,13 @@ static int cache_create(struct cache_args *ca, struct cache **result)
}
clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
+ cache->invalid_bitset = alloc_bitset(from_cblock(cache->cache_size));
+ if (!cache->invalid_bitset) {
+ *error = "could not allocate bitset for invalid blocks";
+ goto bad;
+ }
+ clear_bitset(cache->invalid_bitset, from_cblock(cache->cache_size));
+
cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle);
if (IS_ERR(cache->copier)) {
*error = "could not create kcopyd client";
@@ -2808,6 +2824,24 @@ static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock,
return policy_load_mapping(cache->policy, oblock, cblock, dirty, hint, hint_valid);
}
+static int load_filtered_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock,
+ bool dirty, uint32_t hint, bool hint_valid)
+{
+ struct cache *cache = context;
+
+ if (from_oblock(oblock) >= from_oblock(cache->origin_blocks)) {
+ if (dirty) {
+ DMERR("%s: unable to shrink origin; cache block %u is dirty",
+ cache_device_name(cache), from_cblock(cblock));
+ return -EFBIG;
+ }
+ set_bit(from_cblock(cblock), cache->invalid_bitset);
+ return 0;
+ }
+
+ return load_mapping(context, oblock, cblock, dirty, hint, hint_valid);
+}
+
/*
* The discard block size in the on disk metadata is not
* necessarily the same as we're currently using. So we have to
@@ -2899,6 +2933,27 @@ static dm_cblock_t get_cache_dev_size(struct cache *cache)
return to_cblock(size);
}
+static bool can_resume(struct cache *cache)
+{
+ /*
+ * Disallow retrying the resume operation for devices that failed the
+ * first resume attempt, as the failure leaves the policy object partially
+ * initialized. Retrying could trigger BUG_ON when loading cache mappings
+ * into the incomplete policy object.
+ */
+ if (cache->sized && !cache->loaded_mappings) {
+ if (get_cache_mode(cache) != CM_WRITE)
+ DMERR("%s: unable to resume a failed-loaded cache, please check metadata.",
+ cache_device_name(cache));
+ else
+ DMERR("%s: unable to resume cache due to missing proper cache table reload",
+ cache_device_name(cache));
+ return false;
+ }
+
+ return true;
+}
+
static bool can_resize(struct cache *cache, dm_cblock_t new_size)
{
if (from_cblock(new_size) > from_cblock(cache->cache_size)) {
@@ -2941,12 +2996,33 @@ static int resize_cache_dev(struct cache *cache, dm_cblock_t new_size)
return 0;
}
+static int truncate_oblocks(struct cache *cache)
+{
+ uint32_t nr_blocks = from_cblock(cache->cache_size);
+ uint32_t i;
+ int r;
+
+ for_each_set_bit(i, cache->invalid_bitset, nr_blocks) {
+ r = dm_cache_remove_mapping(cache->cmd, to_cblock(i));
+ if (r) {
+ DMERR_LIMIT("%s: invalidation failed; couldn't update on disk metadata",
+ cache_device_name(cache));
+ return r;
+ }
+ }
+
+ return 0;
+}
+
static int cache_preresume(struct dm_target *ti)
{
int r = 0;
struct cache *cache = ti->private;
dm_cblock_t csize = get_cache_dev_size(cache);
+ if (!can_resume(cache))
+ return -EINVAL;
+
/*
* Check to see if the cache has resized.
*/
@@ -2962,11 +3038,25 @@ static int cache_preresume(struct dm_target *ti)
}
if (!cache->loaded_mappings) {
+ /*
+ * The fast device could have been resized since the last
+ * failed preresume attempt. To be safe we start by a blank
+ * bitset for cache blocks.
+ */
+ clear_bitset(cache->invalid_bitset, from_cblock(cache->cache_size));
+
r = dm_cache_load_mappings(cache->cmd, cache->policy,
- load_mapping, cache);
+ load_filtered_mapping, cache);
if (r) {
DMERR("%s: could not load cache mappings", cache_device_name(cache));
- metadata_operation_failed(cache, "dm_cache_load_mappings", r);
+ if (r != -EFBIG)
+ metadata_operation_failed(cache, "dm_cache_load_mappings", r);
+ return r;
+ }
+
+ r = truncate_oblocks(cache);
+ if (r) {
+ metadata_operation_failed(cache, "dm_cache_remove_mapping", r);
return r;
}
@@ -3426,7 +3516,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type cache_target = {
.name = "cache",
- .version = {2, 2, 0},
+ .version = {2, 3, 0},
.module = THIS_MODULE,
.ctr = cache_ctr,
.dtr = cache_dtr,
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 02a2919f4e5a..9dfdb63220d7 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -17,6 +17,7 @@
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/blk-integrity.h>
+#include <linux/crc32.h>
#include <linux/mempool.h>
#include <linux/slab.h>
#include <linux/crypto.h>
@@ -125,7 +126,6 @@ struct iv_lmk_private {
#define TCW_WHITENING_SIZE 16
struct iv_tcw_private {
- struct crypto_shash *crc32_tfm;
u8 *iv_seed;
u8 *whitening;
};
@@ -607,10 +607,6 @@ static void crypt_iv_tcw_dtr(struct crypt_config *cc)
tcw->iv_seed = NULL;
kfree_sensitive(tcw->whitening);
tcw->whitening = NULL;
-
- if (tcw->crc32_tfm && !IS_ERR(tcw->crc32_tfm))
- crypto_free_shash(tcw->crc32_tfm);
- tcw->crc32_tfm = NULL;
}
static int crypt_iv_tcw_ctr(struct crypt_config *cc, struct dm_target *ti,
@@ -628,13 +624,6 @@ static int crypt_iv_tcw_ctr(struct crypt_config *cc, struct dm_target *ti,
return -EINVAL;
}
- tcw->crc32_tfm = crypto_alloc_shash("crc32", 0,
- CRYPTO_ALG_ALLOCATES_MEMORY);
- if (IS_ERR(tcw->crc32_tfm)) {
- ti->error = "Error initializing CRC32 in TCW";
- return PTR_ERR(tcw->crc32_tfm);
- }
-
tcw->iv_seed = kzalloc(cc->iv_size, GFP_KERNEL);
tcw->whitening = kzalloc(TCW_WHITENING_SIZE, GFP_KERNEL);
if (!tcw->iv_seed || !tcw->whitening) {
@@ -668,36 +657,28 @@ static int crypt_iv_tcw_wipe(struct crypt_config *cc)
return 0;
}
-static int crypt_iv_tcw_whitening(struct crypt_config *cc,
- struct dm_crypt_request *dmreq,
- u8 *data)
+static void crypt_iv_tcw_whitening(struct crypt_config *cc,
+ struct dm_crypt_request *dmreq, u8 *data)
{
struct iv_tcw_private *tcw = &cc->iv_gen_private.tcw;
__le64 sector = cpu_to_le64(dmreq->iv_sector);
u8 buf[TCW_WHITENING_SIZE];
- SHASH_DESC_ON_STACK(desc, tcw->crc32_tfm);
- int i, r;
+ int i;
/* xor whitening with sector number */
crypto_xor_cpy(buf, tcw->whitening, (u8 *)&sector, 8);
crypto_xor_cpy(&buf[8], tcw->whitening + 8, (u8 *)&sector, 8);
/* calculate crc32 for every 32bit part and xor it */
- desc->tfm = tcw->crc32_tfm;
- for (i = 0; i < 4; i++) {
- r = crypto_shash_digest(desc, &buf[i * 4], 4, &buf[i * 4]);
- if (r)
- goto out;
- }
+ for (i = 0; i < 4; i++)
+ put_unaligned_le32(crc32(0, &buf[i * 4], 4), &buf[i * 4]);
crypto_xor(&buf[0], &buf[12], 4);
crypto_xor(&buf[4], &buf[8], 4);
/* apply whitening (8 bytes) to whole sector */
for (i = 0; i < ((1 << SECTOR_SHIFT) / 8); i++)
crypto_xor(data + i * 8, buf, 8);
-out:
memzero_explicit(buf, sizeof(buf));
- return r;
}
static int crypt_iv_tcw_gen(struct crypt_config *cc, u8 *iv,
@@ -707,13 +688,12 @@ static int crypt_iv_tcw_gen(struct crypt_config *cc, u8 *iv,
struct iv_tcw_private *tcw = &cc->iv_gen_private.tcw;
__le64 sector = cpu_to_le64(dmreq->iv_sector);
u8 *src;
- int r = 0;
/* Remove whitening from ciphertext */
if (bio_data_dir(dmreq->ctx->bio_in) != WRITE) {
sg = crypt_get_sg_data(cc, dmreq->sg_in);
src = kmap_local_page(sg_page(sg));
- r = crypt_iv_tcw_whitening(cc, dmreq, src + sg->offset);
+ crypt_iv_tcw_whitening(cc, dmreq, src + sg->offset);
kunmap_local(src);
}
@@ -723,7 +703,7 @@ static int crypt_iv_tcw_gen(struct crypt_config *cc, u8 *iv,
crypto_xor_cpy(&iv[8], tcw->iv_seed + 8, (u8 *)&sector,
cc->iv_size - 8);
- return r;
+ return 0;
}
static int crypt_iv_tcw_post(struct crypt_config *cc, u8 *iv,
@@ -731,7 +711,6 @@ static int crypt_iv_tcw_post(struct crypt_config *cc, u8 *iv,
{
struct scatterlist *sg;
u8 *dst;
- int r;
if (bio_data_dir(dmreq->ctx->bio_in) != WRITE)
return 0;
@@ -739,10 +718,10 @@ static int crypt_iv_tcw_post(struct crypt_config *cc, u8 *iv,
/* Apply whitening on ciphertext */
sg = crypt_get_sg_data(cc, dmreq->sg_out);
dst = kmap_local_page(sg_page(sg));
- r = crypt_iv_tcw_whitening(cc, dmreq, dst + sg->offset);
+ crypt_iv_tcw_whitening(cc, dmreq, dst + sg->offset);
kunmap_local(dst);
- return r;
+ return 0;
}
static int crypt_iv_random_gen(struct crypt_config *cc, u8 *iv,
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index 08f6387620c1..d4cf0ac2a7aa 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -369,6 +369,21 @@ static int delay_map(struct dm_target *ti, struct bio *bio)
return delay_bio(dc, c, bio);
}
+#ifdef CONFIG_BLK_DEV_ZONED
+static int delay_report_zones(struct dm_target *ti,
+ struct dm_report_zones_args *args, unsigned int nr_zones)
+{
+ struct delay_c *dc = ti->private;
+ struct delay_class *c = &dc->read;
+
+ return dm_report_zones(c->dev->bdev, c->start,
+ c->start + dm_target_offset(ti, args->next_sector),
+ args, nr_zones);
+}
+#else
+#define delay_report_zones NULL
+#endif
+
#define DMEMIT_DELAY_CLASS(c) \
DMEMIT("%s %llu %u", (c)->dev->name, (unsigned long long)(c)->start, (c)->delay)
@@ -424,11 +439,12 @@ out:
static struct target_type delay_target = {
.name = "delay",
.version = {1, 4, 0},
- .features = DM_TARGET_PASSES_INTEGRITY,
+ .features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_ZONED_HM,
.module = THIS_MODULE,
.ctr = delay_ctr,
.dtr = delay_dtr,
.map = delay_map,
+ .report_zones = delay_report_zones,
.presuspend = delay_presuspend,
.resume = delay_resume,
.status = delay_status,
diff --git a/drivers/md/dm-ebs-target.c b/drivers/md/dm-ebs-target.c
index 18ae45dcbfb2..b19b0142a690 100644
--- a/drivers/md/dm-ebs-target.c
+++ b/drivers/md/dm-ebs-target.c
@@ -390,6 +390,12 @@ static int ebs_map(struct dm_target *ti, struct bio *bio)
return DM_MAPIO_REMAPPED;
}
+static void ebs_postsuspend(struct dm_target *ti)
+{
+ struct ebs_c *ec = ti->private;
+ dm_bufio_client_reset(ec->bufio);
+}
+
static void ebs_status(struct dm_target *ti, status_type_t type,
unsigned int status_flags, char *result, unsigned int maxlen)
{
@@ -447,6 +453,7 @@ static struct target_type ebs_target = {
.ctr = ebs_ctr,
.dtr = ebs_dtr,
.map = ebs_map,
+ .postsuspend = ebs_postsuspend,
.status = ebs_status,
.io_hints = ebs_io_hints,
.prepare_ioctl = ebs_prepare_ioctl,
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index c8c1a00e7d80..2a283feb3319 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -21,6 +21,7 @@
#include <linux/reboot.h>
#include <crypto/hash.h>
#include <crypto/skcipher.h>
+#include <crypto/utils.h>
#include <linux/async_tx.h>
#include <linux/dm-bufio.h>
@@ -516,7 +517,7 @@ static int sb_mac(struct dm_integrity_c *ic, bool wr)
dm_integrity_io_error(ic, "crypto_shash_digest", r);
return r;
}
- if (memcmp(mac, actual_mac, mac_size)) {
+ if (crypto_memneq(mac, actual_mac, mac_size)) {
dm_integrity_io_error(ic, "superblock mac", -EILSEQ);
dm_audit_log_target(DM_MSG_PREFIX, "mac-superblock", ic->ti, 0);
return -EILSEQ;
@@ -859,7 +860,7 @@ static void rw_section_mac(struct dm_integrity_c *ic, unsigned int section, bool
if (likely(wr))
memcpy(&js->mac, result + (j * JOURNAL_MAC_PER_SECTOR), JOURNAL_MAC_PER_SECTOR);
else {
- if (memcmp(&js->mac, result + (j * JOURNAL_MAC_PER_SECTOR), JOURNAL_MAC_PER_SECTOR)) {
+ if (crypto_memneq(&js->mac, result + (j * JOURNAL_MAC_PER_SECTOR), JOURNAL_MAC_PER_SECTOR)) {
dm_integrity_io_error(ic, "journal mac", -EILSEQ);
dm_audit_log_target(DM_MSG_PREFIX, "mac-journal", ic->ti, 0);
}
@@ -1401,10 +1402,9 @@ static bool find_newer_committed_node(struct dm_integrity_c *ic, struct journal_
static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, sector_t *metadata_block,
unsigned int *metadata_offset, unsigned int total_size, int op)
{
-#define MAY_BE_FILLER 1
-#define MAY_BE_HASH 2
unsigned int hash_offset = 0;
- unsigned int may_be = MAY_BE_HASH | (ic->discard ? MAY_BE_FILLER : 0);
+ unsigned char mismatch_hash = 0;
+ unsigned char mismatch_filler = !ic->discard;
do {
unsigned char *data, *dp;
@@ -1425,7 +1425,7 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se
if (op == TAG_READ) {
memcpy(tag, dp, to_copy);
} else if (op == TAG_WRITE) {
- if (memcmp(dp, tag, to_copy)) {
+ if (crypto_memneq(dp, tag, to_copy)) {
memcpy(dp, tag, to_copy);
dm_bufio_mark_partial_buffer_dirty(b, *metadata_offset, *metadata_offset + to_copy);
}
@@ -1433,29 +1433,30 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se
/* e.g.: op == TAG_CMP */
if (likely(is_power_of_2(ic->tag_size))) {
- if (unlikely(memcmp(dp, tag, to_copy)))
- if (unlikely(!ic->discard) ||
- unlikely(memchr_inv(dp, DISCARD_FILLER, to_copy) != NULL)) {
- goto thorough_test;
- }
+ if (unlikely(crypto_memneq(dp, tag, to_copy)))
+ goto thorough_test;
} else {
unsigned int i, ts;
thorough_test:
ts = total_size;
for (i = 0; i < to_copy; i++, ts--) {
- if (unlikely(dp[i] != tag[i]))
- may_be &= ~MAY_BE_HASH;
- if (likely(dp[i] != DISCARD_FILLER))
- may_be &= ~MAY_BE_FILLER;
+ /*
+ * Warning: the control flow must not be
+ * dependent on match/mismatch of
+ * individual bytes.
+ */
+ mismatch_hash |= dp[i] ^ tag[i];
+ mismatch_filler |= dp[i] ^ DISCARD_FILLER;
hash_offset++;
if (unlikely(hash_offset == ic->tag_size)) {
- if (unlikely(!may_be)) {
+ if (unlikely(mismatch_hash) && unlikely(mismatch_filler)) {
dm_bufio_release(b);
return ts;
}
hash_offset = 0;
- may_be = MAY_BE_HASH | (ic->discard ? MAY_BE_FILLER : 0);
+ mismatch_hash = 0;
+ mismatch_filler = !ic->discard;
}
}
}
@@ -1476,8 +1477,6 @@ thorough_test:
} while (unlikely(total_size));
return 0;
-#undef MAY_BE_FILLER
-#undef MAY_BE_HASH
}
struct flush_request {
@@ -2076,7 +2075,7 @@ retry_kmap:
char checksums_onstack[MAX_T(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)];
integrity_sector_checksum(ic, logical_sector, mem + bv.bv_offset, checksums_onstack);
- if (unlikely(memcmp(checksums_onstack, journal_entry_tag(ic, je), ic->tag_size))) {
+ if (unlikely(crypto_memneq(checksums_onstack, journal_entry_tag(ic, je), ic->tag_size))) {
DMERR_LIMIT("Checksum failed when reading from journal, at sector 0x%llx",
logical_sector);
dm_audit_log_bio(DM_MSG_PREFIX, "journal-checksum",
@@ -2595,7 +2594,7 @@ static void dm_integrity_inline_recheck(struct work_struct *w)
bio_put(outgoing_bio);
integrity_sector_checksum(ic, dio->bio_details.bi_iter.bi_sector, outgoing_data, digest);
- if (unlikely(memcmp(digest, dio->integrity_payload, min(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)))) {
+ if (unlikely(crypto_memneq(digest, dio->integrity_payload, min(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)))) {
DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx",
ic->dev->bdev, dio->bio_details.bi_iter.bi_sector);
atomic64_inc(&ic->number_of_mismatches);
@@ -2634,7 +2633,7 @@ static int dm_integrity_end_io(struct dm_target *ti, struct bio *bio, blk_status
char *mem = bvec_kmap_local(&bv);
//memset(mem, 0xff, ic->sectors_per_block << SECTOR_SHIFT);
integrity_sector_checksum(ic, dio->bio_details.bi_iter.bi_sector, mem, digest);
- if (unlikely(memcmp(digest, dio->integrity_payload + pos,
+ if (unlikely(crypto_memneq(digest, dio->integrity_payload + pos,
min(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)))) {
kunmap_local(mem);
dm_integrity_free_payload(dio);
@@ -2708,7 +2707,7 @@ static void integrity_commit(struct work_struct *w)
unsigned int i, j, n;
struct bio *flushes;
- del_timer(&ic->autocommit_timer);
+ timer_delete(&ic->autocommit_timer);
if (ic->mode == 'I')
return;
@@ -2911,7 +2910,7 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned int write_start
integrity_sector_checksum(ic, sec + ((l - j) << ic->sb->log2_sectors_per_block),
(char *)access_journal_data(ic, i, l), test_tag);
- if (unlikely(memcmp(test_tag, journal_entry_tag(ic, je2), ic->tag_size))) {
+ if (unlikely(crypto_memneq(test_tag, journal_entry_tag(ic, je2), ic->tag_size))) {
dm_integrity_io_error(ic, "tag mismatch when replaying journal", -EILSEQ);
dm_audit_log_target(DM_MSG_PREFIX, "integrity-replay-journal", ic->ti, 0);
}
@@ -3607,7 +3606,7 @@ static void dm_integrity_postsuspend(struct dm_target *ti)
WARN_ON(unregister_reboot_notifier(&ic->reboot_notifier));
- del_timer_sync(&ic->autocommit_timer);
+ timer_delete_sync(&ic->autocommit_timer);
if (ic->recalc_wq)
drain_workqueue(ic->recalc_wq);
@@ -5072,16 +5071,19 @@ try_smaller_buffer:
ic->recalc_bitmap = dm_integrity_alloc_page_list(n_bitmap_pages);
if (!ic->recalc_bitmap) {
+ ti->error = "Could not allocate memory for bitmap";
r = -ENOMEM;
goto bad;
}
ic->may_write_bitmap = dm_integrity_alloc_page_list(n_bitmap_pages);
if (!ic->may_write_bitmap) {
+ ti->error = "Could not allocate memory for bitmap";
r = -ENOMEM;
goto bad;
}
ic->bbs = kvmalloc_array(ic->n_bitmap_blocks, sizeof(struct bitmap_block_status), GFP_KERNEL);
if (!ic->bbs) {
+ ti->error = "Could not allocate memory for bitmap";
r = -ENOMEM;
goto bad;
}
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 637977acc3dc..6c98f4ae5ea9 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -815,7 +815,7 @@ static void enable_nopath_timeout(struct multipath *m)
static void disable_nopath_timeout(struct multipath *m)
{
- del_timer_sync(&m->nopath_timer);
+ timer_delete_sync(&m->nopath_timer);
}
/*
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 8c6f1f7e6456..9e615b4f1f5e 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -1182,7 +1182,7 @@ static void mirror_dtr(struct dm_target *ti)
{
struct mirror_set *ms = ti->private;
- del_timer_sync(&ms->timer);
+ timer_delete_sync(&ms->timer);
flush_workqueue(ms->kmirrord_wq);
flush_work(&ms->trigger_event);
dm_kcopyd_client_destroy(ms->kcopyd_client);
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 3786ac67cefe..a1b7535c508a 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -467,7 +467,7 @@ static struct target_type stripe_target = {
.name = "striped",
.version = {1, 7, 0},
.features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_NOWAIT |
- DM_TARGET_ATOMIC_WRITES,
+ DM_TARGET_ATOMIC_WRITES | DM_TARGET_PASSES_CRYPTO,
.module = THIS_MODULE,
.ctr = stripe_ctr,
.dtr = stripe_dtr,
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 453803f1edf5..35100a435c88 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -697,6 +697,10 @@ int dm_table_add_target(struct dm_table *t, const char *type,
DMERR("%s: zero-length target", dm_device_name(t->md));
return -EINVAL;
}
+ if (start + len < start || start + len > LLONG_MAX >> SECTOR_SHIFT) {
+ DMERR("%s: too large device", dm_device_name(t->md));
+ return -EINVAL;
+ }
ti->type = dm_get_target_type(type);
if (!ti->type) {
diff --git a/drivers/md/dm-vdo/block-map.c b/drivers/md/dm-vdo/block-map.c
index 89cb7942ec5c..baf683cabb1b 100644
--- a/drivers/md/dm-vdo/block-map.c
+++ b/drivers/md/dm-vdo/block-map.c
@@ -451,7 +451,7 @@ static struct page_info * __must_check find_page(struct vdo_page_cache *cache,
* select_lru_page() - Determine which page is least recently used.
*
* Picks the least recently used from among the non-busy entries at the front of each of the lru
- * ring. Since whenever we mark a page busy we also put it to the end of the ring it is unlikely
+ * list. Since whenever we mark a page busy we also put it to the end of the list it is unlikely
* that the entries at the front are busy unless the queue is very short, but not impossible.
*
* Return: A pointer to the info structure for a relevant page, or NULL if no such page can be
@@ -1544,7 +1544,7 @@ static void write_page_if_not_dirtied(struct vdo_waiter *waiter, void *context)
static void return_to_pool(struct block_map_zone *zone, struct pooled_vio *vio)
{
- return_vio_to_pool(zone->vio_pool, vio);
+ return_vio_to_pool(vio);
check_for_drain_complete(zone);
}
@@ -1837,7 +1837,7 @@ static void finish_block_map_page_load(struct vdo_completion *completion)
if (!vdo_copy_valid_page(vio->data, nonce, pbn, page))
vdo_format_block_map_page(page, nonce, pbn, false);
- return_vio_to_pool(zone->vio_pool, pooled);
+ return_vio_to_pool(pooled);
/* Release our claim to the load and wake any waiters */
release_page_lock(data_vio, "load");
@@ -1851,10 +1851,9 @@ static void handle_io_error(struct vdo_completion *completion)
struct vio *vio = as_vio(completion);
struct pooled_vio *pooled = container_of(vio, struct pooled_vio, vio);
struct data_vio *data_vio = completion->parent;
- struct block_map_zone *zone = pooled->context;
vio_record_metadata_io_error(vio);
- return_vio_to_pool(zone->vio_pool, pooled);
+ return_vio_to_pool(pooled);
abort_load(data_vio, result);
}
@@ -2499,7 +2498,7 @@ static void finish_cursor(struct cursor *cursor)
struct cursors *cursors = cursor->parent;
struct vdo_completion *completion = cursors->completion;
- return_vio_to_pool(cursors->pool, vdo_forget(cursor->vio));
+ return_vio_to_pool(vdo_forget(cursor->vio));
if (--cursors->active_roots > 0)
return;
@@ -2746,7 +2745,7 @@ static int __must_check initialize_block_map_zone(struct block_map *map,
if (result != VDO_SUCCESS)
return result;
- result = make_vio_pool(vdo, BLOCK_MAP_VIO_POOL_SIZE,
+ result = make_vio_pool(vdo, BLOCK_MAP_VIO_POOL_SIZE, 1,
zone->thread_id, VIO_TYPE_BLOCK_MAP_INTERIOR,
VIO_PRIORITY_METADATA, zone, &zone->vio_pool);
if (result != VDO_SUCCESS)
diff --git a/drivers/md/dm-vdo/constants.h b/drivers/md/dm-vdo/constants.h
index a8c4d6e24b38..2a8b03779f87 100644
--- a/drivers/md/dm-vdo/constants.h
+++ b/drivers/md/dm-vdo/constants.h
@@ -44,9 +44,6 @@ enum {
/* The default size of each slab journal, in blocks */
DEFAULT_VDO_SLAB_JOURNAL_SIZE = 224,
- /* Unit test minimum */
- MINIMUM_VDO_SLAB_JOURNAL_BLOCKS = 2,
-
/*
* The initial size of lbn_operations and pbn_operations, which is based upon the expected
* maximum number of outstanding VIOs. This value was chosen to make it highly unlikely
diff --git a/drivers/md/dm-vdo/dedupe.c b/drivers/md/dm-vdo/dedupe.c
index 3f3d29af1be4..3c58b941e067 100644
--- a/drivers/md/dm-vdo/dedupe.c
+++ b/drivers/md/dm-vdo/dedupe.c
@@ -226,7 +226,7 @@ struct hash_lock {
* A list containing the data VIOs sharing this lock, all having the same record name and
* data block contents, linked by their hash_lock_node fields.
*/
- struct list_head duplicate_ring;
+ struct list_head duplicate_vios;
/* The number of data_vios sharing this lock instance */
data_vio_count_t reference_count;
@@ -343,7 +343,7 @@ static void return_hash_lock_to_pool(struct hash_zone *zone, struct hash_lock *l
{
memset(lock, 0, sizeof(*lock));
INIT_LIST_HEAD(&lock->pool_node);
- INIT_LIST_HEAD(&lock->duplicate_ring);
+ INIT_LIST_HEAD(&lock->duplicate_vios);
vdo_waitq_init(&lock->waiters);
list_add_tail(&lock->pool_node, &zone->lock_pool);
}
@@ -441,7 +441,7 @@ static void set_hash_lock(struct data_vio *data_vio, struct hash_lock *new_lock)
VDO_ASSERT_LOG_ONLY(data_vio->hash_zone != NULL,
"must have a hash zone when holding a hash lock");
VDO_ASSERT_LOG_ONLY(!list_empty(&data_vio->hash_lock_entry),
- "must be on a hash lock ring when holding a hash lock");
+ "must be on a hash lock list when holding a hash lock");
VDO_ASSERT_LOG_ONLY(old_lock->reference_count > 0,
"hash lock reference must be counted");
@@ -464,10 +464,10 @@ static void set_hash_lock(struct data_vio *data_vio, struct hash_lock *new_lock)
if (new_lock != NULL) {
/*
- * Keep all data_vios sharing the lock on a ring since they can complete in any
+ * Keep all data_vios sharing the lock on a list since they can complete in any
* order and we'll always need a pointer to one to compare data.
*/
- list_move_tail(&data_vio->hash_lock_entry, &new_lock->duplicate_ring);
+ list_move_tail(&data_vio->hash_lock_entry, &new_lock->duplicate_vios);
new_lock->reference_count += 1;
if (new_lock->max_references < new_lock->reference_count)
new_lock->max_references = new_lock->reference_count;
@@ -1789,10 +1789,10 @@ static bool is_hash_collision(struct hash_lock *lock, struct data_vio *candidate
struct hash_zone *zone;
bool collides;
- if (list_empty(&lock->duplicate_ring))
+ if (list_empty(&lock->duplicate_vios))
return false;
- lock_holder = list_first_entry(&lock->duplicate_ring, struct data_vio,
+ lock_holder = list_first_entry(&lock->duplicate_vios, struct data_vio,
hash_lock_entry);
zone = candidate->hash_zone;
collides = !blocks_equal(lock_holder->vio.data, candidate->vio.data);
@@ -1815,7 +1815,7 @@ static inline int assert_hash_lock_preconditions(const struct data_vio *data_vio
return result;
result = VDO_ASSERT(list_empty(&data_vio->hash_lock_entry),
- "must not already be a member of a hash lock ring");
+ "must not already be a member of a hash lock list");
if (result != VDO_SUCCESS)
return result;
@@ -1942,8 +1942,8 @@ void vdo_release_hash_lock(struct data_vio *data_vio)
"returned hash lock must not be in use with state %s",
get_hash_lock_state_name(lock->state));
VDO_ASSERT_LOG_ONLY(list_empty(&lock->pool_node),
- "hash lock returned to zone must not be in a pool ring");
- VDO_ASSERT_LOG_ONLY(list_empty(&lock->duplicate_ring),
+ "hash lock returned to zone must not be in a pool list");
+ VDO_ASSERT_LOG_ONLY(list_empty(&lock->duplicate_vios),
"hash lock returned to zone must not reference DataVIOs");
return_hash_lock_to_pool(zone, lock);
@@ -2261,7 +2261,7 @@ static void check_for_drain_complete(struct hash_zone *zone)
if ((atomic_read(&zone->timer_state) == DEDUPE_QUERY_TIMER_IDLE) ||
change_timer_state(zone, DEDUPE_QUERY_TIMER_RUNNING,
DEDUPE_QUERY_TIMER_IDLE)) {
- del_timer_sync(&zone->timer);
+ timer_delete_sync(&zone->timer);
} else {
/*
* There is an in flight time-out, which must get processed before we can continue.
diff --git a/drivers/md/dm-vdo/encodings.c b/drivers/md/dm-vdo/encodings.c
index 100e92f8f866..b7cc0f41caca 100644
--- a/drivers/md/dm-vdo/encodings.c
+++ b/drivers/md/dm-vdo/encodings.c
@@ -711,24 +711,11 @@ int vdo_configure_slab(block_count_t slab_size, block_count_t slab_journal_block
ref_blocks = vdo_get_saved_reference_count_size(slab_size - slab_journal_blocks);
meta_blocks = (ref_blocks + slab_journal_blocks);
- /* Make sure test code hasn't configured slabs to be too small. */
+ /* Make sure configured slabs are not too small. */
if (meta_blocks >= slab_size)
return VDO_BAD_CONFIGURATION;
- /*
- * If the slab size is very small, assume this must be a unit test and override the number
- * of data blocks to be a power of two (wasting blocks in the slab). Many tests need their
- * data_blocks fields to be the exact capacity of the configured volume, and that used to
- * fall out since they use a power of two for the number of data blocks, the slab size was
- * a power of two, and every block in a slab was a data block.
- *
- * TODO: Try to figure out some way of structuring testParameters and unit tests so this
- * hack isn't needed without having to edit several unit tests every time the metadata size
- * changes by one block.
- */
data_blocks = slab_size - meta_blocks;
- if ((slab_size < 1024) && !is_power_of_2(data_blocks))
- data_blocks = ((block_count_t) 1 << ilog2(data_blocks));
/*
* Configure the slab journal thresholds. The flush threshold is 168 of 224 blocks in
@@ -1221,11 +1208,6 @@ int vdo_validate_config(const struct vdo_config *config,
if (result != VDO_SUCCESS)
return result;
- result = VDO_ASSERT(config->slab_journal_blocks >= MINIMUM_VDO_SLAB_JOURNAL_BLOCKS,
- "slab journal size meets minimum size");
- if (result != VDO_SUCCESS)
- return result;
-
result = VDO_ASSERT(config->slab_journal_blocks <= config->slab_size,
"slab journal size is within expected bound");
if (result != VDO_SUCCESS)
diff --git a/drivers/md/dm-vdo/indexer/index-layout.c b/drivers/md/dm-vdo/indexer/index-layout.c
index af8fab83b0f3..61edf2b72427 100644
--- a/drivers/md/dm-vdo/indexer/index-layout.c
+++ b/drivers/md/dm-vdo/indexer/index-layout.c
@@ -54,7 +54,6 @@
* Each save also has a unique nonce.
*/
-#define MAGIC_SIZE 32
#define NONCE_INFO_SIZE 32
#define MAX_SAVES 2
@@ -98,9 +97,11 @@ enum region_type {
#define SUPER_VERSION_CURRENT 3
#define SUPER_VERSION_MAXIMUM 7
-static const u8 LAYOUT_MAGIC[MAGIC_SIZE] = "*ALBIREO*SINGLE*FILE*LAYOUT*001*";
+static const u8 LAYOUT_MAGIC[] = "*ALBIREO*SINGLE*FILE*LAYOUT*001*";
static const u64 REGION_MAGIC = 0x416c6252676e3031; /* 'AlbRgn01' */
+#define MAGIC_SIZE (sizeof(LAYOUT_MAGIC) - 1)
+
struct region_header {
u64 magic;
u64 region_blocks;
diff --git a/drivers/md/dm-vdo/indexer/index-session.c b/drivers/md/dm-vdo/indexer/index-session.c
index aee0914d604a..aa575a24e0b2 100644
--- a/drivers/md/dm-vdo/indexer/index-session.c
+++ b/drivers/md/dm-vdo/indexer/index-session.c
@@ -100,7 +100,6 @@ static int get_index_session(struct uds_index_session *index_session)
int uds_launch_request(struct uds_request *request)
{
- size_t internal_size;
int result;
if (request->callback == NULL) {
@@ -121,10 +120,7 @@ int uds_launch_request(struct uds_request *request)
}
/* Reset all internal fields before processing. */
- internal_size =
- sizeof(struct uds_request) - offsetof(struct uds_request, zone_number);
- // FIXME should be using struct_group for this instead
- memset((char *) request + sizeof(*request) - internal_size, 0, internal_size);
+ memset(&request->internal, 0, sizeof(request->internal));
result = get_index_session(request->session);
if (result != UDS_SUCCESS)
diff --git a/drivers/md/dm-vdo/indexer/indexer.h b/drivers/md/dm-vdo/indexer/indexer.h
index 183a94eb7e92..7c1fc4577f5b 100644
--- a/drivers/md/dm-vdo/indexer/indexer.h
+++ b/drivers/md/dm-vdo/indexer/indexer.h
@@ -8,6 +8,7 @@
#include <linux/mutex.h>
#include <linux/sched.h>
+#include <linux/stddef.h>
#include <linux/types.h>
#include <linux/wait.h>
@@ -73,7 +74,7 @@ enum uds_request_type {
/* Remove any mapping for a name. */
UDS_DELETE,
-};
+} __packed;
enum uds_open_index_type {
/* Create a new index. */
@@ -226,7 +227,7 @@ struct uds_zone_message {
enum uds_zone_message_type type;
/* The virtual chapter number to which the message applies */
u64 virtual_chapter;
-};
+} __packed;
struct uds_index_session;
struct uds_index;
@@ -253,34 +254,32 @@ struct uds_request {
/* The existing data associated with the request name, if any */
struct uds_record_data old_metadata;
- /* Either UDS_SUCCESS or an error code for the request */
- int status;
/* True if the record name had an existing entry in the index */
bool found;
+ /* Either UDS_SUCCESS or an error code for the request */
+ int status;
- /*
- * The remaining fields are used internally and should not be altered by clients. The index
- * relies on zone_number being the first field in this section.
- */
-
- /* The number of the zone which will process this request*/
- unsigned int zone_number;
- /* A link for adding a request to a lock-free queue */
- struct funnel_queue_entry queue_link;
- /* A link for adding a request to a standard linked list */
- struct uds_request *next_request;
- /* A pointer to the index processing this request */
- struct uds_index *index;
- /* Control message for coordinating between zones */
- struct uds_zone_message zone_message;
- /* If true, process request immediately by waking the worker thread */
- bool unbatched;
- /* If true, continue this request before processing newer requests */
- bool requeued;
- /* The virtual chapter containing the record name, if known */
- u64 virtual_chapter;
- /* The region of the index containing the record name */
- enum uds_index_region location;
+ /* The remaining fields are used internally and should not be altered by clients. */
+ struct_group(internal,
+ /* The virtual chapter containing the record name, if known */
+ u64 virtual_chapter;
+ /* The region of the index containing the record name */
+ enum uds_index_region location;
+ /* If true, process request immediately by waking the worker thread */
+ bool unbatched;
+ /* If true, continue this request before processing newer requests */
+ bool requeued;
+ /* Control message for coordinating between zones */
+ struct uds_zone_message zone_message;
+ /* The number of the zone which will process this request*/
+ unsigned int zone_number;
+ /* A link for adding a request to a lock-free queue */
+ struct funnel_queue_entry queue_link;
+ /* A link for adding a request to a standard linked list */
+ struct uds_request *next_request;
+ /* A pointer to the index processing this request */
+ struct uds_index *index;
+ );
};
/* A session is required for most index operations. */
diff --git a/drivers/md/dm-vdo/io-submitter.c b/drivers/md/dm-vdo/io-submitter.c
index 421e5436c32c..11d47770b54d 100644
--- a/drivers/md/dm-vdo/io-submitter.c
+++ b/drivers/md/dm-vdo/io-submitter.c
@@ -327,6 +327,7 @@ void vdo_submit_data_vio(struct data_vio *data_vio)
* @error_handler: the handler for submission or I/O errors (may be NULL)
* @operation: the type of I/O to perform
* @data: the buffer to read or write (may be NULL)
+ * @size: the I/O amount in bytes
*
* The vio is enqueued on a vdo bio queue so that bio submission (which may block) does not block
* other vdo threads.
@@ -338,7 +339,7 @@ void vdo_submit_data_vio(struct data_vio *data_vio)
*/
void __submit_metadata_vio(struct vio *vio, physical_block_number_t physical,
bio_end_io_t callback, vdo_action_fn error_handler,
- blk_opf_t operation, char *data)
+ blk_opf_t operation, char *data, int size)
{
int result;
struct vdo_completion *completion = &vio->completion;
@@ -349,7 +350,8 @@ void __submit_metadata_vio(struct vio *vio, physical_block_number_t physical,
vdo_reset_completion(completion);
completion->error_handler = error_handler;
- result = vio_reset_bio(vio, data, callback, operation | REQ_META, physical);
+ result = vio_reset_bio_with_size(vio, data, size, callback, operation | REQ_META,
+ physical);
if (result != VDO_SUCCESS) {
continue_vio(vio, result);
return;
diff --git a/drivers/md/dm-vdo/io-submitter.h b/drivers/md/dm-vdo/io-submitter.h
index 80748699496f..3088f11055fd 100644
--- a/drivers/md/dm-vdo/io-submitter.h
+++ b/drivers/md/dm-vdo/io-submitter.h
@@ -8,6 +8,7 @@
#include <linux/bio.h>
+#include "constants.h"
#include "types.h"
struct io_submitter;
@@ -26,14 +27,25 @@ void vdo_submit_data_vio(struct data_vio *data_vio);
void __submit_metadata_vio(struct vio *vio, physical_block_number_t physical,
bio_end_io_t callback, vdo_action_fn error_handler,
- blk_opf_t operation, char *data);
+ blk_opf_t operation, char *data, int size);
static inline void vdo_submit_metadata_vio(struct vio *vio, physical_block_number_t physical,
bio_end_io_t callback, vdo_action_fn error_handler,
blk_opf_t operation)
{
__submit_metadata_vio(vio, physical, callback, error_handler,
- operation, vio->data);
+ operation, vio->data, vio->block_count * VDO_BLOCK_SIZE);
+}
+
+static inline void vdo_submit_metadata_vio_with_size(struct vio *vio,
+ physical_block_number_t physical,
+ bio_end_io_t callback,
+ vdo_action_fn error_handler,
+ blk_opf_t operation,
+ int size)
+{
+ __submit_metadata_vio(vio, physical, callback, error_handler,
+ operation, vio->data, size);
}
static inline void vdo_submit_flush_vio(struct vio *vio, bio_end_io_t callback,
@@ -41,7 +53,7 @@ static inline void vdo_submit_flush_vio(struct vio *vio, bio_end_io_t callback,
{
/* FIXME: Can we just use REQ_OP_FLUSH? */
__submit_metadata_vio(vio, 0, callback, error_handler,
- REQ_OP_WRITE | REQ_PREFLUSH, NULL);
+ REQ_OP_WRITE | REQ_PREFLUSH, NULL, 0);
}
#endif /* VDO_IO_SUBMITTER_H */
diff --git a/drivers/md/dm-vdo/packer.h b/drivers/md/dm-vdo/packer.h
index 0f3be44710b5..8c8d6892582d 100644
--- a/drivers/md/dm-vdo/packer.h
+++ b/drivers/md/dm-vdo/packer.h
@@ -46,7 +46,7 @@ struct compressed_block {
/*
* Each packer_bin holds an incomplete batch of data_vios that only partially fill a compressed
- * block. The bins are kept in a ring sorted by the amount of unused space so the first bin with
+ * block. The bins are kept in a list sorted by the amount of unused space so the first bin with
* enough space to hold a newly-compressed data_vio can easily be found. When the bin fills up or
* is flushed, the first uncanceled data_vio in the bin is selected to be the agent for that bin.
* Upon entering the packer, each data_vio already has its compressed data in the first slot of the
diff --git a/drivers/md/dm-vdo/priority-table.c b/drivers/md/dm-vdo/priority-table.c
index 42d3d8d0e4b5..9bae8256ba4e 100644
--- a/drivers/md/dm-vdo/priority-table.c
+++ b/drivers/md/dm-vdo/priority-table.c
@@ -199,7 +199,7 @@ void vdo_priority_table_remove(struct priority_table *table, struct list_head *e
/*
* Remove the entry from the bucket list, remembering a pointer to another entry in the
- * ring.
+ * list.
*/
next_entry = entry->next;
list_del_init(entry);
diff --git a/drivers/md/dm-vdo/recovery-journal.h b/drivers/md/dm-vdo/recovery-journal.h
index 899071173015..25e7ec6d19f6 100644
--- a/drivers/md/dm-vdo/recovery-journal.h
+++ b/drivers/md/dm-vdo/recovery-journal.h
@@ -43,9 +43,9 @@
* has a vio which is used to commit that block to disk. The vio's data is the on-disk
* representation of the journal block. In addition each in-memory block has a buffer which is used
* to accumulate entries while a partial commit of the block is in progress. In-memory blocks are
- * kept on two rings. Free blocks live on the 'free_tail_blocks' ring. When a block becomes active
- * (see below) it is moved to the 'active_tail_blocks' ring. When a block is fully committed, it is
- * moved back to the 'free_tail_blocks' ring.
+ * kept on two lists. Free blocks live on the 'free_tail_blocks' list. When a block becomes active
+ * (see below) it is moved to the 'active_tail_blocks' list. When a block is fully committed, it is
+ * moved back to the 'free_tail_blocks' list.
*
* When entries are added to the journal, they are added to the active in-memory block, as
* indicated by the 'active_block' field. If the caller wishes to wait for the entry to be
diff --git a/drivers/md/dm-vdo/slab-depot.c b/drivers/md/dm-vdo/slab-depot.c
index 8f0a35c63af6..f3d80ff7bef5 100644
--- a/drivers/md/dm-vdo/slab-depot.c
+++ b/drivers/md/dm-vdo/slab-depot.c
@@ -139,7 +139,7 @@ static bool is_slab_journal_blank(const struct vdo_slab *slab)
}
/**
- * mark_slab_journal_dirty() - Put a slab journal on the dirty ring of its allocator in the correct
+ * mark_slab_journal_dirty() - Put a slab journal on the dirty list of its allocator in the correct
* order.
* @journal: The journal to be marked dirty.
* @lock: The recovery journal lock held by the slab journal.
@@ -414,8 +414,7 @@ static void complete_reaping(struct vdo_completion *completion)
{
struct slab_journal *journal = completion->parent;
- return_vio_to_pool(journal->slab->allocator->vio_pool,
- vio_as_pooled_vio(as_vio(vdo_forget(completion))));
+ return_vio_to_pool(vio_as_pooled_vio(as_vio(completion)));
finish_reaping(journal);
reap_slab_journal(journal);
}
@@ -698,7 +697,7 @@ static void complete_write(struct vdo_completion *completion)
sequence_number_t committed = get_committing_sequence_number(pooled);
list_del_init(&pooled->list_entry);
- return_vio_to_pool(journal->slab->allocator->vio_pool, vdo_forget(pooled));
+ return_vio_to_pool(pooled);
if (result != VDO_SUCCESS) {
vio_record_metadata_io_error(as_vio(completion));
@@ -822,7 +821,7 @@ static void commit_tail(struct slab_journal *journal)
/*
* Since we are about to commit the tail block, this journal no longer needs to be on the
- * ring of journals which the recovery journal might ask to commit.
+ * list of journals which the recovery journal might ask to commit.
*/
mark_slab_journal_clean(journal);
@@ -1076,7 +1075,7 @@ static void finish_reference_block_write(struct vdo_completion *completion)
/* Release the slab journal lock. */
adjust_slab_journal_block_reference(&slab->journal,
block->slab_journal_lock_to_release, -1);
- return_vio_to_pool(slab->allocator->vio_pool, pooled);
+ return_vio_to_pool(pooled);
/*
* We can't clear the is_writing flag earlier as releasing the slab journal lock may cause
@@ -1170,8 +1169,8 @@ static void handle_io_error(struct vdo_completion *completion)
struct vdo_slab *slab = ((struct reference_block *) completion->parent)->slab;
vio_record_metadata_io_error(vio);
- return_vio_to_pool(slab->allocator->vio_pool, vio_as_pooled_vio(vio));
- slab->active_count--;
+ return_vio_to_pool(vio_as_pooled_vio(vio));
+ slab->active_count -= vio->io_size / VDO_BLOCK_SIZE;
vdo_enter_read_only_mode(slab->allocator->depot->vdo, result);
check_if_slab_drained(slab);
}
@@ -1372,7 +1371,7 @@ static unsigned int calculate_slab_priority(struct vdo_slab *slab)
static void prioritize_slab(struct vdo_slab *slab)
{
VDO_ASSERT_LOG_ONLY(list_empty(&slab->allocq_entry),
- "a slab must not already be on a ring when prioritizing");
+ "a slab must not already be on a list when prioritizing");
slab->priority = calculate_slab_priority(slab);
vdo_priority_table_enqueue(slab->allocator->prioritized_slabs,
slab->priority, &slab->allocq_entry);
@@ -2165,28 +2164,95 @@ static void dirty_all_reference_blocks(struct vdo_slab *slab)
dirty_block(&slab->reference_blocks[i]);
}
+static inline bool journal_points_equal(struct journal_point first,
+ struct journal_point second)
+{
+ return ((first.sequence_number == second.sequence_number) &&
+ (first.entry_count == second.entry_count));
+}
+
/**
- * clear_provisional_references() - Clear the provisional reference counts from a reference block.
- * @block: The block to clear.
+ * match_bytes() - Check an 8-byte word for bytes matching the value specified
+ * @input: A word to examine the bytes of
+ * @match: The byte value sought
+ *
+ * Return: 1 in each byte when the corresponding input byte matched, 0 otherwise
*/
-static void clear_provisional_references(struct reference_block *block)
+static inline u64 match_bytes(u64 input, u8 match)
{
- vdo_refcount_t *counters = get_reference_counters_for_block(block);
- block_count_t j;
+ u64 temp = input ^ (match * 0x0101010101010101ULL);
+ /* top bit of each byte is set iff top bit of temp byte is clear; rest are 0 */
+ u64 test_top_bits = ~temp & 0x8080808080808080ULL;
+ /* top bit of each byte is set iff low 7 bits of temp byte are clear; rest are useless */
+ u64 test_low_bits = 0x8080808080808080ULL - (temp & 0x7f7f7f7f7f7f7f7fULL);
+ /* return 1 when both tests indicate temp byte is 0 */
+ return (test_top_bits & test_low_bits) >> 7;
+}
+
+/**
+ * count_valid_references() - Process a newly loaded refcount array
+ * @counters: the array of counters from a metadata block
+ *
+ * Scan a 8-byte-aligned array of counters, fixing up any "provisional" values that weren't
+ * cleaned up at shutdown, changing them internally to "empty".
+ *
+ * Return: the number of blocks that are referenced (counters not "empty")
+ */
+static unsigned int count_valid_references(vdo_refcount_t *counters)
+{
+ u64 *words = (u64 *)counters;
+ /* It's easier to count occurrences of a specific byte than its absences. */
+ unsigned int empty_count = 0;
+ /* For speed, we process 8 bytes at once. */
+ unsigned int words_left = COUNTS_PER_BLOCK / sizeof(u64);
+
+ /*
+ * Sanity check assumptions used for optimizing this code: Counters are bytes. The counter
+ * array is a multiple of the word size.
+ */
+ BUILD_BUG_ON(sizeof(vdo_refcount_t) != 1);
+ BUILD_BUG_ON((COUNTS_PER_BLOCK % sizeof(u64)) != 0);
+
+ while (words_left > 0) {
+ /*
+ * This is used effectively as 8 byte-size counters. Byte 0 counts how many words
+ * had the target value found in byte 0, etc. We just have to avoid overflow.
+ */
+ u64 split_count = 0;
+ /*
+ * The counter "% 255" trick used below to fold split_count into empty_count
+ * imposes a limit of 254 bytes examined each iteration of the outer loop. We
+ * process a word at a time, so that limit gets rounded down to 31 u64 words.
+ */
+ const unsigned int max_words_per_iteration = 254 / sizeof(u64);
+ unsigned int iter_words_left = min_t(unsigned int, words_left,
+ max_words_per_iteration);
+
+ words_left -= iter_words_left;
+
+ while (iter_words_left--) {
+ u64 word = *words;
+ u64 temp;
+
+ /* First, if we have any provisional refcount values, clear them. */
+ temp = match_bytes(word, PROVISIONAL_REFERENCE_COUNT);
+ if (temp) {
+ /*
+ * 'temp' has 0x01 bytes where 'word' has PROVISIONAL; this xor
+ * will alter just those bytes, changing PROVISIONAL to EMPTY.
+ */
+ word ^= temp * (PROVISIONAL_REFERENCE_COUNT ^ EMPTY_REFERENCE_COUNT);
+ *words = word;
+ }
- for (j = 0; j < COUNTS_PER_BLOCK; j++) {
- if (counters[j] == PROVISIONAL_REFERENCE_COUNT) {
- counters[j] = EMPTY_REFERENCE_COUNT;
- block->allocated_count--;
+ /* Now count the EMPTY_REFERENCE_COUNT bytes, updating the 8 counters. */
+ split_count += match_bytes(word, EMPTY_REFERENCE_COUNT);
+ words++;
}
+ empty_count += split_count % 255;
}
-}
-static inline bool journal_points_equal(struct journal_point first,
- struct journal_point second)
-{
- return ((first.sequence_number == second.sequence_number) &&
- (first.entry_count == second.entry_count));
+ return COUNTS_PER_BLOCK - empty_count;
}
/**
@@ -2197,7 +2263,6 @@ static inline bool journal_points_equal(struct journal_point first,
static void unpack_reference_block(struct packed_reference_block *packed,
struct reference_block *block)
{
- block_count_t index;
sector_count_t i;
struct vdo_slab *slab = block->slab;
vdo_refcount_t *counters = get_reference_counters_for_block(block);
@@ -2223,11 +2288,7 @@ static void unpack_reference_block(struct packed_reference_block *packed,
}
}
- block->allocated_count = 0;
- for (index = 0; index < COUNTS_PER_BLOCK; index++) {
- if (counters[index] != EMPTY_REFERENCE_COUNT)
- block->allocated_count++;
- }
+ block->allocated_count = count_valid_references(counters);
}
/**
@@ -2240,13 +2301,19 @@ static void finish_reference_block_load(struct vdo_completion *completion)
struct pooled_vio *pooled = vio_as_pooled_vio(vio);
struct reference_block *block = completion->parent;
struct vdo_slab *slab = block->slab;
+ unsigned int block_count = vio->io_size / VDO_BLOCK_SIZE;
+ unsigned int i;
+ char *data = vio->data;
- unpack_reference_block((struct packed_reference_block *) vio->data, block);
- return_vio_to_pool(slab->allocator->vio_pool, pooled);
- slab->active_count--;
- clear_provisional_references(block);
+ for (i = 0; i < block_count; i++, block++, data += VDO_BLOCK_SIZE) {
+ struct packed_reference_block *packed = (struct packed_reference_block *) data;
+
+ unpack_reference_block(packed, block);
+ slab->free_blocks -= block->allocated_count;
+ }
+ return_vio_to_pool(pooled);
+ slab->active_count -= block_count;
- slab->free_blocks -= block->allocated_count;
check_if_slab_drained(slab);
}
@@ -2260,23 +2327,25 @@ static void load_reference_block_endio(struct bio *bio)
}
/**
- * load_reference_block() - After a block waiter has gotten a VIO from the VIO pool, load the
- * block.
- * @waiter: The waiter of the block to load.
+ * load_reference_block_group() - After a block waiter has gotten a VIO from the VIO pool, load
+ * a set of blocks.
+ * @waiter: The waiter of the first block to load.
* @context: The VIO returned by the pool.
*/
-static void load_reference_block(struct vdo_waiter *waiter, void *context)
+static void load_reference_block_group(struct vdo_waiter *waiter, void *context)
{
struct pooled_vio *pooled = context;
struct vio *vio = &pooled->vio;
struct reference_block *block =
container_of(waiter, struct reference_block, waiter);
- size_t block_offset = (block - block->slab->reference_blocks);
+ u32 block_offset = block - block->slab->reference_blocks;
+ u32 max_block_count = block->slab->reference_block_count - block_offset;
+ u32 block_count = min_t(int, vio->block_count, max_block_count);
vio->completion.parent = block;
- vdo_submit_metadata_vio(vio, block->slab->ref_counts_origin + block_offset,
- load_reference_block_endio, handle_io_error,
- REQ_OP_READ);
+ vdo_submit_metadata_vio_with_size(vio, block->slab->ref_counts_origin + block_offset,
+ load_reference_block_endio, handle_io_error,
+ REQ_OP_READ, block_count * VDO_BLOCK_SIZE);
}
/**
@@ -2286,14 +2355,21 @@ static void load_reference_block(struct vdo_waiter *waiter, void *context)
static void load_reference_blocks(struct vdo_slab *slab)
{
block_count_t i;
+ u64 blocks_per_vio = slab->allocator->refcount_blocks_per_big_vio;
+ struct vio_pool *pool = slab->allocator->refcount_big_vio_pool;
+
+ if (!pool) {
+ pool = slab->allocator->vio_pool;
+ blocks_per_vio = 1;
+ }
slab->free_blocks = slab->block_count;
slab->active_count = slab->reference_block_count;
- for (i = 0; i < slab->reference_block_count; i++) {
+ for (i = 0; i < slab->reference_block_count; i += blocks_per_vio) {
struct vdo_waiter *waiter = &slab->reference_blocks[i].waiter;
- waiter->callback = load_reference_block;
- acquire_vio_from_pool(slab->allocator->vio_pool, waiter);
+ waiter->callback = load_reference_block_group;
+ acquire_vio_from_pool(pool, waiter);
}
}
@@ -2429,7 +2505,7 @@ static void finish_loading_journal(struct vdo_completion *completion)
initialize_journal_state(journal);
}
- return_vio_to_pool(slab->allocator->vio_pool, vio_as_pooled_vio(vio));
+ return_vio_to_pool(vio_as_pooled_vio(vio));
vdo_finish_loading_with_result(&slab->state, allocate_counters_if_clean(slab));
}
@@ -2449,7 +2525,7 @@ static void handle_load_error(struct vdo_completion *completion)
struct vio *vio = as_vio(completion);
vio_record_metadata_io_error(vio);
- return_vio_to_pool(journal->slab->allocator->vio_pool, vio_as_pooled_vio(vio));
+ return_vio_to_pool(vio_as_pooled_vio(vio));
vdo_finish_loading_with_result(&journal->slab->state, result);
}
@@ -2547,7 +2623,7 @@ static void queue_slab(struct vdo_slab *slab)
int result;
VDO_ASSERT_LOG_ONLY(list_empty(&slab->allocq_entry),
- "a requeued slab must not already be on a ring");
+ "a requeued slab must not already be on a list");
if (vdo_is_read_only(allocator->depot->vdo))
return;
@@ -2700,6 +2776,7 @@ static void finish_scrubbing(struct slab_scrubber *scrubber, int result)
vdo_log_info("VDO commencing normal operation");
else if (prior_state == VDO_RECOVERING)
vdo_log_info("Exiting recovery mode");
+ free_vio_pool(vdo_forget(allocator->refcount_big_vio_pool));
}
/*
@@ -3281,7 +3358,7 @@ int vdo_release_block_reference(struct block_allocator *allocator,
* This is a min_heap callback function orders slab_status structures using the 'is_clean' field as
* the primary key and the 'emptiness' field as the secondary key.
*
- * Slabs need to be pushed onto the rings in the same order they are to be popped off. Popping
+ * Slabs need to be pushed onto the lists in the same order they are to be popped off. Popping
* should always get the most empty first, so pushing should be from most empty to least empty.
* Thus, the ordering is reversed from the usual sense since min_heap returns smaller elements
* before larger ones.
@@ -3983,6 +4060,7 @@ static int __must_check initialize_block_allocator(struct slab_depot *depot,
struct vdo *vdo = depot->vdo;
block_count_t max_free_blocks = depot->slab_config.data_blocks;
unsigned int max_priority = (2 + ilog2(max_free_blocks));
+ u32 reference_block_count, refcount_reads_needed, refcount_blocks_per_vio;
*allocator = (struct block_allocator) {
.depot = depot,
@@ -4000,12 +4078,24 @@ static int __must_check initialize_block_allocator(struct slab_depot *depot,
return result;
vdo_initialize_completion(&allocator->completion, vdo, VDO_BLOCK_ALLOCATOR_COMPLETION);
- result = make_vio_pool(vdo, BLOCK_ALLOCATOR_VIO_POOL_SIZE, allocator->thread_id,
+ result = make_vio_pool(vdo, BLOCK_ALLOCATOR_VIO_POOL_SIZE, 1, allocator->thread_id,
VIO_TYPE_SLAB_JOURNAL, VIO_PRIORITY_METADATA,
allocator, &allocator->vio_pool);
if (result != VDO_SUCCESS)
return result;
+ /* Initialize the refcount-reading vio pool. */
+ reference_block_count = vdo_get_saved_reference_count_size(depot->slab_config.slab_blocks);
+ refcount_reads_needed = DIV_ROUND_UP(reference_block_count, MAX_BLOCKS_PER_VIO);
+ refcount_blocks_per_vio = DIV_ROUND_UP(reference_block_count, refcount_reads_needed);
+ allocator->refcount_blocks_per_big_vio = refcount_blocks_per_vio;
+ result = make_vio_pool(vdo, BLOCK_ALLOCATOR_REFCOUNT_VIO_POOL_SIZE,
+ allocator->refcount_blocks_per_big_vio, allocator->thread_id,
+ VIO_TYPE_SLAB_JOURNAL, VIO_PRIORITY_METADATA,
+ NULL, &allocator->refcount_big_vio_pool);
+ if (result != VDO_SUCCESS)
+ return result;
+
result = initialize_slab_scrubber(allocator);
if (result != VDO_SUCCESS)
return result;
@@ -4223,6 +4313,7 @@ void vdo_free_slab_depot(struct slab_depot *depot)
uninitialize_allocator_summary(allocator);
uninitialize_scrubber_vio(&allocator->scrubber);
free_vio_pool(vdo_forget(allocator->vio_pool));
+ free_vio_pool(vdo_forget(allocator->refcount_big_vio_pool));
vdo_free_priority_table(vdo_forget(allocator->prioritized_slabs));
}
diff --git a/drivers/md/dm-vdo/slab-depot.h b/drivers/md/dm-vdo/slab-depot.h
index f234853501ca..fadc0c9d4dc4 100644
--- a/drivers/md/dm-vdo/slab-depot.h
+++ b/drivers/md/dm-vdo/slab-depot.h
@@ -45,6 +45,13 @@
enum {
/* The number of vios in the vio pool is proportional to the throughput of the VDO. */
BLOCK_ALLOCATOR_VIO_POOL_SIZE = 128,
+
+ /*
+ * The number of vios in the vio pool used for loading reference count data. A slab's
+ * refcounts is capped at ~8MB, and we process one at a time in a zone, so 9 should be
+ * plenty.
+ */
+ BLOCK_ALLOCATOR_REFCOUNT_VIO_POOL_SIZE = 9,
};
/*
@@ -248,7 +255,7 @@ struct vdo_slab {
/* A list of the dirty blocks waiting to be written out */
struct vdo_wait_queue dirty_blocks;
- /* The number of blocks which are currently writing */
+ /* The number of blocks which are currently reading or writing */
size_t active_count;
/* A waiter object for updating the slab summary */
@@ -425,6 +432,10 @@ struct block_allocator {
/* The vio pool for reading and writing block allocator metadata */
struct vio_pool *vio_pool;
+ /* The vio pool for large initial reads of ref count areas */
+ struct vio_pool *refcount_big_vio_pool;
+ /* How many ref count blocks are read per vio at initial load */
+ u32 refcount_blocks_per_big_vio;
/* The dm_kcopyd client for erasing slab journals */
struct dm_kcopyd_client *eraser;
/* Iterator over the slabs to be erased */
diff --git a/drivers/md/dm-vdo/types.h b/drivers/md/dm-vdo/types.h
index dbe892b10f26..cdf36e7d7702 100644
--- a/drivers/md/dm-vdo/types.h
+++ b/drivers/md/dm-vdo/types.h
@@ -376,6 +376,9 @@ struct vio {
/* The size of this vio in blocks */
unsigned int block_count;
+ /* The amount of data to be read or written, in bytes */
+ unsigned int io_size;
+
/* The data being read or written. */
char *data;
diff --git a/drivers/md/dm-vdo/vdo.c b/drivers/md/dm-vdo/vdo.c
index a7e32baab4af..80b608674022 100644
--- a/drivers/md/dm-vdo/vdo.c
+++ b/drivers/md/dm-vdo/vdo.c
@@ -31,9 +31,7 @@
#include <linux/completion.h>
#include <linux/device-mapper.h>
-#include <linux/kernel.h>
#include <linux/lz4.h>
-#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/spinlock.h>
#include <linux/types.h>
@@ -142,12 +140,6 @@ static void finish_vdo_request_queue(void *ptr)
vdo_unregister_allocating_thread();
}
-#ifdef MODULE
-#define MODULE_NAME THIS_MODULE->name
-#else
-#define MODULE_NAME "dm-vdo"
-#endif /* MODULE */
-
static const struct vdo_work_queue_type default_queue_type = {
.start = start_vdo_request_queue,
.finish = finish_vdo_request_queue,
@@ -559,8 +551,7 @@ int vdo_make(unsigned int instance, struct device_config *config, char **reason,
*vdo_ptr = vdo;
snprintf(vdo->thread_name_prefix, sizeof(vdo->thread_name_prefix),
- "%s%u", MODULE_NAME, instance);
- BUG_ON(vdo->thread_name_prefix[0] == '\0');
+ "vdo%u", instance);
result = vdo_allocate(vdo->thread_config.thread_count,
struct vdo_thread, __func__, &vdo->threads);
if (result != VDO_SUCCESS) {
diff --git a/drivers/md/dm-vdo/vio.c b/drivers/md/dm-vdo/vio.c
index e710f3c5a972..e7f4153e55e3 100644
--- a/drivers/md/dm-vdo/vio.c
+++ b/drivers/md/dm-vdo/vio.c
@@ -188,14 +188,23 @@ void vdo_set_bio_properties(struct bio *bio, struct vio *vio, bio_end_io_t callb
/*
* Prepares the bio to perform IO with the specified buffer. May only be used on a VDO-allocated
- * bio, as it assumes the bio wraps a 4k buffer that is 4k aligned, but there does not have to be a
- * vio associated with the bio.
+ * bio, as it assumes the bio wraps a 4k-multiple buffer that is 4k aligned, but there does not
+ * have to be a vio associated with the bio.
*/
int vio_reset_bio(struct vio *vio, char *data, bio_end_io_t callback,
blk_opf_t bi_opf, physical_block_number_t pbn)
{
- int bvec_count, offset, len, i;
+ return vio_reset_bio_with_size(vio, data, vio->block_count * VDO_BLOCK_SIZE,
+ callback, bi_opf, pbn);
+}
+
+int vio_reset_bio_with_size(struct vio *vio, char *data, int size, bio_end_io_t callback,
+ blk_opf_t bi_opf, physical_block_number_t pbn)
+{
+ int bvec_count, offset, i;
struct bio *bio = vio->bio;
+ int vio_size = vio->block_count * VDO_BLOCK_SIZE;
+ int remaining;
bio_reset(bio, bio->bi_bdev, bi_opf);
vdo_set_bio_properties(bio, vio, callback, bi_opf, pbn);
@@ -205,22 +214,21 @@ int vio_reset_bio(struct vio *vio, char *data, bio_end_io_t callback,
bio->bi_ioprio = 0;
bio->bi_io_vec = bio->bi_inline_vecs;
bio->bi_max_vecs = vio->block_count + 1;
- len = VDO_BLOCK_SIZE * vio->block_count;
+ if (VDO_ASSERT(size <= vio_size, "specified size %d is not greater than allocated %d",
+ size, vio_size) != VDO_SUCCESS)
+ size = vio_size;
+ vio->io_size = size;
offset = offset_in_page(data);
- bvec_count = DIV_ROUND_UP(offset + len, PAGE_SIZE);
+ bvec_count = DIV_ROUND_UP(offset + size, PAGE_SIZE);
+ remaining = size;
- /*
- * If we knew that data was always on one page, or contiguous pages, we wouldn't need the
- * loop. But if we're using vmalloc, it's not impossible that the data is in different
- * pages that can't be merged in bio_add_page...
- */
- for (i = 0; (i < bvec_count) && (len > 0); i++) {
+ for (i = 0; (i < bvec_count) && (remaining > 0); i++) {
struct page *page;
int bytes_added;
int bytes = PAGE_SIZE - offset;
- if (bytes > len)
- bytes = len;
+ if (bytes > remaining)
+ bytes = remaining;
page = is_vmalloc_addr(data) ? vmalloc_to_page(data) : virt_to_page(data);
bytes_added = bio_add_page(bio, page, bytes, offset);
@@ -232,7 +240,7 @@ int vio_reset_bio(struct vio *vio, char *data, bio_end_io_t callback,
}
data += bytes;
- len -= bytes;
+ remaining -= bytes;
offset = 0;
}
@@ -301,6 +309,7 @@ void vio_record_metadata_io_error(struct vio *vio)
* make_vio_pool() - Create a new vio pool.
* @vdo: The vdo.
* @pool_size: The number of vios in the pool.
+ * @block_count: The number of 4k blocks per vio.
* @thread_id: The ID of the thread using this pool.
* @vio_type: The type of vios in the pool.
* @priority: The priority with which vios from the pool should be enqueued.
@@ -309,13 +318,14 @@ void vio_record_metadata_io_error(struct vio *vio)
*
* Return: A success or error code.
*/
-int make_vio_pool(struct vdo *vdo, size_t pool_size, thread_id_t thread_id,
+int make_vio_pool(struct vdo *vdo, size_t pool_size, size_t block_count, thread_id_t thread_id,
enum vio_type vio_type, enum vio_priority priority, void *context,
struct vio_pool **pool_ptr)
{
struct vio_pool *pool;
char *ptr;
int result;
+ size_t per_vio_size = VDO_BLOCK_SIZE * block_count;
result = vdo_allocate_extended(struct vio_pool, pool_size, struct pooled_vio,
__func__, &pool);
@@ -326,7 +336,7 @@ int make_vio_pool(struct vdo *vdo, size_t pool_size, thread_id_t thread_id,
INIT_LIST_HEAD(&pool->available);
INIT_LIST_HEAD(&pool->busy);
- result = vdo_allocate(pool_size * VDO_BLOCK_SIZE, char,
+ result = vdo_allocate(pool_size * per_vio_size, char,
"VIO pool buffer", &pool->buffer);
if (result != VDO_SUCCESS) {
free_vio_pool(pool);
@@ -334,10 +344,10 @@ int make_vio_pool(struct vdo *vdo, size_t pool_size, thread_id_t thread_id,
}
ptr = pool->buffer;
- for (pool->size = 0; pool->size < pool_size; pool->size++, ptr += VDO_BLOCK_SIZE) {
+ for (pool->size = 0; pool->size < pool_size; pool->size++, ptr += per_vio_size) {
struct pooled_vio *pooled = &pool->vios[pool->size];
- result = allocate_vio_components(vdo, vio_type, priority, NULL, 1, ptr,
+ result = allocate_vio_components(vdo, vio_type, priority, NULL, block_count, ptr,
&pooled->vio);
if (result != VDO_SUCCESS) {
free_vio_pool(pool);
@@ -345,6 +355,7 @@ int make_vio_pool(struct vdo *vdo, size_t pool_size, thread_id_t thread_id,
}
pooled->context = context;
+ pooled->pool = pool;
list_add_tail(&pooled->pool_entry, &pool->available);
}
@@ -419,12 +430,13 @@ void acquire_vio_from_pool(struct vio_pool *pool, struct vdo_waiter *waiter)
}
/**
- * return_vio_to_pool() - Return a vio to the pool
- * @pool: The vio pool.
+ * return_vio_to_pool() - Return a vio to its pool
* @vio: The pooled vio to return.
*/
-void return_vio_to_pool(struct vio_pool *pool, struct pooled_vio *vio)
+void return_vio_to_pool(struct pooled_vio *vio)
{
+ struct vio_pool *pool = vio->pool;
+
VDO_ASSERT_LOG_ONLY((pool->thread_id == vdo_get_callback_thread_id()),
"vio pool entry returned on same thread as it was acquired");
diff --git a/drivers/md/dm-vdo/vio.h b/drivers/md/dm-vdo/vio.h
index 3490e9f59b04..4bfcb21901f1 100644
--- a/drivers/md/dm-vdo/vio.h
+++ b/drivers/md/dm-vdo/vio.h
@@ -30,6 +30,8 @@ struct pooled_vio {
void *context;
/* The list entry used by the pool */
struct list_head pool_entry;
+ /* The pool this vio is allocated from */
+ struct vio_pool *pool;
};
/**
@@ -123,6 +125,8 @@ void vdo_set_bio_properties(struct bio *bio, struct vio *vio, bio_end_io_t callb
int vio_reset_bio(struct vio *vio, char *data, bio_end_io_t callback,
blk_opf_t bi_opf, physical_block_number_t pbn);
+int vio_reset_bio_with_size(struct vio *vio, char *data, int size, bio_end_io_t callback,
+ blk_opf_t bi_opf, physical_block_number_t pbn);
void update_vio_error_stats(struct vio *vio, const char *format, ...)
__printf(2, 3);
@@ -188,12 +192,13 @@ static inline struct pooled_vio *vio_as_pooled_vio(struct vio *vio)
struct vio_pool;
-int __must_check make_vio_pool(struct vdo *vdo, size_t pool_size, thread_id_t thread_id,
- enum vio_type vio_type, enum vio_priority priority,
- void *context, struct vio_pool **pool_ptr);
+int __must_check make_vio_pool(struct vdo *vdo, size_t pool_size, size_t block_count,
+ thread_id_t thread_id, enum vio_type vio_type,
+ enum vio_priority priority, void *context,
+ struct vio_pool **pool_ptr);
void free_vio_pool(struct vio_pool *pool);
bool __must_check is_vio_pool_busy(struct vio_pool *pool);
void acquire_vio_from_pool(struct vio_pool *pool, struct vdo_waiter *waiter);
-void return_vio_to_pool(struct vio_pool *pool, struct pooled_vio *vio);
+void return_vio_to_pool(struct pooled_vio *vio);
#endif /* VIO_H */
diff --git a/drivers/md/dm-vdo/wait-queue.c b/drivers/md/dm-vdo/wait-queue.c
index 6e1e739277ef..f81ed0cee2bf 100644
--- a/drivers/md/dm-vdo/wait-queue.c
+++ b/drivers/md/dm-vdo/wait-queue.c
@@ -34,7 +34,7 @@ void vdo_waitq_enqueue_waiter(struct vdo_wait_queue *waitq, struct vdo_waiter *w
waitq->last_waiter->next_waiter = waiter;
}
- /* In both cases, the waiter we added to the ring becomes the last waiter. */
+ /* In both cases, the waiter we added to the list becomes the last waiter. */
waitq->last_waiter = waiter;
waitq->length += 1;
}
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index e86c1431b108..3c427f18a04b 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -30,6 +30,7 @@
#define DM_VERITY_ENV_VAR_NAME "DM_VERITY_ERR_BLOCK_NR"
#define DM_VERITY_DEFAULT_PREFETCH_SIZE 262144
+#define DM_VERITY_USE_BH_DEFAULT_BYTES 8192
#define DM_VERITY_MAX_CORRUPTED_ERRS 100
@@ -49,6 +50,15 @@ static unsigned int dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE
module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, 0644);
+static unsigned int dm_verity_use_bh_bytes[4] = {
+ DM_VERITY_USE_BH_DEFAULT_BYTES, // IOPRIO_CLASS_NONE
+ DM_VERITY_USE_BH_DEFAULT_BYTES, // IOPRIO_CLASS_RT
+ DM_VERITY_USE_BH_DEFAULT_BYTES, // IOPRIO_CLASS_BE
+ 0 // IOPRIO_CLASS_IDLE
+};
+
+module_param_array_named(use_bh_bytes, dm_verity_use_bh_bytes, uint, NULL, 0644);
+
static DEFINE_STATIC_KEY_FALSE(use_bh_wq_enabled);
/* Is at least one dm-verity instance using ahash_tfm instead of shash_tfm? */
@@ -311,7 +321,7 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
if (static_branch_unlikely(&use_bh_wq_enabled) && io->in_bh) {
data = dm_bufio_get(v->bufio, hash_block, &buf);
- if (data == NULL) {
+ if (IS_ERR_OR_NULL(data)) {
/*
* In tasklet and the hash was not in the bufio cache.
* Return early and resume execution from a work-queue
@@ -324,8 +334,24 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
&buf, bio->bi_ioprio);
}
- if (IS_ERR(data))
- return PTR_ERR(data);
+ if (IS_ERR(data)) {
+ if (skip_unverified)
+ return 1;
+ r = PTR_ERR(data);
+ data = dm_bufio_new(v->bufio, hash_block, &buf);
+ if (IS_ERR(data))
+ return r;
+ if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_METADATA,
+ hash_block, data) == 0) {
+ aux = dm_bufio_get_aux_data(buf);
+ aux->hash_verified = 1;
+ goto release_ok;
+ } else {
+ dm_bufio_release(buf);
+ dm_bufio_forget(v->bufio, hash_block);
+ return r;
+ }
+ }
aux = dm_bufio_get_aux_data(buf);
@@ -366,6 +392,7 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
}
}
+release_ok:
data += offset;
memcpy(want_digest, data, v->digest_size);
r = 0;
@@ -652,9 +679,17 @@ static void verity_bh_work(struct work_struct *w)
verity_finish_io(io, errno_to_blk_status(err));
}
+static inline bool verity_use_bh(unsigned int bytes, unsigned short ioprio)
+{
+ return ioprio <= IOPRIO_CLASS_IDLE &&
+ bytes <= READ_ONCE(dm_verity_use_bh_bytes[ioprio]);
+}
+
static void verity_end_io(struct bio *bio)
{
struct dm_verity_io *io = bio->bi_private;
+ unsigned short ioprio = IOPRIO_PRIO_CLASS(bio->bi_ioprio);
+ unsigned int bytes = io->n_blocks << io->v->data_dev_block_bits;
if (bio->bi_status &&
(!verity_fec_is_enabled(io->v) ||
@@ -664,9 +699,14 @@ static void verity_end_io(struct bio *bio)
return;
}
- if (static_branch_unlikely(&use_bh_wq_enabled) && io->v->use_bh_wq) {
- INIT_WORK(&io->bh_work, verity_bh_work);
- queue_work(system_bh_wq, &io->bh_work);
+ if (static_branch_unlikely(&use_bh_wq_enabled) && io->v->use_bh_wq &&
+ verity_use_bh(bytes, ioprio)) {
+ if (in_hardirq() || irqs_disabled()) {
+ INIT_WORK(&io->bh_work, verity_bh_work);
+ queue_work(system_bh_wq, &io->bh_work);
+ } else {
+ verity_bh_work(&io->bh_work);
+ }
} else {
INIT_WORK(&io->work, verity_work);
queue_work(io->v->verify_wq, &io->work);
@@ -796,6 +836,13 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
return DM_MAPIO_SUBMITTED;
}
+static void verity_postsuspend(struct dm_target *ti)
+{
+ struct dm_verity *v = ti->private;
+ flush_workqueue(v->verify_wq);
+ dm_bufio_client_reset(v->bufio);
+}
+
/*
* Status: V (valid) or C (corruption found)
*/
@@ -1761,11 +1808,12 @@ static struct target_type verity_target = {
.name = "verity",
/* Note: the LSMs depend on the singleton and immutable features */
.features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE,
- .version = {1, 10, 0},
+ .version = {1, 11, 0},
.module = THIS_MODULE,
.ctr = verity_ctr,
.dtr = verity_dtr,
.map = verity_map,
+ .postsuspend = verity_postsuspend,
.status = verity_status,
.prepare_ioctl = verity_prepare_ioctl,
.iterate_devices = verity_iterate_devices,
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 7ce8847b3404..d6a04a57472d 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -797,7 +797,7 @@ static void writecache_flush(struct dm_writecache *wc)
bool need_flush_after_free;
wc->uncommitted_blocks = 0;
- del_timer(&wc->autocommit_timer);
+ timer_delete(&wc->autocommit_timer);
if (list_empty(&wc->lru))
return;
@@ -927,8 +927,8 @@ static void writecache_suspend(struct dm_target *ti)
struct dm_writecache *wc = ti->private;
bool flush_on_suspend;
- del_timer_sync(&wc->autocommit_timer);
- del_timer_sync(&wc->max_age_timer);
+ timer_delete_sync(&wc->autocommit_timer);
+ timer_delete_sync(&wc->max_age_timer);
wc_lock(wc);
writecache_flush(wc);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 4d1e42891d24..5ab7574c0c76 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1540,14 +1540,18 @@ static void __send_empty_flush(struct clone_info *ci)
{
struct dm_table *t = ci->map;
struct bio flush_bio;
+ blk_opf_t opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC;
+
+ if ((ci->io->orig_bio->bi_opf & (REQ_IDLE | REQ_SYNC)) ==
+ (REQ_IDLE | REQ_SYNC))
+ opf |= REQ_IDLE;
/*
* Use an on-stack bio for this, it's safe since we don't
* need to reference it after submit. It's just used as
* the basis for the clone(s).
*/
- bio_init(&flush_bio, ci->io->md->disk->part0, NULL, 0,
- REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC);
+ bio_init(&flush_bio, ci->io->md->disk->part0, NULL, 0, opf);
ci->bio = &flush_bio;
ci->sector_count = 0;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 438e71e45c16..9daa78c5fe33 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -4064,7 +4064,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
* it must always be in_sync
*/
mddev->in_sync = 1;
- del_timer_sync(&mddev->safemode_timer);
+ timer_delete_sync(&mddev->safemode_timer);
}
pers->run(mddev);
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
@@ -6405,7 +6405,7 @@ static void md_clean(struct mddev *mddev)
static void __md_stop_writes(struct mddev *mddev)
{
- del_timer_sync(&mddev->safemode_timer);
+ timer_delete_sync(&mddev->safemode_timer);
if (mddev->pers && mddev->pers->quiesce) {
mddev->pers->quiesce(mddev, 1);
diff --git a/drivers/md/persistent-data/Kconfig b/drivers/md/persistent-data/Kconfig
index f4f948b0e173..dbb97a7233ab 100644
--- a/drivers/md/persistent-data/Kconfig
+++ b/drivers/md/persistent-data/Kconfig
@@ -2,7 +2,7 @@
config DM_PERSISTENT_DATA
tristate
depends on BLK_DEV_DM
- select LIBCRC32C
+ select CRC32
select DM_BUFIO
help
Library providing immutable on-disk data structure support for