summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-12-13 10:58:09 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2022-12-13 10:58:09 -0800
commit8715c6d3100fc7c6edddf29af4a399a1c12d028c (patch)
tree084555960802704156e231313a143837a4e86098
parent8ecd28b7a3a4c43a875a8840851f72468a2ca1d7 (diff)
parent7991dbff6849f67e823b7cc0c15e5a90b0549b9f (diff)
downloadlwn-8715c6d3100fc7c6edddf29af4a399a1c12d028c.tar.gz
lwn-8715c6d3100fc7c6edddf29af4a399a1c12d028c.zip
Merge tag 'for-6.2/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer: - Fix use-after-free races due to missing resource cleanup during DM target destruction in DM targets: thin-pool, cache, integrity and clone. - Fix ABBA deadlocks in DM thin-pool and cache targets due to their use of a bufio client (that has a shrinker whose locking can cause the incorrect locking order). - Fix DM cache target to set its needs_check flag after first aborting the metadata (whereby using reset persistent-data objects to update the superblock with, otherwise the superblock update could be dropped due to aborting metadata). This was found with code-inspection when comparing with the equivalent in DM thinp code. - Fix DM thin-pool's presume to continue resuming the device even if the pool in is fail mode -- otherwise bios may never be failed up the IO stack (which will prevent resetting the thin-pool target via table reload) - Fix DM thin-pool's metadata to use proper btree root (from previous transaction) if metadata commit failed. - Add 'waitfor' module param to DM module (dm_mod) to allow dm-init to wait for the specified device before continuing with its DM target initialization. * tag 'for-6.2/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: dm thin: Use last transaction's pmd->root when commit failed dm init: add dm-mod.waitfor to wait for asynchronously probed block devices dm ioctl: fix a couple ioctl codes dm ioctl: a small code cleanup in list_version_get_info dm thin: resume even if in FAIL mode dm cache: set needs_check flag after aborting metadata dm cache: Fix ABBA deadlock between shrink_slab and dm_cache_metadata_abort dm thin: Fix ABBA deadlock between shrink_slab and dm_pool_abort_metadata dm integrity: Fix UAF in dm_integrity_dtr() dm cache: Fix UAF in destroy() dm clone: Fix UAF in clone_dtr() dm thin: Fix UAF in run_timer_softirq()
-rw-r--r--Documentation/admin-guide/device-mapper/dm-init.rst8
-rw-r--r--drivers/md/dm-cache-metadata.c54
-rw-r--r--drivers/md/dm-cache-target.c11
-rw-r--r--drivers/md/dm-clone-target.c1
-rw-r--r--drivers/md/dm-init.c22
-rw-r--r--drivers/md/dm-integrity.c2
-rw-r--r--drivers/md/dm-ioctl.c6
-rw-r--r--drivers/md/dm-thin-metadata.c60
-rw-r--r--drivers/md/dm-thin.c18
9 files changed, 154 insertions, 28 deletions
diff --git a/Documentation/admin-guide/device-mapper/dm-init.rst b/Documentation/admin-guide/device-mapper/dm-init.rst
index e5242ff17e9b..981d6a907699 100644
--- a/Documentation/admin-guide/device-mapper/dm-init.rst
+++ b/Documentation/admin-guide/device-mapper/dm-init.rst
@@ -123,3 +123,11 @@ Other examples (per target):
0 1638400 verity 1 8:1 8:2 4096 4096 204800 1 sha256
fb1a5a0f00deb908d8b53cb270858975e76cf64105d412ce764225d53b8f3cfd
51934789604d1b92399c52e7cb149d1b3a1b74bbbcb103b2a0aaacbed5c08584
+
+For setups using device-mapper on top of asynchronously probed block
+devices (MMC, USB, ..), it may be necessary to tell dm-init to
+explicitly wait for them to become available before setting up the
+device-mapper tables. This can be done with the "dm-mod.waitfor="
+module parameter, which takes a list of devices to wait for::
+
+ dm-mod.waitfor=<device1>[,..,<deviceN>]
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index ab13b7380265..83a5975bcc72 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -551,11 +551,13 @@ static int __create_persistent_data_objects(struct dm_cache_metadata *cmd,
return r;
}
-static void __destroy_persistent_data_objects(struct dm_cache_metadata *cmd)
+static void __destroy_persistent_data_objects(struct dm_cache_metadata *cmd,
+ bool destroy_bm)
{
dm_sm_destroy(cmd->metadata_sm);
dm_tm_destroy(cmd->tm);
- dm_block_manager_destroy(cmd->bm);
+ if (destroy_bm)
+ dm_block_manager_destroy(cmd->bm);
}
typedef unsigned long (*flags_mutator)(unsigned long);
@@ -826,7 +828,7 @@ static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev,
cmd2 = lookup(bdev);
if (cmd2) {
mutex_unlock(&table_lock);
- __destroy_persistent_data_objects(cmd);
+ __destroy_persistent_data_objects(cmd, true);
kfree(cmd);
return cmd2;
}
@@ -874,7 +876,7 @@ void dm_cache_metadata_close(struct dm_cache_metadata *cmd)
mutex_unlock(&table_lock);
if (!cmd->fail_io)
- __destroy_persistent_data_objects(cmd);
+ __destroy_persistent_data_objects(cmd, true);
kfree(cmd);
}
}
@@ -1807,14 +1809,52 @@ int dm_cache_metadata_needs_check(struct dm_cache_metadata *cmd, bool *result)
int dm_cache_metadata_abort(struct dm_cache_metadata *cmd)
{
- int r;
+ int r = -EINVAL;
+ struct dm_block_manager *old_bm = NULL, *new_bm = NULL;
+
+ /* fail_io is double-checked with cmd->root_lock held below */
+ if (unlikely(cmd->fail_io))
+ return r;
+
+ /*
+ * Replacement block manager (new_bm) is created and old_bm destroyed outside of
+ * cmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of
+ * shrinker associated with the block manager's bufio client vs cmd root_lock).
+ * - must take shrinker_rwsem without holding cmd->root_lock
+ */
+ new_bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
+ CACHE_MAX_CONCURRENT_LOCKS);
WRITE_LOCK(cmd);
- __destroy_persistent_data_objects(cmd);
- r = __create_persistent_data_objects(cmd, false);
+ if (cmd->fail_io) {
+ WRITE_UNLOCK(cmd);
+ goto out;
+ }
+
+ __destroy_persistent_data_objects(cmd, false);
+ old_bm = cmd->bm;
+ if (IS_ERR(new_bm)) {
+ DMERR("could not create block manager during abort");
+ cmd->bm = NULL;
+ r = PTR_ERR(new_bm);
+ goto out_unlock;
+ }
+
+ cmd->bm = new_bm;
+ r = __open_or_format_metadata(cmd, false);
+ if (r) {
+ cmd->bm = NULL;
+ goto out_unlock;
+ }
+ new_bm = NULL;
+out_unlock:
if (r)
cmd->fail_io = true;
WRITE_UNLOCK(cmd);
+ dm_block_manager_destroy(old_bm);
+out:
+ if (new_bm && !IS_ERR(new_bm))
+ dm_block_manager_destroy(new_bm);
return r;
}
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 54a8d5c9a44e..5e92fac90b67 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -907,16 +907,16 @@ static void abort_transaction(struct cache *cache)
if (get_cache_mode(cache) >= CM_READ_ONLY)
return;
- if (dm_cache_metadata_set_needs_check(cache->cmd)) {
- DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name);
- set_cache_mode(cache, CM_FAIL);
- }
-
DMERR_LIMIT("%s: aborting current metadata transaction", dev_name);
if (dm_cache_metadata_abort(cache->cmd)) {
DMERR("%s: failed to abort metadata transaction", dev_name);
set_cache_mode(cache, CM_FAIL);
}
+
+ if (dm_cache_metadata_set_needs_check(cache->cmd)) {
+ DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name);
+ set_cache_mode(cache, CM_FAIL);
+ }
}
static void metadata_operation_failed(struct cache *cache, const char *op, int r)
@@ -1887,6 +1887,7 @@ static void destroy(struct cache *cache)
if (cache->prison)
dm_bio_prison_destroy_v2(cache->prison);
+ cancel_delayed_work_sync(&cache->waker);
if (cache->wq)
destroy_workqueue(cache->wq);
diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c
index 2f1cc66d2641..29e0b85eeaf0 100644
--- a/drivers/md/dm-clone-target.c
+++ b/drivers/md/dm-clone-target.c
@@ -1958,6 +1958,7 @@ static void clone_dtr(struct dm_target *ti)
mempool_exit(&clone->hydration_pool);
dm_kcopyd_client_destroy(clone->kcopyd_client);
+ cancel_delayed_work_sync(&clone->waker);
destroy_workqueue(clone->wq);
hash_table_exit(clone);
dm_clone_metadata_close(clone->cmd);
diff --git a/drivers/md/dm-init.c b/drivers/md/dm-init.c
index b0c45c6ebe0b..dc4381d68313 100644
--- a/drivers/md/dm-init.c
+++ b/drivers/md/dm-init.c
@@ -8,6 +8,7 @@
*/
#include <linux/ctype.h>
+#include <linux/delay.h>
#include <linux/device.h>
#include <linux/device-mapper.h>
#include <linux/init.h>
@@ -18,12 +19,17 @@
#define DM_MAX_DEVICES 256
#define DM_MAX_TARGETS 256
#define DM_MAX_STR_SIZE 4096
+#define DM_MAX_WAITFOR 256
static char *create;
+static char *waitfor[DM_MAX_WAITFOR];
+
/*
* Format: dm-mod.create=<name>,<uuid>,<minor>,<flags>,<table>[,<table>+][;<name>,<uuid>,<minor>,<flags>,<table>[,<table>+]+]
* Table format: <start_sector> <num_sectors> <target_type> <target_args>
+ * Block devices to wait for to become available before setting up tables:
+ * dm-mod.waitfor=<device1>[,..,<deviceN>]
*
* See Documentation/admin-guide/device-mapper/dm-init.rst for dm-mod.create="..." format
* details.
@@ -266,7 +272,7 @@ static int __init dm_init_init(void)
struct dm_device *dev;
LIST_HEAD(devices);
char *str;
- int r;
+ int i, r;
if (!create)
return 0;
@@ -286,6 +292,17 @@ static int __init dm_init_init(void)
DMINFO("waiting for all devices to be available before creating mapped devices");
wait_for_device_probe();
+ for (i = 0; i < ARRAY_SIZE(waitfor); i++) {
+ if (waitfor[i]) {
+ DMINFO("waiting for device %s ...", waitfor[i]);
+ while (!dm_get_dev_t(waitfor[i]))
+ msleep(5);
+ }
+ }
+
+ if (waitfor[0])
+ DMINFO("all devices available");
+
list_for_each_entry(dev, &devices, list) {
if (dm_early_create(&dev->dmi, dev->table,
dev->target_args_array))
@@ -301,3 +318,6 @@ late_initcall(dm_init_init);
module_param(create, charp, 0);
MODULE_PARM_DESC(create, "Create a mapped device in early boot");
+
+module_param_array(waitfor, charp, NULL, 0);
+MODULE_PARM_DESC(waitfor, "Devices to wait for before setting up tables");
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index e97e9f97456d..1388ee35571e 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -4558,6 +4558,8 @@ static void dm_integrity_dtr(struct dm_target *ti)
BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
BUG_ON(!list_empty(&ic->wait_list));
+ if (ic->mode == 'B')
+ cancel_delayed_work_sync(&ic->bitmap_flush_work);
if (ic->metadata_wq)
destroy_workqueue(ic->metadata_wq);
if (ic->wait_wq)
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 3bfc1583c20a..36fc6ae4737a 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -681,7 +681,7 @@ static void list_version_get_info(struct target_type *tt, void *param)
strcpy(info->vers->name, tt->name);
info->old_vers = info->vers;
- info->vers = align_ptr(((void *) ++info->vers) + strlen(tt->name) + 1);
+ info->vers = align_ptr((void *)(info->vers + 1) + strlen(tt->name) + 1);
}
static int __list_versions(struct dm_ioctl *param, size_t param_size, const char *name)
@@ -1788,8 +1788,8 @@ static ioctl_fn lookup_ioctl(unsigned int cmd, int *ioctl_flags)
{DM_TARGET_MSG_CMD, 0, target_message},
{DM_DEV_SET_GEOMETRY_CMD, 0, dev_set_geometry},
- {DM_DEV_ARM_POLL, IOCTL_FLAGS_NO_PARAMS, dev_arm_poll},
- {DM_GET_TARGET_VERSION, 0, get_target_version},
+ {DM_DEV_ARM_POLL_CMD, IOCTL_FLAGS_NO_PARAMS, dev_arm_poll},
+ {DM_GET_TARGET_VERSION_CMD, 0, get_target_version},
};
if (unlikely(cmd >= ARRAY_SIZE(_ioctls)))
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index a27395c8621f..6bcc4c4786d8 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -724,6 +724,15 @@ static int __open_metadata(struct dm_pool_metadata *pmd)
goto bad_cleanup_data_sm;
}
+ /*
+ * For pool metadata opening process, root setting is redundant
+ * because it will be set again in __begin_transaction(). But dm
+ * pool aborting process really needs to get last transaction's
+ * root to avoid accessing broken btree.
+ */
+ pmd->root = le64_to_cpu(disk_super->data_mapping_root);
+ pmd->details_root = le64_to_cpu(disk_super->device_details_root);
+
__setup_btree_details(pmd);
dm_bm_unlock(sblock);
@@ -776,13 +785,15 @@ static int __create_persistent_data_objects(struct dm_pool_metadata *pmd, bool f
return r;
}
-static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd)
+static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd,
+ bool destroy_bm)
{
dm_sm_destroy(pmd->data_sm);
dm_sm_destroy(pmd->metadata_sm);
dm_tm_destroy(pmd->nb_tm);
dm_tm_destroy(pmd->tm);
- dm_block_manager_destroy(pmd->bm);
+ if (destroy_bm)
+ dm_block_manager_destroy(pmd->bm);
}
static int __begin_transaction(struct dm_pool_metadata *pmd)
@@ -989,7 +1000,7 @@ int dm_pool_metadata_close(struct dm_pool_metadata *pmd)
}
pmd_write_unlock(pmd);
if (!pmd->fail_io)
- __destroy_persistent_data_objects(pmd);
+ __destroy_persistent_data_objects(pmd, true);
kfree(pmd);
return 0;
@@ -1860,19 +1871,52 @@ static void __set_abort_with_changes_flags(struct dm_pool_metadata *pmd)
int dm_pool_abort_metadata(struct dm_pool_metadata *pmd)
{
int r = -EINVAL;
+ struct dm_block_manager *old_bm = NULL, *new_bm = NULL;
+
+ /* fail_io is double-checked with pmd->root_lock held below */
+ if (unlikely(pmd->fail_io))
+ return r;
+
+ /*
+ * Replacement block manager (new_bm) is created and old_bm destroyed outside of
+ * pmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of
+ * shrinker associated with the block manager's bufio client vs pmd root_lock).
+ * - must take shrinker_rwsem without holding pmd->root_lock
+ */
+ new_bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
+ THIN_MAX_CONCURRENT_LOCKS);
pmd_write_lock(pmd);
- if (pmd->fail_io)
+ if (pmd->fail_io) {
+ pmd_write_unlock(pmd);
goto out;
+ }
__set_abort_with_changes_flags(pmd);
- __destroy_persistent_data_objects(pmd);
- r = __create_persistent_data_objects(pmd, false);
+ __destroy_persistent_data_objects(pmd, false);
+ old_bm = pmd->bm;
+ if (IS_ERR(new_bm)) {
+ DMERR("could not create block manager during abort");
+ pmd->bm = NULL;
+ r = PTR_ERR(new_bm);
+ goto out_unlock;
+ }
+
+ pmd->bm = new_bm;
+ r = __open_or_format_metadata(pmd, false);
+ if (r) {
+ pmd->bm = NULL;
+ goto out_unlock;
+ }
+ new_bm = NULL;
+out_unlock:
if (r)
pmd->fail_io = true;
-
-out:
pmd_write_unlock(pmd);
+ dm_block_manager_destroy(old_bm);
+out:
+ if (new_bm && !IS_ERR(new_bm))
+ dm_block_manager_destroy(new_bm);
return r;
}
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index c2b5a537f5b8..64cfcf46881d 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -2889,6 +2889,8 @@ static void __pool_destroy(struct pool *pool)
dm_bio_prison_destroy(pool->prison);
dm_kcopyd_client_destroy(pool->copier);
+ cancel_delayed_work_sync(&pool->waker);
+ cancel_delayed_work_sync(&pool->no_space_timeout);
if (pool->wq)
destroy_workqueue(pool->wq);
@@ -3540,20 +3542,28 @@ static int pool_preresume(struct dm_target *ti)
*/
r = bind_control_target(pool, ti);
if (r)
- return r;
+ goto out;
r = maybe_resize_data_dev(ti, &need_commit1);
if (r)
- return r;
+ goto out;
r = maybe_resize_metadata_dev(ti, &need_commit2);
if (r)
- return r;
+ goto out;
if (need_commit1 || need_commit2)
(void) commit(pool);
+out:
+ /*
+ * When a thin-pool is PM_FAIL, it cannot be rebuilt if
+ * bio is in deferred list. Therefore need to return 0
+ * to allow pool_resume() to flush IO.
+ */
+ if (r && get_pool_mode(pool) == PM_FAIL)
+ r = 0;
- return 0;
+ return r;
}
static void pool_suspend_active_thins(struct pool *pool)