summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Snitzer <snitzer@redhat.com>2014-10-28 18:34:52 -0400
committerMike Snitzer <snitzer@redhat.com>2014-11-19 12:31:17 -0500
commitffcc39364160663cda1a3c358f4537302a92459b (patch)
treebc7defa66e5c79b036ed21092a8005b72879559b
parent80e96c5484be788f277eead9cabf88cf8e430419 (diff)
downloadlwn-ffcc39364160663cda1a3c358f4537302a92459b.tar.gz
lwn-ffcc39364160663cda1a3c358f4537302a92459b.zip
dm: enhance internal suspend and resume interface
Rename dm_internal_{suspend,resume} to dm_internal_{suspend,resume}_fast -- dm-stats will continue using these methods to avoid all the extra suspend/resume logic that is not needed in order to quickly flush IO. Introduce dm_internal_suspend_noflush() variant that actually calls the mapped_device's target callbacks -- otherwise target-specific hooks are avoided (e.g. dm-thin's thin_presuspend and thin_postsuspend). Common code between dm_internal_{suspend_noflush,resume} and dm_{suspend,resume} was factored out as __dm_{suspend,resume}. Update dm_internal_{suspend_noflush,resume} to always take and release the mapped_device's suspend_lock. Also update dm_{suspend,resume} to be aware of potential for DM_INTERNAL_SUSPEND_FLAG to be set and respond accordingly by interruptibly waiting for the DM_INTERNAL_SUSPEND_FLAG to be cleared. Add lockdep annotation to dm_suspend() and dm_resume(). The existing DM_SUSPEND_FLAG remains unchanged. DM_INTERNAL_SUSPEND_FLAG is set by dm_internal_suspend_noflush() and cleared by dm_internal_resume(). Both DM_SUSPEND_FLAG and DM_INTERNAL_SUSPEND_FLAG may be set if a device was already suspended when dm_internal_suspend_noflush() was called -- this can be thought of as a "nested suspend". A "nested suspend" can occur with legacy userspace dm-thin code that might suspend all active thin volumes before suspending the pool for resize. But otherwise, in the normal dm-thin-pool suspend case moving forward: the thin-pool will have DM_SUSPEND_FLAG set and all active thins from that thin-pool will have DM_INTERNAL_SUSPEND_FLAG set. Also add DM_INTERNAL_SUSPEND_FLAG to status report. This new DM_INTERNAL_SUSPEND_FLAG state is being reported to assist with debugging (e.g. 'dmsetup info' will report an internally suspended device accordingly). Signed-off-by: Mike Snitzer <snitzer@redhat.com> Acked-by: Joe Thornber <ejt@redhat.com>
-rw-r--r--drivers/md/dm-ioctl.c5
-rw-r--r--drivers/md/dm-stats.c2
-rw-r--r--drivers/md/dm.c229
-rw-r--r--drivers/md/dm.h9
-rw-r--r--include/uapi/linux/dm-ioctl.h5
5 files changed, 192 insertions, 58 deletions
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 0be9381365d7..73f791bb9ea4 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -684,11 +684,14 @@ static void __dev_status(struct mapped_device *md, struct dm_ioctl *param)
int srcu_idx;
param->flags &= ~(DM_SUSPEND_FLAG | DM_READONLY_FLAG |
- DM_ACTIVE_PRESENT_FLAG);
+ DM_ACTIVE_PRESENT_FLAG | DM_INTERNAL_SUSPEND_FLAG);
if (dm_suspended_md(md))
param->flags |= DM_SUSPEND_FLAG;
+ if (dm_suspended_internally_md(md))
+ param->flags |= DM_INTERNAL_SUSPEND_FLAG;
+
if (dm_test_deferred_remove_flag(md))
param->flags |= DM_DEFERRED_REMOVE;
diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c
index 28a90122a5a8..42a057aa3811 100644
--- a/drivers/md/dm-stats.c
+++ b/drivers/md/dm-stats.c
@@ -824,7 +824,7 @@ static int message_stats_create(struct mapped_device *md,
return 1;
id = dm_stats_create(dm_get_stats(md), start, end, step, program_id, aux_data,
- dm_internal_suspend, dm_internal_resume, md);
+ dm_internal_suspend_fast, dm_internal_resume_fast, md);
if (id < 0)
return id;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index f84de3215982..a0ece87ad426 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -19,6 +19,7 @@
#include <linux/idr.h>
#include <linux/hdreg.h>
#include <linux/delay.h>
+#include <linux/wait.h>
#include <trace/events/block.h>
@@ -117,6 +118,7 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo);
#define DMF_NOFLUSH_SUSPENDING 5
#define DMF_MERGE_IS_OPTIONAL 6
#define DMF_DEFERRED_REMOVE 7
+#define DMF_SUSPENDED_INTERNALLY 8
/*
* A dummy definition to make RCU happy.
@@ -2718,36 +2720,18 @@ static void unlock_fs(struct mapped_device *md)
}
/*
- * We need to be able to change a mapping table under a mounted
- * filesystem. For example we might want to move some data in
- * the background. Before the table can be swapped with
- * dm_bind_table, dm_suspend must be called to flush any in
- * flight bios and ensure that any further io gets deferred.
- */
-/*
- * Suspend mechanism in request-based dm.
- *
- * 1. Flush all I/Os by lock_fs() if needed.
- * 2. Stop dispatching any I/O by stopping the request_queue.
- * 3. Wait for all in-flight I/Os to be completed or requeued.
+ * If __dm_suspend returns 0, the device is completely quiescent
+ * now. There is no request-processing activity. All new requests
+ * are being added to md->deferred list.
*
- * To abort suspend, start the request_queue.
+ * Caller must hold md->suspend_lock
*/
-int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
+static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
+ unsigned suspend_flags, int interruptible)
{
- struct dm_table *map = NULL;
- int r = 0;
- int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0;
- int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0;
-
- mutex_lock(&md->suspend_lock);
-
- if (dm_suspended_md(md)) {
- r = -EINVAL;
- goto out_unlock;
- }
-
- map = rcu_dereference(md->map);
+ bool do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG;
+ bool noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG;
+ int r;
/*
* DMF_NOFLUSH_SUSPENDING must be set before presuspend.
@@ -2772,7 +2756,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
r = lock_fs(md);
if (r) {
dm_table_presuspend_undo_targets(map);
- goto out_unlock;
+ return r;
}
}
@@ -2806,7 +2790,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
* We call dm_wait_for_completion to wait for all existing requests
* to finish.
*/
- r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE);
+ r = dm_wait_for_completion(md, interruptible);
if (noflush)
clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
@@ -2822,14 +2806,55 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
unlock_fs(md);
dm_table_presuspend_undo_targets(map);
- goto out_unlock; /* pushback list is already flushed, so skip flush */
+ /* pushback list is already flushed, so skip flush */
}
- /*
- * If dm_wait_for_completion returned 0, the device is completely
- * quiescent now. There is no request-processing activity. All new
- * requests are being added to md->deferred list.
- */
+ return r;
+}
+
+/*
+ * We need to be able to change a mapping table under a mounted
+ * filesystem. For example we might want to move some data in
+ * the background. Before the table can be swapped with
+ * dm_bind_table, dm_suspend must be called to flush any in
+ * flight bios and ensure that any further io gets deferred.
+ */
+/*
+ * Suspend mechanism in request-based dm.
+ *
+ * 1. Flush all I/Os by lock_fs() if needed.
+ * 2. Stop dispatching any I/O by stopping the request_queue.
+ * 3. Wait for all in-flight I/Os to be completed or requeued.
+ *
+ * To abort suspend, start the request_queue.
+ */
+int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
+{
+ struct dm_table *map = NULL;
+ int r = 0;
+
+retry:
+ mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING);
+
+ if (dm_suspended_md(md)) {
+ r = -EINVAL;
+ goto out_unlock;
+ }
+
+ if (dm_suspended_internally_md(md)) {
+ /* already internally suspended, wait for internal resume */
+ mutex_unlock(&md->suspend_lock);
+ r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE);
+ if (r)
+ return r;
+ goto retry;
+ }
+
+ map = rcu_dereference(md->map);
+
+ r = __dm_suspend(md, map, suspend_flags, TASK_INTERRUPTIBLE);
+ if (r)
+ goto out_unlock;
set_bit(DMF_SUSPENDED, &md->flags);
@@ -2840,35 +2865,57 @@ out_unlock:
return r;
}
+static int __dm_resume(struct mapped_device *md, struct dm_table *map)
+{
+ if (map) {
+ int r = dm_table_resume_targets(map);
+ if (r)
+ return r;
+ }
+
+ dm_queue_flush(md);
+
+ /*
+ * Flushing deferred I/Os must be done after targets are resumed
+ * so that mapping of targets can work correctly.
+ * Request-based dm is queueing the deferred I/Os in its request_queue.
+ */
+ if (dm_request_based(md))
+ start_queue(md->queue);
+
+ unlock_fs(md);
+
+ return 0;
+}
+
int dm_resume(struct mapped_device *md)
{
int r = -EINVAL;
struct dm_table *map = NULL;
- mutex_lock(&md->suspend_lock);
+retry:
+ mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING);
+
if (!dm_suspended_md(md))
goto out;
+ if (dm_suspended_internally_md(md)) {
+ /* already internally suspended, wait for internal resume */
+ mutex_unlock(&md->suspend_lock);
+ r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE);
+ if (r)
+ return r;
+ goto retry;
+ }
+
map = rcu_dereference(md->map);
if (!map || !dm_table_get_size(map))
goto out;
- r = dm_table_resume_targets(map);
+ r = __dm_resume(md, map);
if (r)
goto out;
- dm_queue_flush(md);
-
- /*
- * Flushing deferred I/Os must be done after targets are resumed
- * so that mapping of targets can work correctly.
- * Request-based dm is queueing the deferred I/Os in its request_queue.
- */
- if (dm_request_based(md))
- start_queue(md->queue);
-
- unlock_fs(md);
-
clear_bit(DMF_SUSPENDED, &md->flags);
r = 0;
@@ -2882,15 +2929,80 @@ out:
* Internal suspend/resume works like userspace-driven suspend. It waits
* until all bios finish and prevents issuing new bios to the target drivers.
* It may be used only from the kernel.
- *
- * Internal suspend holds md->suspend_lock, which prevents interaction with
- * userspace-driven suspend.
*/
-void dm_internal_suspend(struct mapped_device *md)
+static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_flags)
{
- mutex_lock(&md->suspend_lock);
+ struct dm_table *map = NULL;
+
+ if (dm_suspended_internally_md(md))
+ return; /* nested internal suspend */
+
+ if (dm_suspended_md(md)) {
+ set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
+ return; /* nest suspend */
+ }
+
+ map = rcu_dereference(md->map);
+
+ /*
+ * Using TASK_UNINTERRUPTIBLE because only NOFLUSH internal suspend is
+ * supported. Properly supporting a TASK_INTERRUPTIBLE internal suspend
+ * would require changing .presuspend to return an error -- avoid this
+ * until there is a need for more elaborate variants of internal suspend.
+ */
+ (void) __dm_suspend(md, map, suspend_flags, TASK_UNINTERRUPTIBLE);
+
+ set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
+
+ dm_table_postsuspend_targets(map);
+}
+
+static void __dm_internal_resume(struct mapped_device *md)
+{
+ if (!dm_suspended_internally_md(md))
+ return; /* resume from nested internal suspend */
+
if (dm_suspended_md(md))
+ goto done; /* resume from nested suspend */
+
+ /*
+ * NOTE: existing callers don't need to call dm_table_resume_targets
+ * (which may fail -- so best to avoid it for now by passing NULL map)
+ */
+ (void) __dm_resume(md, NULL);
+
+done:
+ clear_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
+ smp_mb__after_atomic();
+ wake_up_bit(&md->flags, DMF_SUSPENDED_INTERNALLY);
+}
+
+void dm_internal_suspend_noflush(struct mapped_device *md)
+{
+ mutex_lock(&md->suspend_lock);
+ __dm_internal_suspend(md, DM_SUSPEND_NOFLUSH_FLAG);
+ mutex_unlock(&md->suspend_lock);
+}
+EXPORT_SYMBOL_GPL(dm_internal_suspend_noflush);
+
+void dm_internal_resume(struct mapped_device *md)
+{
+ mutex_lock(&md->suspend_lock);
+ __dm_internal_resume(md);
+ mutex_unlock(&md->suspend_lock);
+}
+EXPORT_SYMBOL_GPL(dm_internal_resume);
+
+/*
+ * Fast variants of internal suspend/resume hold md->suspend_lock,
+ * which prevents interaction with userspace-driven suspend.
+ */
+
+void dm_internal_suspend_fast(struct mapped_device *md)
+{
+ mutex_lock(&md->suspend_lock);
+ if (dm_suspended_md(md) || dm_suspended_internally_md(md))
return;
set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
@@ -2899,9 +3011,9 @@ void dm_internal_suspend(struct mapped_device *md)
dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
}
-void dm_internal_resume(struct mapped_device *md)
+void dm_internal_resume_fast(struct mapped_device *md)
{
- if (dm_suspended_md(md))
+ if (dm_suspended_md(md) || dm_suspended_internally_md(md))
goto done;
dm_queue_flush(md);
@@ -2987,6 +3099,11 @@ int dm_suspended_md(struct mapped_device *md)
return test_bit(DMF_SUSPENDED, &md->flags);
}
+int dm_suspended_internally_md(struct mapped_device *md)
+{
+ return test_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
+}
+
int dm_test_deferred_remove_flag(struct mapped_device *md)
{
return test_bit(DMF_DEFERRED_REMOVE, &md->flags);
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 781994093bf5..84b0f9e4ba6c 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -130,6 +130,15 @@ int dm_deleting_md(struct mapped_device *md);
int dm_suspended_md(struct mapped_device *md);
/*
+ * Internal suspend and resume methods.
+ */
+int dm_suspended_internally_md(struct mapped_device *md);
+void dm_internal_suspend_fast(struct mapped_device *md);
+void dm_internal_resume_fast(struct mapped_device *md);
+void dm_internal_suspend_noflush(struct mapped_device *md);
+void dm_internal_resume(struct mapped_device *md);
+
+/*
* Test if the device is scheduled for deferred remove.
*/
int dm_test_deferred_remove_flag(struct mapped_device *md);
diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h
index 2be66f4be2f9..a570d7b5796c 100644
--- a/include/uapi/linux/dm-ioctl.h
+++ b/include/uapi/linux/dm-ioctl.h
@@ -352,4 +352,9 @@ enum {
*/
#define DM_DEFERRED_REMOVE (1 << 17) /* In/Out */
+/*
+ * If set, the device is suspended internally.
+ */
+#define DM_INTERNAL_SUSPEND_FLAG (1 << 18) /* Out */
+
#endif /* _LINUX_DM_IOCTL_H */