summaryrefslogtreecommitdiff
path: root/drivers/block/drbd/drbd_int.h
diff options
context:
space:
mode:
authorLars Ellenberg <lars.ellenberg@linbit.com>2010-09-14 20:26:27 +0200
committerPhilipp Reisner <philipp.reisner@linbit.com>2010-10-14 18:38:50 +0200
commite9e6f3ec535d7b7c9e2ca64ad691e743e7d3c2f0 (patch)
treecbc17d81b9d937b4fc515548f30f5ed00be193ee /drivers/block/drbd/drbd_int.h
parent22cc37a943832c948808884604ec6f5ff2594c1d (diff)
downloadlwn-e9e6f3ec535d7b7c9e2ca64ad691e743e7d3c2f0.tar.gz
lwn-e9e6f3ec535d7b7c9e2ca64ad691e743e7d3c2f0.zip
drbd: fix for possible deadlock on IO error during resync
Scenario: Something (say, flush-147:0) is in drbd_al_begin_io, holding a local_cnt, waiting for the resync to make progress. Disk fails, worker in after_state_ch does drbd_rs_cancel_all, then waits for local_cnt to drop to zero. flush-147:0 is woken by drbd_rs_cancel_all, needs to write an AL transaction, and queues that on the worker. Deadlock. Fix: do not wait in the worker, have put_ldev() trigger the state change D_FAILED -> D_DISKLESS when necessary. put_ldev() cannot do the state change directly, as it may or may not already hold various spinlocks. We queue a short work instead. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block/drbd/drbd_int.h')
-rw-r--r--drivers/block/drbd/drbd_int.h8
1 files changed, 7 insertions, 1 deletions
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 8ab6fed39539..c07c370c4c82 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -852,6 +852,7 @@ enum {
BITMAP_IO, /* suspend application io;
once no more io in flight, start bitmap io */
BITMAP_IO_QUEUED, /* Started bitmap IO */
+ GO_DISKLESS, /* Disk failed, local_cnt reached zero, we are going diskless */
RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */
NET_CONGESTED, /* The data socket is congested */
@@ -976,6 +977,7 @@ struct drbd_conf {
unsigned int ko_count;
struct drbd_work resync_work,
unplug_work,
+ go_diskless,
md_sync_work;
struct timer_list resync_timer;
struct timer_list md_sync_timer;
@@ -1278,6 +1280,7 @@ extern void drbd_queue_bitmap_io(struct drbd_conf *mdev,
extern int drbd_bmio_set_n_write(struct drbd_conf *mdev);
extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev);
extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why);
+extern void drbd_go_diskless(struct drbd_conf *mdev);
/* Meta data layout
@@ -2123,8 +2126,11 @@ static inline void put_ldev(struct drbd_conf *mdev)
int i = atomic_dec_return(&mdev->local_cnt);
__release(local);
D_ASSERT(i >= 0);
- if (i == 0)
+ if (i == 0) {
+ if (mdev->state.disk == D_FAILED)
+ drbd_go_diskless(mdev);
wake_up(&mdev->misc_wait);
+ }
}
#ifndef __CHECKER__