md: avoid races when stopping resync.

commit 73d5c38a9536142e062c35997b044e89166e063b upstream. There has been a race in raid10 and raid1 for a long time which has only recently started showing up due to a scheduler changed. When a sync_read request finishes, as soon as reschedule_retry is called, another thread can mark the resync request as having completed, so md_do_sync can finish, ->stop can be called, and ->conf can be freed. So using conf after reschedule_retry is not safe. Similarly, when finishing a sync_write, calling md_done_sync must be the last thing we do, as it allows a chain of events which will free conf and other data structures. The first of these requires action in raid10.c The second requires action in raid1.c and raid10.c Signed-off-by: NeilBrown <neilb@suse.de> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
author: NeilBrown <neilb@suse.de> 2009-02-25 13:18:47 +1100
committer: Greg Kroah-Hartman <gregkh@suse.de> 2009-03-16 17:52:54 -0700
commit: a9785b107920670a8ec8ef1744e0e0c59dde9eb4 (patch)
tree: 37cb0a3b2a75543918226ec892fa2d0801ffe7b8
parent: 285ed5b1cedb3f043fa90f31deaa11f7c3ff4c76 (diff)
download: lwn-a9785b107920670a8ec8ef1744e0e0c59dde9eb4.tar.gz
lwn-a9785b107920670a8ec8ef1744e0e0c59dde9eb4.zip
2 files changed, 6 insertions, 4 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 03a5ab705c20..2b510a305082 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1229,8 +1229,9 @@ static void end_sync_write(struct bio *bio, int error)
 	update_head_pos(mirror, r1_bio);
 
 	if (atomic_dec_and_test(&r1_bio->remaining)) {
-		md_done_sync(mddev, r1_bio->sectors, uptodate);
+		sector_t s = r1_bio->sectors;
 		put_buf(r1_bio);
+		md_done_sync(mddev, s, uptodate);
 	}
 }
 
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 941576d4eceb..310493ff55ff 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1231,6 +1231,7 @@ static void end_sync_read(struct bio *bio, int error)
 	/* for reconstruct, we always reschedule after a read.
 	 * for resync, only after all reads
 	 */
+	rdev_dec_pending(conf->mirrors[d].rdev, conf->mddev);
 	if (test_bit(R10BIO_IsRecover, &r10_bio->state) ||
 	    atomic_dec_and_test(&r10_bio->remaining)) {
 		/* we have read all the blocks,
@@ -1238,7 +1239,6 @@ static void end_sync_read(struct bio *bio, int error)
 		 */
 		reschedule_retry(r10_bio);
 	}
-	rdev_dec_pending(conf->mirrors[d].rdev, conf->mddev);
 }
 
 static void end_sync_write(struct bio *bio, int error)
@@ -1259,11 +1259,13 @@ static void end_sync_write(struct bio *bio, int error)
 
 	update_head_pos(i, r10_bio);
 
+	rdev_dec_pending(conf->mirrors[d].rdev, mddev);
 	while (atomic_dec_and_test(&r10_bio->remaining)) {
 		if (r10_bio->master_bio == NULL) {
 			/* the primary of several recovery bios */
-			md_done_sync(mddev, r10_bio->sectors, 1);
+			sector_t s = r10_bio->sectors;
 			put_buf(r10_bio);
+			md_done_sync(mddev, s, 1);
 			break;
 		} else {
 			r10bio_t *r10_bio2 = (r10bio_t *)r10_bio->master_bio;
@@ -1271,7 +1273,6 @@ static void end_sync_write(struct bio *bio, int error)
 			r10_bio = r10_bio2;
 		}
 	}
-	rdev_dec_pending(conf->mirrors[d].rdev, mddev);
 }
 
 /*
author	NeilBrown <neilb@suse.de>	2009-02-25 13:18:47 +1100
committer	Greg Kroah-Hartman <gregkh@suse.de>	2009-03-16 17:52:54 -0700
commit	a9785b107920670a8ec8ef1744e0e0c59dde9eb4 (patch)
tree	37cb0a3b2a75543918226ec892fa2d0801ffe7b8
parent	285ed5b1cedb3f043fa90f31deaa11f7c3ff4c76 (diff)
download	lwn-a9785b107920670a8ec8ef1744e0e0c59dde9eb4.tar.gz lwn-a9785b107920670a8ec8ef1744e0e0c59dde9eb4.zip