summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/md/raid5.c69
1 files changed, 62 insertions, 7 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 516baf49a1fa..b443cd2459df 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -532,13 +532,21 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
bi->bi_end_io = raid5_end_read_request;
rcu_read_lock();
- rdev = rcu_dereference(conf->disks[i].rdev);
rrdev = rcu_dereference(conf->disks[i].replacement);
+ smp_mb(); /* Ensure that if rrdev is NULL, rdev won't be */
+ rdev = rcu_dereference(conf->disks[i].rdev);
+ if (!rdev) {
+ rdev = rrdev;
+ rrdev = NULL;
+ }
if (rw & WRITE) {
if (replace_only)
rdev = NULL;
+ if (rdev == rrdev)
+ /* We raced and saw duplicates */
+ rrdev = NULL;
} else {
- if (test_bit(R5_ReadRepl, &sh->dev[i].flags))
+ if (test_bit(R5_ReadRepl, &sh->dev[i].flags) && rrdev)
rdev = rrdev;
rrdev = NULL;
}
@@ -1640,7 +1648,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
int disks = sh->disks, i;
int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
char b[BDEVNAME_SIZE];
- struct md_rdev *rdev;
+ struct md_rdev *rdev = NULL;
for (i=0 ; i<disks; i++)
@@ -1655,8 +1663,13 @@ static void raid5_end_read_request(struct bio * bi, int error)
return;
}
if (test_bit(R5_ReadRepl, &sh->dev[i].flags))
+ /* If replacement finished while this request was outstanding,
+ * 'replacement' might be NULL already.
+ * In that case it moved down to 'rdev'.
+ * rdev is not removed until all requests are finished.
+ */
rdev = conf->disks[i].replacement;
- else
+ if (!rdev)
rdev = conf->disks[i].rdev;
if (uptodate) {
@@ -1753,7 +1766,14 @@ static void raid5_end_write_request(struct bio *bi, int error)
}
if (bi == &sh->dev[i].rreq) {
rdev = conf->disks[i].replacement;
- replacement = 1;
+ if (rdev)
+ replacement = 1;
+ else
+ /* rdev was removed and 'replacement'
+ * replaced it. rdev is not removed
+ * until all requests are finished.
+ */
+ rdev = conf->disks[i].rdev;
break;
}
}
@@ -3539,6 +3559,9 @@ finish:
}
if (test_and_clear_bit(R5_MadeGoodRepl, &dev->flags)) {
rdev = conf->disks[i].replacement;
+ if (!rdev)
+ /* rdev have been moved down */
+ rdev = conf->disks[i].rdev;
rdev_clear_badblocks(rdev, sh->sector,
STRIPE_SECTORS);
rdev_dec_pending(rdev, conf->mddev);
@@ -5204,7 +5227,25 @@ static int raid5_spare_active(struct mddev *mddev)
for (i = 0; i < conf->raid_disks; i++) {
tmp = conf->disks + i;
- if (tmp->rdev
+ if (tmp->replacement
+ && tmp->replacement->recovery_offset == MaxSector
+ && !test_bit(Faulty, &tmp->replacement->flags)
+ && !test_and_set_bit(In_sync, &tmp->replacement->flags)) {
+ /* Replacement has just become active. */
+ if (!tmp->rdev
+ || !test_and_clear_bit(In_sync, &tmp->rdev->flags))
+ count++;
+ if (tmp->rdev) {
+ /* Replaced device not technically faulty,
+ * but we need to be sure it gets removed
+ * and never re-added.
+ */
+ set_bit(Faulty, &tmp->rdev->flags);
+ sysfs_notify_dirent_safe(
+ tmp->rdev->sysfs_state);
+ }
+ sysfs_notify_dirent_safe(tmp->replacement->sysfs_state);
+ } else if (tmp->rdev
&& tmp->rdev->recovery_offset == MaxSector
&& !test_bit(Faulty, &tmp->rdev->flags)
&& !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
@@ -5250,6 +5291,7 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
if (!test_bit(Faulty, &rdev->flags) &&
mddev->recovery_disabled != conf->recovery_disabled &&
!has_failed(conf) &&
+ (!p->replacement || p->replacement == rdev) &&
number < conf->raid_disks) {
err = -EBUSY;
goto abort;
@@ -5260,7 +5302,20 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
/* lost the race, try later */
err = -EBUSY;
*rdevp = rdev;
- }
+ } else if (p->replacement) {
+ /* We must have just cleared 'rdev' */
+ p->rdev = p->replacement;
+ clear_bit(Replacement, &p->replacement->flags);
+ smp_mb(); /* Make sure other CPUs may see both as identical
+ * but will never see neither - if they are careful
+ */
+ p->replacement = NULL;
+ clear_bit(WantReplacement, &rdev->flags);
+ } else
+ /* We might have just removed the Replacement as faulty-
+ * clear the bit just in case
+ */
+ clear_bit(WantReplacement, &rdev->flags);
abort:
print_raid5_conf(conf);