diff options
Diffstat (limited to 'drivers/md/raid10.c')
-rw-r--r-- | drivers/md/raid10.c | 320 |
1 files changed, 187 insertions, 133 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index c7de2a53e625..be1a9fca3b2d 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -707,7 +707,6 @@ static struct md_rdev *read_balance(struct r10conf *conf, raid10_find_phys(conf, r10_bio); rcu_read_lock(); -retry: sectors = r10_bio->sectors; best_slot = -1; best_rdev = NULL; @@ -804,13 +803,6 @@ retry: if (slot >= 0) { atomic_inc(&rdev->nr_pending); - if (test_bit(Faulty, &rdev->flags)) { - /* Cannot risk returning a device that failed - * before we inc'ed nr_pending - */ - rdev_dec_pending(rdev, conf->mddev); - goto retry; - } r10_bio->read_slot = slot; } else rdev = NULL; @@ -865,7 +857,7 @@ static void flush_pending_writes(struct r10conf *conf) while (bio) { /* submit pending writes */ struct bio *next = bio->bi_next; bio->bi_next = NULL; - if (unlikely((bio->bi_rw & REQ_DISCARD) && + if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) /* Just ignore it */ bio_endio(bio); @@ -913,7 +905,7 @@ static void raise_barrier(struct r10conf *conf, int force) /* Now wait for all pending IO to complete */ wait_event_lock_irq(conf->wait_barrier, - !conf->nr_pending && conf->barrier < RESYNC_DEPTH, + !atomic_read(&conf->nr_pending) && conf->barrier < RESYNC_DEPTH, conf->resync_lock); spin_unlock_irq(&conf->resync_lock); @@ -944,23 +936,23 @@ static void wait_barrier(struct r10conf *conf) */ wait_event_lock_irq(conf->wait_barrier, !conf->barrier || - (conf->nr_pending && + (atomic_read(&conf->nr_pending) && current->bio_list && !bio_list_empty(current->bio_list)), conf->resync_lock); conf->nr_waiting--; + if (!conf->nr_waiting) + wake_up(&conf->wait_barrier); } - conf->nr_pending++; + atomic_inc(&conf->nr_pending); spin_unlock_irq(&conf->resync_lock); } static void allow_barrier(struct r10conf *conf) { - unsigned long flags; - spin_lock_irqsave(&conf->resync_lock, flags); - conf->nr_pending--; - spin_unlock_irqrestore(&conf->resync_lock, flags); - wake_up(&conf->wait_barrier); + if ((atomic_dec_and_test(&conf->nr_pending)) || + (conf->array_freeze_pending)) + wake_up(&conf->wait_barrier); } static void freeze_array(struct r10conf *conf, int extra) @@ -978,13 +970,15 @@ static void freeze_array(struct r10conf *conf, int extra) * we continue. */ spin_lock_irq(&conf->resync_lock); + conf->array_freeze_pending++; conf->barrier++; conf->nr_waiting++; wait_event_lock_irq_cmd(conf->wait_barrier, - conf->nr_pending == conf->nr_queued+extra, + atomic_read(&conf->nr_pending) == conf->nr_queued+extra, conf->resync_lock, flush_pending_writes(conf)); + conf->array_freeze_pending--; spin_unlock_irq(&conf->resync_lock); } @@ -1041,7 +1035,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) while (bio) { /* submit pending writes */ struct bio *next = bio->bi_next; bio->bi_next = NULL; - if (unlikely((bio->bi_rw & REQ_DISCARD) && + if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) /* Just ignore it */ bio_endio(bio); @@ -1058,12 +1052,10 @@ static void __make_request(struct mddev *mddev, struct bio *bio) struct r10bio *r10_bio; struct bio *read_bio; int i; + const int op = bio_op(bio); const int rw = bio_data_dir(bio); - const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); - const unsigned long do_fua = (bio->bi_rw & REQ_FUA); - const unsigned long do_discard = (bio->bi_rw - & (REQ_DISCARD | REQ_SECURE)); - const unsigned long do_same = (bio->bi_rw & REQ_WRITE_SAME); + const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); + const unsigned long do_fua = (bio->bi_opf & REQ_FUA); unsigned long flags; struct md_rdev *blocked_rdev; struct blk_plug_cb *cb; @@ -1072,6 +1064,8 @@ static void __make_request(struct mddev *mddev, struct bio *bio) int max_sectors; int sectors; + md_write_start(mddev, bio); + /* * Register the new request and wait if the reconstruction * thread has put up a bar for new requests. @@ -1156,7 +1150,7 @@ read_again: choose_data_offset(r10_bio, rdev); read_bio->bi_bdev = rdev->bdev; read_bio->bi_end_io = raid10_end_read_request; - read_bio->bi_rw = READ | do_sync; + bio_set_op_attrs(read_bio, op, do_sync); read_bio->bi_private = r10_bio; if (max_sectors < r10_bio->sectors) { @@ -1363,8 +1357,7 @@ retry_write: rdev)); mbio->bi_bdev = rdev->bdev; mbio->bi_end_io = raid10_end_write_request; - mbio->bi_rw = - WRITE | do_sync | do_fua | do_discard | do_same; + bio_set_op_attrs(mbio, op, do_sync | do_fua); mbio->bi_private = r10_bio; atomic_inc(&r10_bio->remaining); @@ -1406,8 +1399,7 @@ retry_write: r10_bio, rdev)); mbio->bi_bdev = rdev->bdev; mbio->bi_end_io = raid10_end_write_request; - mbio->bi_rw = - WRITE | do_sync | do_fua | do_discard | do_same; + bio_set_op_attrs(mbio, op, do_sync | do_fua); mbio->bi_private = r10_bio; atomic_inc(&r10_bio->remaining); @@ -1450,13 +1442,11 @@ static void raid10_make_request(struct mddev *mddev, struct bio *bio) struct bio *split; - if (unlikely(bio->bi_rw & REQ_FLUSH)) { + if (unlikely(bio->bi_opf & REQ_PREFLUSH)) { md_flush_request(mddev, bio); return; } - md_write_start(mddev, bio); - do { /* @@ -1503,10 +1493,12 @@ static void raid10_status(struct seq_file *seq, struct mddev *mddev) } seq_printf(seq, " [%d/%d] [", conf->geo.raid_disks, conf->geo.raid_disks - mddev->degraded); - for (i = 0; i < conf->geo.raid_disks; i++) - seq_printf(seq, "%s", - conf->mirrors[i].rdev && - test_bit(In_sync, &conf->mirrors[i].rdev->flags) ? "U" : "_"); + rcu_read_lock(); + for (i = 0; i < conf->geo.raid_disks; i++) { + struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev); + seq_printf(seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_"); + } + rcu_read_unlock(); seq_printf(seq, "]"); } @@ -1604,7 +1596,7 @@ static void raid10_error(struct mddev *mddev, struct md_rdev *rdev) static void print_conf(struct r10conf *conf) { int i; - struct raid10_info *tmp; + struct md_rdev *rdev; printk(KERN_DEBUG "RAID10 conf printout:\n"); if (!conf) { @@ -1614,14 +1606,16 @@ static void print_conf(struct r10conf *conf) printk(KERN_DEBUG " --- wd:%d rd:%d\n", conf->geo.raid_disks - conf->mddev->degraded, conf->geo.raid_disks); + /* This is only called with ->reconfix_mutex held, so + * rcu protection of rdev is not needed */ for (i = 0; i < conf->geo.raid_disks; i++) { char b[BDEVNAME_SIZE]; - tmp = conf->mirrors + i; - if (tmp->rdev) + rdev = conf->mirrors[i].rdev; + if (rdev) printk(KERN_DEBUG " disk %d, wo:%d, o:%d, dev:%s\n", - i, !test_bit(In_sync, &tmp->rdev->flags), - !test_bit(Faulty, &tmp->rdev->flags), - bdevname(tmp->rdev->bdev,b)); + i, !test_bit(In_sync, &rdev->flags), + !test_bit(Faulty, &rdev->flags), + bdevname(rdev->bdev,b)); } } @@ -1770,7 +1764,7 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev) err = -EBUSY; goto abort; } - /* Only remove faulty devices if recovery + /* Only remove non-faulty devices if recovery * is not possible. */ if (!test_bit(Faulty, &rdev->flags) && @@ -1782,13 +1776,16 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev) goto abort; } *rdevp = NULL; - synchronize_rcu(); - if (atomic_read(&rdev->nr_pending)) { - /* lost the race, try later */ - err = -EBUSY; - *rdevp = rdev; - goto abort; - } else if (p->replacement) { + if (!test_bit(RemoveSynchronized, &rdev->flags)) { + synchronize_rcu(); + if (atomic_read(&rdev->nr_pending)) { + /* lost the race, try later */ + err = -EBUSY; + *rdevp = rdev; + goto abort; + } + } + if (p->replacement) { /* We must have just cleared 'rdev' */ p->rdev = p->replacement; clear_bit(Replacement, &p->replacement->flags); @@ -1992,10 +1989,10 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) tbio->bi_vcnt = vcnt; tbio->bi_iter.bi_size = fbio->bi_iter.bi_size; - tbio->bi_rw = WRITE; tbio->bi_private = r10_bio; tbio->bi_iter.bi_sector = r10_bio->devs[i].addr; tbio->bi_end_io = end_sync_write; + bio_set_op_attrs(tbio, REQ_OP_WRITE, 0); bio_copy_data(tbio, fbio); @@ -2078,7 +2075,7 @@ static void fix_recovery_read_error(struct r10bio *r10_bio) addr, s << 9, bio->bi_io_vec[idx].bv_page, - READ, false); + REQ_OP_READ, 0, false); if (ok) { rdev = conf->mirrors[dw].rdev; addr = r10_bio->devs[1].addr + sect; @@ -2086,7 +2083,7 @@ static void fix_recovery_read_error(struct r10bio *r10_bio) addr, s << 9, bio->bi_io_vec[idx].bv_page, - WRITE, false); + REQ_OP_WRITE, 0, false); if (!ok) { set_bit(WriteErrorSeen, &rdev->flags); if (!test_and_set_bit(WantReplacement, @@ -2175,21 +2172,20 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio) */ static void check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev) { - struct timespec cur_time_mon; + long cur_time_mon; unsigned long hours_since_last; unsigned int read_errors = atomic_read(&rdev->read_errors); - ktime_get_ts(&cur_time_mon); + cur_time_mon = ktime_get_seconds(); - if (rdev->last_read_error.tv_sec == 0 && - rdev->last_read_error.tv_nsec == 0) { + if (rdev->last_read_error == 0) { /* first time we've seen a read error */ rdev->last_read_error = cur_time_mon; return; } - hours_since_last = (cur_time_mon.tv_sec - - rdev->last_read_error.tv_sec) / 3600; + hours_since_last = (long)(cur_time_mon - + rdev->last_read_error) / 3600; rdev->last_read_error = cur_time_mon; @@ -2213,7 +2209,7 @@ static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector, if (is_badblock(rdev, sector, sectors, &first_bad, &bad_sectors) && (rw == READ || test_bit(WriteErrorSeen, &rdev->flags))) return -1; - if (sync_page_io(rdev, sector, sectors << 9, page, rw, false)) + if (sync_page_io(rdev, sector, sectors << 9, page, rw, 0, false)) /* success */ return 1; if (rw == WRITE) { @@ -2268,7 +2264,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 printk(KERN_NOTICE "md/raid10:%s: %s: Failing raid device\n", mdname(mddev), b); - md_error(mddev, conf->mirrors[d].rdev); + md_error(mddev, rdev); r10_bio->devs[r10_bio->read_slot].bio = IO_BLOCKED; return; } @@ -2291,6 +2287,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 rdev = rcu_dereference(conf->mirrors[d].rdev); if (rdev && test_bit(In_sync, &rdev->flags) && + !test_bit(Faulty, &rdev->flags) && is_badblock(rdev, r10_bio->devs[sl].addr + sect, s, &first_bad, &bad_sectors) == 0) { atomic_inc(&rdev->nr_pending); @@ -2299,7 +2296,8 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 r10_bio->devs[sl].addr + sect, s<<9, - conf->tmppage, READ, false); + conf->tmppage, + REQ_OP_READ, 0, false); rdev_dec_pending(rdev, mddev); rcu_read_lock(); if (success) @@ -2343,6 +2341,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 d = r10_bio->devs[sl].devnum; rdev = rcu_dereference(conf->mirrors[d].rdev); if (!rdev || + test_bit(Faulty, &rdev->flags) || !test_bit(In_sync, &rdev->flags)) continue; @@ -2382,6 +2381,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 d = r10_bio->devs[sl].devnum; rdev = rcu_dereference(conf->mirrors[d].rdev); if (!rdev || + test_bit(Faulty, &rdev->flags) || !test_bit(In_sync, &rdev->flags)) continue; @@ -2465,18 +2465,21 @@ static int narrow_write_error(struct r10bio *r10_bio, int i) while (sect_to_write) { struct bio *wbio; + sector_t wsector; if (sectors > sect_to_write) sectors = sect_to_write; /* Write at 'sector' for 'sectors' */ wbio = bio_clone_mddev(bio, GFP_NOIO, mddev); bio_trim(wbio, sector - bio->bi_iter.bi_sector, sectors); - wbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+ - choose_data_offset(r10_bio, rdev) + - (sector - r10_bio->sector)); + wsector = r10_bio->devs[i].addr + (sector - r10_bio->sector); + wbio->bi_iter.bi_sector = wsector + + choose_data_offset(r10_bio, rdev); wbio->bi_bdev = rdev->bdev; - if (submit_bio_wait(WRITE, wbio) < 0) + bio_set_op_attrs(wbio, REQ_OP_WRITE, 0); + + if (submit_bio_wait(wbio) < 0) /* Failure! */ - ok = rdev_set_badblocks(rdev, sector, + ok = rdev_set_badblocks(rdev, wsector, sectors, 0) && ok; @@ -2531,7 +2534,7 @@ read_more: return; } - do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC); + do_sync = (r10_bio->master_bio->bi_opf & REQ_SYNC); slot = r10_bio->read_slot; printk_ratelimited( KERN_ERR @@ -2548,7 +2551,7 @@ read_more: bio->bi_iter.bi_sector = r10_bio->devs[slot].addr + choose_data_offset(r10_bio, rdev); bio->bi_bdev = rdev->bdev; - bio->bi_rw = READ | do_sync; + bio_set_op_attrs(bio, REQ_OP_READ, do_sync); bio->bi_private = r10_bio; bio->bi_end_io = raid10_end_read_request; if (max_sectors < r10_bio->sectors) { @@ -2877,11 +2880,14 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, /* Completed a full sync so the replacements * are now fully recovered. */ - for (i = 0; i < conf->geo.raid_disks; i++) - if (conf->mirrors[i].replacement) - conf->mirrors[i].replacement - ->recovery_offset - = MaxSector; + rcu_read_lock(); + for (i = 0; i < conf->geo.raid_disks; i++) { + struct md_rdev *rdev = + rcu_dereference(conf->mirrors[i].replacement); + if (rdev) + rdev->recovery_offset = MaxSector; + } + rcu_read_unlock(); } conf->fullsync = 0; } @@ -2912,6 +2918,13 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, max_sector > (sector_nr | chunk_mask)) max_sector = (sector_nr | chunk_mask) + 1; + /* + * If there is non-resync activity waiting for a turn, then let it + * though before starting on this new sync request. + */ + if (conf->nr_waiting) + schedule_timeout_uninterruptible(1); + /* Again, very different code for resync and recovery. * Both must result in an r10bio with a list of bios that * have bi_end_io, bi_sector, bi_bdev set, @@ -2940,14 +2953,20 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, int must_sync; int any_working; struct raid10_info *mirror = &conf->mirrors[i]; + struct md_rdev *mrdev, *mreplace; - if ((mirror->rdev == NULL || - test_bit(In_sync, &mirror->rdev->flags)) - && - (mirror->replacement == NULL || - test_bit(Faulty, - &mirror->replacement->flags))) + rcu_read_lock(); + mrdev = rcu_dereference(mirror->rdev); + mreplace = rcu_dereference(mirror->replacement); + + if ((mrdev == NULL || + test_bit(Faulty, &mrdev->flags) || + test_bit(In_sync, &mrdev->flags)) && + (mreplace == NULL || + test_bit(Faulty, &mreplace->flags))) { + rcu_read_unlock(); continue; + } still_degraded = 0; /* want to reconstruct this device */ @@ -2957,8 +2976,11 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, /* last stripe is not complete - don't * try to recover this sector. */ + rcu_read_unlock(); continue; } + if (mreplace && test_bit(Faulty, &mreplace->flags)) + mreplace = NULL; /* Unless we are doing a full sync, or a replacement * we only need to recover the block if it is set in * the bitmap @@ -2968,14 +2990,19 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, if (sync_blocks < max_sync) max_sync = sync_blocks; if (!must_sync && - mirror->replacement == NULL && + mreplace == NULL && !conf->fullsync) { /* yep, skip the sync_blocks here, but don't assume * that there will never be anything to do here */ chunks_skipped = -1; + rcu_read_unlock(); continue; } + atomic_inc(&mrdev->nr_pending); + if (mreplace) + atomic_inc(&mreplace->nr_pending); + rcu_read_unlock(); r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); r10_bio->state = 0; @@ -2994,12 +3021,15 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, /* Need to check if the array will still be * degraded */ - for (j = 0; j < conf->geo.raid_disks; j++) - if (conf->mirrors[j].rdev == NULL || - test_bit(Faulty, &conf->mirrors[j].rdev->flags)) { + rcu_read_lock(); + for (j = 0; j < conf->geo.raid_disks; j++) { + struct md_rdev *rdev = rcu_dereference( + conf->mirrors[j].rdev); + if (rdev == NULL || test_bit(Faulty, &rdev->flags)) { still_degraded = 1; break; } + } must_sync = bitmap_start_sync(mddev->bitmap, sect, &sync_blocks, still_degraded); @@ -3009,15 +3039,15 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, int k; int d = r10_bio->devs[j].devnum; sector_t from_addr, to_addr; - struct md_rdev *rdev; + struct md_rdev *rdev = + rcu_dereference(conf->mirrors[d].rdev); sector_t sector, first_bad; int bad_sectors; - if (!conf->mirrors[d].rdev || - !test_bit(In_sync, &conf->mirrors[d].rdev->flags)) + if (!rdev || + !test_bit(In_sync, &rdev->flags)) continue; /* This is where we read from */ any_working = 1; - rdev = conf->mirrors[d].rdev; sector = r10_bio->devs[j].addr; if (is_badblock(rdev, sector, max_sync, @@ -3038,7 +3068,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, biolist = bio; bio->bi_private = r10_bio; bio->bi_end_io = end_sync_read; - bio->bi_rw = READ; + bio_set_op_attrs(bio, REQ_OP_READ, 0); from_addr = r10_bio->devs[j].addr; bio->bi_iter.bi_sector = from_addr + rdev->data_offset; @@ -3056,18 +3086,17 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, r10_bio->devs[1].devnum = i; r10_bio->devs[1].addr = to_addr; - rdev = mirror->rdev; - if (!test_bit(In_sync, &rdev->flags)) { + if (!test_bit(In_sync, &mrdev->flags)) { bio = r10_bio->devs[1].bio; bio_reset(bio); bio->bi_next = biolist; biolist = bio; bio->bi_private = r10_bio; bio->bi_end_io = end_sync_write; - bio->bi_rw = WRITE; + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); bio->bi_iter.bi_sector = to_addr - + rdev->data_offset; - bio->bi_bdev = rdev->bdev; + + mrdev->data_offset; + bio->bi_bdev = mrdev->bdev; atomic_inc(&r10_bio->remaining); } else r10_bio->devs[1].bio->bi_end_io = NULL; @@ -3076,8 +3105,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, bio = r10_bio->devs[1].repl_bio; if (bio) bio->bi_end_io = NULL; - rdev = mirror->replacement; - /* Note: if rdev != NULL, then bio + /* Note: if mreplace != NULL, then bio * cannot be NULL as r10buf_pool_alloc will * have allocated it. * So the second test here is pointless. @@ -3085,21 +3113,22 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, * this comment keeps human reviewers * happy. */ - if (rdev == NULL || bio == NULL || - test_bit(Faulty, &rdev->flags)) + if (mreplace == NULL || bio == NULL || + test_bit(Faulty, &mreplace->flags)) break; bio_reset(bio); bio->bi_next = biolist; biolist = bio; bio->bi_private = r10_bio; bio->bi_end_io = end_sync_write; - bio->bi_rw = WRITE; + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); bio->bi_iter.bi_sector = to_addr + - rdev->data_offset; - bio->bi_bdev = rdev->bdev; + mreplace->data_offset; + bio->bi_bdev = mreplace->bdev; atomic_inc(&r10_bio->remaining); break; } + rcu_read_unlock(); if (j == conf->copies) { /* Cannot recover, so abort the recovery or * record a bad block */ @@ -3112,15 +3141,15 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, if (r10_bio->devs[k].devnum == i) break; if (!test_bit(In_sync, - &mirror->rdev->flags) + &mrdev->flags) && !rdev_set_badblocks( - mirror->rdev, + mrdev, r10_bio->devs[k].addr, max_sync, 0)) any_working = 0; - if (mirror->replacement && + if (mreplace && !rdev_set_badblocks( - mirror->replacement, + mreplace, r10_bio->devs[k].addr, max_sync, 0)) any_working = 0; @@ -3138,8 +3167,14 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, if (rb2) atomic_dec(&rb2->remaining); r10_bio = rb2; + rdev_dec_pending(mrdev, mddev); + if (mreplace) + rdev_dec_pending(mreplace, mddev); break; } + rdev_dec_pending(mrdev, mddev); + if (mreplace) + rdev_dec_pending(mreplace, mddev); } if (biolist == NULL) { while (r10_bio) { @@ -3184,6 +3219,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, int d = r10_bio->devs[i].devnum; sector_t first_bad, sector; int bad_sectors; + struct md_rdev *rdev; if (r10_bio->devs[i].repl_bio) r10_bio->devs[i].repl_bio->bi_end_io = NULL; @@ -3191,12 +3227,14 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, bio = r10_bio->devs[i].bio; bio_reset(bio); bio->bi_error = -EIO; - if (conf->mirrors[d].rdev == NULL || - test_bit(Faulty, &conf->mirrors[d].rdev->flags)) + rcu_read_lock(); + rdev = rcu_dereference(conf->mirrors[d].rdev); + if (rdev == NULL || test_bit(Faulty, &rdev->flags)) { + rcu_read_unlock(); continue; + } sector = r10_bio->devs[i].addr; - if (is_badblock(conf->mirrors[d].rdev, - sector, max_sync, + if (is_badblock(rdev, sector, max_sync, &first_bad, &bad_sectors)) { if (first_bad > sector) max_sync = first_bad - sector; @@ -3204,25 +3242,28 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, bad_sectors -= (sector - first_bad); if (max_sync > bad_sectors) max_sync = bad_sectors; + rcu_read_unlock(); continue; } } - atomic_inc(&conf->mirrors[d].rdev->nr_pending); + atomic_inc(&rdev->nr_pending); atomic_inc(&r10_bio->remaining); bio->bi_next = biolist; biolist = bio; bio->bi_private = r10_bio; bio->bi_end_io = end_sync_read; - bio->bi_rw = READ; - bio->bi_iter.bi_sector = sector + - conf->mirrors[d].rdev->data_offset; - bio->bi_bdev = conf->mirrors[d].rdev->bdev; + bio_set_op_attrs(bio, REQ_OP_READ, 0); + bio->bi_iter.bi_sector = sector + rdev->data_offset; + bio->bi_bdev = rdev->bdev; count++; - if (conf->mirrors[d].replacement == NULL || - test_bit(Faulty, - &conf->mirrors[d].replacement->flags)) + rdev = rcu_dereference(conf->mirrors[d].replacement); + if (rdev == NULL || test_bit(Faulty, &rdev->flags)) { + rcu_read_unlock(); continue; + } + atomic_inc(&rdev->nr_pending); + rcu_read_unlock(); /* Need to set up for writing to the replacement */ bio = r10_bio->devs[i].repl_bio; @@ -3230,15 +3271,13 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, bio->bi_error = -EIO; sector = r10_bio->devs[i].addr; - atomic_inc(&conf->mirrors[d].rdev->nr_pending); bio->bi_next = biolist; biolist = bio; bio->bi_private = r10_bio; bio->bi_end_io = end_sync_write; - bio->bi_rw = WRITE; - bio->bi_iter.bi_sector = sector + - conf->mirrors[d].replacement->data_offset; - bio->bi_bdev = conf->mirrors[d].replacement->bdev; + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + bio->bi_iter.bi_sector = sector + rdev->data_offset; + bio->bi_bdev = rdev->bdev; count++; } @@ -3505,6 +3544,7 @@ static struct r10conf *setup_conf(struct mddev *mddev) spin_lock_init(&conf->resync_lock); init_waitqueue_head(&conf->wait_barrier); + atomic_set(&conf->nr_pending, 0); conf->thread = md_register_thread(raid10d, mddev, "raid10"); if (!conf->thread) @@ -4320,7 +4360,7 @@ read_more: + rdev->data_offset); read_bio->bi_private = r10_bio; read_bio->bi_end_io = end_sync_read; - read_bio->bi_rw = READ; + bio_set_op_attrs(read_bio, REQ_OP_READ, 0); read_bio->bi_flags &= (~0UL << BIO_RESET_BITS); read_bio->bi_error = 0; read_bio->bi_vcnt = 0; @@ -4334,15 +4374,16 @@ read_more: blist = read_bio; read_bio->bi_next = NULL; + rcu_read_lock(); for (s = 0; s < conf->copies*2; s++) { struct bio *b; int d = r10_bio->devs[s/2].devnum; struct md_rdev *rdev2; if (s&1) { - rdev2 = conf->mirrors[d].replacement; + rdev2 = rcu_dereference(conf->mirrors[d].replacement); b = r10_bio->devs[s/2].repl_bio; } else { - rdev2 = conf->mirrors[d].rdev; + rdev2 = rcu_dereference(conf->mirrors[d].rdev); b = r10_bio->devs[s/2].bio; } if (!rdev2 || test_bit(Faulty, &rdev2->flags)) @@ -4354,7 +4395,7 @@ read_more: rdev2->new_data_offset; b->bi_private = r10_bio; b->bi_end_io = end_reshape_write; - b->bi_rw = WRITE; + bio_set_op_attrs(b, REQ_OP_WRITE, 0); b->bi_next = blist; blist = b; } @@ -4387,6 +4428,7 @@ read_more: nr_sectors += len >> 9; } bio_full: + rcu_read_unlock(); r10_bio->sectors = nr_sectors; /* Now submit the read */ @@ -4438,16 +4480,20 @@ static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio) struct bio *b; int d = r10_bio->devs[s/2].devnum; struct md_rdev *rdev; + rcu_read_lock(); if (s&1) { - rdev = conf->mirrors[d].replacement; + rdev = rcu_dereference(conf->mirrors[d].replacement); b = r10_bio->devs[s/2].repl_bio; } else { - rdev = conf->mirrors[d].rdev; + rdev = rcu_dereference(conf->mirrors[d].rdev); b = r10_bio->devs[s/2].bio; } - if (!rdev || test_bit(Faulty, &rdev->flags)) + if (!rdev || test_bit(Faulty, &rdev->flags)) { + rcu_read_unlock(); continue; + } atomic_inc(&rdev->nr_pending); + rcu_read_unlock(); md_sync_acct(b->bi_bdev, r10_bio->sectors); atomic_inc(&r10_bio->remaining); b->bi_next = NULL; @@ -4508,9 +4554,10 @@ static int handle_reshape_read_error(struct mddev *mddev, if (s > (PAGE_SIZE >> 9)) s = PAGE_SIZE >> 9; + rcu_read_lock(); while (!success) { int d = r10b->devs[slot].devnum; - struct md_rdev *rdev = conf->mirrors[d].rdev; + struct md_rdev *rdev = rcu_dereference(conf->mirrors[d].rdev); sector_t addr; if (rdev == NULL || test_bit(Faulty, &rdev->flags) || @@ -4518,11 +4565,15 @@ static int handle_reshape_read_error(struct mddev *mddev, goto failed; addr = r10b->devs[slot].addr + idx * PAGE_SIZE; + atomic_inc(&rdev->nr_pending); + rcu_read_unlock(); success = sync_page_io(rdev, addr, s << 9, bvec[idx].bv_page, - READ, false); + REQ_OP_READ, 0, false); + rdev_dec_pending(rdev, mddev); + rcu_read_lock(); if (success) break; failed: @@ -4532,6 +4583,7 @@ static int handle_reshape_read_error(struct mddev *mddev, if (slot == first_slot) break; } + rcu_read_unlock(); if (!success) { /* couldn't read this block, must give up */ set_bit(MD_RECOVERY_INTR, @@ -4601,16 +4653,18 @@ static void raid10_finish_reshape(struct mddev *mddev) } } else { int d; + rcu_read_lock(); for (d = conf->geo.raid_disks ; d < conf->geo.raid_disks - mddev->delta_disks; d++) { - struct md_rdev *rdev = conf->mirrors[d].rdev; + struct md_rdev *rdev = rcu_dereference(conf->mirrors[d].rdev); if (rdev) clear_bit(In_sync, &rdev->flags); - rdev = conf->mirrors[d].replacement; + rdev = rcu_dereference(conf->mirrors[d].replacement); if (rdev) clear_bit(In_sync, &rdev->flags); } + rcu_read_unlock(); } mddev->layout = mddev->new_layout; mddev->chunk_sectors = 1 << conf->geo.chunk_shift; |