diff options
author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2012-05-24 01:13:01 -0700 |
---|---|---|
committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2012-05-24 01:13:01 -0700 |
commit | e644dae645e167d154c0526358940986682a72b0 (patch) | |
tree | 972993c6568085b8d407fc7e13de10f4b93c651d /drivers/md/raid1.c | |
parent | 899c612d74d4a242158a4db20367388d6299c028 (diff) | |
parent | 86809173ce32ef03bd4d0389dfc72df0c805e9c4 (diff) | |
download | lwn-e644dae645e167d154c0526358940986682a72b0.tar.gz lwn-e644dae645e167d154c0526358940986682a72b0.zip |
Merge branch 'next' into for-linus
Diffstat (limited to 'drivers/md/raid1.c')
-rw-r--r-- | drivers/md/raid1.c | 116 |
1 files changed, 85 insertions, 31 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index a368db2431a5..15dd59b84e94 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -523,6 +523,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect rdev = rcu_dereference(conf->mirrors[disk].rdev); if (r1_bio->bios[disk] == IO_BLOCKED || rdev == NULL + || test_bit(Unmerged, &rdev->flags) || test_bit(Faulty, &rdev->flags)) continue; if (!test_bit(In_sync, &rdev->flags) && @@ -614,6 +615,39 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect return best_disk; } +static int raid1_mergeable_bvec(struct request_queue *q, + struct bvec_merge_data *bvm, + struct bio_vec *biovec) +{ + struct mddev *mddev = q->queuedata; + struct r1conf *conf = mddev->private; + sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); + int max = biovec->bv_len; + + if (mddev->merge_check_needed) { + int disk; + rcu_read_lock(); + for (disk = 0; disk < conf->raid_disks * 2; disk++) { + struct md_rdev *rdev = rcu_dereference( + conf->mirrors[disk].rdev); + if (rdev && !test_bit(Faulty, &rdev->flags)) { + struct request_queue *q = + bdev_get_queue(rdev->bdev); + if (q->merge_bvec_fn) { + bvm->bi_sector = sector + + rdev->data_offset; + bvm->bi_bdev = rdev->bdev; + max = min(max, q->merge_bvec_fn( + q, bvm, biovec)); + } + } + } + rcu_read_unlock(); + } + return max; + +} + int md_raid1_congested(struct mddev *mddev, int bits) { struct r1conf *conf = mddev->private; @@ -624,7 +658,7 @@ int md_raid1_congested(struct mddev *mddev, int bits) return 1; rcu_read_lock(); - for (i = 0; i < conf->raid_disks; i++) { + for (i = 0; i < conf->raid_disks * 2; i++) { struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev); if (rdev && !test_bit(Faulty, &rdev->flags)) { struct request_queue *q = bdev_get_queue(rdev->bdev); @@ -737,9 +771,22 @@ static void wait_barrier(struct r1conf *conf) spin_lock_irq(&conf->resync_lock); if (conf->barrier) { conf->nr_waiting++; - wait_event_lock_irq(conf->wait_barrier, !conf->barrier, + /* Wait for the barrier to drop. + * However if there are already pending + * requests (preventing the barrier from + * rising completely), and the + * pre-process bio queue isn't empty, + * then don't wait, as we need to empty + * that queue to get the nr_pending + * count down. + */ + wait_event_lock_irq(conf->wait_barrier, + !conf->barrier || + (conf->nr_pending && + current->bio_list && + !bio_list_empty(current->bio_list)), conf->resync_lock, - ); + ); conf->nr_waiting--; } conf->nr_pending++; @@ -1002,7 +1049,8 @@ read_again: break; } r1_bio->bios[i] = NULL; - if (!rdev || test_bit(Faulty, &rdev->flags)) { + if (!rdev || test_bit(Faulty, &rdev->flags) + || test_bit(Unmerged, &rdev->flags)) { if (i < conf->raid_disks) set_bit(R1BIO_Degraded, &r1_bio->state); continue; @@ -1322,6 +1370,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) struct mirror_info *p; int first = 0; int last = conf->raid_disks - 1; + struct request_queue *q = bdev_get_queue(rdev->bdev); if (mddev->recovery_disabled == conf->recovery_disabled) return -EBUSY; @@ -1329,23 +1378,17 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) if (rdev->raid_disk >= 0) first = last = rdev->raid_disk; + if (q->merge_bvec_fn) { + set_bit(Unmerged, &rdev->flags); + mddev->merge_check_needed = 1; + } + for (mirror = first; mirror <= last; mirror++) { p = conf->mirrors+mirror; if (!p->rdev) { disk_stack_limits(mddev->gendisk, rdev->bdev, rdev->data_offset << 9); - /* as we don't honour merge_bvec_fn, we must - * never risk violating it, so limit - * ->max_segments to one lying with a single - * page, as a one page request is never in - * violation. - */ - if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { - blk_queue_max_segments(mddev->queue, 1); - blk_queue_segment_boundary(mddev->queue, - PAGE_CACHE_SIZE - 1); - } p->head_position = 0; rdev->raid_disk = mirror; @@ -1370,6 +1413,19 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) break; } } + if (err == 0 && test_bit(Unmerged, &rdev->flags)) { + /* Some requests might not have seen this new + * merge_bvec_fn. We must wait for them to complete + * before merging the device fully. + * First we make sure any code which has tested + * our function has submitted the request, then + * we wait for all outstanding requests to complete. + */ + synchronize_sched(); + raise_barrier(conf); + lower_barrier(conf); + clear_bit(Unmerged, &rdev->flags); + } md_integrity_add_rdev(rdev, mddev); print_conf(conf); return err; @@ -1656,6 +1712,7 @@ static int process_checks(struct r1bio *r1_bio) struct r1conf *conf = mddev->private; int primary; int i; + int vcnt; for (primary = 0; primary < conf->raid_disks * 2; primary++) if (r1_bio->bios[primary]->bi_end_io == end_sync_read && @@ -1665,9 +1722,9 @@ static int process_checks(struct r1bio *r1_bio) break; } r1_bio->read_disk = primary; + vcnt = (r1_bio->sectors + PAGE_SIZE / 512 - 1) >> (PAGE_SHIFT - 9); for (i = 0; i < conf->raid_disks * 2; i++) { int j; - int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9); struct bio *pbio = r1_bio->bios[primary]; struct bio *sbio = r1_bio->bios[i]; int size; @@ -1682,7 +1739,7 @@ static int process_checks(struct r1bio *r1_bio) s = sbio->bi_io_vec[j].bv_page; if (memcmp(page_address(p), page_address(s), - PAGE_SIZE)) + sbio->bi_io_vec[j].bv_len)) break; } } else @@ -2330,8 +2387,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp int ok = 1; for (i = 0 ; i < conf->raid_disks * 2 ; i++) if (r1_bio->bios[i]->bi_end_io == end_sync_write) { - struct md_rdev *rdev = - rcu_dereference(conf->mirrors[i].rdev); + struct md_rdev *rdev = conf->mirrors[i].rdev; ok = rdev_set_badblocks(rdev, sector_nr, min_bad, 0 ) && ok; @@ -2491,7 +2547,7 @@ static struct r1conf *setup_conf(struct mddev *mddev) err = -EINVAL; spin_lock_init(&conf->device_lock); - list_for_each_entry(rdev, &mddev->disks, same_set) { + rdev_for_each(rdev, mddev) { int disk_idx = rdev->raid_disk; if (disk_idx >= mddev->raid_disks || disk_idx < 0) @@ -2580,11 +2636,13 @@ static struct r1conf *setup_conf(struct mddev *mddev) return ERR_PTR(err); } +static int stop(struct mddev *mddev); static int run(struct mddev *mddev) { struct r1conf *conf; int i; struct md_rdev *rdev; + int ret; if (mddev->level != 1) { printk(KERN_ERR "md/raid1:%s: raid level not set to mirroring (%d)\n", @@ -2609,20 +2667,11 @@ static int run(struct mddev *mddev) if (IS_ERR(conf)) return PTR_ERR(conf); - list_for_each_entry(rdev, &mddev->disks, same_set) { + rdev_for_each(rdev, mddev) { if (!mddev->gendisk) continue; disk_stack_limits(mddev->gendisk, rdev->bdev, rdev->data_offset << 9); - /* as we don't honour merge_bvec_fn, we must never risk - * violating it, so limit ->max_segments to 1 lying within - * a single page, as a one page request is never in violation. - */ - if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { - blk_queue_max_segments(mddev->queue, 1); - blk_queue_segment_boundary(mddev->queue, - PAGE_CACHE_SIZE - 1); - } } mddev->degraded = 0; @@ -2656,8 +2705,13 @@ static int run(struct mddev *mddev) if (mddev->queue) { mddev->queue->backing_dev_info.congested_fn = raid1_congested; mddev->queue->backing_dev_info.congested_data = mddev; + blk_queue_merge_bvec(mddev->queue, raid1_mergeable_bvec); } - return md_integrity_register(mddev); + + ret = md_integrity_register(mddev); + if (ret) + stop(mddev); + return ret; } static int stop(struct mddev *mddev) |