diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-04 20:28:10 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-04 20:28:10 -0800 |
commit | d9734e0d1ccf87e828ad172c58a96dff97cfc0ba (patch) | |
tree | fcce824c45f17dbcc954bc55b8b642a967b962e0 /block | |
parent | 0d51ce9ca1116e8f4dc87cb51db8dd250327e9bb (diff) | |
parent | 2404e607a9ee36db361bebe32787dafa1f7d6c00 (diff) | |
download | lwn-d9734e0d1ccf87e828ad172c58a96dff97cfc0ba.tar.gz lwn-d9734e0d1ccf87e828ad172c58a96dff97cfc0ba.zip |
Merge branch 'for-4.4/core' of git://git.kernel.dk/linux-block
Pull core block updates from Jens Axboe:
"This is the core block pull request for 4.4. I've got a few more
topic branches this time around, some of them will layer on top of the
core+drivers changes and will come in a separate round. So not a huge
chunk of changes in this round.
This pull request contains:
- Enable blk-mq page allocation tracking with kmemleak, from Catalin.
- Unused prototype removal in blk-mq from Christoph.
- Cleanup of the q->blk_trace exchange, using cmpxchg instead of two
xchg()'s, from Davidlohr.
- A plug flush fix from Jeff.
- Also from Jeff, a fix that means we don't have to update shared tag
sets at init time unless we do a state change. This cuts down boot
times on thousands of devices a lot with scsi/blk-mq.
- blk-mq waitqueue barrier fix from Kosuke.
- Various fixes from Ming:
- Fixes for segment merging and splitting, and checks, for
the old core and blk-mq.
- Potential blk-mq speedup by marking ctx pending at the end
of a plug insertion batch in blk-mq.
- direct-io no page dirty on kernel direct reads.
- A WRITE_SYNC fix for mpage from Roman"
* 'for-4.4/core' of git://git.kernel.dk/linux-block:
blk-mq: avoid excessive boot delays with large lun counts
blktrace: re-write setting q->blk_trace
blk-mq: mark ctx as pending at batch in flush plug path
blk-mq: fix for trace_block_plug()
block: check bio_mergeable() early before merging
blk-mq: check bio_mergeable() early before merging
block: avoid to merge splitted bio
block: setup bi_phys_segments after splitting
block: fix plug list flushing for nomerge queues
blk-mq: remove unused blk_mq_clone_flush_request prototype
blk-mq: fix waitqueue_active without memory barrier in block/blk-mq-tag.c
fs: direct-io: don't dirtying pages for ITER_BVEC/ITER_KVEC direct read
fs/mpage.c: forgotten WRITE_SYNC in case of data integrity write
block: kmemleak: Track the page allocations for struct request
Diffstat (limited to 'block')
-rw-r--r-- | block/blk-core.c | 32 | ||||
-rw-r--r-- | block/blk-merge.c | 32 | ||||
-rw-r--r-- | block/blk-mq-tag.c | 4 | ||||
-rw-r--r-- | block/blk-mq.c | 89 | ||||
-rw-r--r-- | block/blk-mq.h | 2 | ||||
-rw-r--r-- | block/blk.h | 1 | ||||
-rw-r--r-- | block/elevator.c | 2 |
7 files changed, 122 insertions, 40 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index 18e92a6645e2..f888f23d796f 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1594,6 +1594,30 @@ out: return ret; } +unsigned int blk_plug_queued_count(struct request_queue *q) +{ + struct blk_plug *plug; + struct request *rq; + struct list_head *plug_list; + unsigned int ret = 0; + + plug = current->plug; + if (!plug) + goto out; + + if (q->mq_ops) + plug_list = &plug->mq_list; + else + plug_list = &plug->list; + + list_for_each_entry(rq, plug_list, queuelist) { + if (rq->q == q) + ret++; + } +out: + return ret; +} + void init_request_from_bio(struct request *req, struct bio *bio) { req->cmd_type = REQ_TYPE_FS; @@ -1641,9 +1665,11 @@ static void blk_queue_bio(struct request_queue *q, struct bio *bio) * Check if we can merge with the plugged list before grabbing * any locks. */ - if (!blk_queue_nomerges(q) && - blk_attempt_plug_merge(q, bio, &request_count, NULL)) - return; + if (!blk_queue_nomerges(q)) { + if (blk_attempt_plug_merge(q, bio, &request_count, NULL)) + return; + } else + request_count = blk_plug_queued_count(q); spin_lock_irq(q->queue_lock); diff --git a/block/blk-merge.c b/block/blk-merge.c index c4e9c37f3e38..de5716d8e525 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -11,13 +11,16 @@ static struct bio *blk_bio_discard_split(struct request_queue *q, struct bio *bio, - struct bio_set *bs) + struct bio_set *bs, + unsigned *nsegs) { unsigned int max_discard_sectors, granularity; int alignment; sector_t tmp; unsigned split_sectors; + *nsegs = 1; + /* Zero-sector (unknown) and one-sector granularities are the same. */ granularity = max(q->limits.discard_granularity >> 9, 1U); @@ -51,8 +54,11 @@ static struct bio *blk_bio_discard_split(struct request_queue *q, static struct bio *blk_bio_write_same_split(struct request_queue *q, struct bio *bio, - struct bio_set *bs) + struct bio_set *bs, + unsigned *nsegs) { + *nsegs = 1; + if (!q->limits.max_write_same_sectors) return NULL; @@ -64,7 +70,8 @@ static struct bio *blk_bio_write_same_split(struct request_queue *q, static struct bio *blk_bio_segment_split(struct request_queue *q, struct bio *bio, - struct bio_set *bs) + struct bio_set *bs, + unsigned *segs) { struct bio_vec bv, bvprv, *bvprvp = NULL; struct bvec_iter iter; @@ -106,24 +113,35 @@ new_segment: sectors += bv.bv_len >> 9; } + *segs = nsegs; return NULL; split: + *segs = nsegs; return bio_split(bio, sectors, GFP_NOIO, bs); } void blk_queue_split(struct request_queue *q, struct bio **bio, struct bio_set *bs) { - struct bio *split; + struct bio *split, *res; + unsigned nsegs; if ((*bio)->bi_rw & REQ_DISCARD) - split = blk_bio_discard_split(q, *bio, bs); + split = blk_bio_discard_split(q, *bio, bs, &nsegs); else if ((*bio)->bi_rw & REQ_WRITE_SAME) - split = blk_bio_write_same_split(q, *bio, bs); + split = blk_bio_write_same_split(q, *bio, bs, &nsegs); else - split = blk_bio_segment_split(q, *bio, q->bio_split); + split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs); + + /* physical segments can be figured out during splitting */ + res = split ? split : *bio; + res->bi_phys_segments = nsegs; + bio_set_flag(res, BIO_SEG_VALID); if (split) { + /* there isn't chance to merge the splitted bio */ + split->bi_rw |= REQ_NOMERGE; + bio_chain(split, *bio); generic_make_request(*bio); *bio = split; diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index ec2d11915142..60ac684c8b8c 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -75,6 +75,10 @@ void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve) struct blk_mq_bitmap_tags *bt; int i, wake_index; + /* + * Make sure all changes prior to this are visible from other CPUs. + */ + smp_mb(); bt = &tags->bitmap_tags; wake_index = atomic_read(&bt->wake_index); for (i = 0; i < BT_WAIT_QUEUES; i++) { diff --git a/block/blk-mq.c b/block/blk-mq.c index 85f014327342..309e41087e6b 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -9,6 +9,7 @@ #include <linux/backing-dev.h> #include <linux/bio.h> #include <linux/blkdev.h> +#include <linux/kmemleak.h> #include <linux/mm.h> #include <linux/init.h> #include <linux/slab.h> @@ -989,18 +990,25 @@ void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs) } EXPORT_SYMBOL(blk_mq_delay_queue); -static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, - struct request *rq, bool at_head) +static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx, + struct blk_mq_ctx *ctx, + struct request *rq, + bool at_head) { - struct blk_mq_ctx *ctx = rq->mq_ctx; - trace_block_rq_insert(hctx->queue, rq); if (at_head) list_add(&rq->queuelist, &ctx->rq_list); else list_add_tail(&rq->queuelist, &ctx->rq_list); +} +static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, + struct request *rq, bool at_head) +{ + struct blk_mq_ctx *ctx = rq->mq_ctx; + + __blk_mq_insert_req_list(hctx, ctx, rq, at_head); blk_mq_hctx_mark_pending(hctx, ctx); } @@ -1056,8 +1064,9 @@ static void blk_mq_insert_requests(struct request_queue *q, rq = list_first_entry(list, struct request, queuelist); list_del_init(&rq->queuelist); rq->mq_ctx = ctx; - __blk_mq_insert_request(hctx, rq, false); + __blk_mq_insert_req_list(hctx, ctx, rq, false); } + blk_mq_hctx_mark_pending(hctx, ctx); spin_unlock(&ctx->lock); blk_mq_run_hw_queue(hctx, from_schedule); @@ -1139,7 +1148,7 @@ static inline bool blk_mq_merge_queue_io(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, struct request *rq, struct bio *bio) { - if (!hctx_allow_merges(hctx)) { + if (!hctx_allow_merges(hctx) || !bio_mergeable(bio)) { blk_mq_bio_to_request(rq, bio); spin_lock(&ctx->lock); insert_rq: @@ -1267,9 +1276,12 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) blk_queue_split(q, &bio, q->bio_split); - if (!is_flush_fua && !blk_queue_nomerges(q) && - blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq)) - return; + if (!is_flush_fua && !blk_queue_nomerges(q)) { + if (blk_attempt_plug_merge(q, bio, &request_count, + &same_queue_rq)) + return; + } else + request_count = blk_plug_queued_count(q); rq = blk_mq_map_request(q, bio, &data); if (unlikely(!rq)) @@ -1376,7 +1388,7 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio) plug = current->plug; if (plug) { blk_mq_bio_to_request(rq, bio); - if (list_empty(&plug->mq_list)) + if (!request_count) trace_block_plug(q); else if (request_count >= BLK_MAX_REQUEST_COUNT) { blk_flush_plug_list(plug, false); @@ -1430,6 +1442,11 @@ static void blk_mq_free_rq_map(struct blk_mq_tag_set *set, while (!list_empty(&tags->page_list)) { page = list_first_entry(&tags->page_list, struct page, lru); list_del_init(&page->lru); + /* + * Remove kmemleak object previously allocated in + * blk_mq_init_rq_map(). + */ + kmemleak_free(page_address(page)); __free_pages(page, page->private); } @@ -1502,6 +1519,11 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, list_add_tail(&page->lru, &tags->page_list); p = page_address(page); + /* + * Allow kmemleak to scan these pages as they contain pointers + * to additional allocations like via ops->init_request(). + */ + kmemleak_alloc(p, order_to_size(this_order), 1, GFP_KERNEL); entries_per_page = order_to_size(this_order) / rq_size; to_do = min(entries_per_page, set->queue_depth - i); left -= to_do * rq_size; @@ -1673,7 +1695,7 @@ static int blk_mq_init_hctx(struct request_queue *q, INIT_LIST_HEAD(&hctx->dispatch); hctx->queue = q; hctx->queue_num = hctx_idx; - hctx->flags = set->flags; + hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED; blk_mq_init_cpu_notifier(&hctx->cpu_notifier, blk_mq_hctx_notify, hctx); @@ -1860,27 +1882,26 @@ static void blk_mq_map_swqueue(struct request_queue *q, } } -static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set) +static void queue_set_hctx_shared(struct request_queue *q, bool shared) { struct blk_mq_hw_ctx *hctx; - struct request_queue *q; - bool shared; int i; - if (set->tag_list.next == set->tag_list.prev) - shared = false; - else - shared = true; + queue_for_each_hw_ctx(q, hctx, i) { + if (shared) + hctx->flags |= BLK_MQ_F_TAG_SHARED; + else + hctx->flags &= ~BLK_MQ_F_TAG_SHARED; + } +} + +static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, bool shared) +{ + struct request_queue *q; list_for_each_entry(q, &set->tag_list, tag_set_list) { blk_mq_freeze_queue(q); - - queue_for_each_hw_ctx(q, hctx, i) { - if (shared) - hctx->flags |= BLK_MQ_F_TAG_SHARED; - else - hctx->flags &= ~BLK_MQ_F_TAG_SHARED; - } + queue_set_hctx_shared(q, shared); blk_mq_unfreeze_queue(q); } } @@ -1891,7 +1912,12 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q) mutex_lock(&set->tag_list_lock); list_del_init(&q->tag_set_list); - blk_mq_update_tag_set_depth(set); + if (list_is_singular(&set->tag_list)) { + /* just transitioned to unshared */ + set->flags &= ~BLK_MQ_F_TAG_SHARED; + /* update existing queue */ + blk_mq_update_tag_set_depth(set, false); + } mutex_unlock(&set->tag_list_lock); } @@ -1901,8 +1927,17 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set, q->tag_set = set; mutex_lock(&set->tag_list_lock); + + /* Check to see if we're transitioning to shared (from 1 to 2 queues). */ + if (!list_empty(&set->tag_list) && !(set->flags & BLK_MQ_F_TAG_SHARED)) { + set->flags |= BLK_MQ_F_TAG_SHARED; + /* update existing queue */ + blk_mq_update_tag_set_depth(set, true); + } + if (set->flags & BLK_MQ_F_TAG_SHARED) + queue_set_hctx_shared(q, true); list_add_tail(&q->tag_set_list, &set->tag_list); - blk_mq_update_tag_set_depth(set); + mutex_unlock(&set->tag_list_lock); } diff --git a/block/blk-mq.h b/block/blk-mq.h index f4fea7964910..b44dce165761 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -29,8 +29,6 @@ void __blk_mq_complete_request(struct request *rq); void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_freeze_queue(struct request_queue *q); void blk_mq_free_queue(struct request_queue *q); -void blk_mq_clone_flush_request(struct request *flush_rq, - struct request *orig_rq); int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); void blk_mq_wake_waiters(struct request_queue *q); diff --git a/block/blk.h b/block/blk.h index 98614ad37c81..aa27d0292af1 100644 --- a/block/blk.h +++ b/block/blk.h @@ -86,6 +86,7 @@ bool bio_attempt_back_merge(struct request_queue *q, struct request *req, bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, unsigned int *request_count, struct request **same_queue_rq); +unsigned int blk_plug_queued_count(struct request_queue *q); void blk_account_io_start(struct request *req, bool new_io); void blk_account_io_completion(struct request *req, unsigned int bytes); diff --git a/block/elevator.c b/block/elevator.c index 84d63943f2de..c3555c9c672f 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -420,7 +420,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) * noxmerges: Only simple one-hit cache try * merges: All merge tries attempted */ - if (blk_queue_nomerges(q)) + if (blk_queue_nomerges(q) || !bio_mergeable(bio)) return ELEVATOR_NO_MERGE; /* |