diff options
author | Tejun Heo <tj@kernel.org> | 2012-03-05 13:15:27 -0800 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2012-03-06 21:27:24 +0100 |
commit | 852c788f8365062c8a383c5a93f7f7289977cb50 (patch) | |
tree | 561b69e7f2b6bcc16de165b3b988990de7913615 /block | |
parent | f6e8d01bee036460e03bd4f6a79d014f98ba712e (diff) | |
download | lwn-852c788f8365062c8a383c5a93f7f7289977cb50.tar.gz lwn-852c788f8365062c8a383c5a93f7f7289977cb50.zip |
block: implement bio_associate_current()
IO scheduling and cgroup are tied to the issuing task via io_context
and cgroup of %current. Unfortunately, there are cases where IOs need
to be routed via a different task which makes scheduling and cgroup
limit enforcement applied completely incorrectly.
For example, all bios delayed by blk-throttle end up being issued by a
delayed work item and get assigned the io_context of the worker task
which happens to serve the work item and dumped to the default block
cgroup. This is double confusing as bios which aren't delayed end up
in the correct cgroup and makes using blk-throttle and cfq propio
together impossible.
Any code which punts IO issuing to another task is affected which is
getting more and more common (e.g. btrfs). As both io_context and
cgroup are firmly tied to task including userland visible APIs to
manipulate them, it makes a lot of sense to match up tasks to bios.
This patch implements bio_associate_current() which associates the
specified bio with %current. The bio will record the associated ioc
and blkcg at that point and block layer will use the recorded ones
regardless of which task actually ends up issuing the bio. bio
release puts the associated ioc and blkcg.
It grabs and remembers ioc and blkcg instead of the task itself
because task may already be dead by the time the bio is issued making
ioc and blkcg inaccessible and those are all block layer cares about.
elevator_set_req_fn() is updated such that the bio elvdata is being
allocated for is available to the elevator.
This doesn't update block cgroup policies yet. Further patches will
implement the support.
-v2: #ifdef CONFIG_BLK_CGROUP added around bio->bi_ioc dereference in
rq_ioc() to fix build breakage.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Kent Overstreet <koverstreet@google.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block')
-rw-r--r-- | block/blk-core.c | 32 | ||||
-rw-r--r-- | block/cfq-iosched.c | 3 | ||||
-rw-r--r-- | block/elevator.c | 5 |
3 files changed, 30 insertions, 10 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index b2d0fcd8f87f..991c1d6ef245 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -696,7 +696,7 @@ static inline void blk_free_request(struct request_queue *q, struct request *rq) } static struct request * -blk_alloc_request(struct request_queue *q, struct io_cq *icq, +blk_alloc_request(struct request_queue *q, struct bio *bio, struct io_cq *icq, unsigned int flags, gfp_t gfp_mask) { struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); @@ -710,7 +710,7 @@ blk_alloc_request(struct request_queue *q, struct io_cq *icq, if (flags & REQ_ELVPRIV) { rq->elv.icq = icq; - if (unlikely(elv_set_request(q, rq, gfp_mask))) { + if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) { mempool_free(rq, q->rq.rq_pool); return NULL; } @@ -810,6 +810,22 @@ static bool blk_rq_should_init_elevator(struct bio *bio) } /** + * rq_ioc - determine io_context for request allocation + * @bio: request being allocated is for this bio (can be %NULL) + * + * Determine io_context to use for request allocation for @bio. May return + * %NULL if %current->io_context doesn't exist. + */ +static struct io_context *rq_ioc(struct bio *bio) +{ +#ifdef CONFIG_BLK_CGROUP + if (bio && bio->bi_ioc) + return bio->bi_ioc; +#endif + return current->io_context; +} + +/** * get_request - get a free request * @q: request_queue to allocate request from * @rw_flags: RW and SYNC flags @@ -836,7 +852,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags, int may_queue; retry: et = q->elevator->type; - ioc = current->io_context; + ioc = rq_ioc(bio); if (unlikely(blk_queue_dead(q))) return NULL; @@ -919,14 +935,16 @@ retry: /* create icq if missing */ if ((rw_flags & REQ_ELVPRIV) && unlikely(et->icq_cache && !icq)) { - ioc = create_io_context(gfp_mask, q->node); - if (ioc) - icq = ioc_create_icq(ioc, q, gfp_mask); + create_io_context(gfp_mask, q->node); + ioc = rq_ioc(bio); + if (!ioc) + goto fail_alloc; + icq = ioc_create_icq(ioc, q, gfp_mask); if (!icq) goto fail_alloc; } - rq = blk_alloc_request(q, icq, rw_flags, gfp_mask); + rq = blk_alloc_request(q, bio, icq, rw_flags, gfp_mask); if (unlikely(!rq)) goto fail_alloc; diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 9a4eac490e0b..abac87337d70 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -3299,7 +3299,8 @@ split_cfqq(struct cfq_io_cq *cic, struct cfq_queue *cfqq) * Allocate cfq data structures associated with this request. */ static int -cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) +cfq_set_request(struct request_queue *q, struct request *rq, struct bio *bio, + gfp_t gfp_mask) { struct cfq_data *cfqd = q->elevator->elevator_data; struct cfq_io_cq *cic = icq_to_cic(rq->elv.icq); diff --git a/block/elevator.c b/block/elevator.c index 451654fadab0..be3ab6df0fea 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -663,12 +663,13 @@ struct request *elv_former_request(struct request_queue *q, struct request *rq) return NULL; } -int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) +int elv_set_request(struct request_queue *q, struct request *rq, + struct bio *bio, gfp_t gfp_mask) { struct elevator_queue *e = q->elevator; if (e->type->ops.elevator_set_req_fn) - return e->type->ops.elevator_set_req_fn(q, rq, gfp_mask); + return e->type->ops.elevator_set_req_fn(q, rq, bio, gfp_mask); return 0; } |