blkcg: factor out blkio_group creation

Currently both blk-throttle and cfq-iosched implement their own blkio_group creation code in throtl_get_tg() and cfq_get_cfqg(). This patch factors out the common code into blkg_lookup_create(), which returns ERR_PTR value so that transitional failures due to queue bypass can be distinguished from other failures. * New plkio_policy_ops methods blkio_alloc_group_fn() and blkio_link_group_fn added. Both are transitional and will be removed once the blkg management code is fully moved into blk-cgroup.c. * blkio_alloc_group_fn() allocates policy-specific blkg which is usually a larger data structure with blkg as the first entry and intiailizes it. Note that initialization of blkg proper, including percpu stats, is responsibility of blk-cgroup proper. Note that default config (weight, bps...) initialization is done from this method; otherwise, we end up violating locking order between blkcg and q locks via blkcg_get_CONF() functions. * blkio_link_group_fn() is called under queue_lock and responsible for linking the blkg to the queue. blkcg side is handled by blk-cgroup proper. * The common blkg creation function is named blkg_lookup_create() and blkiocg_lookup_group() is renamed to blkg_lookup() for consistency. Also, throtl / cfq related functions are similarly [re]named for consistency. This simplifies blkcg policy implementations and enables further cleanup. -v2: Vivek noticed that blkg_lookup_create() incorrectly tested blk_queue_dead() instead of blk_queue_bypass() leading a user of the function ending up creating a new blkg on bypassing queue. This is a bug introduced while relocating bypass patches before this one. Fixed. -v3: ERR_PTR patch folded into this one. @for_root added to blkg_lookup_create() to allow creating root group on a bypassed queue during elevator switch. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
author: Tejun Heo <tj@kernel.org> 2012-03-05 13:15:06 -0800
committer: Jens Axboe <axboe@kernel.dk> 2012-03-06 21:27:22 +0100
commit: cd1604fab4f95f7cfc227d3955fd7ae14da61f38 (patch)
tree: 021881faedc1c2468730f9f54d364083e70dce76 /block/blk-throttle.c
parent: f51b802c17e2a21926b29911493f5e7ddf6eee87 (diff)
download: lwn-cd1604fab4f95f7cfc227d3955fd7ae14da61f38.tar.gz
lwn-cd1604fab4f95f7cfc227d3955fd7ae14da61f38.zip
1 files changed, 52 insertions, 103 deletions
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index aeeb798d1cda..2ae637b9e80c 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -181,17 +181,25 @@ static void throtl_put_tg(struct throtl_grp *tg)
 	call_rcu(&tg->rcu_head, throtl_free_tg);
 }
 
-static void throtl_init_group(struct throtl_grp *tg)
+static struct blkio_group *throtl_alloc_blkio_group(struct request_queue *q,
+						    struct blkio_cgroup *blkcg)
 {
+	struct throtl_grp *tg;
+
+	tg = kzalloc_node(sizeof(*tg), GFP_ATOMIC, q->node);
+	if (!tg)
+		return NULL;
+
 	INIT_HLIST_NODE(&tg->tg_node);
 	RB_CLEAR_NODE(&tg->rb_node);
 	bio_list_init(&tg->bio_lists[0]);
 	bio_list_init(&tg->bio_lists[1]);
 	tg->limits_changed = false;
 
-	/* Practically unlimited BW */
-	tg->bps[0] = tg->bps[1] = -1;
-	tg->iops[0] = tg->iops[1] = -1;
+	tg->bps[READ] = blkcg_get_read_bps(blkcg, tg->blkg.dev);
+	tg->bps[WRITE] = blkcg_get_write_bps(blkcg, tg->blkg.dev);
+	tg->iops[READ] = blkcg_get_read_iops(blkcg, tg->blkg.dev);
+	tg->iops[WRITE] = blkcg_get_write_iops(blkcg, tg->blkg.dev);
 
 	/*
 	 * Take the initial reference that will be released on destroy
@@ -200,14 +208,8 @@ static void throtl_init_group(struct throtl_grp *tg)
 	 * exit or cgroup deletion path depending on who is exiting first.
 	 */
 	atomic_set(&tg->ref, 1);
-}
 
-/* Should be called with rcu read lock held (needed for blkcg) */
-static void
-throtl_add_group_to_td_list(struct throtl_data *td, struct throtl_grp *tg)
-{
-	hlist_add_head(&tg->tg_node, &td->tg_list);
-	td->nr_undestroyed_grps++;
+	return &tg->blkg;
 }
 
 static void
@@ -246,119 +248,62 @@ throtl_tg_fill_dev_details(struct throtl_data *td, struct throtl_grp *tg)
 	spin_unlock_irq(td->queue->queue_lock);
 }
 
-static void throtl_init_add_tg_lists(struct throtl_data *td,
-			struct throtl_grp *tg, struct blkio_cgroup *blkcg)
+static void throtl_link_blkio_group(struct request_queue *q,
+				    struct blkio_group *blkg)
 {
-	__throtl_tg_fill_dev_details(td, tg);
-
-	/* Add group onto cgroup list */
-	blkiocg_add_blkio_group(blkcg, &tg->blkg, td->queue,
-				tg->blkg.dev, BLKIO_POLICY_THROTL);
-
-	tg->bps[READ] = blkcg_get_read_bps(blkcg, tg->blkg.dev);
-	tg->bps[WRITE] = blkcg_get_write_bps(blkcg, tg->blkg.dev);
-	tg->iops[READ] = blkcg_get_read_iops(blkcg, tg->blkg.dev);
-	tg->iops[WRITE] = blkcg_get_write_iops(blkcg, tg->blkg.dev);
-
-	throtl_add_group_to_td_list(td, tg);
-}
-
-/* Should be called without queue lock and outside of rcu period */
-static struct throtl_grp *throtl_alloc_tg(struct throtl_data *td)
-{
-	struct throtl_grp *tg = NULL;
-	int ret;
-
-	tg = kzalloc_node(sizeof(*tg), GFP_ATOMIC, td->queue->node);
-	if (!tg)
-		return NULL;
-
-	ret = blkio_alloc_blkg_stats(&tg->blkg);
+	struct throtl_data *td = q->td;
+	struct throtl_grp *tg = tg_of_blkg(blkg);
 
-	if (ret) {
-		kfree(tg);
-		return NULL;
-	}
+	__throtl_tg_fill_dev_details(td, tg);
 
-	throtl_init_group(tg);
-	return tg;
+	hlist_add_head(&tg->tg_node, &td->tg_list);
+	td->nr_undestroyed_grps++;
 }
 
 static struct
-throtl_grp *throtl_find_tg(struct throtl_data *td, struct blkio_cgroup *blkcg)
+throtl_grp *throtl_lookup_tg(struct throtl_data *td, struct blkio_cgroup *blkcg)
 {
 	struct throtl_grp *tg = NULL;
 
 	/*
 	 * This is the common case when there are no blkio cgroups.
- 	 * Avoid lookup in this case
- 	 */
+	 * Avoid lookup in this case
+	 */
 	if (blkcg == &blkio_root_cgroup)
 		tg = td->root_tg;
 	else
-		tg = tg_of_blkg(blkiocg_lookup_group(blkcg, td->queue,
-						     BLKIO_POLICY_THROTL));
+		tg = tg_of_blkg(blkg_lookup(blkcg, td->queue,
+					    BLKIO_POLICY_THROTL));
 
 	__throtl_tg_fill_dev_details(td, tg);
 	return tg;
 }
 
-static struct throtl_grp *throtl_get_tg(struct throtl_data *td,
-					struct blkio_cgroup *blkcg)
+static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td,
+						  struct blkio_cgroup *blkcg)
 {
-	struct throtl_grp *tg = NULL, *__tg = NULL;
 	struct request_queue *q = td->queue;
-
-	/* no throttling for dead queue */
-	if (unlikely(blk_queue_bypass(q)))
-		return NULL;
-
-	tg = throtl_find_tg(td, blkcg);
-	if (tg)
-		return tg;
-
-	if (!css_tryget(&blkcg->css))
-		return NULL;
-
-	/*
-	 * Need to allocate a group. Allocation of group also needs allocation
-	 * of per cpu stats which in-turn takes a mutex() and can block. Hence
-	 * we need to drop rcu lock and queue_lock before we call alloc.
-	 */
-	spin_unlock_irq(q->queue_lock);
-	rcu_read_unlock();
-
-	tg = throtl_alloc_tg(td);
-
-	/* Group allocated and queue is still alive. take the lock */
-	rcu_read_lock();
-	spin_lock_irq(q->queue_lock);
-	css_put(&blkcg->css);
-
-	/* Make sure @q is still alive */
-	if (unlikely(blk_queue_bypass(q))) {
-		kfree(tg);
-		return NULL;
-	}
+	struct throtl_grp *tg = NULL;
 
 	/*
-	 * If some other thread already allocated the group while we were
-	 * not holding queue lock, free up the group
+	 * This is the common case when there are no blkio cgroups.
+	 * Avoid lookup in this case
 	 */
-	__tg = throtl_find_tg(td, blkcg);
+	if (blkcg == &blkio_root_cgroup) {
+		tg = td->root_tg;
+	} else {
+		struct blkio_group *blkg;
 
-	if (__tg) {
-		kfree(tg);
-		return __tg;
-	}
+		blkg = blkg_lookup_create(blkcg, q, BLKIO_POLICY_THROTL, false);
 
-	/* Group allocation failed. Account the IO to root group */
-	if (!tg) {
-		tg = td->root_tg;
-		return tg;
+		/* if %NULL and @q is alive, fall back to root_tg */
+		if (!IS_ERR(blkg))
+			tg = tg_of_blkg(blkg);
+		else if (!blk_queue_dead(q))
+			tg = td->root_tg;
 	}
 
-	throtl_init_add_tg_lists(td, tg, blkcg);
+	__throtl_tg_fill_dev_details(td, tg);
 	return tg;
 }
 
@@ -1107,6 +1052,8 @@ static void throtl_shutdown_wq(struct request_queue *q)
 
 static struct blkio_policy_type blkio_policy_throtl = {
 	.ops = {
+		.blkio_alloc_group_fn = throtl_alloc_blkio_group,
+		.blkio_link_group_fn = throtl_link_blkio_group,
 		.blkio_unlink_group_fn = throtl_unlink_blkio_group,
 		.blkio_clear_queue_fn = throtl_clear_queue,
 		.blkio_update_group_read_bps_fn =
@@ -1141,7 +1088,7 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
 	 */
 	rcu_read_lock();
 	blkcg = task_blkio_cgroup(current);
-	tg = throtl_find_tg(td, blkcg);
+	tg = throtl_lookup_tg(td, blkcg);
 	if (tg) {
 		throtl_tg_fill_dev_details(td, tg);
 
@@ -1157,7 +1104,7 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
 	 * IO group
 	 */
 	spin_lock_irq(q->queue_lock);
-	tg = throtl_get_tg(td, blkcg);
+	tg = throtl_lookup_create_tg(td, blkcg);
 	if (unlikely(!tg))
 		goto out_unlock;
 
@@ -1252,6 +1199,7 @@ void blk_throtl_drain(struct request_queue *q)
 int blk_throtl_init(struct request_queue *q)
 {
 	struct throtl_data *td;
+	struct blkio_group *blkg;
 
 	td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node);
 	if (!td)
@@ -1262,13 +1210,17 @@ int blk_throtl_init(struct request_queue *q)
 	td->limits_changed = false;
 	INIT_DELAYED_WORK(&td->throtl_work, blk_throtl_work);
 
-	/* alloc and Init root group. */
+	q->td = td;
 	td->queue = q;
 
+	/* alloc and init root group. */
 	rcu_read_lock();
 	spin_lock_irq(q->queue_lock);
 
-	td->root_tg = throtl_get_tg(td, &blkio_root_cgroup);
+	blkg = blkg_lookup_create(&blkio_root_cgroup, q, BLKIO_POLICY_THROTL,
+				  true);
+	if (!IS_ERR(blkg))
+		td->root_tg = tg_of_blkg(blkg);
 
 	spin_unlock_irq(q->queue_lock);
 	rcu_read_unlock();
@@ -1277,9 +1229,6 @@ int blk_throtl_init(struct request_queue *q)
 		kfree(td);
 		return -ENOMEM;
 	}
-
-	/* Attach throtl data to request queue */
-	q->td = td;
 	return 0;
 }
author	Tejun Heo <tj@kernel.org>	2012-03-05 13:15:06 -0800
committer	Jens Axboe <axboe@kernel.dk>	2012-03-06 21:27:22 +0100
commit	cd1604fab4f95f7cfc227d3955fd7ae14da61f38 (patch)
tree	021881faedc1c2468730f9f54d364083e70dce76 /block/blk-throttle.c
parent	f51b802c17e2a21926b29911493f5e7ddf6eee87 (diff)
download	lwn-cd1604fab4f95f7cfc227d3955fd7ae14da61f38.tar.gz lwn-cd1604fab4f95f7cfc227d3955fd7ae14da61f38.zip