blk-mq: Add a polling specific stats function

Rather than bucketing IO statisics based on direction only we also bucket based on the IO size. This leads to improved polling performance. Update the bucket callback function and use it in the polling latency estimation. Signed-off-by: Stephen Bates <sbates@raithlin.com> Signed-off-by: Jens Axboe <axboe@fb.com>
author: Stephen Bates <sbates@raithlin.com> 2017-04-07 06:24:03 -0600
committer: Jens Axboe <axboe@fb.com> 2017-04-20 15:29:40 -0600
commit: 720b8ccc45002708c7db1165f72a2d3c6996dba0 (patch)
tree: 6021454aba3b5323981ddacdd3fe1b77c6f33f8f /block
parent: a37244e4cc5766af68004ad6249fcd0e4694b441 (diff)
download: lwn-720b8ccc45002708c7db1165f72a2d3c6996dba0.tar.gz
lwn-720b8ccc45002708c7db1165f72a2d3c6996dba0.zip
1 files changed, 35 insertions, 10 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c
index f2f6c59c5b05..9d7645f24b05 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -42,6 +42,25 @@ static LIST_HEAD(all_q_list);
 static void blk_mq_poll_stats_start(struct request_queue *q);
 static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
 
+/* Must be consisitent with function below */
+#define BLK_MQ_POLL_STATS_BKTS 16
+static int blk_mq_poll_stats_bkt(const struct request *rq)
+{
+	int ddir, bytes, bucket;
+
+	ddir = blk_stat_rq_ddir(rq);
+	bytes = blk_rq_bytes(rq);
+
+	bucket = ddir + 2*(ilog2(bytes) - 9);
+
+	if (bucket < 0)
+		return -1;
+	else if (bucket >= BLK_MQ_POLL_STATS_BKTS)
+		return ddir + BLK_MQ_POLL_STATS_BKTS - 2;
+
+	return bucket;
+}
+
 /*
  * Check if any of the ctx's have pending work in this hardware queue
  */
@@ -2257,7 +2276,8 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 	q->mq_ops = set->ops;
 
 	q->poll_cb = blk_stat_alloc_callback(blk_mq_poll_stats_fn,
-					     blk_stat_rq_ddir, 2, q);
+					     blk_mq_poll_stats_bkt,
+					     BLK_MQ_POLL_STATS_BKTS, q);
 	if (!q->poll_cb)
 		goto err_exit;
 
@@ -2683,11 +2703,12 @@ static void blk_mq_poll_stats_start(struct request_queue *q)
 static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb)
 {
 	struct request_queue *q = cb->data;
+	int bucket;
 
-	if (cb->stat[READ].nr_samples)
-		q->poll_stat[READ] = cb->stat[READ];
-	if (cb->stat[WRITE].nr_samples)
-		q->poll_stat[WRITE] = cb->stat[WRITE];
+	for (bucket = 0; bucket < BLK_MQ_POLL_STATS_BKTS; bucket++) {
+		if (cb->stat[bucket].nr_samples)
+			q->poll_stat[bucket] = cb->stat[bucket];
+	}
 }
 
 static unsigned long blk_mq_poll_nsecs(struct request_queue *q,
@@ -2695,6 +2716,7 @@ static unsigned long blk_mq_poll_nsecs(struct request_queue *q,
 				       struct request *rq)
 {
 	unsigned long ret = 0;
+	int bucket;
 
 	/*
 	 * If stats collection isn't on, don't sleep but turn it on for
@@ -2709,12 +2731,15 @@ static unsigned long blk_mq_poll_nsecs(struct request_queue *q,
 	 * For instance, if the completion latencies are tight, we can
 	 * get closer than just half the mean. This is especially
 	 * important on devices where the completion latencies are longer
-	 * than ~10 usec.
+	 * than ~10 usec. We do use the stats for the relevant IO size
+	 * if available which does lead to better estimates.
 	 */
-	if (req_op(rq) == REQ_OP_READ && q->poll_stat[READ].nr_samples)
-		ret = (q->poll_stat[READ].mean + 1) / 2;
-	else if (req_op(rq) == REQ_OP_WRITE && q->poll_stat[WRITE].nr_samples)
-		ret = (q->poll_stat[WRITE].mean + 1) / 2;
+	bucket = blk_mq_poll_stats_bkt(rq);
+	if (bucket < 0)
+		return ret;
+
+	if (q->poll_stat[bucket].nr_samples)
+		ret = (q->poll_stat[bucket].mean + 1) / 2;
 
 	return ret;
 }
author	Stephen Bates <sbates@raithlin.com>	2017-04-07 06:24:03 -0600
committer	Jens Axboe <axboe@fb.com>	2017-04-20 15:29:40 -0600
commit	720b8ccc45002708c7db1165f72a2d3c6996dba0 (patch)
tree	6021454aba3b5323981ddacdd3fe1b77c6f33f8f /block
parent	a37244e4cc5766af68004ad6249fcd0e4694b441 (diff)
download	lwn-720b8ccc45002708c7db1165f72a2d3c6996dba0.tar.gz lwn-720b8ccc45002708c7db1165f72a2d3c6996dba0.zip