summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorRusty Russell <rusty@rustcorp.com.au>2008-12-30 08:02:35 +1030
committerRusty Russell <rusty@rustcorp.com.au>2008-12-30 08:02:35 +1030
commit33edcf133ba93ecba2e4b6472e97b689895d805c (patch)
tree327d7a20acef64005e7c5ccbfa1265be28aeb6ac /block
parentbe4d638c1597580ed2294d899d9f1a2cd10e462c (diff)
parent3c92ec8ae91ecf59d88c798301833d7cf83f2179 (diff)
downloadlwn-33edcf133ba93ecba2e4b6472e97b689895d805c.tar.gz
lwn-33edcf133ba93ecba2e4b6472e97b689895d805c.zip
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'block')
-rw-r--r--block/Kconfig1
-rw-r--r--block/blk-core.c46
-rw-r--r--block/blktrace.c332
-rw-r--r--block/elevator.c12
4 files changed, 366 insertions, 25 deletions
diff --git a/block/Kconfig b/block/Kconfig
index 1ab7c15c8d7a..290b219fad9c 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -47,6 +47,7 @@ config BLK_DEV_IO_TRACE
depends on SYSFS
select RELAY
select DEBUG_FS
+ select TRACEPOINTS
help
Say Y here if you want to be able to trace the block layer actions
on a given queue. Tracing allows you to see any traffic happening
diff --git a/block/blk-core.c b/block/blk-core.c
index c36aa98fafa3..561e8a1b43a4 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -28,9 +28,23 @@
#include <linux/task_io_accounting_ops.h>
#include <linux/blktrace_api.h>
#include <linux/fault-inject.h>
+#include <trace/block.h>
#include "blk.h"
+DEFINE_TRACE(block_plug);
+DEFINE_TRACE(block_unplug_io);
+DEFINE_TRACE(block_unplug_timer);
+DEFINE_TRACE(block_getrq);
+DEFINE_TRACE(block_sleeprq);
+DEFINE_TRACE(block_rq_requeue);
+DEFINE_TRACE(block_bio_backmerge);
+DEFINE_TRACE(block_bio_frontmerge);
+DEFINE_TRACE(block_bio_queue);
+DEFINE_TRACE(block_rq_complete);
+DEFINE_TRACE(block_remap); /* Also used in drivers/md/dm.c */
+EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);
+
static int __make_request(struct request_queue *q, struct bio *bio);
/*
@@ -205,7 +219,7 @@ void blk_plug_device(struct request_queue *q)
if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) {
mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
- blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG);
+ trace_block_plug(q);
}
}
EXPORT_SYMBOL(blk_plug_device);
@@ -292,9 +306,7 @@ void blk_unplug_work(struct work_struct *work)
struct request_queue *q =
container_of(work, struct request_queue, unplug_work);
- blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
- q->rq.count[READ] + q->rq.count[WRITE]);
-
+ trace_block_unplug_io(q);
q->unplug_fn(q);
}
@@ -302,9 +314,7 @@ void blk_unplug_timeout(unsigned long data)
{
struct request_queue *q = (struct request_queue *)data;
- blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL,
- q->rq.count[READ] + q->rq.count[WRITE]);
-
+ trace_block_unplug_timer(q);
kblockd_schedule_work(q, &q->unplug_work);
}
@@ -314,9 +324,7 @@ void blk_unplug(struct request_queue *q)
* devices don't necessarily have an ->unplug_fn defined
*/
if (q->unplug_fn) {
- blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
- q->rq.count[READ] + q->rq.count[WRITE]);
-
+ trace_block_unplug_io(q);
q->unplug_fn(q);
}
}
@@ -822,7 +830,7 @@ rq_starved:
if (ioc_batching(q, ioc))
ioc->nr_batch_requests--;
- blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ);
+ trace_block_getrq(q, bio, rw);
out:
return rq;
}
@@ -848,7 +856,7 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
prepare_to_wait_exclusive(&rl->wait[rw], &wait,
TASK_UNINTERRUPTIBLE);
- blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ);
+ trace_block_sleeprq(q, bio, rw);
__generic_unplug_device(q);
spin_unlock_irq(q->queue_lock);
@@ -928,7 +936,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
{
blk_delete_timer(rq);
blk_clear_rq_complete(rq);
- blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
+ trace_block_rq_requeue(q, rq);
if (blk_rq_tagged(rq))
blk_queue_end_tag(q, rq);
@@ -1167,7 +1175,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
if (!ll_back_merge_fn(q, req, bio))
break;
- blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
+ trace_block_bio_backmerge(q, bio);
req->biotail->bi_next = bio;
req->biotail = bio;
@@ -1186,7 +1194,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
if (!ll_front_merge_fn(q, req, bio))
break;
- blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
+ trace_block_bio_frontmerge(q, bio);
bio->bi_next = req->bio;
req->bio = bio;
@@ -1269,7 +1277,7 @@ static inline void blk_partition_remap(struct bio *bio)
bio->bi_sector += p->start_sect;
bio->bi_bdev = bdev->bd_contains;
- blk_add_trace_remap(bdev_get_queue(bio->bi_bdev), bio,
+ trace_block_remap(bdev_get_queue(bio->bi_bdev), bio,
bdev->bd_dev, bio->bi_sector,
bio->bi_sector - p->start_sect);
}
@@ -1441,10 +1449,10 @@ end_io:
goto end_io;
if (old_sector != -1)
- blk_add_trace_remap(q, bio, old_dev, bio->bi_sector,
+ trace_block_remap(q, bio, old_dev, bio->bi_sector,
old_sector);
- blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
+ trace_block_bio_queue(q, bio);
old_sector = bio->bi_sector;
old_dev = bio->bi_bdev->bd_dev;
@@ -1678,7 +1686,7 @@ static int __end_that_request_first(struct request *req, int error,
int total_bytes, bio_nbytes, next_idx = 0;
struct bio *bio;
- blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE);
+ trace_block_rq_complete(req->q, req);
/*
* for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual
diff --git a/block/blktrace.c b/block/blktrace.c
index 85049a7e7a17..b0a2cae886db 100644
--- a/block/blktrace.c
+++ b/block/blktrace.c
@@ -23,10 +23,18 @@
#include <linux/mutex.h>
#include <linux/debugfs.h>
#include <linux/time.h>
+#include <trace/block.h>
#include <asm/uaccess.h>
static unsigned int blktrace_seq __read_mostly = 1;
+/* Global reference count of probes */
+static DEFINE_MUTEX(blk_probe_mutex);
+static atomic_t blk_probes_ref = ATOMIC_INIT(0);
+
+static int blk_register_tracepoints(void);
+static void blk_unregister_tracepoints(void);
+
/*
* Send out a notify message.
*/
@@ -119,7 +127,7 @@ static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK
* The worker for the various blk_add_trace*() types. Fills out a
* blk_io_trace structure and places it in a per-cpu subbuffer.
*/
-void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
+static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
int rw, u32 what, int error, int pdu_len, void *pdu_data)
{
struct task_struct *tsk = current;
@@ -177,8 +185,6 @@ void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
local_irq_restore(flags);
}
-EXPORT_SYMBOL_GPL(__blk_add_trace);
-
static struct dentry *blk_tree_root;
static DEFINE_MUTEX(blk_tree_mutex);
static unsigned int root_users;
@@ -237,6 +243,10 @@ static void blk_trace_cleanup(struct blk_trace *bt)
free_percpu(bt->sequence);
free_percpu(bt->msg_data);
kfree(bt);
+ mutex_lock(&blk_probe_mutex);
+ if (atomic_dec_and_test(&blk_probes_ref))
+ blk_unregister_tracepoints();
+ mutex_unlock(&blk_probe_mutex);
}
int blk_trace_remove(struct request_queue *q)
@@ -428,6 +438,14 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
bt->pid = buts->pid;
bt->trace_state = Blktrace_setup;
+ mutex_lock(&blk_probe_mutex);
+ if (atomic_add_return(1, &blk_probes_ref) == 1) {
+ ret = blk_register_tracepoints();
+ if (ret)
+ goto probe_err;
+ }
+ mutex_unlock(&blk_probe_mutex);
+
ret = -EBUSY;
old_bt = xchg(&q->blk_trace, bt);
if (old_bt) {
@@ -436,6 +454,9 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
}
return 0;
+probe_err:
+ atomic_dec(&blk_probes_ref);
+ mutex_unlock(&blk_probe_mutex);
err:
if (dir)
blk_remove_tree(dir);
@@ -562,3 +583,308 @@ void blk_trace_shutdown(struct request_queue *q)
blk_trace_remove(q);
}
}
+
+/*
+ * blktrace probes
+ */
+
+/**
+ * blk_add_trace_rq - Add a trace for a request oriented action
+ * @q: queue the io is for
+ * @rq: the source request
+ * @what: the action
+ *
+ * Description:
+ * Records an action against a request. Will log the bio offset + size.
+ *
+ **/
+static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
+ u32 what)
+{
+ struct blk_trace *bt = q->blk_trace;
+ int rw = rq->cmd_flags & 0x03;
+
+ if (likely(!bt))
+ return;
+
+ if (blk_discard_rq(rq))
+ rw |= (1 << BIO_RW_DISCARD);
+
+ if (blk_pc_request(rq)) {
+ what |= BLK_TC_ACT(BLK_TC_PC);
+ __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors,
+ sizeof(rq->cmd), rq->cmd);
+ } else {
+ what |= BLK_TC_ACT(BLK_TC_FS);
+ __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
+ rw, what, rq->errors, 0, NULL);
+ }
+}
+
+static void blk_add_trace_rq_abort(struct request_queue *q, struct request *rq)
+{
+ blk_add_trace_rq(q, rq, BLK_TA_ABORT);
+}
+
+static void blk_add_trace_rq_insert(struct request_queue *q, struct request *rq)
+{
+ blk_add_trace_rq(q, rq, BLK_TA_INSERT);
+}
+
+static void blk_add_trace_rq_issue(struct request_queue *q, struct request *rq)
+{
+ blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
+}
+
+static void blk_add_trace_rq_requeue(struct request_queue *q, struct request *rq)
+{
+ blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
+}
+
+static void blk_add_trace_rq_complete(struct request_queue *q, struct request *rq)
+{
+ blk_add_trace_rq(q, rq, BLK_TA_COMPLETE);
+}
+
+/**
+ * blk_add_trace_bio - Add a trace for a bio oriented action
+ * @q: queue the io is for
+ * @bio: the source bio
+ * @what: the action
+ *
+ * Description:
+ * Records an action against a bio. Will log the bio offset + size.
+ *
+ **/
+static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
+ u32 what)
+{
+ struct blk_trace *bt = q->blk_trace;
+
+ if (likely(!bt))
+ return;
+
+ __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what,
+ !bio_flagged(bio, BIO_UPTODATE), 0, NULL);
+}
+
+static void blk_add_trace_bio_bounce(struct request_queue *q, struct bio *bio)
+{
+ blk_add_trace_bio(q, bio, BLK_TA_BOUNCE);
+}
+
+static void blk_add_trace_bio_complete(struct request_queue *q, struct bio *bio)
+{
+ blk_add_trace_bio(q, bio, BLK_TA_COMPLETE);
+}
+
+static void blk_add_trace_bio_backmerge(struct request_queue *q, struct bio *bio)
+{
+ blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
+}
+
+static void blk_add_trace_bio_frontmerge(struct request_queue *q, struct bio *bio)
+{
+ blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
+}
+
+static void blk_add_trace_bio_queue(struct request_queue *q, struct bio *bio)
+{
+ blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
+}
+
+static void blk_add_trace_getrq(struct request_queue *q, struct bio *bio, int rw)
+{
+ if (bio)
+ blk_add_trace_bio(q, bio, BLK_TA_GETRQ);
+ else {
+ struct blk_trace *bt = q->blk_trace;
+
+ if (bt)
+ __blk_add_trace(bt, 0, 0, rw, BLK_TA_GETRQ, 0, 0, NULL);
+ }
+}
+
+
+static void blk_add_trace_sleeprq(struct request_queue *q, struct bio *bio, int rw)
+{
+ if (bio)
+ blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ);
+ else {
+ struct blk_trace *bt = q->blk_trace;
+
+ if (bt)
+ __blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ, 0, 0, NULL);
+ }
+}
+
+static void blk_add_trace_plug(struct request_queue *q)
+{
+ struct blk_trace *bt = q->blk_trace;
+
+ if (bt)
+ __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL);
+}
+
+static void blk_add_trace_unplug_io(struct request_queue *q)
+{
+ struct blk_trace *bt = q->blk_trace;
+
+ if (bt) {
+ unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE];
+ __be64 rpdu = cpu_to_be64(pdu);
+
+ __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_IO, 0,
+ sizeof(rpdu), &rpdu);
+ }
+}
+
+static void blk_add_trace_unplug_timer(struct request_queue *q)
+{
+ struct blk_trace *bt = q->blk_trace;
+
+ if (bt) {
+ unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE];
+ __be64 rpdu = cpu_to_be64(pdu);
+
+ __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_TIMER, 0,
+ sizeof(rpdu), &rpdu);
+ }
+}
+
+static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
+ unsigned int pdu)
+{
+ struct blk_trace *bt = q->blk_trace;
+
+ if (bt) {
+ __be64 rpdu = cpu_to_be64(pdu);
+
+ __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw,
+ BLK_TA_SPLIT, !bio_flagged(bio, BIO_UPTODATE),
+ sizeof(rpdu), &rpdu);
+ }
+}
+
+/**
+ * blk_add_trace_remap - Add a trace for a remap operation
+ * @q: queue the io is for
+ * @bio: the source bio
+ * @dev: target device
+ * @from: source sector
+ * @to: target sector
+ *
+ * Description:
+ * Device mapper or raid target sometimes need to split a bio because
+ * it spans a stripe (or similar). Add a trace for that action.
+ *
+ **/
+static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
+ dev_t dev, sector_t from, sector_t to)
+{
+ struct blk_trace *bt = q->blk_trace;
+ struct blk_io_trace_remap r;
+
+ if (likely(!bt))
+ return;
+
+ r.device = cpu_to_be32(dev);
+ r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev);
+ r.sector = cpu_to_be64(to);
+
+ __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP,
+ !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
+}
+
+/**
+ * blk_add_driver_data - Add binary message with driver-specific data
+ * @q: queue the io is for
+ * @rq: io request
+ * @data: driver-specific data
+ * @len: length of driver-specific data
+ *
+ * Description:
+ * Some drivers might want to write driver-specific data per request.
+ *
+ **/
+void blk_add_driver_data(struct request_queue *q,
+ struct request *rq,
+ void *data, size_t len)
+{
+ struct blk_trace *bt = q->blk_trace;
+
+ if (likely(!bt))
+ return;
+
+ if (blk_pc_request(rq))
+ __blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA,
+ rq->errors, len, data);
+ else
+ __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
+ 0, BLK_TA_DRV_DATA, rq->errors, len, data);
+}
+EXPORT_SYMBOL_GPL(blk_add_driver_data);
+
+static int blk_register_tracepoints(void)
+{
+ int ret;
+
+ ret = register_trace_block_rq_abort(blk_add_trace_rq_abort);
+ WARN_ON(ret);
+ ret = register_trace_block_rq_insert(blk_add_trace_rq_insert);
+ WARN_ON(ret);
+ ret = register_trace_block_rq_issue(blk_add_trace_rq_issue);
+ WARN_ON(ret);
+ ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue);
+ WARN_ON(ret);
+ ret = register_trace_block_rq_complete(blk_add_trace_rq_complete);
+ WARN_ON(ret);
+ ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce);
+ WARN_ON(ret);
+ ret = register_trace_block_bio_complete(blk_add_trace_bio_complete);
+ WARN_ON(ret);
+ ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge);
+ WARN_ON(ret);
+ ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge);
+ WARN_ON(ret);
+ ret = register_trace_block_bio_queue(blk_add_trace_bio_queue);
+ WARN_ON(ret);
+ ret = register_trace_block_getrq(blk_add_trace_getrq);
+ WARN_ON(ret);
+ ret = register_trace_block_sleeprq(blk_add_trace_sleeprq);
+ WARN_ON(ret);
+ ret = register_trace_block_plug(blk_add_trace_plug);
+ WARN_ON(ret);
+ ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer);
+ WARN_ON(ret);
+ ret = register_trace_block_unplug_io(blk_add_trace_unplug_io);
+ WARN_ON(ret);
+ ret = register_trace_block_split(blk_add_trace_split);
+ WARN_ON(ret);
+ ret = register_trace_block_remap(blk_add_trace_remap);
+ WARN_ON(ret);
+ return 0;
+}
+
+static void blk_unregister_tracepoints(void)
+{
+ unregister_trace_block_remap(blk_add_trace_remap);
+ unregister_trace_block_split(blk_add_trace_split);
+ unregister_trace_block_unplug_io(blk_add_trace_unplug_io);
+ unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer);
+ unregister_trace_block_plug(blk_add_trace_plug);
+ unregister_trace_block_sleeprq(blk_add_trace_sleeprq);
+ unregister_trace_block_getrq(blk_add_trace_getrq);
+ unregister_trace_block_bio_queue(blk_add_trace_bio_queue);
+ unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge);
+ unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge);
+ unregister_trace_block_bio_complete(blk_add_trace_bio_complete);
+ unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce);
+ unregister_trace_block_rq_complete(blk_add_trace_rq_complete);
+ unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue);
+ unregister_trace_block_rq_issue(blk_add_trace_rq_issue);
+ unregister_trace_block_rq_insert(blk_add_trace_rq_insert);
+ unregister_trace_block_rq_abort(blk_add_trace_rq_abort);
+
+ tracepoint_synchronize_unregister();
+}
diff --git a/block/elevator.c b/block/elevator.c
index a6951f76ba0c..86836dd179c0 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -33,6 +33,7 @@
#include <linux/compiler.h>
#include <linux/delay.h>
#include <linux/blktrace_api.h>
+#include <trace/block.h>
#include <linux/hash.h>
#include <linux/uaccess.h>
@@ -41,6 +42,8 @@
static DEFINE_SPINLOCK(elv_list_lock);
static LIST_HEAD(elv_list);
+DEFINE_TRACE(block_rq_abort);
+
/*
* Merge hash stuff.
*/
@@ -52,6 +55,9 @@ static const int elv_hash_shift = 6;
#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
#define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash))
+DEFINE_TRACE(block_rq_insert);
+DEFINE_TRACE(block_rq_issue);
+
/*
* Query io scheduler to see if the current process issuing bio may be
* merged with rq.
@@ -586,7 +592,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
unsigned ordseq;
int unplug_it = 1;
- blk_add_trace_rq(q, rq, BLK_TA_INSERT);
+ trace_block_rq_insert(q, rq);
rq->q = q;
@@ -772,7 +778,7 @@ struct request *elv_next_request(struct request_queue *q)
* not be passed by new incoming requests
*/
rq->cmd_flags |= REQ_STARTED;
- blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
+ trace_block_rq_issue(q, rq);
}
if (!q->boundary_rq || q->boundary_rq == rq) {
@@ -914,7 +920,7 @@ void elv_abort_queue(struct request_queue *q)
while (!list_empty(&q->queue_head)) {
rq = list_entry_rq(q->queue_head.next);
rq->cmd_flags |= REQ_QUIET;
- blk_add_trace_rq(q, rq, BLK_TA_ABORT);
+ trace_block_rq_abort(q, rq);
__blk_end_request(rq, -EIO, blk_rq_bytes(rq));
}
}