diff options
Diffstat (limited to 'block/ll_rw_blk.c')
-rw-r--r-- | block/ll_rw_blk.c | 182 |
1 files changed, 133 insertions, 49 deletions
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 03d9c82b0fe7..062067fa7ead 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -28,6 +28,7 @@ #include <linux/writeback.h> #include <linux/interrupt.h> #include <linux/cpu.h> +#include <linux/blktrace_api.h> /* * for max sense size @@ -625,26 +626,31 @@ static inline int ordered_bio_endio(struct request *rq, struct bio *bio, * Different hardware can have different requirements as to what pages * it can do I/O directly to. A low level driver can call * blk_queue_bounce_limit to have lower memory pages allocated as bounce - * buffers for doing I/O to pages residing above @page. By default - * the block layer sets this to the highest numbered "low" memory page. + * buffers for doing I/O to pages residing above @page. **/ void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr) { unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT; - - /* - * set appropriate bounce gfp mask -- unfortunately we don't have a - * full 4GB zone, so we have to resort to low memory for any bounces. - * ISA has its own < 16MB zone. - */ - if (bounce_pfn < blk_max_low_pfn) { - BUG_ON(dma_addr < BLK_BOUNCE_ISA); + int dma = 0; + + q->bounce_gfp = GFP_NOIO; +#if BITS_PER_LONG == 64 + /* Assume anything <= 4GB can be handled by IOMMU. + Actually some IOMMUs can handle everything, but I don't + know of a way to test this here. */ + if (bounce_pfn < (0xffffffff>>PAGE_SHIFT)) + dma = 1; + q->bounce_pfn = max_low_pfn; +#else + if (bounce_pfn < blk_max_low_pfn) + dma = 1; + q->bounce_pfn = bounce_pfn; +#endif + if (dma) { init_emergency_isa_pool(); q->bounce_gfp = GFP_NOIO | GFP_DMA; - } else - q->bounce_gfp = GFP_NOIO; - - q->bounce_pfn = bounce_pfn; + q->bounce_pfn = bounce_pfn; + } } EXPORT_SYMBOL(blk_queue_bounce_limit); @@ -1551,8 +1557,10 @@ void blk_plug_device(request_queue_t *q) if (test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags)) return; - if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) + if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) { mod_timer(&q->unplug_timer, jiffies + q->unplug_delay); + blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG); + } } EXPORT_SYMBOL(blk_plug_device); @@ -1616,14 +1624,21 @@ static void blk_backing_dev_unplug(struct backing_dev_info *bdi, /* * devices don't necessarily have an ->unplug_fn defined */ - if (q->unplug_fn) + if (q->unplug_fn) { + blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, + q->rq.count[READ] + q->rq.count[WRITE]); + q->unplug_fn(q); + } } static void blk_unplug_work(void *data) { request_queue_t *q = data; + blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, + q->rq.count[READ] + q->rq.count[WRITE]); + q->unplug_fn(q); } @@ -1631,6 +1646,9 @@ static void blk_unplug_timeout(unsigned long data) { request_queue_t *q = (request_queue_t *)data; + blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL, + q->rq.count[READ] + q->rq.count[WRITE]); + kblockd_schedule_work(&q->unplug_work); } @@ -1735,16 +1753,11 @@ EXPORT_SYMBOL(blk_run_queue); * Hopefully the low level driver will have finished any * outstanding requests first... **/ -void blk_cleanup_queue(request_queue_t * q) +static void blk_release_queue(struct kobject *kobj) { + request_queue_t *q = container_of(kobj, struct request_queue, kobj); struct request_list *rl = &q->rq; - if (!atomic_dec_and_test(&q->refcnt)) - return; - - if (q->elevator) - elevator_exit(q->elevator); - blk_sync_queue(q); if (rl->rq_pool) @@ -1753,9 +1766,30 @@ void blk_cleanup_queue(request_queue_t * q) if (q->queue_tags) __blk_queue_free_tags(q); + if (q->blk_trace) + blk_trace_shutdown(q); + kmem_cache_free(requestq_cachep, q); } +void blk_put_queue(request_queue_t *q) +{ + kobject_put(&q->kobj); +} +EXPORT_SYMBOL(blk_put_queue); + +void blk_cleanup_queue(request_queue_t * q) +{ + mutex_lock(&q->sysfs_lock); + set_bit(QUEUE_FLAG_DEAD, &q->queue_flags); + mutex_unlock(&q->sysfs_lock); + + if (q->elevator) + elevator_exit(q->elevator); + + blk_put_queue(q); +} + EXPORT_SYMBOL(blk_cleanup_queue); static int blk_init_free_list(request_queue_t *q) @@ -1783,6 +1817,8 @@ request_queue_t *blk_alloc_queue(gfp_t gfp_mask) } EXPORT_SYMBOL(blk_alloc_queue); +static struct kobj_type queue_ktype; + request_queue_t *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) { request_queue_t *q; @@ -1793,11 +1829,16 @@ request_queue_t *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) memset(q, 0, sizeof(*q)); init_timer(&q->unplug_timer); - atomic_set(&q->refcnt, 1); + + snprintf(q->kobj.name, KOBJ_NAME_LEN, "%s", "queue"); + q->kobj.ktype = &queue_ktype; + kobject_init(&q->kobj); q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug; q->backing_dev_info.unplug_io_data = q; + mutex_init(&q->sysfs_lock); + return q; } EXPORT_SYMBOL(blk_alloc_queue_node); @@ -1849,8 +1890,10 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) return NULL; q->node = node_id; - if (blk_init_free_list(q)) - goto out_init; + if (blk_init_free_list(q)) { + kmem_cache_free(requestq_cachep, q); + return NULL; + } /* * if caller didn't supply a lock, they get per-queue locking with @@ -1886,9 +1929,7 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) return q; } - blk_cleanup_queue(q); -out_init: - kmem_cache_free(requestq_cachep, q); + blk_put_queue(q); return NULL; } EXPORT_SYMBOL(blk_init_queue_node); @@ -1896,7 +1937,7 @@ EXPORT_SYMBOL(blk_init_queue_node); int blk_get_queue(request_queue_t *q) { if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { - atomic_inc(&q->refcnt); + kobject_get(&q->kobj); return 0; } @@ -2104,6 +2145,8 @@ rq_starved: rq_init(q, rq); rq->rl = rl; + + blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ); out: return rq; } @@ -2132,6 +2175,8 @@ static struct request *get_request_wait(request_queue_t *q, int rw, if (!rq) { struct io_context *ioc; + blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ); + __generic_unplug_device(q); spin_unlock_irq(q->queue_lock); io_schedule(); @@ -2185,6 +2230,8 @@ EXPORT_SYMBOL(blk_get_request); */ void blk_requeue_request(request_queue_t *q, struct request *rq) { + blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); + if (blk_rq_tagged(rq)) blk_queue_end_tag(q, rq); @@ -2819,6 +2866,8 @@ static int __make_request(request_queue_t *q, struct bio *bio) if (!q->back_merge_fn(q, req, bio)) break; + blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); + req->biotail->bi_next = bio; req->biotail = bio; req->nr_sectors = req->hard_nr_sectors += nr_sectors; @@ -2834,6 +2883,8 @@ static int __make_request(request_queue_t *q, struct bio *bio) if (!q->front_merge_fn(q, req, bio)) break; + blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); + bio->bi_next = req->bio; req->bio = bio; @@ -2951,6 +3002,7 @@ void generic_make_request(struct bio *bio) request_queue_t *q; sector_t maxsector; int ret, nr_sectors = bio_sectors(bio); + dev_t old_dev; might_sleep(); /* Test device or partition size, when known. */ @@ -2977,6 +3029,8 @@ void generic_make_request(struct bio *bio) * NOTE: we don't repeat the blk_size check for each new device. * Stacking drivers are expected to know what they are doing. */ + maxsector = -1; + old_dev = 0; do { char b[BDEVNAME_SIZE]; @@ -3009,6 +3063,15 @@ end_io: */ blk_partition_remap(bio); + if (maxsector != -1) + blk_add_trace_remap(q, bio, old_dev, bio->bi_sector, + maxsector); + + blk_add_trace_bio(q, bio, BLK_TA_QUEUE); + + maxsector = bio->bi_sector; + old_dev = bio->bi_bdev->bd_dev; + ret = q->make_request_fn(q, bio); } while (ret); } @@ -3128,6 +3191,8 @@ static int __end_that_request_first(struct request *req, int uptodate, int total_bytes, bio_nbytes, error, next_idx = 0; struct bio *bio; + blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE); + /* * extend uptodate bool to allow < 0 value to be direct io error */ @@ -3472,10 +3537,12 @@ void put_io_context(struct io_context *ioc) BUG_ON(atomic_read(&ioc->refcount) == 0); if (atomic_dec_and_test(&ioc->refcount)) { + rcu_read_lock(); if (ioc->aic && ioc->aic->dtor) ioc->aic->dtor(ioc->aic); if (ioc->cic && ioc->cic->dtor) ioc->cic->dtor(ioc->cic); + rcu_read_unlock(); kmem_cache_free(iocontext_cachep, ioc); } @@ -3609,10 +3676,13 @@ static ssize_t queue_requests_store(struct request_queue *q, const char *page, size_t count) { struct request_list *rl = &q->rq; + unsigned long nr; + int ret = queue_var_store(&nr, page, count); + if (nr < BLKDEV_MIN_RQ) + nr = BLKDEV_MIN_RQ; - int ret = queue_var_store(&q->nr_requests, page, count); - if (q->nr_requests < BLKDEV_MIN_RQ) - q->nr_requests = BLKDEV_MIN_RQ; + spin_lock_irq(q->queue_lock); + q->nr_requests = nr; blk_queue_congestion_threshold(q); if (rl->count[READ] >= queue_congestion_on_threshold(q)) @@ -3638,6 +3708,7 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count) blk_clear_queue_full(q, WRITE); wake_up(&rl->wait[WRITE]); } + spin_unlock_irq(q->queue_lock); return ret; } @@ -3753,13 +3824,19 @@ static ssize_t queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page) { struct queue_sysfs_entry *entry = to_queue(attr); - struct request_queue *q; + request_queue_t *q = container_of(kobj, struct request_queue, kobj); + ssize_t res; - q = container_of(kobj, struct request_queue, kobj); if (!entry->show) return -EIO; - - return entry->show(q, page); + mutex_lock(&q->sysfs_lock); + if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) { + mutex_unlock(&q->sysfs_lock); + return -ENOENT; + } + res = entry->show(q, page); + mutex_unlock(&q->sysfs_lock); + return res; } static ssize_t @@ -3767,13 +3844,20 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr, const char *page, size_t length) { struct queue_sysfs_entry *entry = to_queue(attr); - struct request_queue *q; + request_queue_t *q = container_of(kobj, struct request_queue, kobj); + + ssize_t res; - q = container_of(kobj, struct request_queue, kobj); if (!entry->store) return -EIO; - - return entry->store(q, page, length); + mutex_lock(&q->sysfs_lock); + if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) { + mutex_unlock(&q->sysfs_lock); + return -ENOENT; + } + res = entry->store(q, page, length); + mutex_unlock(&q->sysfs_lock); + return res; } static struct sysfs_ops queue_sysfs_ops = { @@ -3784,6 +3868,7 @@ static struct sysfs_ops queue_sysfs_ops = { static struct kobj_type queue_ktype = { .sysfs_ops = &queue_sysfs_ops, .default_attrs = default_attrs, + .release = blk_release_queue, }; int blk_register_queue(struct gendisk *disk) @@ -3796,19 +3881,17 @@ int blk_register_queue(struct gendisk *disk) return -ENXIO; q->kobj.parent = kobject_get(&disk->kobj); - if (!q->kobj.parent) - return -EBUSY; - snprintf(q->kobj.name, KOBJ_NAME_LEN, "%s", "queue"); - q->kobj.ktype = &queue_ktype; - - ret = kobject_register(&q->kobj); + ret = kobject_add(&q->kobj); if (ret < 0) return ret; + kobject_uevent(&q->kobj, KOBJ_ADD); + ret = elv_register_queue(q); if (ret) { - kobject_unregister(&q->kobj); + kobject_uevent(&q->kobj, KOBJ_REMOVE); + kobject_del(&q->kobj); return ret; } @@ -3822,7 +3905,8 @@ void blk_unregister_queue(struct gendisk *disk) if (q && q->request_fn) { elv_unregister_queue(q); - kobject_unregister(&q->kobj); + kobject_uevent(&q->kobj, KOBJ_REMOVE); + kobject_del(&q->kobj); kobject_put(&disk->kobj); } } |