diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-10 17:23:49 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-10 17:23:49 -0800 |
commit | 3419b45039c6b799c974a8019361c045e7ca232c (patch) | |
tree | 36a63602036cc50f34fadcbd5d5d8fca94e44297 | |
parent | 01504f5e9e071f1dde1062e3be15f54d4555308f (diff) | |
parent | c1c534609fe8a859f9c8108a5591e6e8a97e34d1 (diff) | |
download | lwn-3419b45039c6b799c974a8019361c045e7ca232c.tar.gz lwn-3419b45039c6b799c974a8019361c045e7ca232c.zip |
Merge branch 'for-4.4/io-poll' of git://git.kernel.dk/linux-block
Pull block IO poll support from Jens Axboe:
"Various groups have been doing experimentation around IO polling for
(really) fast devices. The code has been reviewed and has been
sitting on the side for a few releases, but this is now good enough
for coordinated benchmarking and further experimentation.
Currently O_DIRECT sync read/write are supported. A framework is in
the works that allows scalable stats tracking so we can auto-tune
this. And we'll add libaio support as well soon. Fow now, it's an
opt-in feature for test purposes"
* 'for-4.4/io-poll' of git://git.kernel.dk/linux-block:
direct-io: be sure to assign dio->bio_bdev for both paths
directio: add block polling support
NVMe: add blk polling support
block: add block polling support
blk-mq: return tag/queue combo in the make_request_fn handlers
block: change ->make_request_fn() and users to return a queue cookie
33 files changed, 286 insertions, 90 deletions
diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c index f2a00c591bf7..e9110b9b8bcd 100644 --- a/arch/m68k/emu/nfblock.c +++ b/arch/m68k/emu/nfblock.c @@ -59,7 +59,7 @@ struct nfhd_device { struct gendisk *disk; }; -static void nfhd_make_request(struct request_queue *queue, struct bio *bio) +static blk_qc_t nfhd_make_request(struct request_queue *queue, struct bio *bio) { struct nfhd_device *dev = queue->queuedata; struct bio_vec bvec; @@ -77,6 +77,7 @@ static void nfhd_make_request(struct request_queue *queue, struct bio *bio) sec += len; } bio_endio(bio); + return BLK_QC_T_NONE; } static int nfhd_getgeo(struct block_device *bdev, struct hd_geometry *geo) diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index d2b79bc336c1..7a399b4d60a0 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -103,7 +103,7 @@ axon_ram_irq_handler(int irq, void *dev) * axon_ram_make_request - make_request() method for block device * @queue, @bio: see blk_queue_make_request() */ -static void +static blk_qc_t axon_ram_make_request(struct request_queue *queue, struct bio *bio) { struct axon_ram_bank *bank = bio->bi_bdev->bd_disk->private_data; @@ -120,7 +120,7 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio) bio_for_each_segment(vec, bio, iter) { if (unlikely(phys_mem + vec.bv_len > phys_end)) { bio_io_error(bio); - return; + return BLK_QC_T_NONE; } user_mem = page_address(vec.bv_page) + vec.bv_offset; @@ -133,6 +133,7 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio) transfered += vec.bv_len; } bio_endio(bio); + return BLK_QC_T_NONE; } /** diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c index fa84ca990caa..3c3ace2c46b6 100644 --- a/arch/xtensa/platforms/iss/simdisk.c +++ b/arch/xtensa/platforms/iss/simdisk.c @@ -101,7 +101,7 @@ static void simdisk_transfer(struct simdisk *dev, unsigned long sector, spin_unlock(&dev->lock); } -static void simdisk_make_request(struct request_queue *q, struct bio *bio) +static blk_qc_t simdisk_make_request(struct request_queue *q, struct bio *bio) { struct simdisk *dev = q->queuedata; struct bio_vec bvec; @@ -119,6 +119,7 @@ static void simdisk_make_request(struct request_queue *q, struct bio *bio) } bio_endio(bio); + return BLK_QC_T_NONE; } static int simdisk_open(struct block_device *bdev, fmode_t mode) diff --git a/block/blk-core.c b/block/blk-core.c index 590cca21c24a..2bbf08cd2900 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -809,7 +809,7 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) } EXPORT_SYMBOL(blk_init_queue_node); -static void blk_queue_bio(struct request_queue *q, struct bio *bio); +static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio); struct request_queue * blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, @@ -1678,7 +1678,7 @@ void init_request_from_bio(struct request *req, struct bio *bio) blk_rq_bio_prep(req->q, req, bio); } -static void blk_queue_bio(struct request_queue *q, struct bio *bio) +static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio) { const bool sync = !!(bio->bi_rw & REQ_SYNC); struct blk_plug *plug; @@ -1698,7 +1698,7 @@ static void blk_queue_bio(struct request_queue *q, struct bio *bio) if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { bio->bi_error = -EIO; bio_endio(bio); - return; + return BLK_QC_T_NONE; } if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) { @@ -1713,7 +1713,7 @@ static void blk_queue_bio(struct request_queue *q, struct bio *bio) */ if (!blk_queue_nomerges(q)) { if (blk_attempt_plug_merge(q, bio, &request_count, NULL)) - return; + return BLK_QC_T_NONE; } else request_count = blk_plug_queued_count(q); @@ -1791,6 +1791,8 @@ get_rq: out_unlock: spin_unlock_irq(q->queue_lock); } + + return BLK_QC_T_NONE; } /* @@ -1996,12 +1998,13 @@ end_io: * a lower device by calling into generic_make_request recursively, which * means the bio should NOT be touched after the call to ->make_request_fn. */ -void generic_make_request(struct bio *bio) +blk_qc_t generic_make_request(struct bio *bio) { struct bio_list bio_list_on_stack; + blk_qc_t ret = BLK_QC_T_NONE; if (!generic_make_request_checks(bio)) - return; + goto out; /* * We only want one ->make_request_fn to be active at a time, else @@ -2015,7 +2018,7 @@ void generic_make_request(struct bio *bio) */ if (current->bio_list) { bio_list_add(current->bio_list, bio); - return; + goto out; } /* following loop may be a bit non-obvious, and so deserves some @@ -2040,7 +2043,7 @@ void generic_make_request(struct bio *bio) if (likely(blk_queue_enter(q, __GFP_DIRECT_RECLAIM) == 0)) { - q->make_request_fn(q, bio); + ret = q->make_request_fn(q, bio); blk_queue_exit(q); @@ -2053,6 +2056,9 @@ void generic_make_request(struct bio *bio) } } while (bio); current->bio_list = NULL; /* deactivate */ + +out: + return ret; } EXPORT_SYMBOL(generic_make_request); @@ -2066,7 +2072,7 @@ EXPORT_SYMBOL(generic_make_request); * interfaces; @bio must be presetup and ready for I/O. * */ -void submit_bio(int rw, struct bio *bio) +blk_qc_t submit_bio(int rw, struct bio *bio) { bio->bi_rw |= rw; @@ -2100,7 +2106,7 @@ void submit_bio(int rw, struct bio *bio) } } - generic_make_request(bio); + return generic_make_request(bio); } EXPORT_SYMBOL(submit_bio); @@ -3306,6 +3312,47 @@ void blk_finish_plug(struct blk_plug *plug) } EXPORT_SYMBOL(blk_finish_plug); +bool blk_poll(struct request_queue *q, blk_qc_t cookie) +{ + struct blk_plug *plug; + long state; + + if (!q->mq_ops || !q->mq_ops->poll || !blk_qc_t_valid(cookie) || + !test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) + return false; + + plug = current->plug; + if (plug) + blk_flush_plug_list(plug, false); + + state = current->state; + while (!need_resched()) { + unsigned int queue_num = blk_qc_t_to_queue_num(cookie); + struct blk_mq_hw_ctx *hctx = q->queue_hw_ctx[queue_num]; + int ret; + + hctx->poll_invoked++; + + ret = q->mq_ops->poll(hctx, blk_qc_t_to_tag(cookie)); + if (ret > 0) { + hctx->poll_success++; + set_current_state(TASK_RUNNING); + return true; + } + + if (signal_pending_state(state, current)) + set_current_state(TASK_RUNNING); + + if (current->state == TASK_RUNNING) + return true; + if (ret < 0) + break; + cpu_relax(); + } + + return false; +} + #ifdef CONFIG_PM /** * blk_pm_runtime_init - Block layer runtime PM initialization routine diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 6f57a110289c..1cf18784c5cf 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -174,6 +174,11 @@ static ssize_t blk_mq_sysfs_rq_list_show(struct blk_mq_ctx *ctx, char *page) return ret; } +static ssize_t blk_mq_hw_sysfs_poll_show(struct blk_mq_hw_ctx *hctx, char *page) +{ + return sprintf(page, "invoked=%lu, success=%lu\n", hctx->poll_invoked, hctx->poll_success); +} + static ssize_t blk_mq_hw_sysfs_queued_show(struct blk_mq_hw_ctx *hctx, char *page) { @@ -295,6 +300,10 @@ static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_cpus = { .attr = {.name = "cpu_list", .mode = S_IRUGO }, .show = blk_mq_hw_sysfs_cpus_show, }; +static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_poll = { + .attr = {.name = "io_poll", .mode = S_IRUGO }, + .show = blk_mq_hw_sysfs_poll_show, +}; static struct attribute *default_hw_ctx_attrs[] = { &blk_mq_hw_sysfs_queued.attr, @@ -304,6 +313,7 @@ static struct attribute *default_hw_ctx_attrs[] = { &blk_mq_hw_sysfs_tags.attr, &blk_mq_hw_sysfs_cpus.attr, &blk_mq_hw_sysfs_active.attr, + &blk_mq_hw_sysfs_poll.attr, NULL, }; diff --git a/block/blk-mq.c b/block/blk-mq.c index 694f8703f83c..86bd5b25288e 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1198,7 +1198,7 @@ static struct request *blk_mq_map_request(struct request_queue *q, return rq; } -static int blk_mq_direct_issue_request(struct request *rq) +static int blk_mq_direct_issue_request(struct request *rq, blk_qc_t *cookie) { int ret; struct request_queue *q = rq->q; @@ -1209,6 +1209,7 @@ static int blk_mq_direct_issue_request(struct request *rq) .list = NULL, .last = 1 }; + blk_qc_t new_cookie = blk_tag_to_qc_t(rq->tag, hctx->queue_num); /* * For OK queue, we are done. For error, kill it. Any other @@ -1216,18 +1217,21 @@ static int blk_mq_direct_issue_request(struct request *rq) * would have done */ ret = q->mq_ops->queue_rq(hctx, &bd); - if (ret == BLK_MQ_RQ_QUEUE_OK) + if (ret == BLK_MQ_RQ_QUEUE_OK) { + *cookie = new_cookie; return 0; - else { - __blk_mq_requeue_request(rq); + } - if (ret == BLK_MQ_RQ_QUEUE_ERROR) { - rq->errors = -EIO; - blk_mq_end_request(rq, rq->errors); - return 0; - } - return -1; + __blk_mq_requeue_request(rq); + + if (ret == BLK_MQ_RQ_QUEUE_ERROR) { + *cookie = BLK_QC_T_NONE; + rq->errors = -EIO; + blk_mq_end_request(rq, rq->errors); + return 0; } + + return -1; } /* @@ -1235,7 +1239,7 @@ static int blk_mq_direct_issue_request(struct request *rq) * but will attempt to bypass the hctx queueing if we can go straight to * hardware for SYNC IO. */ -static void blk_mq_make_request(struct request_queue *q, struct bio *bio) +static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) { const int is_sync = rw_is_sync(bio->bi_rw); const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA); @@ -1244,12 +1248,13 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) unsigned int request_count = 0; struct blk_plug *plug; struct request *same_queue_rq = NULL; + blk_qc_t cookie; blk_queue_bounce(q, &bio); if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { bio_io_error(bio); - return; + return BLK_QC_T_NONE; } blk_queue_split(q, &bio, q->bio_split); @@ -1257,13 +1262,15 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) if (!is_flush_fua && !blk_queue_nomerges(q)) { if (blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq)) - return; + return BLK_QC_T_NONE; } else request_count = blk_plug_queued_count(q); rq = blk_mq_map_request(q, bio, &data); if (unlikely(!rq)) - return; + return BLK_QC_T_NONE; + + cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num); if (unlikely(is_flush_fua)) { blk_mq_bio_to_request(rq, bio); @@ -1302,11 +1309,11 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) old_rq = rq; blk_mq_put_ctx(data.ctx); if (!old_rq) - return; - if (!blk_mq_direct_issue_request(old_rq)) - return; + goto done; + if (!blk_mq_direct_issue_request(old_rq, &cookie)) + goto done; blk_mq_insert_request(old_rq, false, true, true); - return; + goto done; } if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) { @@ -1320,13 +1327,15 @@ run_queue: blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua); } blk_mq_put_ctx(data.ctx); +done: + return cookie; } /* * Single hardware queue variant. This will attempt to use any per-process * plug for merging and IO deferral. */ -static void blk_sq_make_request(struct request_queue *q, struct bio *bio) +static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio) { const int is_sync = rw_is_sync(bio->bi_rw); const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA); @@ -1334,23 +1343,26 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio) unsigned int request_count = 0; struct blk_map_ctx data; struct request *rq; + blk_qc_t cookie; blk_queue_bounce(q, &bio); if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { bio_io_error(bio); - return; + return BLK_QC_T_NONE; } blk_queue_split(q, &bio, q->bio_split); if (!is_flush_fua && !blk_queue_nomerges(q) && blk_attempt_plug_merge(q, bio, &request_count, NULL)) - return; + return BLK_QC_T_NONE; rq = blk_mq_map_request(q, bio, &data); if (unlikely(!rq)) - return; + return BLK_QC_T_NONE; + + cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num); if (unlikely(is_flush_fua)) { blk_mq_bio_to_request(rq, bio); @@ -1374,7 +1386,7 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio) } list_add_tail(&rq->queuelist, &plug->mq_list); blk_mq_put_ctx(data.ctx); - return; + return cookie; } if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) { @@ -1389,6 +1401,7 @@ run_queue: } blk_mq_put_ctx(data.ctx); + return cookie; } /* diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 31849e328b45..565b8dac5782 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -317,6 +317,34 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count) return ret; } +static ssize_t queue_poll_show(struct request_queue *q, char *page) +{ + return queue_var_show(test_bit(QUEUE_FLAG_POLL, &q->queue_flags), page); +} + +static ssize_t queue_poll_store(struct request_queue *q, const char *page, + size_t count) +{ + unsigned long poll_on; + ssize_t ret; + + if (!q->mq_ops || !q->mq_ops->poll) + return -EINVAL; + + ret = queue_var_store(&poll_on, page, count); + if (ret < 0) + return ret; + + spin_lock_irq(q->queue_lock); + if (poll_on) + queue_flag_set(QUEUE_FLAG_POLL, q); + else + queue_flag_clear(QUEUE_FLAG_POLL, q); + spin_unlock_irq(q->queue_lock); + + return ret; +} + static struct queue_sysfs_entry queue_requests_entry = { .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, .show = queue_requests_show, @@ -442,6 +470,12 @@ static struct queue_sysfs_entry queue_random_entry = { .store = queue_store_random, }; +static struct queue_sysfs_entry queue_poll_entry = { + .attr = {.name = "io_poll", .mode = S_IRUGO | S_IWUSR }, + .show = queue_poll_show, + .store = queue_poll_store, +}; + static struct attribute *default_attrs[] = { &queue_requests_entry.attr, &queue_ra_entry.attr, @@ -466,6 +500,7 @@ static struct attribute *default_attrs[] = { &queue_rq_affinity_entry.attr, &queue_iostats_entry.attr, &queue_random_entry.attr, + &queue_poll_entry.attr, NULL, }; diff --git a/drivers/block/brd.c b/drivers/block/brd.c index b9794aeeb878..c9f9c30d6467 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -323,7 +323,7 @@ out: return err; } -static void brd_make_request(struct request_queue *q, struct bio *bio) +static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio) { struct block_device *bdev = bio->bi_bdev; struct brd_device *brd = bdev->bd_disk->private_data; @@ -358,9 +358,10 @@ static void brd_make_request(struct request_queue *q, struct bio *bio) out: bio_endio(bio); - return; + return BLK_QC_T_NONE; io_error: bio_io_error(bio); + return BLK_QC_T_NONE; } static int brd_rw_page(struct block_device *bdev, sector_t sector, diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 015c6e91b756..e66d453a5f2b 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1448,7 +1448,7 @@ extern int proc_details; /* drbd_req */ extern void do_submit(struct work_struct *ws); extern void __drbd_make_request(struct drbd_device *, struct bio *, unsigned long); -extern void drbd_make_request(struct request_queue *q, struct bio *bio); +extern blk_qc_t drbd_make_request(struct request_queue *q, struct bio *bio); extern int drbd_read_remote(struct drbd_device *device, struct drbd_request *req); extern int is_valid_ar_handle(struct drbd_request *, sector_t); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 211592682169..3ae2c0086563 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1494,7 +1494,7 @@ void do_submit(struct work_struct *ws) } } -void drbd_make_request(struct request_queue *q, struct bio *bio) +blk_qc_t drbd_make_request(struct request_queue *q, struct bio *bio) { struct drbd_device *device = (struct drbd_device *) q->queuedata; unsigned long start_jif; @@ -1510,6 +1510,7 @@ void drbd_make_request(struct request_queue *q, struct bio *bio) inc_ap_bio(device); __drbd_make_request(device, bio, start_jif); + return BLK_QC_T_NONE; } void request_timer_fn(unsigned long data) diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 1c9e4fe5aa44..6255d1c4bba4 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -321,7 +321,7 @@ static struct nullb_queue *nullb_to_queue(struct nullb *nullb) return &nullb->queues[index]; } -static void null_queue_bio(struct request_queue *q, struct bio *bio) +static blk_qc_t null_queue_bio(struct request_queue *q, struct bio *bio) { struct nullb *nullb = q->queuedata; struct nullb_queue *nq = nullb_to_queue(nullb); @@ -331,6 +331,7 @@ static void null_queue_bio(struct request_queue *q, struct bio *bio) cmd->bio = bio; null_handle_cmd(cmd); + return BLK_QC_T_NONE; } static int null_rq_prep_fn(struct request_queue *q, struct request *req) diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 2f477d45d6cf..d06c62eccdf0 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2441,7 +2441,7 @@ static void pkt_make_request_write(struct request_queue *q, struct bio *bio) } } -static void pkt_make_request(struct request_queue *q, struct bio *bio) +static blk_qc_t pkt_make_request(struct request_queue *q, struct bio *bio) { struct pktcdvd_device *pd; char b[BDEVNAME_SIZE]; @@ -2467,7 +2467,7 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio) */ if (bio_data_dir(bio) == READ) { pkt_make_request_read(pd, bio); - return; + return BLK_QC_T_NONE; } if (!test_bit(PACKET_WRITABLE, &pd->flags)) { @@ -2499,13 +2499,12 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio) pkt_make_request_write(q, split); } while (split != bio); - return; + return BLK_QC_T_NONE; end_io: bio_io_error(bio); + return BLK_QC_T_NONE; } - - static void pkt_init_queue(struct pktcdvd_device *pd) { struct request_queue *q = pd->disk->queue; diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c index d89fcac59515..56847fcda086 100644 --- a/drivers/block/ps3vram.c +++ b/drivers/block/ps3vram.c @@ -598,7 +598,7 @@ out: return next; } -static void ps3vram_make_request(struct request_queue *q, struct bio *bio) +static blk_qc_t ps3vram_make_request(struct request_queue *q, struct bio *bio) { struct ps3_system_bus_device *dev = q->queuedata; struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev); @@ -614,11 +614,13 @@ static void ps3vram_make_request(struct request_queue *q, struct bio *bio) spin_unlock_irq(&priv->lock); if (busy) - return; + return BLK_QC_T_NONE; do { bio = ps3vram_do_bio(dev, bio); } while (bio); + + return BLK_QC_T_NONE; } static int ps3vram_probe(struct ps3_system_bus_device *dev) diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c index 3163e4cdc2cc..e1b8b7061d2f 100644 --- a/drivers/block/rsxx/dev.c +++ b/drivers/block/rsxx/dev.c @@ -145,7 +145,7 @@ static void bio_dma_done_cb(struct rsxx_cardinfo *card, } } -static void rsxx_make_request(struct request_queue *q, struct bio *bio) +static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio) { struct rsxx_cardinfo *card = q->queuedata; struct rsxx_bio_meta *bio_meta; @@ -199,7 +199,7 @@ static void rsxx_make_request(struct request_queue *q, struct bio *bio) if (st) goto queue_err; - return; + return BLK_QC_T_NONE; queue_err: kmem_cache_free(bio_meta_pool, bio_meta); @@ -207,6 +207,7 @@ req_err: if (st) bio->bi_error = st; bio_endio(bio); + return BLK_QC_T_NONE; } /*----------------- Device Setup -------------------*/ diff --git a/drivers/block/umem.c b/drivers/block/umem.c index 04d65790a886..7939b9f87441 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -524,7 +524,7 @@ static int mm_check_plugged(struct cardinfo *card) return !!blk_check_plugged(mm_unplug, card, sizeof(struct blk_plug_cb)); } -static void mm_make_request(struct request_queue *q, struct bio *bio) +static blk_qc_t mm_make_request(struct request_queue *q, struct bio *bio) { struct cardinfo *card = q->queuedata; pr_debug("mm_make_request %llu %u\n", @@ -541,7 +541,7 @@ static void mm_make_request(struct request_queue *q, struct bio *bio) activate(card); spin_unlock_irq(&card->lock); - return; + return BLK_QC_T_NONE; } static irqreturn_t mm_interrupt(int irq, void *__card) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 81a557c33a1f..47915d736f8d 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -894,7 +894,7 @@ out: /* * Handler function for all zram I/O requests. */ -static void zram_make_request(struct request_queue *queue, struct bio *bio) +static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio) { struct zram *zram = queue->queuedata; @@ -911,11 +911,12 @@ static void zram_make_request(struct request_queue *queue, struct bio *bio) __zram_make_request(zram, bio); zram_meta_put(zram); - return; + return BLK_QC_T_NONE; put_zram: zram_meta_put(zram); error: bio_io_error(bio); + return BLK_QC_T_NONE; } static void zram_slot_free_notify(struct block_device *bdev, diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c index 64a888a5e9b3..7ba64c87ba1c 100644 --- a/drivers/lightnvm/rrpc.c +++ b/drivers/lightnvm/rrpc.c @@ -803,7 +803,7 @@ static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio, return NVM_IO_OK; } -static void rrpc_make_rq(struct request_queue *q, struct bio *bio) +static blk_qc_t rrpc_make_rq(struct request_queue *q, struct bio *bio) { struct rrpc *rrpc = q->queuedata; struct nvm_rq *rqd; @@ -811,21 +811,21 @@ static void rrpc_make_rq(struct request_queue *q, struct bio *bio) if (bio->bi_rw & REQ_DISCARD) { rrpc_discard(rrpc, bio); - return; + return BLK_QC_T_NONE; } rqd = mempool_alloc(rrpc->rq_pool, GFP_KERNEL); if (!rqd) { pr_err_ratelimited("rrpc: not able to queue bio."); bio_io_error(bio); - return; + return BLK_QC_T_NONE; } memset(rqd, 0, sizeof(struct nvm_rq)); err = rrpc_submit_io(rrpc, bio, rqd, NVM_IOTYPE_NONE); switch (err) { case NVM_IO_OK: - return; + return BLK_QC_T_NONE; case NVM_IO_ERR: bio_io_error(bio); break; @@ -841,6 +841,7 @@ static void rrpc_make_rq(struct request_queue *q, struct bio *bio) } mempool_free(rqd, rrpc->rq_pool); + return BLK_QC_T_NONE; } static void rrpc_requeue(struct work_struct *work) diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 8e9877b04637..25fa8445bb24 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -958,7 +958,8 @@ static void cached_dev_nodata(struct closure *cl) /* Cached devices - read & write stuff */ -static void cached_dev_make_request(struct request_queue *q, struct bio *bio) +static blk_qc_t cached_dev_make_request(struct request_queue *q, + struct bio *bio) { struct search *s; struct bcache_device *d = bio->bi_bdev->bd_disk->private_data; @@ -997,6 +998,8 @@ static void cached_dev_make_request(struct request_queue *q, struct bio *bio) else generic_make_request(bio); } + + return BLK_QC_T_NONE; } static int cached_dev_ioctl(struct bcache_device *d, fmode_t mode, @@ -1070,7 +1073,8 @@ static void flash_dev_nodata(struct closure *cl) continue_at(cl, search_free, NULL); } -static void flash_dev_make_request(struct request_queue *q, struct bio *bio) +static blk_qc_t flash_dev_make_request(struct request_queue *q, + struct bio *bio) { struct search *s; struct closure *cl; @@ -1093,7 +1097,7 @@ static void flash_dev_make_request(struct request_queue *q, struct bio *bio) continue_at_nobarrier(&s->cl, flash_dev_nodata, bcache_wq); - return; + return BLK_QC_T_NONE; } else if (rw) { bch_keybuf_check_overlapping(&s->iop.c->moving_gc_keys, &KEY(d->id, bio->bi_iter.bi_sector, 0), @@ -1109,6 +1113,7 @@ static void flash_dev_make_request(struct request_queue *q, struct bio *bio) } continue_at(cl, search_free, NULL); + return BLK_QC_T_NONE; } static int flash_dev_ioctl(struct bcache_device *d, fmode_t mode, diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 32440ad5f684..6e15f3565892 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1755,7 +1755,7 @@ static void __split_and_process_bio(struct mapped_device *md, * The request function that just remaps the bio built up by * dm_merge_bvec. */ -static void dm_make_request(struct request_queue *q, struct bio *bio) +static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio) { int rw = bio_data_dir(bio); struct mapped_device *md = q->queuedata; @@ -1774,12 +1774,12 @@ static void dm_make_request(struct request_queue *q, struct bio *bio) queue_io(md, bio); else bio_io_error(bio); - return; + return BLK_QC_T_NONE; } __split_and_process_bio(md, map, bio); dm_put_live_table(md, srcu_idx); - return; + return BLK_QC_T_NONE; } int dm_request_based(struct mapped_device *md) diff --git a/drivers/md/md.c b/drivers/md/md.c index 3f9a514b5b9d..807095f4c793 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -250,7 +250,7 @@ static DEFINE_SPINLOCK(all_mddevs_lock); * call has finished, the bio has been linked into some internal structure * and so is visible to ->quiesce(), so we don't need the refcount any more. */ -static void md_make_request(struct request_queue *q, struct bio *bio) +static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio) { const int rw = bio_data_dir(bio); struct mddev *mddev = q->queuedata; @@ -262,13 +262,13 @@ static void md_make_request(struct request_queue *q, struct bio *bio) if (mddev == NULL || mddev->pers == NULL || !mddev->ready) { bio_io_error(bio); - return; + return BLK_QC_T_NONE; } if (mddev->ro == 1 && unlikely(rw == WRITE)) { if (bio_sectors(bio) != 0) bio->bi_error = -EROFS; bio_endio(bio); - return; + return BLK_QC_T_NONE; } smp_rmb(); /* Ensure implications of 'active' are visible */ rcu_read_lock(); @@ -302,6 +302,8 @@ static void md_make_request(struct request_queue *q, struct bio *bio) if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended) wake_up(&mddev->sb_wait); + + return BLK_QC_T_NONE; } /* mddev_suspend makes sure no new requests are submitted diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c index 0df77cb07df6..91a336ea8c4f 100644 --- a/drivers/nvdimm/blk.c +++ b/drivers/nvdimm/blk.c @@ -161,7 +161,7 @@ static int nd_blk_do_bvec(struct nd_blk_device *blk_dev, return err; } -static void nd_blk_make_request(struct request_queue *q, struct bio *bio) +static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio) { struct block_device *bdev = bio->bi_bdev; struct gendisk *disk = bdev->bd_disk; @@ -208,6 +208,7 @@ static void nd_blk_make_request(struct request_queue *q, struct bio *bio) out: bio_endio(bio); + return BLK_QC_T_NONE; } static int nd_blk_rw_bytes(struct nd_namespace_common *ndns, diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index eae93ab8ffcd..efb2c1ceef98 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1150,7 +1150,7 @@ static int btt_do_bvec(struct btt *btt, struct bio_integrity_payload *bip, return ret; } -static void btt_make_request(struct request_queue *q, struct bio *bio) +static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio) { struct bio_integrity_payload *bip = bio_integrity(bio); struct btt *btt = q->queuedata; @@ -1198,6 +1198,7 @@ static void btt_make_request(struct request_queue *q, struct bio *bio) out: bio_endio(bio); + return BLK_QC_T_NONE; } static int btt_rw_page(struct block_device *bdev, sector_t sector, diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 349f03e7ed06..012e0649f1ac 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -64,7 +64,7 @@ static void pmem_do_bvec(struct pmem_device *pmem, struct page *page, kunmap_atomic(mem); } -static void pmem_make_request(struct request_queue *q, struct bio *bio) +static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) { bool do_acct; unsigned long start; @@ -84,6 +84,7 @@ static void pmem_make_request(struct request_queue *q, struct bio *bio) wmb_pmem(); bio_endio(bio); + return BLK_QC_T_NONE; } static int pmem_rw_page(struct block_device *bdev, sector_t sector, diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 97b6640a3745..3dfc28875cc3 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -90,7 +90,7 @@ static struct class *nvme_class; static int __nvme_reset(struct nvme_dev *dev); static int nvme_reset(struct nvme_dev *dev); -static int nvme_process_cq(struct nvme_queue *nvmeq); +static void nvme_process_cq(struct nvme_queue *nvmeq); static void nvme_dead_ctrl(struct nvme_dev *dev); struct async_cmd_info { @@ -935,7 +935,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_MQ_RQ_QUEUE_BUSY; } -static int nvme_process_cq(struct nvme_queue *nvmeq) +static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) { u16 head, phase; @@ -953,6 +953,8 @@ static int nvme_process_cq(struct nvme_queue *nvmeq) head = 0; phase = !phase; } + if (tag && *tag == cqe.command_id) + *tag = -1; ctx = nvme_finish_cmd(nvmeq, cqe.command_id, &fn); fn(nvmeq, ctx, &cqe); } @@ -964,14 +966,18 @@ static int nvme_process_cq(struct nvme_queue *nvmeq) * a big problem. */ if (head == nvmeq->cq_head && phase == nvmeq->cq_phase) - return 0; + return; writel(head, nvmeq->q_db + nvmeq->dev->db_stride); nvmeq->cq_head = head; nvmeq->cq_phase = phase; nvmeq->cqe_seen = 1; - return 1; +} + +static void nvme_process_cq(struct nvme_queue *nvmeq) +{ + __nvme_process_cq(nvmeq, NULL); } static irqreturn_t nvme_irq(int irq, void *data) @@ -995,6 +1001,23 @@ static irqreturn_t nvme_irq_check(int irq, void *data) return IRQ_WAKE_THREAD; } +static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) +{ + struct nvme_queue *nvmeq = hctx->driver_data; + + if ((le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status) & 1) == + nvmeq->cq_phase) { + spin_lock_irq(&nvmeq->q_lock); + __nvme_process_cq(nvmeq, &tag); + spin_unlock_irq(&nvmeq->q_lock); + + if (tag == -1) + return 1; + } + + return 0; +} + /* * Returns 0 on success. If the result is negative, it's a Linux error code; * if the result is positive, it's an NVM Express status code @@ -1656,6 +1679,7 @@ static struct blk_mq_ops nvme_mq_ops = { .init_hctx = nvme_init_hctx, .init_request = nvme_init_request, .timeout = nvme_timeout, + .poll = nvme_poll, }; static void nvme_dev_remove_admin(struct nvme_dev *dev) diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 5ed44fe21380..94a8f4ab57bc 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -27,7 +27,8 @@ static int dcssblk_open(struct block_device *bdev, fmode_t mode); static void dcssblk_release(struct gendisk *disk, fmode_t mode); -static void dcssblk_make_request(struct request_queue *q, struct bio *bio); +static blk_qc_t dcssblk_make_request(struct request_queue *q, + struct bio *bio); static long dcssblk_direct_access(struct block_device *bdev, sector_t secnum, void __pmem **kaddr, unsigned long *pfn); @@ -815,7 +816,7 @@ dcssblk_release(struct gendisk *disk, fmode_t mode) up_write(&dcssblk_devices_sem); } -static void +static blk_qc_t dcssblk_make_request(struct request_queue *q, struct bio *bio) { struct dcssblk_dev_info *dev_info; @@ -874,9 +875,10 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio) bytes_done += bvec.bv_len; } bio_endio(bio); - return; + return BLK_QC_T_NONE; fail: bio_io_error(bio); + return BLK_QC_T_NONE; } static long diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c index 02871f1db562..288f59a4147b 100644 --- a/drivers/s390/block/xpram.c +++ b/drivers/s390/block/xpram.c @@ -181,7 +181,7 @@ static unsigned long xpram_highest_page_index(void) /* * Block device make request function. */ -static void xpram_make_request(struct request_queue *q, struct bio *bio) +static blk_qc_t xpram_make_request(struct request_queue *q, struct bio *bio) { xpram_device_t *xdev = bio->bi_bdev->bd_disk->private_data; struct bio_vec bvec; @@ -223,9 +223,10 @@ static void xpram_make_request(struct request_queue *q, struct bio *bio) } } bio_endio(bio); - return; + return BLK_QC_T_NONE; fail: bio_io_error(bio); + return BLK_QC_T_NONE; } static int xpram_getgeo(struct block_device *bdev, struct hd_geometry *geo) diff --git a/drivers/staging/lustre/lustre/llite/lloop.c b/drivers/staging/lustre/lustre/llite/lloop.c index e6974c36276d..fed50d538a41 100644 --- a/drivers/staging/lustre/lustre/llite/lloop.c +++ b/drivers/staging/lustre/lustre/llite/lloop.c @@ -333,7 +333,7 @@ static unsigned int loop_get_bio(struct lloop_device *lo, struct bio **req) return count; } -static void loop_make_request(struct request_queue *q, struct bio *old_bio) +static blk_qc_t loop_make_request(struct request_queue *q, struct bio *old_bio) { struct lloop_device *lo = q->queuedata; int rw = bio_rw(old_bio); @@ -364,9 +364,10 @@ static void loop_make_request(struct request_queue *q, struct bio *old_bio) goto err; } loop_add_bio(lo, old_bio); - return; + return BLK_QC_T_NONE; err: bio_io_error(old_bio); + return BLK_QC_T_NONE; } static inline void loop_handle_bio(struct lloop_device *lo, struct bio *bio) diff --git a/fs/direct-io.c b/fs/direct-io.c index 18e7554cf94c..cb5337d8c273 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -109,6 +109,8 @@ struct dio_submit { struct dio { int flags; /* doesn't change */ int rw; + blk_qc_t bio_cookie; + struct block_device *bio_bdev; struct inode *inode; loff_t i_size; /* i_size when submitted */ dio_iodone_t *end_io; /* IO completion function */ @@ -397,11 +399,14 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio) if (dio->is_async && dio->rw == READ && dio->should_dirty) bio_set_pages_dirty(bio); - if (sdio->submit_io) + dio->bio_bdev = bio->bi_bdev; + + if (sdio->submit_io) { sdio->submit_io(dio->rw, bio, dio->inode, sdio->logical_offset_in_bio); - else - submit_bio(dio->rw, bio); + dio->bio_cookie = BLK_QC_T_NONE; + } else + dio->bio_cookie = submit_bio(dio->rw, bio); sdio->bio = NULL; sdio->boundary = 0; @@ -440,7 +445,8 @@ static struct bio *dio_await_one(struct dio *dio) __set_current_state(TASK_UNINTERRUPTIBLE); dio->waiter = current; spin_unlock_irqrestore(&dio->bio_lock, flags); - io_schedule(); + if (!blk_poll(bdev_get_queue(dio->bio_bdev), dio->bio_cookie)) + io_schedule(); /* wake up sets us TASK_RUNNING */ spin_lock_irqsave(&dio->bio_lock, flags); dio->waiter = NULL; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 83cc9d4e5455..daf17d70aeca 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -59,6 +59,9 @@ struct blk_mq_hw_ctx { struct blk_mq_cpu_notifier cpu_notifier; struct kobject kobj; + + unsigned long poll_invoked; + unsigned long poll_success; }; struct blk_mq_tag_set { @@ -97,6 +100,8 @@ typedef void (exit_request_fn)(void *, struct request *, unsigned int, typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, bool); typedef void (busy_tag_iter_fn)(struct request *, void *, bool); +typedef int (poll_fn)(struct blk_mq_hw_ctx *, unsigned int); + struct blk_mq_ops { /* @@ -114,6 +119,11 @@ struct blk_mq_ops { */ timeout_fn *timeout; + /* + * Called to poll for completion of a specific tag. + */ + poll_fn *poll; + softirq_done_fn *complete; /* diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index e8130138f29d..641e5a3ed58c 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -244,4 +244,28 @@ enum rq_flag_bits { #define REQ_MQ_INFLIGHT (1ULL << __REQ_MQ_INFLIGHT) #define REQ_NO_TIMEOUT (1ULL << __REQ_NO_TIMEOUT) +typedef unsigned int blk_qc_t; +#define BLK_QC_T_NONE -1U +#define BLK_QC_T_SHIFT 16 + +static inline bool blk_qc_t_valid(blk_qc_t cookie) +{ + return cookie != BLK_QC_T_NONE; +} + +static inline blk_qc_t blk_tag_to_qc_t(unsigned int tag, unsigned int queue_num) +{ + return tag | (queue_num << BLK_QC_T_SHIFT); +} + +static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie) +{ + return cookie >> BLK_QC_T_SHIFT; +} + +static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie) +{ + return cookie & 0xffff; +} + #endif /* __LINUX_BLK_TYPES_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d045ca8487af..3fe27f8d91f0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -209,7 +209,7 @@ static inline unsigned short req_get_ioprio(struct request *req) struct blk_queue_ctx; typedef void (request_fn_proc) (struct request_queue *q); -typedef void (make_request_fn) (struct request_queue *q, struct bio *bio); +typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio); typedef int (prep_rq_fn) (struct request_queue *, struct request *); typedef void (unprep_rq_fn) (struct request_queue *, struct request *); @@ -487,6 +487,7 @@ struct request_queue { #define QUEUE_FLAG_DEAD 19 /* queue tear-down finished */ #define QUEUE_FLAG_INIT_DONE 20 /* queue is initialized */ #define QUEUE_FLAG_NO_SG_MERGE 21 /* don't attempt to merge SG segments*/ +#define QUEUE_FLAG_POLL 22 /* IO polling enabled if set */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_STACKABLE) | \ @@ -761,7 +762,7 @@ static inline void rq_flush_dcache_pages(struct request *rq) extern int blk_register_queue(struct gendisk *disk); extern void blk_unregister_queue(struct gendisk *disk); -extern void generic_make_request(struct bio *bio); +extern blk_qc_t generic_make_request(struct bio *bio); extern void blk_rq_init(struct request_queue *q, struct request *rq); extern void blk_put_request(struct request *); extern void __blk_put_request(struct request_queue *, struct request *); @@ -814,6 +815,8 @@ extern int blk_execute_rq(struct request_queue *, struct gendisk *, extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, struct request *, int, rq_end_io_fn *); +bool blk_poll(struct request_queue *q, blk_qc_t cookie); + static inline struct request_queue *bdev_get_queue(struct block_device *bdev) { return bdev->bd_disk->queue; /* this is never NULL */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 9a1cb8c605e0..6230eb2a9cca 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2613,7 +2613,7 @@ static inline void remove_inode_hash(struct inode *inode) extern void inode_sb_list_add(struct inode *inode); #ifdef CONFIG_BLOCK -extern void submit_bio(int, struct bio *); +extern blk_qc_t submit_bio(int, struct bio *); extern int bdev_read_only(struct block_device *); #endif extern int set_blocksize(struct block_device *, int); diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 5ebd70d12f35..69c9057e1ab8 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -426,7 +426,7 @@ static inline struct ppa_addr block_to_ppa(struct nvm_dev *dev, return ppa; } -typedef void (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *); +typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *); typedef sector_t (nvm_tgt_capacity_fn)(void *); typedef int (nvm_tgt_end_io_fn)(struct nvm_rq *, int); typedef void *(nvm_tgt_init_fn)(struct nvm_dev *, struct gendisk *, int, int); |