summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-10-22 17:07:18 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2010-10-22 17:07:18 -0700
commita2887097f25cd38cadfc11d10769e2b349fb5eca (patch)
treecd4adcb305365d6ba9acd2c02d4eb9d0125c6f8d /block
parent8abfc6e7a45eb74e51904bbae676fae008b11366 (diff)
parent005a1d15f5a6b2bb4ada80349513effbf22b4588 (diff)
downloadlwn-a2887097f25cd38cadfc11d10769e2b349fb5eca.tar.gz
lwn-a2887097f25cd38cadfc11d10769e2b349fb5eca.zip
Merge branch 'for-2.6.37/barrier' of git://git.kernel.dk/linux-2.6-block
* 'for-2.6.37/barrier' of git://git.kernel.dk/linux-2.6-block: (46 commits) xen-blkfront: disable barrier/flush write support Added blk-lib.c and blk-barrier.c was renamed to blk-flush.c block: remove BLKDEV_IFL_WAIT aic7xxx_old: removed unused 'req' variable block: remove the BH_Eopnotsupp flag block: remove the BLKDEV_IFL_BARRIER flag block: remove the WRITE_BARRIER flag swap: do not send discards as barriers fat: do not send discards as barriers ext4: do not send discards as barriers jbd2: replace barriers with explicit flush / FUA usage jbd2: Modify ASYNC_COMMIT code to not rely on queue draining on barrier jbd: replace barriers with explicit flush / FUA usage nilfs2: replace barriers with explicit flush / FUA usage reiserfs: replace barriers with explicit flush / FUA usage gfs2: replace barriers with explicit flush / FUA usage btrfs: replace barriers with explicit flush / FUA usage xfs: replace barriers with explicit flush / FUA usage block: pass gfp_mask and flags to sb_issue_discard dm: convey that all flushes are processed as empty ...
Diffstat (limited to 'block')
-rw-r--r--block/Makefile2
-rw-r--r--block/blk-barrier.c350
-rw-r--r--block/blk-core.c72
-rw-r--r--block/blk-flush.c262
-rw-r--r--block/blk-lib.c39
-rw-r--r--block/blk-settings.c20
-rw-r--r--block/blk.h8
-rw-r--r--block/elevator.c79
-rw-r--r--block/ioctl.c4
9 files changed, 350 insertions, 486 deletions
diff --git a/block/Makefile b/block/Makefile
index c850d5ef80a2..0fec4b3fab51 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -3,7 +3,7 @@
#
obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
- blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
+ blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
deleted file mode 100644
index f0faefca032f..000000000000
--- a/block/blk-barrier.c
+++ /dev/null
@@ -1,350 +0,0 @@
-/*
- * Functions related to barrier IO handling
- */
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/bio.h>
-#include <linux/blkdev.h>
-#include <linux/gfp.h>
-
-#include "blk.h"
-
-/**
- * blk_queue_ordered - does this queue support ordered writes
- * @q: the request queue
- * @ordered: one of QUEUE_ORDERED_*
- *
- * Description:
- * For journalled file systems, doing ordered writes on a commit
- * block instead of explicitly doing wait_on_buffer (which is bad
- * for performance) can be a big win. Block drivers supporting this
- * feature should call this function and indicate so.
- *
- **/
-int blk_queue_ordered(struct request_queue *q, unsigned ordered)
-{
- if (ordered != QUEUE_ORDERED_NONE &&
- ordered != QUEUE_ORDERED_DRAIN &&
- ordered != QUEUE_ORDERED_DRAIN_FLUSH &&
- ordered != QUEUE_ORDERED_DRAIN_FUA &&
- ordered != QUEUE_ORDERED_TAG &&
- ordered != QUEUE_ORDERED_TAG_FLUSH &&
- ordered != QUEUE_ORDERED_TAG_FUA) {
- printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered);
- return -EINVAL;
- }
-
- q->ordered = ordered;
- q->next_ordered = ordered;
-
- return 0;
-}
-EXPORT_SYMBOL(blk_queue_ordered);
-
-/*
- * Cache flushing for ordered writes handling
- */
-unsigned blk_ordered_cur_seq(struct request_queue *q)
-{
- if (!q->ordseq)
- return 0;
- return 1 << ffz(q->ordseq);
-}
-
-unsigned blk_ordered_req_seq(struct request *rq)
-{
- struct request_queue *q = rq->q;
-
- BUG_ON(q->ordseq == 0);
-
- if (rq == &q->pre_flush_rq)
- return QUEUE_ORDSEQ_PREFLUSH;
- if (rq == &q->bar_rq)
- return QUEUE_ORDSEQ_BAR;
- if (rq == &q->post_flush_rq)
- return QUEUE_ORDSEQ_POSTFLUSH;
-
- /*
- * !fs requests don't need to follow barrier ordering. Always
- * put them at the front. This fixes the following deadlock.
- *
- * http://thread.gmane.org/gmane.linux.kernel/537473
- */
- if (rq->cmd_type != REQ_TYPE_FS)
- return QUEUE_ORDSEQ_DRAIN;
-
- if ((rq->cmd_flags & REQ_ORDERED_COLOR) ==
- (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR))
- return QUEUE_ORDSEQ_DRAIN;
- else
- return QUEUE_ORDSEQ_DONE;
-}
-
-bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
-{
- struct request *rq;
-
- if (error && !q->orderr)
- q->orderr = error;
-
- BUG_ON(q->ordseq & seq);
- q->ordseq |= seq;
-
- if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE)
- return false;
-
- /*
- * Okay, sequence complete.
- */
- q->ordseq = 0;
- rq = q->orig_bar_rq;
- __blk_end_request_all(rq, q->orderr);
- return true;
-}
-
-static void pre_flush_end_io(struct request *rq, int error)
-{
- elv_completed_request(rq->q, rq);
- blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error);
-}
-
-static void bar_end_io(struct request *rq, int error)
-{
- elv_completed_request(rq->q, rq);
- blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error);
-}
-
-static void post_flush_end_io(struct request *rq, int error)
-{
- elv_completed_request(rq->q, rq);
- blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error);
-}
-
-static void queue_flush(struct request_queue *q, unsigned which)
-{
- struct request *rq;
- rq_end_io_fn *end_io;
-
- if (which == QUEUE_ORDERED_DO_PREFLUSH) {
- rq = &q->pre_flush_rq;
- end_io = pre_flush_end_io;
- } else {
- rq = &q->post_flush_rq;
- end_io = post_flush_end_io;
- }
-
- blk_rq_init(q, rq);
- rq->cmd_type = REQ_TYPE_FS;
- rq->cmd_flags = REQ_HARDBARRIER | REQ_FLUSH;
- rq->rq_disk = q->orig_bar_rq->rq_disk;
- rq->end_io = end_io;
-
- elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
-}
-
-static inline bool start_ordered(struct request_queue *q, struct request **rqp)
-{
- struct request *rq = *rqp;
- unsigned skip = 0;
-
- q->orderr = 0;
- q->ordered = q->next_ordered;
- q->ordseq |= QUEUE_ORDSEQ_STARTED;
-
- /*
- * For an empty barrier, there's no actual BAR request, which
- * in turn makes POSTFLUSH unnecessary. Mask them off.
- */
- if (!blk_rq_sectors(rq)) {
- q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
- QUEUE_ORDERED_DO_POSTFLUSH);
- /*
- * Empty barrier on a write-through device w/ ordered
- * tag has no command to issue and without any command
- * to issue, ordering by tag can't be used. Drain
- * instead.
- */
- if ((q->ordered & QUEUE_ORDERED_BY_TAG) &&
- !(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) {
- q->ordered &= ~QUEUE_ORDERED_BY_TAG;
- q->ordered |= QUEUE_ORDERED_BY_DRAIN;
- }
- }
-
- /* stash away the original request */
- blk_dequeue_request(rq);
- q->orig_bar_rq = rq;
- rq = NULL;
-
- /*
- * Queue ordered sequence. As we stack them at the head, we
- * need to queue in reverse order. Note that we rely on that
- * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
- * request gets inbetween ordered sequence.
- */
- if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) {
- queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH);
- rq = &q->post_flush_rq;
- } else
- skip |= QUEUE_ORDSEQ_POSTFLUSH;
-
- if (q->ordered & QUEUE_ORDERED_DO_BAR) {
- rq = &q->bar_rq;
-
- /* initialize proxy request and queue it */
- blk_rq_init(q, rq);
- if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
- rq->cmd_flags |= REQ_WRITE;
- if (q->ordered & QUEUE_ORDERED_DO_FUA)
- rq->cmd_flags |= REQ_FUA;
- init_request_from_bio(rq, q->orig_bar_rq->bio);
- rq->end_io = bar_end_io;
-
- elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
- } else
- skip |= QUEUE_ORDSEQ_BAR;
-
- if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) {
- queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH);
- rq = &q->pre_flush_rq;
- } else
- skip |= QUEUE_ORDSEQ_PREFLUSH;
-
- if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && queue_in_flight(q))
- rq = NULL;
- else
- skip |= QUEUE_ORDSEQ_DRAIN;
-
- *rqp = rq;
-
- /*
- * Complete skipped sequences. If whole sequence is complete,
- * return false to tell elevator that this request is gone.
- */
- return !blk_ordered_complete_seq(q, skip, 0);
-}
-
-bool blk_do_ordered(struct request_queue *q, struct request **rqp)
-{
- struct request *rq = *rqp;
- const int is_barrier = rq->cmd_type == REQ_TYPE_FS &&
- (rq->cmd_flags & REQ_HARDBARRIER);
-
- if (!q->ordseq) {
- if (!is_barrier)
- return true;
-
- if (q->next_ordered != QUEUE_ORDERED_NONE)
- return start_ordered(q, rqp);
- else {
- /*
- * Queue ordering not supported. Terminate
- * with prejudice.
- */
- blk_dequeue_request(rq);
- __blk_end_request_all(rq, -EOPNOTSUPP);
- *rqp = NULL;
- return false;
- }
- }
-
- /*
- * Ordered sequence in progress
- */
-
- /* Special requests are not subject to ordering rules. */
- if (rq->cmd_type != REQ_TYPE_FS &&
- rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
- return true;
-
- if (q->ordered & QUEUE_ORDERED_BY_TAG) {
- /* Ordered by tag. Blocking the next barrier is enough. */
- if (is_barrier && rq != &q->bar_rq)
- *rqp = NULL;
- } else {
- /* Ordered by draining. Wait for turn. */
- WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q));
- if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q))
- *rqp = NULL;
- }
-
- return true;
-}
-
-static void bio_end_empty_barrier(struct bio *bio, int err)
-{
- if (err) {
- if (err == -EOPNOTSUPP)
- set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
- clear_bit(BIO_UPTODATE, &bio->bi_flags);
- }
- if (bio->bi_private)
- complete(bio->bi_private);
- bio_put(bio);
-}
-
-/**
- * blkdev_issue_flush - queue a flush
- * @bdev: blockdev to issue flush for
- * @gfp_mask: memory allocation flags (for bio_alloc)
- * @error_sector: error sector
- * @flags: BLKDEV_IFL_* flags to control behaviour
- *
- * Description:
- * Issue a flush for the block device in question. Caller can supply
- * room for storing the error offset in case of a flush error, if they
- * wish to. If WAIT flag is not passed then caller may check only what
- * request was pushed in some internal queue for later handling.
- */
-int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
- sector_t *error_sector, unsigned long flags)
-{
- DECLARE_COMPLETION_ONSTACK(wait);
- struct request_queue *q;
- struct bio *bio;
- int ret = 0;
-
- if (bdev->bd_disk == NULL)
- return -ENXIO;
-
- q = bdev_get_queue(bdev);
- if (!q)
- return -ENXIO;
-
- /*
- * some block devices may not have their queue correctly set up here
- * (e.g. loop device without a backing file) and so issuing a flush
- * here will panic. Ensure there is a request function before issuing
- * the barrier.
- */
- if (!q->make_request_fn)
- return -ENXIO;
-
- bio = bio_alloc(gfp_mask, 0);
- bio->bi_end_io = bio_end_empty_barrier;
- bio->bi_bdev = bdev;
- if (test_bit(BLKDEV_WAIT, &flags))
- bio->bi_private = &wait;
-
- bio_get(bio);
- submit_bio(WRITE_BARRIER, bio);
- if (test_bit(BLKDEV_WAIT, &flags)) {
- wait_for_completion(&wait);
- /*
- * The driver must store the error location in ->bi_sector, if
- * it supports it. For non-stacked drivers, this should be
- * copied from blk_rq_pos(rq).
- */
- if (error_sector)
- *error_sector = bio->bi_sector;
- }
-
- if (bio_flagged(bio, BIO_EOPNOTSUPP))
- ret = -EOPNOTSUPP;
- else if (!bio_flagged(bio, BIO_UPTODATE))
- ret = -EIO;
-
- bio_put(bio);
- return ret;
-}
-EXPORT_SYMBOL(blkdev_issue_flush);
diff --git a/block/blk-core.c b/block/blk-core.c
index 500eb859886e..45141469e89e 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -139,7 +139,7 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
{
struct request_queue *q = rq->q;
- if (&q->bar_rq != rq) {
+ if (&q->flush_rq != rq) {
if (error)
clear_bit(BIO_UPTODATE, &bio->bi_flags);
else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
@@ -163,13 +163,12 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
if (bio->bi_size == 0)
bio_endio(bio, error);
} else {
-
/*
- * Okay, this is the barrier request in progress, just
- * record the error;
+ * Okay, this is the sequenced flush request in
+ * progress, just record the error;
*/
- if (error && !q->orderr)
- q->orderr = error;
+ if (error && !q->flush_err)
+ q->flush_err = error;
}
}
@@ -531,6 +530,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
init_timer(&q->unplug_timer);
setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
INIT_LIST_HEAD(&q->timeout_list);
+ INIT_LIST_HEAD(&q->pending_flushes);
INIT_WORK(&q->unplug_work, blk_unplug_work);
kobject_init(&q->kobj, &blk_queue_ktype);
@@ -1053,22 +1053,6 @@ void blk_insert_request(struct request_queue *q, struct request *rq,
}
EXPORT_SYMBOL(blk_insert_request);
-/*
- * add-request adds a request to the linked list.
- * queue lock is held and interrupts disabled, as we muck with the
- * request queue list.
- */
-static inline void add_request(struct request_queue *q, struct request *req)
-{
- drive_stat_acct(req, 1);
-
- /*
- * elevator indicated where it wants this request to be
- * inserted at elevator_merge time
- */
- __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);
-}
-
static void part_round_stats_single(int cpu, struct hd_struct *part,
unsigned long now)
{
@@ -1217,13 +1201,16 @@ static int __make_request(struct request_queue *q, struct bio *bio)
const bool sync = !!(bio->bi_rw & REQ_SYNC);
const bool unplug = !!(bio->bi_rw & REQ_UNPLUG);
const unsigned long ff = bio->bi_rw & REQ_FAILFAST_MASK;
+ int where = ELEVATOR_INSERT_SORT;
int rw_flags;
- if ((bio->bi_rw & REQ_HARDBARRIER) &&
- (q->next_ordered == QUEUE_ORDERED_NONE)) {
+ /* REQ_HARDBARRIER is no more */
+ if (WARN_ONCE(bio->bi_rw & REQ_HARDBARRIER,
+ "block: HARDBARRIER is deprecated, use FLUSH/FUA instead\n")) {
bio_endio(bio, -EOPNOTSUPP);
return 0;
}
+
/*
* low level driver can indicate that it wants pages above a
* certain limit bounced to low memory (ie for highmem, or even
@@ -1233,7 +1220,12 @@ static int __make_request(struct request_queue *q, struct bio *bio)
spin_lock_irq(q->queue_lock);
- if (unlikely((bio->bi_rw & REQ_HARDBARRIER)) || elv_queue_empty(q))
+ if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
+ where = ELEVATOR_INSERT_FRONT;
+ goto get_rq;
+ }
+
+ if (elv_queue_empty(q))
goto get_rq;
el_ret = elv_merge(q, &req, bio);
@@ -1330,7 +1322,10 @@ get_rq:
req->cpu = blk_cpu_to_group(smp_processor_id());
if (queue_should_plug(q) && elv_queue_empty(q))
blk_plug_device(q);
- add_request(q, req);
+
+ /* insert the request into the elevator */
+ drive_stat_acct(req, 1);
+ __elv_add_request(q, req, where, 0);
out:
if (unplug || !queue_should_plug(q))
__generic_unplug_device(q);
@@ -1530,6 +1525,19 @@ static inline void __generic_make_request(struct bio *bio)
if (bio_check_eod(bio, nr_sectors))
goto end_io;
+ /*
+ * Filter flush bio's early so that make_request based
+ * drivers without flush support don't have to worry
+ * about them.
+ */
+ if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) {
+ bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA);
+ if (!nr_sectors) {
+ err = 0;
+ goto end_io;
+ }
+ }
+
if ((bio->bi_rw & REQ_DISCARD) &&
(!blk_queue_discard(q) ||
((bio->bi_rw & REQ_SECURE) &&
@@ -1794,11 +1802,11 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes)
static void blk_account_io_done(struct request *req)
{
/*
- * Account IO completion. bar_rq isn't accounted as a normal
- * IO on queueing nor completion. Accounting the containing
- * request is enough.
+ * Account IO completion. flush_rq isn't accounted as a
+ * normal IO on queueing nor completion. Accounting the
+ * containing request is enough.
*/
- if (blk_do_io_stat(req) && req != &req->q->bar_rq) {
+ if (blk_do_io_stat(req) && req != &req->q->flush_rq) {
unsigned long duration = jiffies - req->start_time;
const int rw = rq_data_dir(req);
struct hd_struct *part;
@@ -2523,9 +2531,7 @@ EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
static void __blk_rq_prep_clone(struct request *dst, struct request *src)
{
dst->cpu = src->cpu;
- dst->cmd_flags = (rq_data_dir(src) | REQ_NOMERGE);
- if (src->cmd_flags & REQ_DISCARD)
- dst->cmd_flags |= REQ_DISCARD;
+ dst->cmd_flags = (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE;
dst->cmd_type = src->cmd_type;
dst->__sector = blk_rq_pos(src);
dst->__data_len = blk_rq_bytes(src);
diff --git a/block/blk-flush.c b/block/blk-flush.c
new file mode 100644
index 000000000000..54b123d6563e
--- /dev/null
+++ b/block/blk-flush.c
@@ -0,0 +1,262 @@
+/*
+ * Functions to sequence FLUSH and FUA writes.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/gfp.h>
+
+#include "blk.h"
+
+/* FLUSH/FUA sequences */
+enum {
+ QUEUE_FSEQ_STARTED = (1 << 0), /* flushing in progress */
+ QUEUE_FSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */
+ QUEUE_FSEQ_DATA = (1 << 2), /* data write in progress */
+ QUEUE_FSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */
+ QUEUE_FSEQ_DONE = (1 << 4),
+};
+
+static struct request *queue_next_fseq(struct request_queue *q);
+
+unsigned blk_flush_cur_seq(struct request_queue *q)
+{
+ if (!q->flush_seq)
+ return 0;
+ return 1 << ffz(q->flush_seq);
+}
+
+static struct request *blk_flush_complete_seq(struct request_queue *q,
+ unsigned seq, int error)
+{
+ struct request *next_rq = NULL;
+
+ if (error && !q->flush_err)
+ q->flush_err = error;
+
+ BUG_ON(q->flush_seq & seq);
+ q->flush_seq |= seq;
+
+ if (blk_flush_cur_seq(q) != QUEUE_FSEQ_DONE) {
+ /* not complete yet, queue the next flush sequence */
+ next_rq = queue_next_fseq(q);
+ } else {
+ /* complete this flush request */
+ __blk_end_request_all(q->orig_flush_rq, q->flush_err);
+ q->orig_flush_rq = NULL;
+ q->flush_seq = 0;
+
+ /* dispatch the next flush if there's one */
+ if (!list_empty(&q->pending_flushes)) {
+ next_rq = list_entry_rq(q->pending_flushes.next);
+ list_move(&next_rq->queuelist, &q->queue_head);
+ }
+ }
+ return next_rq;
+}
+
+static void blk_flush_complete_seq_end_io(struct request_queue *q,
+ unsigned seq, int error)
+{
+ bool was_empty = elv_queue_empty(q);
+ struct request *next_rq;
+
+ next_rq = blk_flush_complete_seq(q, seq, error);
+
+ /*
+ * Moving a request silently to empty queue_head may stall the
+ * queue. Kick the queue in those cases.
+ */
+ if (was_empty && next_rq)
+ __blk_run_queue(q);
+}
+
+static void pre_flush_end_io(struct request *rq, int error)
+{
+ elv_completed_request(rq->q, rq);
+ blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_PREFLUSH, error);
+}
+
+static void flush_data_end_io(struct request *rq, int error)
+{
+ elv_completed_request(rq->q, rq);
+ blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_DATA, error);
+}
+
+static void post_flush_end_io(struct request *rq, int error)
+{
+ elv_completed_request(rq->q, rq);
+ blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_POSTFLUSH, error);
+}
+
+static void init_flush_request(struct request *rq, struct gendisk *disk)
+{
+ rq->cmd_type = REQ_TYPE_FS;
+ rq->cmd_flags = WRITE_FLUSH;
+ rq->rq_disk = disk;
+}
+
+static struct request *queue_next_fseq(struct request_queue *q)
+{
+ struct request *orig_rq = q->orig_flush_rq;
+ struct request *rq = &q->flush_rq;
+
+ blk_rq_init(q, rq);
+
+ switch (blk_flush_cur_seq(q)) {
+ case QUEUE_FSEQ_PREFLUSH:
+ init_flush_request(rq, orig_rq->rq_disk);
+ rq->end_io = pre_flush_end_io;
+ break;
+ case QUEUE_FSEQ_DATA:
+ init_request_from_bio(rq, orig_rq->bio);
+ /*
+ * orig_rq->rq_disk may be different from
+ * bio->bi_bdev->bd_disk if orig_rq got here through
+ * remapping drivers. Make sure rq->rq_disk points
+ * to the same one as orig_rq.
+ */
+ rq->rq_disk = orig_rq->rq_disk;
+ rq->cmd_flags &= ~(REQ_FLUSH | REQ_FUA);
+ rq->cmd_flags |= orig_rq->cmd_flags & (REQ_FLUSH | REQ_FUA);
+ rq->end_io = flush_data_end_io;
+ break;
+ case QUEUE_FSEQ_POSTFLUSH:
+ init_flush_request(rq, orig_rq->rq_disk);
+ rq->end_io = post_flush_end_io;
+ break;
+ default:
+ BUG();
+ }
+
+ elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
+ return rq;
+}
+
+struct request *blk_do_flush(struct request_queue *q, struct request *rq)
+{
+ unsigned int fflags = q->flush_flags; /* may change, cache it */
+ bool has_flush = fflags & REQ_FLUSH, has_fua = fflags & REQ_FUA;
+ bool do_preflush = has_flush && (rq->cmd_flags & REQ_FLUSH);
+ bool do_postflush = has_flush && !has_fua && (rq->cmd_flags & REQ_FUA);
+ unsigned skip = 0;
+
+ /*
+ * Special case. If there's data but flush is not necessary,
+ * the request can be issued directly.
+ *
+ * Flush w/o data should be able to be issued directly too but
+ * currently some drivers assume that rq->bio contains
+ * non-zero data if it isn't NULL and empty FLUSH requests
+ * getting here usually have bio's without data.
+ */
+ if (blk_rq_sectors(rq) && !do_preflush && !do_postflush) {
+ rq->cmd_flags &= ~REQ_FLUSH;
+ if (!has_fua)
+ rq->cmd_flags &= ~REQ_FUA;
+ return rq;
+ }
+
+ /*
+ * Sequenced flushes can't be processed in parallel. If
+ * another one is already in progress, queue for later
+ * processing.
+ */
+ if (q->flush_seq) {
+ list_move_tail(&rq->queuelist, &q->pending_flushes);
+ return NULL;
+ }
+
+ /*
+ * Start a new flush sequence
+ */
+ q->flush_err = 0;
+ q->flush_seq |= QUEUE_FSEQ_STARTED;
+
+ /* adjust FLUSH/FUA of the original request and stash it away */
+ rq->cmd_flags &= ~REQ_FLUSH;
+ if (!has_fua)
+ rq->cmd_flags &= ~REQ_FUA;
+ blk_dequeue_request(rq);
+ q->orig_flush_rq = rq;
+
+ /* skip unneded sequences and return the first one */
+ if (!do_preflush)
+ skip |= QUEUE_FSEQ_PREFLUSH;
+ if (!blk_rq_sectors(rq))
+ skip |= QUEUE_FSEQ_DATA;
+ if (!do_postflush)
+ skip |= QUEUE_FSEQ_POSTFLUSH;
+ return blk_flush_complete_seq(q, skip, 0);
+}
+
+static void bio_end_flush(struct bio *bio, int err)
+{
+ if (err)
+ clear_bit(BIO_UPTODATE, &bio->bi_flags);
+ if (bio->bi_private)
+ complete(bio->bi_private);
+ bio_put(bio);
+}
+
+/**
+ * blkdev_issue_flush - queue a flush
+ * @bdev: blockdev to issue flush for
+ * @gfp_mask: memory allocation flags (for bio_alloc)
+ * @error_sector: error sector
+ *
+ * Description:
+ * Issue a flush for the block device in question. Caller can supply
+ * room for storing the error offset in case of a flush error, if they
+ * wish to. If WAIT flag is not passed then caller may check only what
+ * request was pushed in some internal queue for later handling.
+ */
+int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
+ sector_t *error_sector)
+{
+ DECLARE_COMPLETION_ONSTACK(wait);
+ struct request_queue *q;
+ struct bio *bio;
+ int ret = 0;
+
+ if (bdev->bd_disk == NULL)
+ return -ENXIO;
+
+ q = bdev_get_queue(bdev);
+ if (!q)
+ return -ENXIO;
+
+ /*
+ * some block devices may not have their queue correctly set up here
+ * (e.g. loop device without a backing file) and so issuing a flush
+ * here will panic. Ensure there is a request function before issuing
+ * the flush.
+ */
+ if (!q->make_request_fn)
+ return -ENXIO;
+
+ bio = bio_alloc(gfp_mask, 0);
+ bio->bi_end_io = bio_end_flush;
+ bio->bi_bdev = bdev;
+ bio->bi_private = &wait;
+
+ bio_get(bio);
+ submit_bio(WRITE_FLUSH, bio);
+ wait_for_completion(&wait);
+
+ /*
+ * The driver must store the error location in ->bi_sector, if
+ * it supports it. For non-stacked drivers, this should be
+ * copied from blk_rq_pos(rq).
+ */
+ if (error_sector)
+ *error_sector = bio->bi_sector;
+
+ if (!bio_flagged(bio, BIO_UPTODATE))
+ ret = -EIO;
+
+ bio_put(bio);
+ return ret;
+}
+EXPORT_SYMBOL(blkdev_issue_flush);
diff --git a/block/blk-lib.c b/block/blk-lib.c
index c392029a104e..1a320d2406b0 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -39,8 +39,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
{
DECLARE_COMPLETION_ONSTACK(wait);
struct request_queue *q = bdev_get_queue(bdev);
- int type = flags & BLKDEV_IFL_BARRIER ?
- DISCARD_BARRIER : DISCARD_NOBARRIER;
+ int type = REQ_WRITE | REQ_DISCARD;
unsigned int max_discard_sectors;
struct bio *bio;
int ret = 0;
@@ -62,10 +61,10 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
max_discard_sectors &= ~(disc_sects - 1);
}
- if (flags & BLKDEV_IFL_SECURE) {
+ if (flags & BLKDEV_DISCARD_SECURE) {
if (!blk_queue_secdiscard(q))
return -EOPNOTSUPP;
- type |= DISCARD_SECURE;
+ type |= REQ_SECURE;
}
while (nr_sects && !ret) {
@@ -78,8 +77,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
bio->bi_sector = sector;
bio->bi_end_io = blkdev_discard_end_io;
bio->bi_bdev = bdev;
- if (flags & BLKDEV_IFL_WAIT)
- bio->bi_private = &wait;
+ bio->bi_private = &wait;
if (nr_sects > max_discard_sectors) {
bio->bi_size = max_discard_sectors << 9;
@@ -93,8 +91,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
bio_get(bio);
submit_bio(type, bio);
- if (flags & BLKDEV_IFL_WAIT)
- wait_for_completion(&wait);
+ wait_for_completion(&wait);
if (bio_flagged(bio, BIO_EOPNOTSUPP))
ret = -EOPNOTSUPP;
@@ -140,7 +137,6 @@ static void bio_batch_end_io(struct bio *bio, int err)
* @sector: start sector
* @nr_sects: number of sectors to write
* @gfp_mask: memory allocation flags (for bio_alloc)
- * @flags: BLKDEV_IFL_* flags to control behaviour
*
* Description:
* Generate and issue number of bios with zerofiled pages.
@@ -149,7 +145,7 @@ static void bio_batch_end_io(struct bio *bio, int err)
*/
int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
- sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
+ sector_t nr_sects, gfp_t gfp_mask)
{
int ret;
struct bio *bio;
@@ -162,12 +158,6 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
bb.wait = &wait;
bb.end_io = NULL;
- if (flags & BLKDEV_IFL_BARRIER) {
- /* issue async barrier before the data */
- ret = blkdev_issue_flush(bdev, gfp_mask, NULL, 0);
- if (ret)
- return ret;
- }
submit:
ret = 0;
while (nr_sects != 0) {
@@ -181,8 +171,7 @@ submit:
bio->bi_sector = sector;
bio->bi_bdev = bdev;
bio->bi_end_io = bio_batch_end_io;
- if (flags & BLKDEV_IFL_WAIT)
- bio->bi_private = &bb;
+ bio->bi_private = &bb;
while (nr_sects != 0) {
sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects);
@@ -199,18 +188,10 @@ submit:
issued++;
submit_bio(WRITE, bio);
}
- /*
- * When all data bios are in flight. Send final barrier if requeted.
- */
- if (nr_sects == 0 && flags & BLKDEV_IFL_BARRIER)
- ret = blkdev_issue_flush(bdev, gfp_mask, NULL,
- flags & BLKDEV_IFL_WAIT);
-
- if (flags & BLKDEV_IFL_WAIT)
- /* Wait for bios in-flight */
- while ( issued != atomic_read(&bb.done))
- wait_for_completion(&wait);
+ /* Wait for bios in-flight */
+ while (issued != atomic_read(&bb.done))
+ wait_for_completion(&wait);
if (!test_bit(BIO_UPTODATE, &bb.flags))
/* One of bios in the batch was completed with error.*/
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 315b88c8cbbb..701859fb9647 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -792,6 +792,26 @@ void blk_queue_update_dma_alignment(struct request_queue *q, int mask)
}
EXPORT_SYMBOL(blk_queue_update_dma_alignment);
+/**
+ * blk_queue_flush - configure queue's cache flush capability
+ * @q: the request queue for the device
+ * @flush: 0, REQ_FLUSH or REQ_FLUSH | REQ_FUA
+ *
+ * Tell block layer cache flush capability of @q. If it supports
+ * flushing, REQ_FLUSH should be set. If it supports bypassing
+ * write cache for individual writes, REQ_FUA should be set.
+ */
+void blk_queue_flush(struct request_queue *q, unsigned int flush)
+{
+ WARN_ON_ONCE(flush & ~(REQ_FLUSH | REQ_FUA));
+
+ if (WARN_ON_ONCE(!(flush & REQ_FLUSH) && (flush & REQ_FUA)))
+ flush &= ~REQ_FUA;
+
+ q->flush_flags = flush & (REQ_FLUSH | REQ_FUA);
+}
+EXPORT_SYMBOL_GPL(blk_queue_flush);
+
static int __init blk_settings_init(void)
{
blk_max_low_pfn = max_low_pfn - 1;
diff --git a/block/blk.h b/block/blk.h
index f864012ec300..1e675e5ade02 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -51,6 +51,8 @@ static inline void blk_clear_rq_complete(struct request *rq)
*/
#define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash))
+struct request *blk_do_flush(struct request_queue *q, struct request *rq);
+
static inline struct request *__elv_next_request(struct request_queue *q)
{
struct request *rq;
@@ -58,7 +60,11 @@ static inline struct request *__elv_next_request(struct request_queue *q)
while (1) {
while (!list_empty(&q->queue_head)) {
rq = list_entry_rq(q->queue_head.next);
- if (blk_do_ordered(q, &rq))
+ if (!(rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) ||
+ rq == &q->flush_rq)
+ return rq;
+ rq = blk_do_flush(q, rq);
+ if (rq)
return rq;
}
diff --git a/block/elevator.c b/block/elevator.c
index 4e11559aa2b0..282e8308f7e2 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -617,8 +617,6 @@ void elv_quiesce_end(struct request_queue *q)
void elv_insert(struct request_queue *q, struct request *rq, int where)
{
- struct list_head *pos;
- unsigned ordseq;
int unplug_it = 1;
trace_block_rq_insert(q, rq);
@@ -626,9 +624,16 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
rq->q = q;
switch (where) {
+ case ELEVATOR_INSERT_REQUEUE:
+ /*
+ * Most requeues happen because of a busy condition,
+ * don't force unplug of the queue for that case.
+ * Clear unplug_it and fall through.
+ */
+ unplug_it = 0;
+
case ELEVATOR_INSERT_FRONT:
rq->cmd_flags |= REQ_SOFTBARRIER;
-
list_add(&rq->queuelist, &q->queue_head);
break;
@@ -668,36 +673,6 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
q->elevator->ops->elevator_add_req_fn(q, rq);
break;
- case ELEVATOR_INSERT_REQUEUE:
- /*
- * If ordered flush isn't in progress, we do front
- * insertion; otherwise, requests should be requeued
- * in ordseq order.
- */
- rq->cmd_flags |= REQ_SOFTBARRIER;
-
- /*
- * Most requeues happen because of a busy condition,
- * don't force unplug of the queue for that case.
- */
- unplug_it = 0;
-
- if (q->ordseq == 0) {
- list_add(&rq->queuelist, &q->queue_head);
- break;
- }
-
- ordseq = blk_ordered_req_seq(rq);
-
- list_for_each(pos, &q->queue_head) {
- struct request *pos_rq = list_entry_rq(pos);
- if (ordseq <= blk_ordered_req_seq(pos_rq))
- break;
- }
-
- list_add_tail(&rq->queuelist, pos);
- break;
-
default:
printk(KERN_ERR "%s: bad insertion point %d\n",
__func__, where);
@@ -716,26 +691,8 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
void __elv_add_request(struct request_queue *q, struct request *rq, int where,
int plug)
{
- if (q->ordcolor)
- rq->cmd_flags |= REQ_ORDERED_COLOR;
-
if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
- /*
- * toggle ordered color
- */
- if (rq->cmd_flags & REQ_HARDBARRIER)
- q->ordcolor ^= 1;
-
- /*
- * barriers implicitly indicate back insertion
- */
- if (where == ELEVATOR_INSERT_SORT)
- where = ELEVATOR_INSERT_BACK;
-
- /*
- * this request is scheduling boundary, update
- * end_sector
- */
+ /* barriers are scheduling boundary, update end_sector */
if (rq->cmd_type == REQ_TYPE_FS ||
(rq->cmd_flags & REQ_DISCARD)) {
q->end_sector = rq_end_sector(rq);
@@ -855,24 +812,6 @@ void elv_completed_request(struct request_queue *q, struct request *rq)
e->ops->elevator_completed_req_fn)
e->ops->elevator_completed_req_fn(q, rq);
}
-
- /*
- * Check if the queue is waiting for fs requests to be
- * drained for flush sequence.
- */
- if (unlikely(q->ordseq)) {
- struct request *next = NULL;
-
- if (!list_empty(&q->queue_head))
- next = list_entry_rq(q->queue_head.next);
-
- if (!queue_in_flight(q) &&
- blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
- (!next || blk_ordered_req_seq(next) > QUEUE_ORDSEQ_DRAIN)) {
- blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
- __blk_run_queue(q);
- }
- }
}
#define to_elv(atr) container_of((atr), struct elv_fs_entry, attr)
diff --git a/block/ioctl.c b/block/ioctl.c
index 2c15fe0912c4..d724ceb1d465 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -116,7 +116,7 @@ static int blkdev_reread_part(struct block_device *bdev)
static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
uint64_t len, int secure)
{
- unsigned long flags = BLKDEV_IFL_WAIT;
+ unsigned long flags = 0;
if (start & 511)
return -EINVAL;
@@ -128,7 +128,7 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
if (start + len > (bdev->bd_inode->i_size >> 9))
return -EINVAL;
if (secure)
- flags |= BLKDEV_IFL_SECURE;
+ flags |= BLKDEV_DISCARD_SECURE;
return blkdev_issue_discard(bdev, start, len, GFP_KERNEL, flags);
}