summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorMartin K. Petersen <martin.petersen@oracle.com>2012-09-18 12:19:27 -0400
committerJens Axboe <axboe@kernel.dk>2012-09-20 14:31:45 +0200
commit4363ac7c13a9a4b763c6e8d9fdbfc2468f3b8ca4 (patch)
tree010b05699eb9544b9cdfe5e1b3affdaea80132e7 /block
parentf31dc1cd490539e2b62a126bc4dc2495b165d772 (diff)
downloadlwn-4363ac7c13a9a4b763c6e8d9fdbfc2468f3b8ca4.tar.gz
lwn-4363ac7c13a9a4b763c6e8d9fdbfc2468f3b8ca4.zip
block: Implement support for WRITE SAME
The WRITE SAME command supported on some SCSI devices allows the same block to be efficiently replicated throughout a block range. Only a single logical block is transferred from the host and the storage device writes the same data to all blocks described by the I/O. This patch implements support for WRITE SAME in the block layer. The blkdev_issue_write_same() function can be used by filesystems and block drivers to replicate a buffer across a block range. This can be used to efficiently initialize software RAID devices, etc. Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> Acked-by: Mike Snitzer <snitzer@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block')
-rw-r--r--block/blk-core.c14
-rw-r--r--block/blk-lib.c74
-rw-r--r--block/blk-merge.c9
-rw-r--r--block/blk-settings.c16
-rw-r--r--block/blk-sysfs.c13
5 files changed, 124 insertions, 2 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index 33eded00c5b1..3b080541098e 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1704,6 +1704,11 @@ generic_make_request_checks(struct bio *bio)
goto end_io;
}
+ if (bio->bi_rw & REQ_WRITE_SAME && !bdev_write_same(bio->bi_bdev)) {
+ err = -EOPNOTSUPP;
+ goto end_io;
+ }
+
/*
* Various block parts want %current->io_context and lazy ioc
* allocation ends up trading a lot of pain for a small amount of
@@ -1809,8 +1814,6 @@ EXPORT_SYMBOL(generic_make_request);
*/
void submit_bio(int rw, struct bio *bio)
{
- int count = bio_sectors(bio);
-
bio->bi_rw |= rw;
/*
@@ -1818,6 +1821,13 @@ void submit_bio(int rw, struct bio *bio)
* go through the normal accounting stuff before submission.
*/
if (bio_has_data(bio)) {
+ unsigned int count;
+
+ if (unlikely(rw & REQ_WRITE_SAME))
+ count = bdev_logical_block_size(bio->bi_bdev) >> 9;
+ else
+ count = bio_sectors(bio);
+
if (rw & WRITE) {
count_vm_events(PGPGOUT, count);
} else {
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 19cc761cacb2..a062543c58ac 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -130,6 +130,80 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
EXPORT_SYMBOL(blkdev_issue_discard);
/**
+ * blkdev_issue_write_same - queue a write same operation
+ * @bdev: target blockdev
+ * @sector: start sector
+ * @nr_sects: number of sectors to write
+ * @gfp_mask: memory allocation flags (for bio_alloc)
+ * @page: page containing data to write
+ *
+ * Description:
+ * Issue a write same request for the sectors in question.
+ */
+int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp_mask,
+ struct page *page)
+{
+ DECLARE_COMPLETION_ONSTACK(wait);
+ struct request_queue *q = bdev_get_queue(bdev);
+ unsigned int max_write_same_sectors;
+ struct bio_batch bb;
+ struct bio *bio;
+ int ret = 0;
+
+ if (!q)
+ return -ENXIO;
+
+ max_write_same_sectors = q->limits.max_write_same_sectors;
+
+ if (max_write_same_sectors == 0)
+ return -EOPNOTSUPP;
+
+ atomic_set(&bb.done, 1);
+ bb.flags = 1 << BIO_UPTODATE;
+ bb.wait = &wait;
+
+ while (nr_sects) {
+ bio = bio_alloc(gfp_mask, 1);
+ if (!bio) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ bio->bi_sector = sector;
+ bio->bi_end_io = bio_batch_end_io;
+ bio->bi_bdev = bdev;
+ bio->bi_private = &bb;
+ bio->bi_vcnt = 1;
+ bio->bi_io_vec->bv_page = page;
+ bio->bi_io_vec->bv_offset = 0;
+ bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);
+
+ if (nr_sects > max_write_same_sectors) {
+ bio->bi_size = max_write_same_sectors << 9;
+ nr_sects -= max_write_same_sectors;
+ sector += max_write_same_sectors;
+ } else {
+ bio->bi_size = nr_sects << 9;
+ nr_sects = 0;
+ }
+
+ atomic_inc(&bb.done);
+ submit_bio(REQ_WRITE | REQ_WRITE_SAME, bio);
+ }
+
+ /* Wait for bios in-flight */
+ if (!atomic_dec_and_test(&bb.done))
+ wait_for_completion(&wait);
+
+ if (!test_bit(BIO_UPTODATE, &bb.flags))
+ ret = -ENOTSUPP;
+
+ return ret;
+}
+EXPORT_SYMBOL(blkdev_issue_write_same);
+
+/**
* blkdev_issue_zeroout - generate number of zero filed write bios
* @bdev: blockdev to issue
* @sector: start sector
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 642b862608a1..936a110de0b9 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -419,6 +419,10 @@ static int attempt_merge(struct request_queue *q, struct request *req,
|| next->special)
return 0;
+ if (req->cmd_flags & REQ_WRITE_SAME &&
+ !blk_write_same_mergeable(req->bio, next->bio))
+ return 0;
+
/*
* If we are allowed to merge, then append bio list
* from next to rq and release next. merge_requests_fn
@@ -518,6 +522,11 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
if (bio_integrity(bio) != blk_integrity_rq(rq))
return false;
+ /* must be using the same buffer */
+ if (rq->cmd_flags & REQ_WRITE_SAME &&
+ !blk_write_same_mergeable(rq->bio, bio))
+ return false;
+
return true;
}
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 565a6786032f..779bb7646bcd 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -113,6 +113,7 @@ void blk_set_default_limits(struct queue_limits *lim)
lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
+ lim->max_write_same_sectors = 0;
lim->max_discard_sectors = 0;
lim->discard_granularity = 0;
lim->discard_alignment = 0;
@@ -144,6 +145,7 @@ void blk_set_stacking_limits(struct queue_limits *lim)
lim->max_segments = USHRT_MAX;
lim->max_hw_sectors = UINT_MAX;
lim->max_sectors = UINT_MAX;
+ lim->max_write_same_sectors = UINT_MAX;
}
EXPORT_SYMBOL(blk_set_stacking_limits);
@@ -286,6 +288,18 @@ void blk_queue_max_discard_sectors(struct request_queue *q,
EXPORT_SYMBOL(blk_queue_max_discard_sectors);
/**
+ * blk_queue_max_write_same_sectors - set max sectors for a single write same
+ * @q: the request queue for the device
+ * @max_write_same_sectors: maximum number of sectors to write per command
+ **/
+void blk_queue_max_write_same_sectors(struct request_queue *q,
+ unsigned int max_write_same_sectors)
+{
+ q->limits.max_write_same_sectors = max_write_same_sectors;
+}
+EXPORT_SYMBOL(blk_queue_max_write_same_sectors);
+
+/**
* blk_queue_max_segments - set max hw segments for a request for this queue
* @q: the request queue for the device
* @max_segments: max number of segments
@@ -510,6 +524,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
+ t->max_write_same_sectors = min(t->max_write_same_sectors,
+ b->max_write_same_sectors);
t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn);
t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index ea51d827a0bb..247dbfd42621 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -180,6 +180,13 @@ static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *pag
return queue_var_show(queue_discard_zeroes_data(q), page);
}
+static ssize_t queue_write_same_max_show(struct request_queue *q, char *page)
+{
+ return sprintf(page, "%llu\n",
+ (unsigned long long)q->limits.max_write_same_sectors << 9);
+}
+
+
static ssize_t
queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
{
@@ -385,6 +392,11 @@ static struct queue_sysfs_entry queue_discard_zeroes_data_entry = {
.show = queue_discard_zeroes_data_show,
};
+static struct queue_sysfs_entry queue_write_same_max_entry = {
+ .attr = {.name = "write_same_max_bytes", .mode = S_IRUGO },
+ .show = queue_write_same_max_show,
+};
+
static struct queue_sysfs_entry queue_nonrot_entry = {
.attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR },
.show = queue_show_nonrot,
@@ -432,6 +444,7 @@ static struct attribute *default_attrs[] = {
&queue_discard_granularity_entry.attr,
&queue_discard_max_entry.attr,
&queue_discard_zeroes_data_entry.attr,
+ &queue_write_same_max_entry.attr,
&queue_nonrot_entry.attr,
&queue_nomerges_entry.attr,
&queue_rq_affinity_entry.attr,