summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/Kconfig12
-rw-r--r--block/Makefile4
-rw-r--r--block/as-iosched.c18
-rw-r--r--block/blk-core.c33
-rw-r--r--block/blk-exec.c6
-rw-r--r--block/blk-integrity.c381
-rw-r--r--block/blk-map.c6
-rw-r--r--block/blk-merge.c3
-rw-r--r--block/blk-settings.c24
-rw-r--r--block/blk.h8
-rw-r--r--block/blktrace.c45
-rw-r--r--block/bsg.c57
-rw-r--r--block/cfq-iosched.c83
-rw-r--r--block/cmd-filter.c334
-rw-r--r--block/elevator.c8
-rw-r--r--block/genhd.c12
-rw-r--r--block/scsi_ioctl.c121
17 files changed, 981 insertions, 174 deletions
diff --git a/block/Kconfig b/block/Kconfig
index 3e97f2bc446f..1ab7c15c8d7a 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -81,6 +81,18 @@ config BLK_DEV_BSG
If unsure, say N.
+config BLK_DEV_INTEGRITY
+ bool "Block layer data integrity support"
+ ---help---
+ Some storage devices allow extra information to be
+ stored/retrieved to help protect the data. The block layer
+ data integrity option provides hooks which can be used by
+ filesystems to ensure better data integrity.
+
+ Say yes here if you have a storage device that provides the
+ T10/SCSI Data Integrity Field or the T13/ATA External Path
+ Protection. If in doubt, say N.
+
endif # BLOCK
config BLOCK_COMPAT
diff --git a/block/Makefile b/block/Makefile
index 5a43c7d79594..208000b0750d 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -4,7 +4,8 @@
obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
- blk-exec.o blk-merge.o ioctl.o genhd.o scsi_ioctl.o
+ blk-exec.o blk-merge.o ioctl.o genhd.o scsi_ioctl.o \
+ cmd-filter.o
obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
@@ -14,3 +15,4 @@ obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o
obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o
+obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o
diff --git a/block/as-iosched.c b/block/as-iosched.c
index 743f33a01a07..9735acb5b4f5 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -151,6 +151,7 @@ enum arq_state {
static DEFINE_PER_CPU(unsigned long, ioc_count);
static struct completion *ioc_gone;
+static DEFINE_SPINLOCK(ioc_gone_lock);
static void as_move_to_dispatch(struct as_data *ad, struct request *rq);
static void as_antic_stop(struct as_data *ad);
@@ -164,8 +165,19 @@ static void free_as_io_context(struct as_io_context *aic)
{
kfree(aic);
elv_ioc_count_dec(ioc_count);
- if (ioc_gone && !elv_ioc_count_read(ioc_count))
- complete(ioc_gone);
+ if (ioc_gone) {
+ /*
+ * AS scheduler is exiting, grab exit lock and check
+ * the pending io context count. If it hits zero,
+ * complete ioc_gone and set it back to NULL.
+ */
+ spin_lock(&ioc_gone_lock);
+ if (ioc_gone && !elv_ioc_count_read(ioc_count)) {
+ complete(ioc_gone);
+ ioc_gone = NULL;
+ }
+ spin_unlock(&ioc_gone_lock);
+ }
}
static void as_trim(struct io_context *ioc)
@@ -1493,7 +1505,7 @@ static void __exit as_exit(void)
/* ioc_gone's update must be visible before reading ioc_count */
smp_wmb();
if (elv_ioc_count_read(ioc_count))
- wait_for_completion(ioc_gone);
+ wait_for_completion(&all_gone);
synchronize_rcu();
}
diff --git a/block/blk-core.c b/block/blk-core.c
index 1905aaba49fb..fef79ccb2a11 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -143,6 +143,10 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
bio->bi_size -= nbytes;
bio->bi_sector += (nbytes >> 9);
+
+ if (bio_integrity(bio))
+ bio_integrity_advance(bio, nbytes);
+
if (bio->bi_size == 0)
bio_endio(bio, error);
} else {
@@ -201,8 +205,7 @@ void blk_plug_device(struct request_queue *q)
if (blk_queue_stopped(q))
return;
- if (!test_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) {
- __set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags);
+ if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) {
mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG);
}
@@ -217,10 +220,9 @@ int blk_remove_plug(struct request_queue *q)
{
WARN_ON(!irqs_disabled());
- if (!test_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
+ if (!queue_flag_test_and_clear(QUEUE_FLAG_PLUGGED, q))
return 0;
- queue_flag_clear(QUEUE_FLAG_PLUGGED, q);
del_timer(&q->unplug_timer);
return 1;
}
@@ -324,8 +326,7 @@ void blk_start_queue(struct request_queue *q)
* one level of recursion is ok and is much faster than kicking
* the unplug handling
*/
- if (!test_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
- queue_flag_set(QUEUE_FLAG_REENTER, q);
+ if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
q->request_fn(q);
queue_flag_clear(QUEUE_FLAG_REENTER, q);
} else {
@@ -390,8 +391,7 @@ void __blk_run_queue(struct request_queue *q)
* handling reinvoke the handler shortly if we already got there.
*/
if (!elv_queue_empty(q)) {
- if (!test_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
- queue_flag_set(QUEUE_FLAG_REENTER, q);
+ if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
q->request_fn(q);
queue_flag_clear(QUEUE_FLAG_REENTER, q);
} else {
@@ -1042,15 +1042,9 @@ void blk_put_request(struct request *req)
unsigned long flags;
struct request_queue *q = req->q;
- /*
- * Gee, IDE calls in w/ NULL q. Fix IDE and remove the
- * following if (q) test.
- */
- if (q) {
- spin_lock_irqsave(q->queue_lock, flags);
- __blk_put_request(q, req);
- spin_unlock_irqrestore(q->queue_lock, flags);
- }
+ spin_lock_irqsave(q->queue_lock, flags);
+ __blk_put_request(q, req);
+ spin_unlock_irqrestore(q->queue_lock, flags);
}
EXPORT_SYMBOL(blk_put_request);
@@ -1381,6 +1375,9 @@ end_io:
*/
blk_partition_remap(bio);
+ if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))
+ goto end_io;
+
if (old_sector != -1)
blk_add_trace_remap(q, bio, old_dev, bio->bi_sector,
old_sector);
@@ -2045,7 +2042,7 @@ int __init blk_dev_init(void)
for_each_possible_cpu(i)
INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
- open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL);
+ open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
register_hotcpu_notifier(&blk_cpu_notifier);
return 0;
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 391dd6224890..9bceff7674f2 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -18,7 +18,7 @@
* @rq: request to complete
* @error: end io status of the request
*/
-void blk_end_sync_rq(struct request *rq, int error)
+static void blk_end_sync_rq(struct request *rq, int error)
{
struct completion *waiting = rq->end_io_data;
@@ -31,7 +31,6 @@ void blk_end_sync_rq(struct request *rq, int error)
*/
complete(waiting);
}
-EXPORT_SYMBOL(blk_end_sync_rq);
/**
* blk_execute_rq_nowait - insert a request into queue for execution
@@ -58,6 +57,9 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
spin_lock_irq(q->queue_lock);
__elv_add_request(q, rq, where, 1);
__generic_unplug_device(q);
+ /* the queue is stopped so it won't be plugged+unplugged */
+ if (blk_pm_resume_request(rq))
+ q->request_fn(q);
spin_unlock_irq(q->queue_lock);
}
EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
new file mode 100644
index 000000000000..3f1a8478cc38
--- /dev/null
+++ b/block/blk-integrity.c
@@ -0,0 +1,381 @@
+/*
+ * blk-integrity.c - Block layer data integrity extensions
+ *
+ * Copyright (C) 2007, 2008 Oracle Corporation
+ * Written by: Martin K. Petersen <martin.petersen@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
+ * USA.
+ *
+ */
+
+#include <linux/blkdev.h>
+#include <linux/mempool.h>
+#include <linux/bio.h>
+#include <linux/scatterlist.h>
+
+#include "blk.h"
+
+static struct kmem_cache *integrity_cachep;
+
+/**
+ * blk_rq_count_integrity_sg - Count number of integrity scatterlist elements
+ * @rq: request with integrity metadata attached
+ *
+ * Description: Returns the number of elements required in a
+ * scatterlist corresponding to the integrity metadata in a request.
+ */
+int blk_rq_count_integrity_sg(struct request *rq)
+{
+ struct bio_vec *iv, *ivprv;
+ struct req_iterator iter;
+ unsigned int segments;
+
+ ivprv = NULL;
+ segments = 0;
+
+ rq_for_each_integrity_segment(iv, rq, iter) {
+
+ if (!ivprv || !BIOVEC_PHYS_MERGEABLE(ivprv, iv))
+ segments++;
+
+ ivprv = iv;
+ }
+
+ return segments;
+}
+EXPORT_SYMBOL(blk_rq_count_integrity_sg);
+
+/**
+ * blk_rq_map_integrity_sg - Map integrity metadata into a scatterlist
+ * @rq: request with integrity metadata attached
+ * @sglist: target scatterlist
+ *
+ * Description: Map the integrity vectors in request into a
+ * scatterlist. The scatterlist must be big enough to hold all
+ * elements. I.e. sized using blk_rq_count_integrity_sg().
+ */
+int blk_rq_map_integrity_sg(struct request *rq, struct scatterlist *sglist)
+{
+ struct bio_vec *iv, *ivprv;
+ struct req_iterator iter;
+ struct scatterlist *sg;
+ unsigned int segments;
+
+ ivprv = NULL;
+ sg = NULL;
+ segments = 0;
+
+ rq_for_each_integrity_segment(iv, rq, iter) {
+
+ if (ivprv) {
+ if (!BIOVEC_PHYS_MERGEABLE(ivprv, iv))
+ goto new_segment;
+
+ sg->length += iv->bv_len;
+ } else {
+new_segment:
+ if (!sg)
+ sg = sglist;
+ else {
+ sg->page_link &= ~0x02;
+ sg = sg_next(sg);
+ }
+
+ sg_set_page(sg, iv->bv_page, iv->bv_len, iv->bv_offset);
+ segments++;
+ }
+
+ ivprv = iv;
+ }
+
+ if (sg)
+ sg_mark_end(sg);
+
+ return segments;
+}
+EXPORT_SYMBOL(blk_rq_map_integrity_sg);
+
+/**
+ * blk_integrity_compare - Compare integrity profile of two block devices
+ * @b1: Device to compare
+ * @b2: Device to compare
+ *
+ * Description: Meta-devices like DM and MD need to verify that all
+ * sub-devices use the same integrity format before advertising to
+ * upper layers that they can send/receive integrity metadata. This
+ * function can be used to check whether two block devices have
+ * compatible integrity formats.
+ */
+int blk_integrity_compare(struct block_device *bd1, struct block_device *bd2)
+{
+ struct blk_integrity *b1 = bd1->bd_disk->integrity;
+ struct blk_integrity *b2 = bd2->bd_disk->integrity;
+
+ BUG_ON(bd1->bd_disk == NULL);
+ BUG_ON(bd2->bd_disk == NULL);
+
+ if (!b1 || !b2)
+ return 0;
+
+ if (b1->sector_size != b2->sector_size) {
+ printk(KERN_ERR "%s: %s/%s sector sz %u != %u\n", __func__,
+ bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
+ b1->sector_size, b2->sector_size);
+ return -1;
+ }
+
+ if (b1->tuple_size != b2->tuple_size) {
+ printk(KERN_ERR "%s: %s/%s tuple sz %u != %u\n", __func__,
+ bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
+ b1->tuple_size, b2->tuple_size);
+ return -1;
+ }
+
+ if (b1->tag_size && b2->tag_size && (b1->tag_size != b2->tag_size)) {
+ printk(KERN_ERR "%s: %s/%s tag sz %u != %u\n", __func__,
+ bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
+ b1->tag_size, b2->tag_size);
+ return -1;
+ }
+
+ if (strcmp(b1->name, b2->name)) {
+ printk(KERN_ERR "%s: %s/%s type %s != %s\n", __func__,
+ bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
+ b1->name, b2->name);
+ return -1;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(blk_integrity_compare);
+
+struct integrity_sysfs_entry {
+ struct attribute attr;
+ ssize_t (*show)(struct blk_integrity *, char *);
+ ssize_t (*store)(struct blk_integrity *, const char *, size_t);
+};
+
+static ssize_t integrity_attr_show(struct kobject *kobj, struct attribute *attr,
+ char *page)
+{
+ struct blk_integrity *bi =
+ container_of(kobj, struct blk_integrity, kobj);
+ struct integrity_sysfs_entry *entry =
+ container_of(attr, struct integrity_sysfs_entry, attr);
+
+ return entry->show(bi, page);
+}
+
+static ssize_t integrity_attr_store(struct kobject *kobj,
+ struct attribute *attr, const char *page,
+ size_t count)
+{
+ struct blk_integrity *bi =
+ container_of(kobj, struct blk_integrity, kobj);
+ struct integrity_sysfs_entry *entry =
+ container_of(attr, struct integrity_sysfs_entry, attr);
+ ssize_t ret = 0;
+
+ if (entry->store)
+ ret = entry->store(bi, page, count);
+
+ return ret;
+}
+
+static ssize_t integrity_format_show(struct blk_integrity *bi, char *page)
+{
+ if (bi != NULL && bi->name != NULL)
+ return sprintf(page, "%s\n", bi->name);
+ else
+ return sprintf(page, "none\n");
+}
+
+static ssize_t integrity_tag_size_show(struct blk_integrity *bi, char *page)
+{
+ if (bi != NULL)
+ return sprintf(page, "%u\n", bi->tag_size);
+ else
+ return sprintf(page, "0\n");
+}
+
+static ssize_t integrity_read_store(struct blk_integrity *bi,
+ const char *page, size_t count)
+{
+ char *p = (char *) page;
+ unsigned long val = simple_strtoul(p, &p, 10);
+
+ if (val)
+ bi->flags |= INTEGRITY_FLAG_READ;
+ else
+ bi->flags &= ~INTEGRITY_FLAG_READ;
+
+ return count;
+}
+
+static ssize_t integrity_read_show(struct blk_integrity *bi, char *page)
+{
+ return sprintf(page, "%d\n", (bi->flags & INTEGRITY_FLAG_READ) != 0);
+}
+
+static ssize_t integrity_write_store(struct blk_integrity *bi,
+ const char *page, size_t count)
+{
+ char *p = (char *) page;
+ unsigned long val = simple_strtoul(p, &p, 10);
+
+ if (val)
+ bi->flags |= INTEGRITY_FLAG_WRITE;
+ else
+ bi->flags &= ~INTEGRITY_FLAG_WRITE;
+
+ return count;
+}
+
+static ssize_t integrity_write_show(struct blk_integrity *bi, char *page)
+{
+ return sprintf(page, "%d\n", (bi->flags & INTEGRITY_FLAG_WRITE) != 0);
+}
+
+static struct integrity_sysfs_entry integrity_format_entry = {
+ .attr = { .name = "format", .mode = S_IRUGO },
+ .show = integrity_format_show,
+};
+
+static struct integrity_sysfs_entry integrity_tag_size_entry = {
+ .attr = { .name = "tag_size", .mode = S_IRUGO },
+ .show = integrity_tag_size_show,
+};
+
+static struct integrity_sysfs_entry integrity_read_entry = {
+ .attr = { .name = "read_verify", .mode = S_IRUGO | S_IWUSR },
+ .show = integrity_read_show,
+ .store = integrity_read_store,
+};
+
+static struct integrity_sysfs_entry integrity_write_entry = {
+ .attr = { .name = "write_generate", .mode = S_IRUGO | S_IWUSR },
+ .show = integrity_write_show,
+ .store = integrity_write_store,
+};
+
+static struct attribute *integrity_attrs[] = {
+ &integrity_format_entry.attr,
+ &integrity_tag_size_entry.attr,
+ &integrity_read_entry.attr,
+ &integrity_write_entry.attr,
+ NULL,
+};
+
+static struct sysfs_ops integrity_ops = {
+ .show = &integrity_attr_show,
+ .store = &integrity_attr_store,
+};
+
+static int __init blk_dev_integrity_init(void)
+{
+ integrity_cachep = kmem_cache_create("blkdev_integrity",
+ sizeof(struct blk_integrity),
+ 0, SLAB_PANIC, NULL);
+ return 0;
+}
+subsys_initcall(blk_dev_integrity_init);
+
+static void blk_integrity_release(struct kobject *kobj)
+{
+ struct blk_integrity *bi =
+ container_of(kobj, struct blk_integrity, kobj);
+
+ kmem_cache_free(integrity_cachep, bi);
+}
+
+static struct kobj_type integrity_ktype = {
+ .default_attrs = integrity_attrs,
+ .sysfs_ops = &integrity_ops,
+ .release = blk_integrity_release,
+};
+
+/**
+ * blk_integrity_register - Register a gendisk as being integrity-capable
+ * @disk: struct gendisk pointer to make integrity-aware
+ * @template: integrity profile
+ *
+ * Description: When a device needs to advertise itself as being able
+ * to send/receive integrity metadata it must use this function to
+ * register the capability with the block layer. The template is a
+ * blk_integrity struct with values appropriate for the underlying
+ * hardware. See Documentation/block/data-integrity.txt.
+ */
+int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
+{
+ struct blk_integrity *bi;
+
+ BUG_ON(disk == NULL);
+ BUG_ON(template == NULL);
+
+ if (disk->integrity == NULL) {
+ bi = kmem_cache_alloc(integrity_cachep,
+ GFP_KERNEL | __GFP_ZERO);
+ if (!bi)
+ return -1;
+
+ if (kobject_init_and_add(&bi->kobj, &integrity_ktype,
+ &disk->dev.kobj, "%s", "integrity")) {
+ kmem_cache_free(integrity_cachep, bi);
+ return -1;
+ }
+
+ kobject_uevent(&bi->kobj, KOBJ_ADD);
+
+ bi->flags |= INTEGRITY_FLAG_READ | INTEGRITY_FLAG_WRITE;
+ bi->sector_size = disk->queue->hardsect_size;
+ disk->integrity = bi;
+ } else
+ bi = disk->integrity;
+
+ /* Use the provided profile as template */
+ bi->name = template->name;
+ bi->generate_fn = template->generate_fn;
+ bi->verify_fn = template->verify_fn;
+ bi->tuple_size = template->tuple_size;
+ bi->set_tag_fn = template->set_tag_fn;
+ bi->get_tag_fn = template->get_tag_fn;
+ bi->tag_size = template->tag_size;
+
+ return 0;
+}
+EXPORT_SYMBOL(blk_integrity_register);
+
+/**
+ * blk_integrity_unregister - Remove block integrity profile
+ * @disk: disk whose integrity profile to deallocate
+ *
+ * Description: This function frees all memory used by the block
+ * integrity profile. To be called at device teardown.
+ */
+void blk_integrity_unregister(struct gendisk *disk)
+{
+ struct blk_integrity *bi;
+
+ if (!disk || !disk->integrity)
+ return;
+
+ bi = disk->integrity;
+
+ kobject_uevent(&bi->kobj, KOBJ_REMOVE);
+ kobject_del(&bi->kobj);
+ kobject_put(&disk->dev.kobj);
+ kmem_cache_free(integrity_cachep, bi);
+}
+EXPORT_SYMBOL(blk_integrity_unregister);
diff --git a/block/blk-map.c b/block/blk-map.c
index 0b1af5a3537c..ddd96fb11a7d 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -210,6 +210,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
if (!bio_flagged(bio, BIO_USER_MAPPED))
rq->cmd_flags |= REQ_COPY_USER;
+ blk_queue_bounce(q, &bio);
bio_get(bio);
blk_rq_bio_prep(q, rq, bio);
rq->buffer = rq->data = NULL;
@@ -268,6 +269,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
int reading = rq_data_dir(rq) == READ;
int do_copy = 0;
struct bio *bio;
+ unsigned long stack_mask = ~(THREAD_SIZE - 1);
if (len > (q->max_hw_sectors << 9))
return -EINVAL;
@@ -278,6 +280,10 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
alignment = queue_dma_alignment(q) | q->dma_pad_mask;
do_copy = ((kaddr & alignment) || (len & alignment));
+ if (!((kaddr & stack_mask) ^
+ ((unsigned long)current->stack & stack_mask)))
+ do_copy = 1;
+
if (do_copy)
bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading);
else
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 651136aae76e..5efc9e7a68b7 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -441,6 +441,9 @@ static int attempt_merge(struct request_queue *q, struct request *req,
|| next->special)
return 0;
+ if (blk_integrity_rq(req) != blk_integrity_rq(next))
+ return 0;
+
/*
* If we are allowed to merge, then append bio list
* from next to rq and release next. merge_requests_fn
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 8dd86418f35d..dfc77012843f 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -302,11 +302,10 @@ EXPORT_SYMBOL(blk_queue_stack_limits);
* @q: the request queue for the device
* @mask: pad mask
*
- * Set pad mask. Direct IO requests are padded to the mask specified.
+ * Set dma pad mask.
*
- * Appending pad buffer to a request modifies ->data_len such that it
- * includes the pad buffer. The original requested data length can be
- * obtained using blk_rq_raw_data_len().
+ * Appending pad buffer to a request modifies the last entry of a
+ * scatter list such that it includes the pad buffer.
**/
void blk_queue_dma_pad(struct request_queue *q, unsigned int mask)
{
@@ -315,6 +314,23 @@ void blk_queue_dma_pad(struct request_queue *q, unsigned int mask)
EXPORT_SYMBOL(blk_queue_dma_pad);
/**
+ * blk_queue_update_dma_pad - update pad mask
+ * @q: the request queue for the device
+ * @mask: pad mask
+ *
+ * Update dma pad mask.
+ *
+ * Appending pad buffer to a request modifies the last entry of a
+ * scatter list such that it includes the pad buffer.
+ **/
+void blk_queue_update_dma_pad(struct request_queue *q, unsigned int mask)
+{
+ if (mask > q->dma_pad_mask)
+ q->dma_pad_mask = mask;
+}
+EXPORT_SYMBOL(blk_queue_update_dma_pad);
+
+/**
* blk_queue_dma_drain - Set up a drain buffer for excess dma.
* @q: the request queue for the device
* @dma_drain_needed: fn which returns non-zero if drain is necessary
diff --git a/block/blk.h b/block/blk.h
index 59776ab4742a..c79f30e1df52 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -51,4 +51,12 @@ static inline int queue_congestion_off_threshold(struct request_queue *q)
return q->nr_congestion_off;
}
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
+
+#define rq_for_each_integrity_segment(bvl, _rq, _iter) \
+ __rq_for_each_bio(_iter.bio, _rq) \
+ bip_for_each_vec(bvl, _iter.bio->bi_integrity, _iter.i)
+
+#endif /* BLK_DEV_INTEGRITY */
+
#endif
diff --git a/block/blktrace.c b/block/blktrace.c
index 8d3a27780260..eb9651ccb241 100644
--- a/block/blktrace.c
+++ b/block/blktrace.c
@@ -244,6 +244,7 @@ err:
static void blk_trace_cleanup(struct blk_trace *bt)
{
relay_close(bt->rchan);
+ debugfs_remove(bt->msg_file);
debugfs_remove(bt->dropped_file);
blk_remove_tree(bt->dir);
free_percpu(bt->sequence);
@@ -291,6 +292,44 @@ static const struct file_operations blk_dropped_fops = {
.read = blk_dropped_read,
};
+static int blk_msg_open(struct inode *inode, struct file *filp)
+{
+ filp->private_data = inode->i_private;
+
+ return 0;
+}
+
+static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ char *msg;
+ struct blk_trace *bt;
+
+ if (count > BLK_TN_MAX_MSG)
+ return -EINVAL;
+
+ msg = kmalloc(count, GFP_KERNEL);
+ if (msg == NULL)
+ return -ENOMEM;
+
+ if (copy_from_user(msg, buffer, count)) {
+ kfree(msg);
+ return -EFAULT;
+ }
+
+ bt = filp->private_data;
+ __trace_note_message(bt, "%s", msg);
+ kfree(msg);
+
+ return count;
+}
+
+static const struct file_operations blk_msg_fops = {
+ .owner = THIS_MODULE,
+ .open = blk_msg_open,
+ .write = blk_msg_write,
+};
+
/*
* Keep track of how many times we encountered a full subbuffer, to aid
* the user space app in telling how many lost events there were.
@@ -380,6 +419,10 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
if (!bt->dropped_file)
goto err;
+ bt->msg_file = debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops);
+ if (!bt->msg_file)
+ goto err;
+
bt->rchan = relay_open("trace", dir, buts->buf_size,
buts->buf_nr, &blk_relay_callbacks, bt);
if (!bt->rchan)
@@ -409,6 +452,8 @@ err:
if (dir)
blk_remove_tree(dir);
if (bt) {
+ if (bt->msg_file)
+ debugfs_remove(bt->msg_file);
if (bt->dropped_file)
debugfs_remove(bt->dropped_file);
free_percpu(bt->sequence);
diff --git a/block/bsg.c b/block/bsg.c
index f0b7cd343216..5fb9b0bdbe85 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -19,6 +19,7 @@
#include <linux/uio.h>
#include <linux/idr.h>
#include <linux/bsg.h>
+#include <linux/smp_lock.h>
#include <scsi/scsi.h>
#include <scsi/scsi_ioctl.h>
@@ -44,11 +45,12 @@ struct bsg_device {
char name[BUS_ID_SIZE];
int max_queue;
unsigned long flags;
+ struct blk_scsi_cmd_filter *cmd_filter;
+ mode_t *f_mode;
};
enum {
BSG_F_BLOCK = 1,
- BSG_F_WRITE_PERM = 2,
};
#define BSG_DEFAULT_CMDS 64
@@ -172,7 +174,7 @@ unlock:
}
static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
- struct sg_io_v4 *hdr, int has_write_perm)
+ struct sg_io_v4 *hdr, struct bsg_device *bd)
{
if (hdr->request_len > BLK_MAX_CDB) {
rq->cmd = kzalloc(hdr->request_len, GFP_KERNEL);
@@ -185,7 +187,8 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
return -EFAULT;
if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) {
- if (blk_verify_command(rq->cmd, has_write_perm))
+ if (blk_cmd_filter_verify_command(bd->cmd_filter, rq->cmd,
+ bd->f_mode))
return -EPERM;
} else if (!capable(CAP_SYS_RAWIO))
return -EPERM;
@@ -263,8 +266,7 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr)
rq = blk_get_request(q, rw, GFP_KERNEL);
if (!rq)
return ERR_PTR(-ENOMEM);
- ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, test_bit(BSG_F_WRITE_PERM,
- &bd->flags));
+ ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd);
if (ret)
goto out;
@@ -566,12 +568,23 @@ static inline void bsg_set_block(struct bsg_device *bd, struct file *file)
set_bit(BSG_F_BLOCK, &bd->flags);
}
-static inline void bsg_set_write_perm(struct bsg_device *bd, struct file *file)
+static void bsg_set_cmd_filter(struct bsg_device *bd,
+ struct file *file)
{
- if (file->f_mode & FMODE_WRITE)
- set_bit(BSG_F_WRITE_PERM, &bd->flags);
- else
- clear_bit(BSG_F_WRITE_PERM, &bd->flags);
+ struct inode *inode;
+ struct gendisk *disk;
+
+ if (!file)
+ return;
+
+ inode = file->f_dentry->d_inode;
+ if (!inode)
+ return;
+
+ disk = inode->i_bdev->bd_disk;
+
+ bd->cmd_filter = &disk->cmd_filter;
+ bd->f_mode = &file->f_mode;
}
/*
@@ -595,6 +608,8 @@ bsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
dprintk("%s: read %Zd bytes\n", bd->name, count);
bsg_set_block(bd, file);
+ bsg_set_cmd_filter(bd, file);
+
bytes_read = 0;
ret = __bsg_read(buf, count, bd, NULL, &bytes_read);
*ppos = bytes_read;
@@ -668,7 +683,7 @@ bsg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
dprintk("%s: write %Zd bytes\n", bd->name, count);
bsg_set_block(bd, file);
- bsg_set_write_perm(bd, file);
+ bsg_set_cmd_filter(bd, file);
bytes_written = 0;
ret = __bsg_write(bd, buf, count, &bytes_written);
@@ -709,11 +724,12 @@ static void bsg_kref_release_function(struct kref *kref)
{
struct bsg_class_device *bcd =
container_of(kref, struct bsg_class_device, ref);
+ struct device *parent = bcd->parent;
if (bcd->release)
bcd->release(bcd->parent);
- put_device(bcd->parent);
+ put_device(parent);
}
static int bsg_put_device(struct bsg_device *bd)
@@ -724,8 +740,13 @@ static int bsg_put_device(struct bsg_device *bd)
mutex_lock(&bsg_mutex);
do_free = atomic_dec_and_test(&bd->ref_count);
- if (!do_free)
+ if (!do_free) {
+ mutex_unlock(&bsg_mutex);
goto out;
+ }
+
+ hlist_del(&bd->dev_list);
+ mutex_unlock(&bsg_mutex);
dprintk("%s: tearing down\n", bd->name);
@@ -741,10 +762,8 @@ static int bsg_put_device(struct bsg_device *bd)
*/
ret = bsg_complete_all_commands(bd);
- hlist_del(&bd->dev_list);
kfree(bd);
out:
- mutex_unlock(&bsg_mutex);
kref_put(&q->bsg_dev.ref, bsg_kref_release_function);
if (do_free)
blk_put_queue(q);
@@ -771,7 +790,9 @@ static struct bsg_device *bsg_add_device(struct inode *inode,
}
bd->queue = rq;
+
bsg_set_block(bd, file);
+ bsg_set_cmd_filter(bd, file);
atomic_set(&bd->ref_count, 1);
mutex_lock(&bsg_mutex);
@@ -834,7 +855,11 @@ static struct bsg_device *bsg_get_device(struct inode *inode, struct file *file)
static int bsg_open(struct inode *inode, struct file *file)
{
- struct bsg_device *bd = bsg_get_device(inode, file);
+ struct bsg_device *bd;
+
+ lock_kernel();
+ bd = bsg_get_device(inode, file);
+ unlock_kernel();
if (IS_ERR(bd))
return PTR_ERR(bd);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index d01b411c72f0..1e2aff812ee2 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -11,6 +11,7 @@
#include <linux/elevator.h>
#include <linux/rbtree.h>
#include <linux/ioprio.h>
+#include <linux/blktrace_api.h>
/*
* tunables
@@ -41,13 +42,14 @@ static int cfq_slice_idle = HZ / 125;
#define RQ_CIC(rq) \
((struct cfq_io_context *) (rq)->elevator_private)
-#define RQ_CFQQ(rq) ((rq)->elevator_private2)
+#define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elevator_private2)
static struct kmem_cache *cfq_pool;
static struct kmem_cache *cfq_ioc_pool;
static DEFINE_PER_CPU(unsigned long, ioc_count);
static struct completion *ioc_gone;
+static DEFINE_SPINLOCK(ioc_gone_lock);
#define CFQ_PRIO_LISTS IOPRIO_BE_NR
#define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
@@ -155,6 +157,7 @@ struct cfq_queue {
unsigned short ioprio, org_ioprio;
unsigned short ioprio_class, org_ioprio_class;
+ pid_t pid;
};
enum cfqq_state_flags {
@@ -198,6 +201,11 @@ CFQ_CFQQ_FNS(slice_new);
CFQ_CFQQ_FNS(sync);
#undef CFQ_CFQQ_FNS
+#define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \
+ blk_add_trace_msg((cfqd)->queue, "cfq%d " fmt, (cfqq)->pid, ##args)
+#define cfq_log(cfqd, fmt, args...) \
+ blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args)
+
static void cfq_dispatch_insert(struct request_queue *, struct request *);
static struct cfq_queue *cfq_get_queue(struct cfq_data *, int,
struct io_context *, gfp_t);
@@ -234,8 +242,10 @@ static inline int cfq_bio_sync(struct bio *bio)
*/
static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
{
- if (cfqd->busy_queues)
+ if (cfqd->busy_queues) {
+ cfq_log(cfqd, "schedule dispatch");
kblockd_schedule_work(&cfqd->unplug_work);
+ }
}
static int cfq_queue_empty(struct request_queue *q)
@@ -270,6 +280,7 @@ static inline void
cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies;
+ cfq_log_cfqq(cfqd, cfqq, "set_slice=%lu", cfqq->slice_end - jiffies);
}
/*
@@ -539,6 +550,7 @@ static void cfq_resort_rr_list(struct cfq_data *cfqd, struct cfq_queue *cfqq)
*/
static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
+ cfq_log_cfqq(cfqd, cfqq, "add_to_rr");
BUG_ON(cfq_cfqq_on_rr(cfqq));
cfq_mark_cfqq_on_rr(cfqq);
cfqd->busy_queues++;
@@ -552,6 +564,7 @@ static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
*/
static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
+ cfq_log_cfqq(cfqd, cfqq, "del_from_rr");
BUG_ON(!cfq_cfqq_on_rr(cfqq));
cfq_clear_cfqq_on_rr(cfqq);
@@ -638,6 +651,8 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq)
struct cfq_data *cfqd = q->elevator->elevator_data;
cfqd->rq_in_driver++;
+ cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d",
+ cfqd->rq_in_driver);
/*
* If the depth is larger 1, it really could be queueing. But lets
@@ -657,6 +672,8 @@ static void cfq_deactivate_request(struct request_queue *q, struct request *rq)
WARN_ON(!cfqd->rq_in_driver);
cfqd->rq_in_driver--;
+ cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "deactivate rq, drv=%d",
+ cfqd->rq_in_driver);
}
static void cfq_remove_request(struct request *rq)
@@ -746,6 +763,7 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
struct cfq_queue *cfqq)
{
if (cfqq) {
+ cfq_log_cfqq(cfqd, cfqq, "set_active");
cfqq->slice_end = 0;
cfq_clear_cfqq_must_alloc_slice(cfqq);
cfq_clear_cfqq_fifo_expire(cfqq);
@@ -763,6 +781,8 @@ static void
__cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
int timed_out)
{
+ cfq_log_cfqq(cfqd, cfqq, "slice expired t=%d", timed_out);
+
if (cfq_cfqq_wait_request(cfqq))
del_timer(&cfqd->idle_slice_timer);
@@ -772,8 +792,10 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
/*
* store what was left of this slice, if the queue idled/timed out
*/
- if (timed_out && !cfq_cfqq_slice_new(cfqq))
+ if (timed_out && !cfq_cfqq_slice_new(cfqq)) {
cfqq->slice_resid = cfqq->slice_end - jiffies;
+ cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid);
+ }
cfq_resort_rr_list(cfqd, cfqq);
@@ -866,6 +888,12 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
return;
/*
+ * still requests with the driver, don't idle
+ */
+ if (cfqd->rq_in_driver)
+ return;
+
+ /*
* task has exited, don't wait
*/
cic = cfqd->active_cic;
@@ -892,6 +920,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT));
mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
+ cfq_log(cfqd, "arm_idle: %lu", sl);
}
/*
@@ -902,6 +931,8 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
struct cfq_data *cfqd = q->elevator->elevator_data;
struct cfq_queue *cfqq = RQ_CFQQ(rq);
+ cfq_log_cfqq(cfqd, cfqq, "dispatch_insert");
+
cfq_remove_request(rq);
cfqq->dispatched++;
elv_dispatch_sort(q, rq);
@@ -931,8 +962,9 @@ static struct request *cfq_check_fifo(struct cfq_queue *cfqq)
rq = rq_entry_fifo(cfqq->fifo.next);
if (time_before(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo]))
- return NULL;
+ rq = NULL;
+ cfq_log_cfqq(cfqd, cfqq, "fifo=%p", rq);
return rq;
}
@@ -1072,6 +1104,7 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd)
BUG_ON(cfqd->busy_queues);
+ cfq_log(cfqd, "forced_dispatch=%d\n", dispatched);
return dispatched;
}
@@ -1112,6 +1145,7 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
dispatched += __cfq_dispatch_requests(cfqd, cfqq, max_dispatch);
}
+ cfq_log(cfqd, "dispatched=%d", dispatched);
return dispatched;
}
@@ -1130,6 +1164,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
if (!atomic_dec_and_test(&cfqq->ref))
return;
+ cfq_log_cfqq(cfqd, cfqq, "put_queue");
BUG_ON(rb_first(&cfqq->sort_list));
BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]);
BUG_ON(cfq_cfqq_on_rr(cfqq));
@@ -1177,8 +1212,19 @@ static void cfq_cic_free_rcu(struct rcu_head *head)
kmem_cache_free(cfq_ioc_pool, cic);
elv_ioc_count_dec(ioc_count);
- if (ioc_gone && !elv_ioc_count_read(ioc_count))
- complete(ioc_gone);
+ if (ioc_gone) {
+ /*
+ * CFQ scheduler is exiting, grab exit lock and check
+ * the pending io context count. If it hits zero,
+ * complete ioc_gone and set it back to NULL
+ */
+ spin_lock(&ioc_gone_lock);
+ if (ioc_gone && !elv_ioc_count_read(ioc_count)) {
+ complete(ioc_gone);
+ ioc_gone = NULL;
+ }
+ spin_unlock(&ioc_gone_lock);
+ }
}
static void cfq_cic_free(struct cfq_io_context *cic)
@@ -1427,6 +1473,8 @@ retry:
cfq_mark_cfqq_idle_window(cfqq);
cfq_mark_cfqq_sync(cfqq);
}
+ cfqq->pid = current->pid;
+ cfq_log_cfqq(cfqd, cfqq, "alloced");
}
if (new_cfqq)
@@ -1675,7 +1723,7 @@ static void
cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
struct cfq_io_context *cic)
{
- int enable_idle;
+ int old_idle, enable_idle;
/*
* Don't idle for async or idle io prio class
@@ -1683,7 +1731,7 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
if (!cfq_cfqq_sync(cfqq) || cfq_class_idle(cfqq))
return;
- enable_idle = cfq_cfqq_idle_window(cfqq);
+ enable_idle = old_idle = cfq_cfqq_idle_window(cfqq);
if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
(cfqd->hw_tag && CIC_SEEKY(cic)))
@@ -1695,10 +1743,13 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
enable_idle = 1;
}
- if (enable_idle)
- cfq_mark_cfqq_idle_window(cfqq);
- else
- cfq_clear_cfqq_idle_window(cfqq);
+ if (old_idle != enable_idle) {
+ cfq_log_cfqq(cfqd, cfqq, "idle=%d", enable_idle);
+ if (enable_idle)
+ cfq_mark_cfqq_idle_window(cfqq);
+ else
+ cfq_clear_cfqq_idle_window(cfqq);
+ }
}
/*
@@ -1757,6 +1808,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
*/
static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
+ cfq_log_cfqq(cfqd, cfqq, "preempt");
cfq_slice_expired(cfqd, 1);
/*
@@ -1818,6 +1870,7 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
struct cfq_data *cfqd = q->elevator->elevator_data;
struct cfq_queue *cfqq = RQ_CFQQ(rq);
+ cfq_log_cfqq(cfqd, cfqq, "insert_request");
cfq_init_prio_data(cfqq, RQ_CIC(rq)->ioc);
cfq_add_rq_rb(rq);
@@ -1835,6 +1888,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
unsigned long now;
now = jiffies;
+ cfq_log_cfqq(cfqd, cfqq, "complete");
WARN_ON(!cfqd->rq_in_driver);
WARN_ON(!cfqq->dispatched);
@@ -2004,6 +2058,7 @@ queue_fail:
cfq_schedule_dispatch(cfqd);
spin_unlock_irqrestore(q->queue_lock, flags);
+ cfq_log(cfqd, "set_request fail");
return 1;
}
@@ -2029,6 +2084,8 @@ static void cfq_idle_slice_timer(unsigned long data)
unsigned long flags;
int timed_out = 1;
+ cfq_log(cfqd, "idle timer fired");
+
spin_lock_irqsave(cfqd->queue->queue_lock, flags);
cfqq = cfqd->active_queue;
@@ -2317,7 +2374,7 @@ static void __exit cfq_exit(void)
* pending RCU callbacks
*/
if (elv_ioc_count_read(ioc_count))
- wait_for_completion(ioc_gone);
+ wait_for_completion(&all_gone);
cfq_slab_kill();
}
diff --git a/block/cmd-filter.c b/block/cmd-filter.c
new file mode 100644
index 000000000000..eec4404fd357
--- /dev/null
+++ b/block/cmd-filter.c
@@ -0,0 +1,334 @@
+/*
+ * Copyright 2004 Peter M. Jones <pjones@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public Licens
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
+ *
+ */
+
+#include <linux/list.h>
+#include <linux/genhd.h>
+#include <linux/spinlock.h>
+#include <linux/parser.h>
+#include <linux/capability.h>
+#include <linux/bitops.h>
+
+#include <scsi/scsi.h>
+#include <linux/cdrom.h>
+
+int blk_cmd_filter_verify_command(struct blk_scsi_cmd_filter *filter,
+ unsigned char *cmd, mode_t *f_mode)
+{
+ /* root can do any command. */
+ if (capable(CAP_SYS_RAWIO))
+ return 0;
+
+ /* if there's no filter set, assume we're filtering everything out */
+ if (!filter)
+ return -EPERM;
+
+ /* Anybody who can open the device can do a read-safe command */
+ if (test_bit(cmd[0], filter->read_ok))
+ return 0;
+
+ /* Write-safe commands require a writable open */
+ if (test_bit(cmd[0], filter->write_ok) && (*f_mode & FMODE_WRITE))
+ return 0;
+
+ return -EPERM;
+}
+EXPORT_SYMBOL(blk_cmd_filter_verify_command);
+
+int blk_verify_command(struct file *file, unsigned char *cmd)
+{
+ struct gendisk *disk;
+ struct inode *inode;
+
+ if (!file)
+ return -EINVAL;
+
+ inode = file->f_dentry->d_inode;
+ if (!inode)
+ return -EINVAL;
+
+ disk = inode->i_bdev->bd_disk;
+
+ return blk_cmd_filter_verify_command(&disk->cmd_filter,
+ cmd, &file->f_mode);
+}
+EXPORT_SYMBOL(blk_verify_command);
+
+/* and now, the sysfs stuff */
+static ssize_t rcf_cmds_show(struct blk_scsi_cmd_filter *filter, char *page,
+ int rw)
+{
+ char *npage = page;
+ unsigned long *okbits;
+ int i;
+
+ if (rw == READ)
+ okbits = filter->read_ok;
+ else
+ okbits = filter->write_ok;
+
+ for (i = 0; i < BLK_SCSI_MAX_CMDS; i++) {
+ if (test_bit(i, okbits)) {
+ sprintf(npage, "%02x", i);
+ npage += 2;
+ if (i < BLK_SCSI_MAX_CMDS - 1)
+ sprintf(npage++, " ");
+ }
+ }
+
+ if (npage != page)
+ npage += sprintf(npage, "\n");
+
+ return npage - page;
+}
+
+static ssize_t rcf_readcmds_show(struct blk_scsi_cmd_filter *filter, char *page)
+{
+ return rcf_cmds_show(filter, page, READ);
+}
+
+static ssize_t rcf_writecmds_show(struct blk_scsi_cmd_filter *filter,
+ char *page)
+{
+ return rcf_cmds_show(filter, page, WRITE);
+}
+
+static ssize_t rcf_cmds_store(struct blk_scsi_cmd_filter *filter,
+ const char *page, size_t count, int rw)
+{
+ ssize_t ret = 0;
+ unsigned long okbits[BLK_SCSI_CMD_PER_LONG], *target_okbits;
+ int cmd, status, len;
+ substring_t ss;
+
+ memset(&okbits, 0, sizeof(okbits));
+
+ for (len = strlen(page); len > 0; len -= 3) {
+ if (len < 2)
+ break;
+ ss.from = (char *) page + ret;
+ ss.to = (char *) page + ret + 2;
+ ret += 3;
+ status = match_hex(&ss, &cmd);
+ /* either of these cases means invalid input, so do nothing. */
+ if (status || cmd >= BLK_SCSI_MAX_CMDS)
+ return -EINVAL;
+
+ __set_bit(cmd, okbits);
+ }
+
+ if (rw == READ)
+ target_okbits = filter->read_ok;
+ else
+ target_okbits = filter->write_ok;
+
+ memmove(target_okbits, okbits, sizeof(okbits));
+ return count;
+}
+
+static ssize_t rcf_readcmds_store(struct blk_scsi_cmd_filter *filter,
+ const char *page, size_t count)
+{
+ return rcf_cmds_store(filter, page, count, READ);
+}
+
+static ssize_t rcf_writecmds_store(struct blk_scsi_cmd_filter *filter,
+ const char *page, size_t count)
+{
+ return rcf_cmds_store(filter, page, count, WRITE);
+}
+
+struct rcf_sysfs_entry {
+ struct attribute attr;
+ ssize_t (*show)(struct blk_scsi_cmd_filter *, char *);
+ ssize_t (*store)(struct blk_scsi_cmd_filter *, const char *, size_t);
+};
+
+static struct rcf_sysfs_entry rcf_readcmds_entry = {
+ .attr = { .name = "read_table", .mode = S_IRUGO | S_IWUSR },
+ .show = rcf_readcmds_show,
+ .store = rcf_readcmds_store,
+};
+
+static struct rcf_sysfs_entry rcf_writecmds_entry = {
+ .attr = {.name = "write_table", .mode = S_IRUGO | S_IWUSR },
+ .show = rcf_writecmds_show,
+ .store = rcf_writecmds_store,
+};
+
+static struct attribute *default_attrs[] = {
+ &rcf_readcmds_entry.attr,
+ &rcf_writecmds_entry.attr,
+ NULL,
+};
+
+#define to_rcf(atr) container_of((atr), struct rcf_sysfs_entry, attr)
+
+static ssize_t
+rcf_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
+{
+ struct rcf_sysfs_entry *entry = to_rcf(attr);
+ struct blk_scsi_cmd_filter *filter;
+
+ filter = container_of(kobj, struct blk_scsi_cmd_filter, kobj);
+ if (entry->show)
+ return entry->show(filter, page);
+
+ return 0;
+}
+
+static ssize_t
+rcf_attr_store(struct kobject *kobj, struct attribute *attr,
+ const char *page, size_t length)
+{
+ struct rcf_sysfs_entry *entry = to_rcf(attr);
+ struct blk_scsi_cmd_filter *filter;
+
+ if (!capable(CAP_SYS_RAWIO))
+ return -EPERM;
+
+ if (!entry->store)
+ return -EINVAL;
+
+ filter = container_of(kobj, struct blk_scsi_cmd_filter, kobj);
+ return entry->store(filter, page, length);
+}
+
+static struct sysfs_ops rcf_sysfs_ops = {
+ .show = rcf_attr_show,
+ .store = rcf_attr_store,
+};
+
+static struct kobj_type rcf_ktype = {
+ .sysfs_ops = &rcf_sysfs_ops,
+ .default_attrs = default_attrs,
+};
+
+#ifndef MAINTENANCE_IN_CMD
+#define MAINTENANCE_IN_CMD 0xa3
+#endif
+
+static void rcf_set_defaults(struct blk_scsi_cmd_filter *filter)
+{
+ /* Basic read-only commands */
+ __set_bit(TEST_UNIT_READY, filter->read_ok);
+ __set_bit(REQUEST_SENSE, filter->read_ok);
+ __set_bit(READ_6, filter->read_ok);
+ __set_bit(READ_10, filter->read_ok);
+ __set_bit(READ_12, filter->read_ok);
+ __set_bit(READ_16, filter->read_ok);
+ __set_bit(READ_BUFFER, filter->read_ok);
+ __set_bit(READ_DEFECT_DATA, filter->read_ok);
+ __set_bit(READ_CAPACITY, filter->read_ok);
+ __set_bit(READ_LONG, filter->read_ok);
+ __set_bit(INQUIRY, filter->read_ok);
+ __set_bit(MODE_SENSE, filter->read_ok);
+ __set_bit(MODE_SENSE_10, filter->read_ok);
+ __set_bit(LOG_SENSE, filter->read_ok);
+ __set_bit(START_STOP, filter->read_ok);
+ __set_bit(GPCMD_VERIFY_10, filter->read_ok);
+ __set_bit(VERIFY_16, filter->read_ok);
+ __set_bit(REPORT_LUNS, filter->read_ok);
+ __set_bit(SERVICE_ACTION_IN, filter->read_ok);
+ __set_bit(RECEIVE_DIAGNOSTIC, filter->read_ok);
+ __set_bit(MAINTENANCE_IN_CMD, filter->read_ok);
+ __set_bit(GPCMD_READ_BUFFER_CAPACITY, filter->read_ok);
+
+ /* Audio CD commands */
+ __set_bit(GPCMD_PLAY_CD, filter->read_ok);
+ __set_bit(GPCMD_PLAY_AUDIO_10, filter->read_ok);
+ __set_bit(GPCMD_PLAY_AUDIO_MSF, filter->read_ok);
+ __set_bit(GPCMD_PLAY_AUDIO_TI, filter->read_ok);
+ __set_bit(GPCMD_PAUSE_RESUME, filter->read_ok);
+
+ /* CD/DVD data reading */
+ __set_bit(GPCMD_READ_CD, filter->read_ok);
+ __set_bit(GPCMD_READ_CD_MSF, filter->read_ok);
+ __set_bit(GPCMD_READ_DISC_INFO, filter->read_ok);
+ __set_bit(GPCMD_READ_CDVD_CAPACITY, filter->read_ok);
+ __set_bit(GPCMD_READ_DVD_STRUCTURE, filter->read_ok);
+ __set_bit(GPCMD_READ_HEADER, filter->read_ok);
+ __set_bit(GPCMD_READ_TRACK_RZONE_INFO, filter->read_ok);
+ __set_bit(GPCMD_READ_SUBCHANNEL, filter->read_ok);
+ __set_bit(GPCMD_READ_TOC_PMA_ATIP, filter->read_ok);
+ __set_bit(GPCMD_REPORT_KEY, filter->read_ok);
+ __set_bit(GPCMD_SCAN, filter->read_ok);
+ __set_bit(GPCMD_GET_CONFIGURATION, filter->read_ok);
+ __set_bit(GPCMD_READ_FORMAT_CAPACITIES, filter->read_ok);
+ __set_bit(GPCMD_GET_EVENT_STATUS_NOTIFICATION, filter->read_ok);
+ __set_bit(GPCMD_GET_PERFORMANCE, filter->read_ok);
+ __set_bit(GPCMD_SEEK, filter->read_ok);
+ __set_bit(GPCMD_STOP_PLAY_SCAN, filter->read_ok);
+
+ /* Basic writing commands */
+ __set_bit(WRITE_6, filter->write_ok);
+ __set_bit(WRITE_10, filter->write_ok);
+ __set_bit(WRITE_VERIFY, filter->write_ok);
+ __set_bit(WRITE_12, filter->write_ok);
+ __set_bit(WRITE_VERIFY_12, filter->write_ok);
+ __set_bit(WRITE_16, filter->write_ok);
+ __set_bit(WRITE_LONG, filter->write_ok);
+ __set_bit(WRITE_LONG_2, filter->write_ok);
+ __set_bit(ERASE, filter->write_ok);
+ __set_bit(GPCMD_MODE_SELECT_10, filter->write_ok);
+ __set_bit(MODE_SELECT, filter->write_ok);
+ __set_bit(LOG_SELECT, filter->write_ok);
+ __set_bit(GPCMD_BLANK, filter->write_ok);
+ __set_bit(GPCMD_CLOSE_TRACK, filter->write_ok);
+ __set_bit(GPCMD_FLUSH_CACHE, filter->write_ok);
+ __set_bit(GPCMD_FORMAT_UNIT, filter->write_ok);
+ __set_bit(GPCMD_REPAIR_RZONE_TRACK, filter->write_ok);
+ __set_bit(GPCMD_RESERVE_RZONE_TRACK, filter->write_ok);
+ __set_bit(GPCMD_SEND_DVD_STRUCTURE, filter->write_ok);
+ __set_bit(GPCMD_SEND_EVENT, filter->write_ok);
+ __set_bit(GPCMD_SEND_KEY, filter->write_ok);
+ __set_bit(GPCMD_SEND_OPC, filter->write_ok);
+ __set_bit(GPCMD_SEND_CUE_SHEET, filter->write_ok);
+ __set_bit(GPCMD_SET_SPEED, filter->write_ok);
+ __set_bit(GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL, filter->write_ok);
+ __set_bit(GPCMD_LOAD_UNLOAD, filter->write_ok);
+ __set_bit(GPCMD_SET_STREAMING, filter->write_ok);
+}
+
+int blk_register_filter(struct gendisk *disk)
+{
+ int ret;
+ struct blk_scsi_cmd_filter *filter = &disk->cmd_filter;
+ struct kobject *parent = kobject_get(disk->holder_dir->parent);
+
+ if (!parent)
+ return -ENODEV;
+
+ ret = kobject_init_and_add(&filter->kobj, &rcf_ktype, parent,
+ "%s", "cmd_filter");
+
+ if (ret < 0)
+ return ret;
+
+ rcf_set_defaults(filter);
+ return 0;
+}
+
+void blk_unregister_filter(struct gendisk *disk)
+{
+ struct blk_scsi_cmd_filter *filter = &disk->cmd_filter;
+
+ kobject_put(&filter->kobj);
+ kobject_put(disk->holder_dir->parent);
+}
+
diff --git a/block/elevator.c b/block/elevator.c
index 902dd1344d56..ed6f8f32d27e 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -86,6 +86,12 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio)
if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special)
return 0;
+ /*
+ * only merge integrity protected bio into ditto rq
+ */
+ if (bio_integrity(bio) != blk_integrity_rq(rq))
+ return 0;
+
if (!elv_iosched_allow_merge(rq, bio))
return 0;
@@ -144,7 +150,7 @@ static struct elevator_type *elevator_get(const char *name)
else
sprintf(elv, "%s-iosched", name);
- request_module(elv);
+ request_module("%s", elv);
spin_lock(&elv_list_lock);
e = elevator_find(name);
}
diff --git a/block/genhd.c b/block/genhd.c
index b922d4801c87..9074f384b097 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -189,6 +189,7 @@ void add_disk(struct gendisk *disk)
disk->minors, NULL, exact_match, exact_lock, disk);
register_disk(disk);
blk_register_queue(disk);
+ blk_register_filter(disk);
bdi = &disk->queue->backing_dev_info;
bdi_register_dev(bdi, MKDEV(disk->major, disk->first_minor));
@@ -200,6 +201,7 @@ EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */
void unlink_gendisk(struct gendisk *disk)
{
+ blk_unregister_filter(disk);
sysfs_remove_link(&disk->dev.kobj, "bdi");
bdi_unregister(&disk->queue->backing_dev_info);
blk_unregister_queue(disk);
@@ -400,6 +402,14 @@ static ssize_t disk_removable_show(struct device *dev,
(disk->flags & GENHD_FL_REMOVABLE ? 1 : 0));
}
+static ssize_t disk_ro_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct gendisk *disk = dev_to_disk(dev);
+
+ return sprintf(buf, "%d\n", disk->policy ? 1 : 0);
+}
+
static ssize_t disk_size_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -472,6 +482,7 @@ static ssize_t disk_fail_store(struct device *dev,
static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
+static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL);
static DEVICE_ATTR(size, S_IRUGO, disk_size_show, NULL);
static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
static DEVICE_ATTR(stat, S_IRUGO, disk_stat_show, NULL);
@@ -483,6 +494,7 @@ static struct device_attribute dev_attr_fail =
static struct attribute *disk_attrs[] = {
&dev_attr_range.attr,
&dev_attr_removable.attr,
+ &dev_attr_ro.attr,
&dev_attr_size.attr,
&dev_attr_capability.attr,
&dev_attr_stat.attr,
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 78199c08ec92..c5b9bcfc0a6d 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -105,120 +105,12 @@ static int sg_emulated_host(struct request_queue *q, int __user *p)
return put_user(1, p);
}
-#define CMD_READ_SAFE 0x01
-#define CMD_WRITE_SAFE 0x02
-#define CMD_WARNED 0x04
-#define safe_for_read(cmd) [cmd] = CMD_READ_SAFE
-#define safe_for_write(cmd) [cmd] = CMD_WRITE_SAFE
-
-int blk_verify_command(unsigned char *cmd, int has_write_perm)
-{
- static unsigned char cmd_type[256] = {
-
- /* Basic read-only commands */
- safe_for_read(TEST_UNIT_READY),
- safe_for_read(REQUEST_SENSE),
- safe_for_read(READ_6),
- safe_for_read(READ_10),
- safe_for_read(READ_12),
- safe_for_read(READ_16),
- safe_for_read(READ_BUFFER),
- safe_for_read(READ_DEFECT_DATA),
- safe_for_read(READ_LONG),
- safe_for_read(INQUIRY),
- safe_for_read(MODE_SENSE),
- safe_for_read(MODE_SENSE_10),
- safe_for_read(LOG_SENSE),
- safe_for_read(START_STOP),
- safe_for_read(GPCMD_VERIFY_10),
- safe_for_read(VERIFY_16),
-
- /* Audio CD commands */
- safe_for_read(GPCMD_PLAY_CD),
- safe_for_read(GPCMD_PLAY_AUDIO_10),
- safe_for_read(GPCMD_PLAY_AUDIO_MSF),
- safe_for_read(GPCMD_PLAY_AUDIO_TI),
- safe_for_read(GPCMD_PAUSE_RESUME),
-
- /* CD/DVD data reading */
- safe_for_read(GPCMD_READ_BUFFER_CAPACITY),
- safe_for_read(GPCMD_READ_CD),
- safe_for_read(GPCMD_READ_CD_MSF),
- safe_for_read(GPCMD_READ_DISC_INFO),
- safe_for_read(GPCMD_READ_CDVD_CAPACITY),
- safe_for_read(GPCMD_READ_DVD_STRUCTURE),
- safe_for_read(GPCMD_READ_HEADER),
- safe_for_read(GPCMD_READ_TRACK_RZONE_INFO),
- safe_for_read(GPCMD_READ_SUBCHANNEL),
- safe_for_read(GPCMD_READ_TOC_PMA_ATIP),
- safe_for_read(GPCMD_REPORT_KEY),
- safe_for_read(GPCMD_SCAN),
- safe_for_read(GPCMD_GET_CONFIGURATION),
- safe_for_read(GPCMD_READ_FORMAT_CAPACITIES),
- safe_for_read(GPCMD_GET_EVENT_STATUS_NOTIFICATION),
- safe_for_read(GPCMD_GET_PERFORMANCE),
- safe_for_read(GPCMD_SEEK),
- safe_for_read(GPCMD_STOP_PLAY_SCAN),
-
- /* Basic writing commands */
- safe_for_write(WRITE_6),
- safe_for_write(WRITE_10),
- safe_for_write(WRITE_VERIFY),
- safe_for_write(WRITE_12),
- safe_for_write(WRITE_VERIFY_12),
- safe_for_write(WRITE_16),
- safe_for_write(WRITE_LONG),
- safe_for_write(WRITE_LONG_2),
- safe_for_write(ERASE),
- safe_for_write(GPCMD_MODE_SELECT_10),
- safe_for_write(MODE_SELECT),
- safe_for_write(LOG_SELECT),
- safe_for_write(GPCMD_BLANK),
- safe_for_write(GPCMD_CLOSE_TRACK),
- safe_for_write(GPCMD_FLUSH_CACHE),
- safe_for_write(GPCMD_FORMAT_UNIT),
- safe_for_write(GPCMD_REPAIR_RZONE_TRACK),
- safe_for_write(GPCMD_RESERVE_RZONE_TRACK),
- safe_for_write(GPCMD_SEND_DVD_STRUCTURE),
- safe_for_write(GPCMD_SEND_EVENT),
- safe_for_write(GPCMD_SEND_KEY),
- safe_for_write(GPCMD_SEND_OPC),
- safe_for_write(GPCMD_SEND_CUE_SHEET),
- safe_for_write(GPCMD_SET_SPEED),
- safe_for_write(GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL),
- safe_for_write(GPCMD_LOAD_UNLOAD),
- safe_for_write(GPCMD_SET_STREAMING),
- };
- unsigned char type = cmd_type[cmd[0]];
-
- /* Anybody who can open the device can do a read-safe command */
- if (type & CMD_READ_SAFE)
- return 0;
-
- /* Write-safe commands just require a writable open.. */
- if ((type & CMD_WRITE_SAFE) && has_write_perm)
- return 0;
-
- /* And root can do any command.. */
- if (capable(CAP_SYS_RAWIO))
- return 0;
-
- if (!type) {
- cmd_type[cmd[0]] = CMD_WARNED;
- printk(KERN_WARNING "scsi: unknown opcode 0x%02x\n", cmd[0]);
- }
-
- /* Otherwise fail it with an "Operation not permitted" */
- return -EPERM;
-}
-EXPORT_SYMBOL_GPL(blk_verify_command);
-
static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq,
- struct sg_io_hdr *hdr, int has_write_perm)
+ struct sg_io_hdr *hdr, struct file *file)
{
if (copy_from_user(rq->cmd, hdr->cmdp, hdr->cmd_len))
return -EFAULT;
- if (blk_verify_command(rq->cmd, has_write_perm))
+ if (blk_verify_command(file, rq->cmd))
return -EPERM;
/*
@@ -287,7 +179,7 @@ static int sg_io(struct file *file, struct request_queue *q,
struct gendisk *bd_disk, struct sg_io_hdr *hdr)
{
unsigned long start_time;
- int writing = 0, ret = 0, has_write_perm = 0;
+ int writing = 0, ret = 0;
struct request *rq;
char sense[SCSI_SENSE_BUFFERSIZE];
struct bio *bio;
@@ -316,10 +208,7 @@ static int sg_io(struct file *file, struct request_queue *q,
if (!rq)
return -ENOMEM;
- if (file)
- has_write_perm = file->f_mode & FMODE_WRITE;
-
- if (blk_fill_sghdr_rq(q, rq, hdr, has_write_perm)) {
+ if (blk_fill_sghdr_rq(q, rq, hdr, file)) {
blk_put_request(rq);
return -EFAULT;
}
@@ -451,7 +340,7 @@ int sg_scsi_ioctl(struct file *file, struct request_queue *q,
if (in_len && copy_from_user(buffer, sic->data + cmdlen, in_len))
goto error;
- err = blk_verify_command(rq->cmd, file->f_mode & FMODE_WRITE);
+ err = blk_verify_command(file, rq->cmd);
if (err)
goto error;