summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-10-07 21:04:56 +0900
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-07 21:04:56 +0900
commitdc92b1f9ab1e1665dbbc56911782358e7f9a49f9 (patch)
tree965ccb4a0f2c24a8b24adce415f6506246d07a90 /drivers
parent5e090ed7af10729a396a25df43d69a236e789736 (diff)
parentca16f580a5db7e60bfafe59a50bb133bd3347491 (diff)
downloadlwn-dc92b1f9ab1e1665dbbc56911782358e7f9a49f9.tar.gz
lwn-dc92b1f9ab1e1665dbbc56911782358e7f9a49f9.zip
Merge branch 'virtio-next' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux
Pull virtio changes from Rusty Russell: "New workflow: same git trees pulled by linux-next get sent straight to Linus. Git is awkward at shuffling patches compared with quilt or mq, but that doesn't happen often once things get into my -next branch." * 'virtio-next' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux: (24 commits) lguest: fix occasional crash in example launcher. virtio-blk: Disable callback in virtblk_done() virtio_mmio: Don't attempt to create empty virtqueues virtio_mmio: fix off by one error allocating queue drivers/virtio/virtio_pci.c: fix error return code virtio: don't crash when device is buggy virtio: remove CONFIG_VIRTIO_RING virtio: add help to CONFIG_VIRTIO option. virtio: support reserved vqs virtio: introduce an API to set affinity for a virtqueue virtio-ring: move queue_index to vring_virtqueue virtio_balloon: not EXPERIMENTAL any more. virtio-balloon: dependency fix virtio-blk: fix NULL checking in virtblk_alloc_req() virtio-blk: Add REQ_FLUSH and REQ_FUA support to bio path virtio-blk: Add bio-based IO path for virtio-blk virtio: console: fix error handling in init() function tools: Fix pthread flag for Makefile of trace-agent used by virtio-trace tools: Add guest trace agent as a user tool virtio/console: Allocate scatterlist according to the current pipe size ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/block/virtio_blk.c306
-rw-r--r--drivers/char/virtio_console.c198
-rw-r--r--drivers/lguest/lguest_device.c5
-rw-r--r--drivers/remoteproc/remoteproc_virtio.c5
-rw-r--r--drivers/rpmsg/Kconfig1
-rw-r--r--drivers/s390/kvm/kvm_virtio.c5
-rw-r--r--drivers/virtio/Kconfig17
-rw-r--r--drivers/virtio/Makefile3
-rw-r--r--drivers/virtio/virtio.c2
-rw-r--r--drivers/virtio/virtio_mmio.c29
-rw-r--r--drivers/virtio/virtio_pci.c68
-rw-r--r--drivers/virtio/virtio_ring.c14
12 files changed, 557 insertions, 96 deletions
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index c0bbeb470754..0bdde8fba397 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -14,6 +14,9 @@
#define PART_BITS 4
+static bool use_bio;
+module_param(use_bio, bool, S_IRUGO);
+
static int major;
static DEFINE_IDA(vd_index_ida);
@@ -23,6 +26,7 @@ struct virtio_blk
{
struct virtio_device *vdev;
struct virtqueue *vq;
+ wait_queue_head_t queue_wait;
/* The disk structure for the kernel. */
struct gendisk *disk;
@@ -51,53 +55,244 @@ struct virtio_blk
struct virtblk_req
{
struct request *req;
+ struct bio *bio;
struct virtio_blk_outhdr out_hdr;
struct virtio_scsi_inhdr in_hdr;
+ struct work_struct work;
+ struct virtio_blk *vblk;
+ int flags;
u8 status;
+ struct scatterlist sg[];
+};
+
+enum {
+ VBLK_IS_FLUSH = 1,
+ VBLK_REQ_FLUSH = 2,
+ VBLK_REQ_DATA = 4,
+ VBLK_REQ_FUA = 8,
};
-static void blk_done(struct virtqueue *vq)
+static inline int virtblk_result(struct virtblk_req *vbr)
+{
+ switch (vbr->status) {
+ case VIRTIO_BLK_S_OK:
+ return 0;
+ case VIRTIO_BLK_S_UNSUPP:
+ return -ENOTTY;
+ default:
+ return -EIO;
+ }
+}
+
+static inline struct virtblk_req *virtblk_alloc_req(struct virtio_blk *vblk,
+ gfp_t gfp_mask)
{
- struct virtio_blk *vblk = vq->vdev->priv;
struct virtblk_req *vbr;
- unsigned int len;
- unsigned long flags;
- spin_lock_irqsave(vblk->disk->queue->queue_lock, flags);
- while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
- int error;
+ vbr = mempool_alloc(vblk->pool, gfp_mask);
+ if (!vbr)
+ return NULL;
- switch (vbr->status) {
- case VIRTIO_BLK_S_OK:
- error = 0;
- break;
- case VIRTIO_BLK_S_UNSUPP:
- error = -ENOTTY;
- break;
- default:
- error = -EIO;
+ vbr->vblk = vblk;
+ if (use_bio)
+ sg_init_table(vbr->sg, vblk->sg_elems);
+
+ return vbr;
+}
+
+static void virtblk_add_buf_wait(struct virtio_blk *vblk,
+ struct virtblk_req *vbr,
+ unsigned long out,
+ unsigned long in)
+{
+ DEFINE_WAIT(wait);
+
+ for (;;) {
+ prepare_to_wait_exclusive(&vblk->queue_wait, &wait,
+ TASK_UNINTERRUPTIBLE);
+
+ spin_lock_irq(vblk->disk->queue->queue_lock);
+ if (virtqueue_add_buf(vblk->vq, vbr->sg, out, in, vbr,
+ GFP_ATOMIC) < 0) {
+ spin_unlock_irq(vblk->disk->queue->queue_lock);
+ io_schedule();
+ } else {
+ virtqueue_kick(vblk->vq);
+ spin_unlock_irq(vblk->disk->queue->queue_lock);
break;
}
- switch (vbr->req->cmd_type) {
- case REQ_TYPE_BLOCK_PC:
- vbr->req->resid_len = vbr->in_hdr.residual;
- vbr->req->sense_len = vbr->in_hdr.sense_len;
- vbr->req->errors = vbr->in_hdr.errors;
- break;
- case REQ_TYPE_SPECIAL:
- vbr->req->errors = (error != 0);
- break;
- default:
- break;
+ }
+
+ finish_wait(&vblk->queue_wait, &wait);
+}
+
+static inline void virtblk_add_req(struct virtblk_req *vbr,
+ unsigned int out, unsigned int in)
+{
+ struct virtio_blk *vblk = vbr->vblk;
+
+ spin_lock_irq(vblk->disk->queue->queue_lock);
+ if (unlikely(virtqueue_add_buf(vblk->vq, vbr->sg, out, in, vbr,
+ GFP_ATOMIC) < 0)) {
+ spin_unlock_irq(vblk->disk->queue->queue_lock);
+ virtblk_add_buf_wait(vblk, vbr, out, in);
+ return;
+ }
+ virtqueue_kick(vblk->vq);
+ spin_unlock_irq(vblk->disk->queue->queue_lock);
+}
+
+static int virtblk_bio_send_flush(struct virtblk_req *vbr)
+{
+ unsigned int out = 0, in = 0;
+
+ vbr->flags |= VBLK_IS_FLUSH;
+ vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
+ vbr->out_hdr.sector = 0;
+ vbr->out_hdr.ioprio = 0;
+ sg_set_buf(&vbr->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
+ sg_set_buf(&vbr->sg[out + in++], &vbr->status, sizeof(vbr->status));
+
+ virtblk_add_req(vbr, out, in);
+
+ return 0;
+}
+
+static int virtblk_bio_send_data(struct virtblk_req *vbr)
+{
+ struct virtio_blk *vblk = vbr->vblk;
+ unsigned int num, out = 0, in = 0;
+ struct bio *bio = vbr->bio;
+
+ vbr->flags &= ~VBLK_IS_FLUSH;
+ vbr->out_hdr.type = 0;
+ vbr->out_hdr.sector = bio->bi_sector;
+ vbr->out_hdr.ioprio = bio_prio(bio);
+
+ sg_set_buf(&vbr->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
+
+ num = blk_bio_map_sg(vblk->disk->queue, bio, vbr->sg + out);
+
+ sg_set_buf(&vbr->sg[num + out + in++], &vbr->status,
+ sizeof(vbr->status));
+
+ if (num) {
+ if (bio->bi_rw & REQ_WRITE) {
+ vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
+ out += num;
+ } else {
+ vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
+ in += num;
}
+ }
+
+ virtblk_add_req(vbr, out, in);
+
+ return 0;
+}
+
+static void virtblk_bio_send_data_work(struct work_struct *work)
+{
+ struct virtblk_req *vbr;
+
+ vbr = container_of(work, struct virtblk_req, work);
+
+ virtblk_bio_send_data(vbr);
+}
+
+static void virtblk_bio_send_flush_work(struct work_struct *work)
+{
+ struct virtblk_req *vbr;
+
+ vbr = container_of(work, struct virtblk_req, work);
+
+ virtblk_bio_send_flush(vbr);
+}
+
+static inline void virtblk_request_done(struct virtblk_req *vbr)
+{
+ struct virtio_blk *vblk = vbr->vblk;
+ struct request *req = vbr->req;
+ int error = virtblk_result(vbr);
+
+ if (req->cmd_type == REQ_TYPE_BLOCK_PC) {
+ req->resid_len = vbr->in_hdr.residual;
+ req->sense_len = vbr->in_hdr.sense_len;
+ req->errors = vbr->in_hdr.errors;
+ } else if (req->cmd_type == REQ_TYPE_SPECIAL) {
+ req->errors = (error != 0);
+ }
+
+ __blk_end_request_all(req, error);
+ mempool_free(vbr, vblk->pool);
+}
+
+static inline void virtblk_bio_flush_done(struct virtblk_req *vbr)
+{
+ struct virtio_blk *vblk = vbr->vblk;
+
+ if (vbr->flags & VBLK_REQ_DATA) {
+ /* Send out the actual write data */
+ INIT_WORK(&vbr->work, virtblk_bio_send_data_work);
+ queue_work(virtblk_wq, &vbr->work);
+ } else {
+ bio_endio(vbr->bio, virtblk_result(vbr));
+ mempool_free(vbr, vblk->pool);
+ }
+}
+
+static inline void virtblk_bio_data_done(struct virtblk_req *vbr)
+{
+ struct virtio_blk *vblk = vbr->vblk;
- __blk_end_request_all(vbr->req, error);
+ if (unlikely(vbr->flags & VBLK_REQ_FUA)) {
+ /* Send out a flush before end the bio */
+ vbr->flags &= ~VBLK_REQ_DATA;
+ INIT_WORK(&vbr->work, virtblk_bio_send_flush_work);
+ queue_work(virtblk_wq, &vbr->work);
+ } else {
+ bio_endio(vbr->bio, virtblk_result(vbr));
mempool_free(vbr, vblk->pool);
}
+}
+
+static inline void virtblk_bio_done(struct virtblk_req *vbr)
+{
+ if (unlikely(vbr->flags & VBLK_IS_FLUSH))
+ virtblk_bio_flush_done(vbr);
+ else
+ virtblk_bio_data_done(vbr);
+}
+
+static void virtblk_done(struct virtqueue *vq)
+{
+ struct virtio_blk *vblk = vq->vdev->priv;
+ bool bio_done = false, req_done = false;
+ struct virtblk_req *vbr;
+ unsigned long flags;
+ unsigned int len;
+
+ spin_lock_irqsave(vblk->disk->queue->queue_lock, flags);
+ do {
+ virtqueue_disable_cb(vq);
+ while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
+ if (vbr->bio) {
+ virtblk_bio_done(vbr);
+ bio_done = true;
+ } else {
+ virtblk_request_done(vbr);
+ req_done = true;
+ }
+ }
+ } while (!virtqueue_enable_cb(vq));
/* In case queue is stopped waiting for more buffers. */
- blk_start_queue(vblk->disk->queue);
+ if (req_done)
+ blk_start_queue(vblk->disk->queue);
spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags);
+
+ if (bio_done)
+ wake_up(&vblk->queue_wait);
}
static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
@@ -106,13 +301,13 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
unsigned long num, out = 0, in = 0;
struct virtblk_req *vbr;
- vbr = mempool_alloc(vblk->pool, GFP_ATOMIC);
+ vbr = virtblk_alloc_req(vblk, GFP_ATOMIC);
if (!vbr)
/* When another request finishes we'll try again. */
return false;
vbr->req = req;
-
+ vbr->bio = NULL;
if (req->cmd_flags & REQ_FLUSH) {
vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
vbr->out_hdr.sector = 0;
@@ -172,7 +367,8 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
}
}
- if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr, GFP_ATOMIC)<0) {
+ if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr,
+ GFP_ATOMIC) < 0) {
mempool_free(vbr, vblk->pool);
return false;
}
@@ -180,7 +376,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
return true;
}
-static void do_virtblk_request(struct request_queue *q)
+static void virtblk_request(struct request_queue *q)
{
struct virtio_blk *vblk = q->queuedata;
struct request *req;
@@ -203,6 +399,34 @@ static void do_virtblk_request(struct request_queue *q)
virtqueue_kick(vblk->vq);
}
+static void virtblk_make_request(struct request_queue *q, struct bio *bio)
+{
+ struct virtio_blk *vblk = q->queuedata;
+ struct virtblk_req *vbr;
+
+ BUG_ON(bio->bi_phys_segments + 2 > vblk->sg_elems);
+
+ vbr = virtblk_alloc_req(vblk, GFP_NOIO);
+ if (!vbr) {
+ bio_endio(bio, -ENOMEM);
+ return;
+ }
+
+ vbr->bio = bio;
+ vbr->flags = 0;
+ if (bio->bi_rw & REQ_FLUSH)
+ vbr->flags |= VBLK_REQ_FLUSH;
+ if (bio->bi_rw & REQ_FUA)
+ vbr->flags |= VBLK_REQ_FUA;
+ if (bio->bi_size)
+ vbr->flags |= VBLK_REQ_DATA;
+
+ if (unlikely(vbr->flags & VBLK_REQ_FLUSH))
+ virtblk_bio_send_flush(vbr);
+ else
+ virtblk_bio_send_data(vbr);
+}
+
/* return id (s/n) string for *disk to *id_str
*/
static int virtblk_get_id(struct gendisk *disk, char *id_str)
@@ -360,7 +584,7 @@ static int init_vq(struct virtio_blk *vblk)
int err = 0;
/* We expect one virtqueue, for output. */
- vblk->vq = virtio_find_single_vq(vblk->vdev, blk_done, "requests");
+ vblk->vq = virtio_find_single_vq(vblk->vdev, virtblk_done, "requests");
if (IS_ERR(vblk->vq))
err = PTR_ERR(vblk->vq);
@@ -477,6 +701,8 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
struct virtio_blk *vblk;
struct request_queue *q;
int err, index;
+ int pool_size;
+
u64 cap;
u32 v, blk_size, sg_elems, opt_io_size;
u16 min_io_size;
@@ -506,10 +732,12 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
goto out_free_index;
}
+ init_waitqueue_head(&vblk->queue_wait);
vblk->vdev = vdev;
vblk->sg_elems = sg_elems;
sg_init_table(vblk->sg, vblk->sg_elems);
mutex_init(&vblk->config_lock);
+
INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
vblk->config_enable = true;
@@ -517,7 +745,10 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
if (err)
goto out_free_vblk;
- vblk->pool = mempool_create_kmalloc_pool(1,sizeof(struct virtblk_req));
+ pool_size = sizeof(struct virtblk_req);
+ if (use_bio)
+ pool_size += sizeof(struct scatterlist) * sg_elems;
+ vblk->pool = mempool_create_kmalloc_pool(1, pool_size);
if (!vblk->pool) {
err = -ENOMEM;
goto out_free_vq;
@@ -530,12 +761,14 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
goto out_mempool;
}
- q = vblk->disk->queue = blk_init_queue(do_virtblk_request, NULL);
+ q = vblk->disk->queue = blk_init_queue(virtblk_request, NULL);
if (!q) {
err = -ENOMEM;
goto out_put_disk;
}
+ if (use_bio)
+ blk_queue_make_request(q, virtblk_make_request);
q->queuedata = vblk;
virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
@@ -620,7 +853,6 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
if (!err && opt_io_size)
blk_queue_io_opt(q, blk_size * opt_io_size);
-
add_disk(vblk->disk);
err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial);
if (err)
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 060a672ebb7b..8ab9c3d4bf13 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -24,6 +24,8 @@
#include <linux/err.h>
#include <linux/freezer.h>
#include <linux/fs.h>
+#include <linux/splice.h>
+#include <linux/pagemap.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/poll.h>
@@ -474,26 +476,53 @@ static ssize_t send_control_msg(struct port *port, unsigned int event,
return 0;
}
+struct buffer_token {
+ union {
+ void *buf;
+ struct scatterlist *sg;
+ } u;
+ /* If sgpages == 0 then buf is used, else sg is used */
+ unsigned int sgpages;
+};
+
+static void reclaim_sg_pages(struct scatterlist *sg, unsigned int nrpages)
+{
+ int i;
+ struct page *page;
+
+ for (i = 0; i < nrpages; i++) {
+ page = sg_page(&sg[i]);
+ if (!page)
+ break;
+ put_page(page);
+ }
+ kfree(sg);
+}
+
/* Callers must take the port->outvq_lock */
static void reclaim_consumed_buffers(struct port *port)
{
- void *buf;
+ struct buffer_token *tok;
unsigned int len;
if (!port->portdev) {
/* Device has been unplugged. vqs are already gone. */
return;
}
- while ((buf = virtqueue_get_buf(port->out_vq, &len))) {
- kfree(buf);
+ while ((tok = virtqueue_get_buf(port->out_vq, &len))) {
+ if (tok->sgpages)
+ reclaim_sg_pages(tok->u.sg, tok->sgpages);
+ else
+ kfree(tok->u.buf);
+ kfree(tok);
port->outvq_full = false;
}
}
-static ssize_t send_buf(struct port *port, void *in_buf, size_t in_count,
- bool nonblock)
+static ssize_t __send_to_port(struct port *port, struct scatterlist *sg,
+ int nents, size_t in_count,
+ struct buffer_token *tok, bool nonblock)
{
- struct scatterlist sg[1];
struct virtqueue *out_vq;
ssize_t ret;
unsigned long flags;
@@ -505,8 +534,7 @@ static ssize_t send_buf(struct port *port, void *in_buf, size_t in_count,
reclaim_consumed_buffers(port);
- sg_init_one(sg, in_buf, in_count);
- ret = virtqueue_add_buf(out_vq, sg, 1, 0, in_buf, GFP_ATOMIC);
+ ret = virtqueue_add_buf(out_vq, sg, nents, 0, tok, GFP_ATOMIC);
/* Tell Host to go! */
virtqueue_kick(out_vq);
@@ -544,6 +572,37 @@ done:
return in_count;
}
+static ssize_t send_buf(struct port *port, void *in_buf, size_t in_count,
+ bool nonblock)
+{
+ struct scatterlist sg[1];
+ struct buffer_token *tok;
+
+ tok = kmalloc(sizeof(*tok), GFP_ATOMIC);
+ if (!tok)
+ return -ENOMEM;
+ tok->sgpages = 0;
+ tok->u.buf = in_buf;
+
+ sg_init_one(sg, in_buf, in_count);
+
+ return __send_to_port(port, sg, 1, in_count, tok, nonblock);
+}
+
+static ssize_t send_pages(struct port *port, struct scatterlist *sg, int nents,
+ size_t in_count, bool nonblock)
+{
+ struct buffer_token *tok;
+
+ tok = kmalloc(sizeof(*tok), GFP_ATOMIC);
+ if (!tok)
+ return -ENOMEM;
+ tok->sgpages = nents;
+ tok->u.sg = sg;
+
+ return __send_to_port(port, sg, nents, in_count, tok, nonblock);
+}
+
/*
* Give out the data that's requested from the buffer that we have
* queued up.
@@ -665,6 +724,26 @@ static ssize_t port_fops_read(struct file *filp, char __user *ubuf,
return fill_readbuf(port, ubuf, count, true);
}
+static int wait_port_writable(struct port *port, bool nonblock)
+{
+ int ret;
+
+ if (will_write_block(port)) {
+ if (nonblock)
+ return -EAGAIN;
+
+ ret = wait_event_freezable(port->waitqueue,
+ !will_write_block(port));
+ if (ret < 0)
+ return ret;
+ }
+ /* Port got hot-unplugged. */
+ if (!port->guest_connected)
+ return -ENODEV;
+
+ return 0;
+}
+
static ssize_t port_fops_write(struct file *filp, const char __user *ubuf,
size_t count, loff_t *offp)
{
@@ -681,18 +760,9 @@ static ssize_t port_fops_write(struct file *filp, const char __user *ubuf,
nonblock = filp->f_flags & O_NONBLOCK;
- if (will_write_block(port)) {
- if (nonblock)
- return -EAGAIN;
-
- ret = wait_event_freezable(port->waitqueue,
- !will_write_block(port));
- if (ret < 0)
- return ret;
- }
- /* Port got hot-unplugged. */
- if (!port->guest_connected)
- return -ENODEV;
+ ret = wait_port_writable(port, nonblock);
+ if (ret < 0)
+ return ret;
count = min((size_t)(32 * 1024), count);
@@ -725,6 +795,93 @@ out:
return ret;
}
+struct sg_list {
+ unsigned int n;
+ unsigned int size;
+ size_t len;
+ struct scatterlist *sg;
+};
+
+static int pipe_to_sg(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
+ struct splice_desc *sd)
+{
+ struct sg_list *sgl = sd->u.data;
+ unsigned int offset, len;
+
+ if (sgl->n == sgl->size)
+ return 0;
+
+ /* Try lock this page */
+ if (buf->ops->steal(pipe, buf) == 0) {
+ /* Get reference and unlock page for moving */
+ get_page(buf->page);
+ unlock_page(buf->page);
+
+ len = min(buf->len, sd->len);
+ sg_set_page(&(sgl->sg[sgl->n]), buf->page, len, buf->offset);
+ } else {
+ /* Failback to copying a page */
+ struct page *page = alloc_page(GFP_KERNEL);
+ char *src = buf->ops->map(pipe, buf, 1);
+ char *dst;
+
+ if (!page)
+ return -ENOMEM;
+ dst = kmap(page);
+
+ offset = sd->pos & ~PAGE_MASK;
+
+ len = sd->len;
+ if (len + offset > PAGE_SIZE)
+ len = PAGE_SIZE - offset;
+
+ memcpy(dst + offset, src + buf->offset, len);
+
+ kunmap(page);
+ buf->ops->unmap(pipe, buf, src);
+
+ sg_set_page(&(sgl->sg[sgl->n]), page, len, offset);
+ }
+ sgl->n++;
+ sgl->len += len;
+
+ return len;
+}
+
+/* Faster zero-copy write by splicing */
+static ssize_t port_fops_splice_write(struct pipe_inode_info *pipe,
+ struct file *filp, loff_t *ppos,
+ size_t len, unsigned int flags)
+{
+ struct port *port = filp->private_data;
+ struct sg_list sgl;
+ ssize_t ret;
+ struct splice_desc sd = {
+ .total_len = len,
+ .flags = flags,
+ .pos = *ppos,
+ .u.data = &sgl,
+ };
+
+ ret = wait_port_writable(port, filp->f_flags & O_NONBLOCK);
+ if (ret < 0)
+ return ret;
+
+ sgl.n = 0;
+ sgl.len = 0;
+ sgl.size = pipe->nrbufs;
+ sgl.sg = kmalloc(sizeof(struct scatterlist) * sgl.size, GFP_KERNEL);
+ if (unlikely(!sgl.sg))
+ return -ENOMEM;
+
+ sg_init_table(sgl.sg, sgl.size);
+ ret = __splice_from_pipe(pipe, &sd, pipe_to_sg);
+ if (likely(ret > 0))
+ ret = send_pages(port, sgl.sg, sgl.n, sgl.len, true);
+
+ return ret;
+}
+
static unsigned int port_fops_poll(struct file *filp, poll_table *wait)
{
struct port *port;
@@ -856,6 +1013,7 @@ static const struct file_operations port_fops = {
.open = port_fops_open,
.read = port_fops_read,
.write = port_fops_write,
+ .splice_write = port_fops_splice_write,
.poll = port_fops_poll,
.release = port_fops_release,
.fasync = port_fops_fasync,
diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index 9e8388efd88e..fc92ccbd71dc 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -263,6 +263,9 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev,
struct virtqueue *vq;
int err;
+ if (!name)
+ return NULL;
+
/* We must have this many virtqueues. */
if (index >= ldev->desc->num_vq)
return ERR_PTR(-ENOENT);
@@ -296,7 +299,7 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev,
* to 'true': the host just a(nother) SMP CPU, so we only need inter-cpu
* barriers.
*/
- vq = vring_new_virtqueue(lvq->config.num, LGUEST_VRING_ALIGN, vdev,
+ vq = vring_new_virtqueue(index, lvq->config.num, LGUEST_VRING_ALIGN, vdev,
true, lvq->pages, lg_notify, callback, name);
if (!vq) {
err = -ENOMEM;
diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c
index 3541b4492f64..e7a4780e93db 100644
--- a/drivers/remoteproc/remoteproc_virtio.c
+++ b/drivers/remoteproc/remoteproc_virtio.c
@@ -84,6 +84,9 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev,
if (id >= ARRAY_SIZE(rvdev->vring))
return ERR_PTR(-EINVAL);
+ if (!name)
+ return NULL;
+
ret = rproc_alloc_vring(rvdev, id);
if (ret)
return ERR_PTR(ret);
@@ -103,7 +106,7 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev,
* Create the new vq, and tell virtio we're not interested in
* the 'weak' smp barriers, since we're talking with a real device.
*/
- vq = vring_new_virtqueue(len, rvring->align, vdev, false, addr,
+ vq = vring_new_virtqueue(id, len, rvring->align, vdev, false, addr,
rproc_virtio_notify, callback, name);
if (!vq) {
dev_err(dev, "vring_new_virtqueue %s failed\n", name);
diff --git a/drivers/rpmsg/Kconfig b/drivers/rpmsg/Kconfig
index 32aead65735a..2bd911f12571 100644
--- a/drivers/rpmsg/Kconfig
+++ b/drivers/rpmsg/Kconfig
@@ -4,7 +4,6 @@ menu "Rpmsg drivers (EXPERIMENTAL)"
config RPMSG
tristate
select VIRTIO
- select VIRTIO_RING
depends on EXPERIMENTAL
endmenu
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index 47cccd52aae8..7dabef624da3 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -190,6 +190,9 @@ static struct virtqueue *kvm_find_vq(struct virtio_device *vdev,
if (index >= kdev->desc->num_vq)
return ERR_PTR(-ENOENT);
+ if (!name)
+ return NULL;
+
config = kvm_vq_config(kdev->desc)+index;
err = vmem_add_mapping(config->address,
@@ -198,7 +201,7 @@ static struct virtqueue *kvm_find_vq(struct virtio_device *vdev,
if (err)
goto out;
- vq = vring_new_virtqueue(config->num, KVM_S390_VIRTIO_RING_ALIGN,
+ vq = vring_new_virtqueue(index, config->num, KVM_S390_VIRTIO_RING_ALIGN,
vdev, true, (void *) config->address,
kvm_notify, callback, name);
if (!vq) {
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
index f38b17a86c35..8d5bddb56cb1 100644
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@@ -1,11 +1,9 @@
-# Virtio always gets selected by whoever wants it.
config VIRTIO
tristate
-
-# Similarly the virtio ring implementation.
-config VIRTIO_RING
- tristate
- depends on VIRTIO
+ ---help---
+ This option is selected by any driver which implements the virtio
+ bus, such as CONFIG_VIRTIO_PCI, CONFIG_VIRTIO_MMIO, CONFIG_LGUEST,
+ CONFIG_RPMSG or CONFIG_S390_GUEST.
menu "Virtio drivers"
@@ -13,7 +11,6 @@ config VIRTIO_PCI
tristate "PCI driver for virtio devices (EXPERIMENTAL)"
depends on PCI && EXPERIMENTAL
select VIRTIO
- select VIRTIO_RING
---help---
This drivers provides support for virtio based paravirtual device
drivers over PCI. This requires that your VMM has appropriate PCI
@@ -26,9 +23,8 @@ config VIRTIO_PCI
If unsure, say M.
config VIRTIO_BALLOON
- tristate "Virtio balloon driver (EXPERIMENTAL)"
- select VIRTIO
- select VIRTIO_RING
+ tristate "Virtio balloon driver"
+ depends on VIRTIO
---help---
This driver supports increasing and decreasing the amount
of memory within a KVM guest.
@@ -39,7 +35,6 @@ config VIRTIO_BALLOON
tristate "Platform bus driver for memory mapped virtio devices (EXPERIMENTAL)"
depends on HAS_IOMEM && EXPERIMENTAL
select VIRTIO
- select VIRTIO_RING
---help---
This drivers provides support for memory mapped virtio
platform device driver.
diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
index 5a4c63cfd380..9076635697bb 100644
--- a/drivers/virtio/Makefile
+++ b/drivers/virtio/Makefile
@@ -1,5 +1,4 @@
-obj-$(CONFIG_VIRTIO) += virtio.o
-obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o
+obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o
obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o
obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index c3b3f7f0d9d1..1e8659ca27ef 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -159,7 +159,7 @@ static int virtio_dev_remove(struct device *_d)
drv->remove(dev);
/* Driver should have reset device. */
- BUG_ON(dev->config->get_status(dev));
+ WARN_ON_ONCE(dev->config->get_status(dev));
/* Acknowledge the device's existence again. */
add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index 453db0c403d8..6b1b7e184939 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -131,9 +131,6 @@ struct virtio_mmio_vq_info {
/* the number of entries in the queue */
unsigned int num;
- /* the index of the queue */
- int queue_index;
-
/* the virtual address of the ring queue */
void *queue;
@@ -225,11 +222,10 @@ static void vm_reset(struct virtio_device *vdev)
static void vm_notify(struct virtqueue *vq)
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev);
- struct virtio_mmio_vq_info *info = vq->priv;
/* We write the queue's selector into the notification register to
* signal the other end */
- writel(info->queue_index, vm_dev->base + VIRTIO_MMIO_QUEUE_NOTIFY);
+ writel(virtqueue_get_queue_index(vq), vm_dev->base + VIRTIO_MMIO_QUEUE_NOTIFY);
}
/* Notify all virtqueues on an interrupt. */
@@ -270,6 +266,7 @@ static void vm_del_vq(struct virtqueue *vq)
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev);
struct virtio_mmio_vq_info *info = vq->priv;
unsigned long flags, size;
+ unsigned int index = virtqueue_get_queue_index(vq);
spin_lock_irqsave(&vm_dev->lock, flags);
list_del(&info->node);
@@ -278,7 +275,7 @@ static void vm_del_vq(struct virtqueue *vq)
vring_del_virtqueue(vq);
/* Select and deactivate the queue */
- writel(info->queue_index, vm_dev->base + VIRTIO_MMIO_QUEUE_SEL);
+ writel(index, vm_dev->base + VIRTIO_MMIO_QUEUE_SEL);
writel(0, vm_dev->base + VIRTIO_MMIO_QUEUE_PFN);
size = PAGE_ALIGN(vring_size(info->num, VIRTIO_MMIO_VRING_ALIGN));
@@ -309,6 +306,9 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
unsigned long flags, size;
int err;
+ if (!name)
+ return NULL;
+
/* Select the queue we're interested in */
writel(index, vm_dev->base + VIRTIO_MMIO_QUEUE_SEL);
@@ -324,7 +324,6 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
err = -ENOMEM;
goto error_kmalloc;
}
- info->queue_index = index;
/* Allocate pages for the queue - start with a queue as big as
* possible (limited by maximum size allowed by device), drop down
@@ -332,11 +331,21 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
* and two rings (which makes it "alignment_size * 2")
*/
info->num = readl(vm_dev->base + VIRTIO_MMIO_QUEUE_NUM_MAX);
+
+ /* If the device reports a 0 entry queue, we won't be able to
+ * use it to perform I/O, and vring_new_virtqueue() can't create
+ * empty queues anyway, so don't bother to set up the device.
+ */
+ if (info->num == 0) {
+ err = -ENOENT;
+ goto error_alloc_pages;
+ }
+
while (1) {
size = PAGE_ALIGN(vring_size(info->num,
VIRTIO_MMIO_VRING_ALIGN));
- /* Already smallest possible allocation? */
- if (size <= VIRTIO_MMIO_VRING_ALIGN * 2) {
+ /* Did the last iter shrink the queue below minimum size? */
+ if (size < VIRTIO_MMIO_VRING_ALIGN * 2) {
err = -ENOMEM;
goto error_alloc_pages;
}
@@ -356,7 +365,7 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
vm_dev->base + VIRTIO_MMIO_QUEUE_PFN);
/* Create the vring */
- vq = vring_new_virtqueue(info->num, VIRTIO_MMIO_VRING_ALIGN, vdev,
+ vq = vring_new_virtqueue(index, info->num, VIRTIO_MMIO_VRING_ALIGN, vdev,
true, info->queue, vm_notify, callback, name);
if (!vq) {
err = -ENOMEM;
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 2e03d416b9af..c33aea36598a 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -48,6 +48,7 @@ struct virtio_pci_device
int msix_enabled;
int intx_enabled;
struct msix_entry *msix_entries;
+ cpumask_var_t *msix_affinity_masks;
/* Name strings for interrupts. This size should be enough,
* and I'm too lazy to allocate each name separately. */
char (*msix_names)[256];
@@ -79,9 +80,6 @@ struct virtio_pci_vq_info
/* the number of entries in the queue */
int num;
- /* the index of the queue */
- int queue_index;
-
/* the virtual address of the ring queue */
void *queue;
@@ -202,11 +200,11 @@ static void vp_reset(struct virtio_device *vdev)
static void vp_notify(struct virtqueue *vq)
{
struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
- struct virtio_pci_vq_info *info = vq->priv;
/* we write the queue's selector into the notification register to
* signal the other end */
- iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY);
+ iowrite16(virtqueue_get_queue_index(vq),
+ vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY);
}
/* Handle a configuration change: Tell driver if it wants to know. */
@@ -279,6 +277,10 @@ static void vp_free_vectors(struct virtio_device *vdev)
for (i = 0; i < vp_dev->msix_used_vectors; ++i)
free_irq(vp_dev->msix_entries[i].vector, vp_dev);
+ for (i = 0; i < vp_dev->msix_vectors; i++)
+ if (vp_dev->msix_affinity_masks[i])
+ free_cpumask_var(vp_dev->msix_affinity_masks[i]);
+
if (vp_dev->msix_enabled) {
/* Disable the vector used for configuration */
iowrite16(VIRTIO_MSI_NO_VECTOR,
@@ -296,6 +298,8 @@ static void vp_free_vectors(struct virtio_device *vdev)
vp_dev->msix_names = NULL;
kfree(vp_dev->msix_entries);
vp_dev->msix_entries = NULL;
+ kfree(vp_dev->msix_affinity_masks);
+ vp_dev->msix_affinity_masks = NULL;
}
static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
@@ -314,6 +318,15 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
GFP_KERNEL);
if (!vp_dev->msix_names)
goto error;
+ vp_dev->msix_affinity_masks
+ = kzalloc(nvectors * sizeof *vp_dev->msix_affinity_masks,
+ GFP_KERNEL);
+ if (!vp_dev->msix_affinity_masks)
+ goto error;
+ for (i = 0; i < nvectors; ++i)
+ if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i],
+ GFP_KERNEL))
+ goto error;
for (i = 0; i < nvectors; ++i)
vp_dev->msix_entries[i].entry = i;
@@ -402,7 +415,6 @@ static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index,
if (!info)
return ERR_PTR(-ENOMEM);
- info->queue_index = index;
info->num = num;
info->msix_vector = msix_vec;
@@ -418,7 +430,7 @@ static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index,
vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
/* create the vring */
- vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN, vdev,
+ vq = vring_new_virtqueue(index, info->num, VIRTIO_PCI_VRING_ALIGN, vdev,
true, info->queue, vp_notify, callback, name);
if (!vq) {
err = -ENOMEM;
@@ -467,7 +479,8 @@ static void vp_del_vq(struct virtqueue *vq)
list_del(&info->node);
spin_unlock_irqrestore(&vp_dev->lock, flags);
- iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
+ iowrite16(virtqueue_get_queue_index(vq),
+ vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
if (vp_dev->msix_enabled) {
iowrite16(VIRTIO_MSI_NO_VECTOR,
@@ -542,7 +555,10 @@ static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs,
vp_dev->per_vq_vectors = per_vq_vectors;
allocated_vectors = vp_dev->msix_used_vectors;
for (i = 0; i < nvqs; ++i) {
- if (!callbacks[i] || !vp_dev->msix_enabled)
+ if (!names[i]) {
+ vqs[i] = NULL;
+ continue;
+ } else if (!callbacks[i] || !vp_dev->msix_enabled)
msix_vec = VIRTIO_MSI_NO_VECTOR;
else if (vp_dev->per_vq_vectors)
msix_vec = allocated_vectors++;
@@ -609,6 +625,35 @@ static const char *vp_bus_name(struct virtio_device *vdev)
return pci_name(vp_dev->pci_dev);
}
+/* Setup the affinity for a virtqueue:
+ * - force the affinity for per vq vector
+ * - OR over all affinities for shared MSI
+ * - ignore the affinity request if we're using INTX
+ */
+static int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
+{
+ struct virtio_device *vdev = vq->vdev;
+ struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+ struct virtio_pci_vq_info *info = vq->priv;
+ struct cpumask *mask;
+ unsigned int irq;
+
+ if (!vq->callback)
+ return -EINVAL;
+
+ if (vp_dev->msix_enabled) {
+ mask = vp_dev->msix_affinity_masks[info->msix_vector];
+ irq = vp_dev->msix_entries[info->msix_vector].vector;
+ if (cpu == -1)
+ irq_set_affinity_hint(irq, NULL);
+ else {
+ cpumask_set_cpu(cpu, mask);
+ irq_set_affinity_hint(irq, mask);
+ }
+ }
+ return 0;
+}
+
static struct virtio_config_ops virtio_pci_config_ops = {
.get = vp_get,
.set = vp_set,
@@ -620,6 +665,7 @@ static struct virtio_config_ops virtio_pci_config_ops = {
.get_features = vp_get_features,
.finalize_features = vp_finalize_features,
.bus_name = vp_bus_name,
+ .set_vq_affinity = vp_set_vq_affinity,
};
static void virtio_pci_release_dev(struct device *_d)
@@ -673,8 +719,10 @@ static int __devinit virtio_pci_probe(struct pci_dev *pci_dev,
goto out_enable_device;
vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0);
- if (vp_dev->ioaddr == NULL)
+ if (vp_dev->ioaddr == NULL) {
+ err = -ENOMEM;
goto out_req_regions;
+ }
pci_set_drvdata(pci_dev, vp_dev);
pci_set_master(pci_dev);
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 5aa43c3392a2..e639584b2dbd 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -106,6 +106,9 @@ struct vring_virtqueue
/* How to notify other side. FIXME: commonalize hcalls! */
void (*notify)(struct virtqueue *vq);
+ /* Index of the queue */
+ int queue_index;
+
#ifdef DEBUG
/* They're supposed to lock for us. */
unsigned int in_use;
@@ -171,6 +174,13 @@ static int vring_add_indirect(struct vring_virtqueue *vq,
return head;
}
+int virtqueue_get_queue_index(struct virtqueue *_vq)
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+ return vq->queue_index;
+}
+EXPORT_SYMBOL_GPL(virtqueue_get_queue_index);
+
/**
* virtqueue_add_buf - expose buffer to other end
* @vq: the struct virtqueue we're talking about.
@@ -616,7 +626,8 @@ irqreturn_t vring_interrupt(int irq, void *_vq)
}
EXPORT_SYMBOL_GPL(vring_interrupt);
-struct virtqueue *vring_new_virtqueue(unsigned int num,
+struct virtqueue *vring_new_virtqueue(unsigned int index,
+ unsigned int num,
unsigned int vring_align,
struct virtio_device *vdev,
bool weak_barriers,
@@ -647,6 +658,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
vq->broken = false;
vq->last_used_idx = 0;
vq->num_added = 0;
+ vq->queue_index = index;
list_add_tail(&vq->vq.list, &vdev->vqs);
#ifdef DEBUG
vq->in_use = false;