diff options
Diffstat (limited to 'drivers/vhost')
-rw-r--r-- | drivers/vhost/net.c | 92 | ||||
-rw-r--r-- | drivers/vhost/scsi.c | 136 | ||||
-rw-r--r-- | drivers/vhost/vhost.c | 56 |
3 files changed, 158 insertions, 126 deletions
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 969a85960e9f..831eb4fd197d 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -276,12 +276,12 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to, * of used idx. Once lower device DMA done contiguously, we will signal KVM * guest used idx. */ -static int vhost_zerocopy_signal_used(struct vhost_net *net, - struct vhost_virtqueue *vq) +static void vhost_zerocopy_signal_used(struct vhost_net *net, + struct vhost_virtqueue *vq) { struct vhost_net_virtqueue *nvq = container_of(vq, struct vhost_net_virtqueue, vq); - int i; + int i, add; int j = 0; for (i = nvq->done_idx; i != nvq->upend_idx; i = (i + 1) % UIO_MAXIOV) { @@ -289,15 +289,17 @@ static int vhost_zerocopy_signal_used(struct vhost_net *net, vhost_net_tx_err(net); if (VHOST_DMA_IS_DONE(vq->heads[i].len)) { vq->heads[i].len = VHOST_DMA_CLEAR_LEN; - vhost_add_used_and_signal(vq->dev, vq, - vq->heads[i].id, 0); ++j; } else break; } - if (j) - nvq->done_idx = i; - return j; + while (j) { + add = min(UIO_MAXIOV - nvq->done_idx, j); + vhost_add_used_and_signal_n(vq->dev, vq, + &vq->heads[nvq->done_idx], add); + nvq->done_idx = (nvq->done_idx + add) % UIO_MAXIOV; + j -= add; + } } static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) @@ -306,6 +308,11 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) struct vhost_virtqueue *vq = ubufs->vq; int cnt = atomic_read(&ubufs->kref.refcount); + /* set len to mark this desc buffers done DMA */ + vq->heads[ubuf->desc].len = success ? + VHOST_DMA_DONE_LEN : VHOST_DMA_FAILED_LEN; + vhost_net_ubuf_put(ubufs); + /* * Trigger polling thread if guest stopped submitting new buffers: * in this case, the refcount after decrement will eventually reach 1 @@ -316,10 +323,6 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) */ if (cnt <= 2 || !(cnt % 16)) vhost_poll_queue(&vq->poll); - /* set len to mark this desc buffers done DMA */ - vq->heads[ubuf->desc].len = success ? - VHOST_DMA_DONE_LEN : VHOST_DMA_FAILED_LEN; - vhost_net_ubuf_put(ubufs); } /* Expects to be always run from workqueue - which acts as @@ -360,6 +363,13 @@ static void handle_tx(struct vhost_net *net) if (zcopy) vhost_zerocopy_signal_used(net, vq); + /* If more outstanding DMAs, queue the work. + * Handle upend_idx wrap around + */ + if (unlikely((nvq->upend_idx + vq->num - VHOST_MAX_PEND) + % UIO_MAXIOV == nvq->done_idx)) + break; + head = vhost_get_vq_desc(&net->dev, vq, vq->iov, ARRAY_SIZE(vq->iov), &out, &in, @@ -369,17 +379,6 @@ static void handle_tx(struct vhost_net *net) break; /* Nothing new? Wait for eventfd to tell us they refilled. */ if (head == vq->num) { - int num_pends; - - /* If more outstanding DMAs, queue the work. - * Handle upend_idx wrap around - */ - num_pends = likely(nvq->upend_idx >= nvq->done_idx) ? - (nvq->upend_idx - nvq->done_idx) : - (nvq->upend_idx + UIO_MAXIOV - - nvq->done_idx); - if (unlikely(num_pends > VHOST_MAX_PEND)) - break; if (unlikely(vhost_enable_notify(&net->dev, vq))) { vhost_disable_notify(&net->dev, vq); continue; @@ -402,43 +401,36 @@ static void handle_tx(struct vhost_net *net) iov_length(nvq->hdr, s), hdr_size); break; } - zcopy_used = zcopy && (len >= VHOST_GOODCOPY_LEN || - nvq->upend_idx != nvq->done_idx); + + zcopy_used = zcopy && len >= VHOST_GOODCOPY_LEN + && (nvq->upend_idx + 1) % UIO_MAXIOV != + nvq->done_idx + && vhost_net_tx_select_zcopy(net); /* use msg_control to pass vhost zerocopy ubuf info to skb */ if (zcopy_used) { + struct ubuf_info *ubuf; + ubuf = nvq->ubuf_info + nvq->upend_idx; + vq->heads[nvq->upend_idx].id = head; - if (!vhost_net_tx_select_zcopy(net) || - len < VHOST_GOODCOPY_LEN) { - /* copy don't need to wait for DMA done */ - vq->heads[nvq->upend_idx].len = - VHOST_DMA_DONE_LEN; - msg.msg_control = NULL; - msg.msg_controllen = 0; - ubufs = NULL; - } else { - struct ubuf_info *ubuf; - ubuf = nvq->ubuf_info + nvq->upend_idx; - - vq->heads[nvq->upend_idx].len = - VHOST_DMA_IN_PROGRESS; - ubuf->callback = vhost_zerocopy_callback; - ubuf->ctx = nvq->ubufs; - ubuf->desc = nvq->upend_idx; - msg.msg_control = ubuf; - msg.msg_controllen = sizeof(ubuf); - ubufs = nvq->ubufs; - kref_get(&ubufs->kref); - } + vq->heads[nvq->upend_idx].len = VHOST_DMA_IN_PROGRESS; + ubuf->callback = vhost_zerocopy_callback; + ubuf->ctx = nvq->ubufs; + ubuf->desc = nvq->upend_idx; + msg.msg_control = ubuf; + msg.msg_controllen = sizeof(ubuf); + ubufs = nvq->ubufs; + kref_get(&ubufs->kref); nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV; - } else + } else { msg.msg_control = NULL; + ubufs = NULL; + } /* TODO: Check specific error and bomb out unless ENOBUFS? */ err = sock->ops->sendmsg(NULL, sock, &msg, len); if (unlikely(err < 0)) { if (zcopy_used) { - if (ubufs) - vhost_net_ubuf_put(ubufs); + vhost_net_ubuf_put(ubufs); nvq->upend_idx = ((unsigned)nvq->upend_idx - 1) % UIO_MAXIOV; } diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 0c27c7df1b09..4b79a1f2f901 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1,12 +1,12 @@ /******************************************************************************* * Vhost kernel TCM fabric driver for virtio SCSI initiators * - * (C) Copyright 2010-2012 RisingTide Systems LLC. + * (C) Copyright 2010-2013 Datera, Inc. * (C) Copyright 2010-2012 IBM Corp. * * Licensed to the Linux Foundation under the General Public License (GPL) version 2. * - * Authors: Nicholas A. Bellinger <nab@risingtidesystems.com> + * Authors: Nicholas A. Bellinger <nab@daterainc.com> * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> * * This program is free software; you can redistribute it and/or modify @@ -48,12 +48,16 @@ #include <linux/virtio_scsi.h> #include <linux/llist.h> #include <linux/bitmap.h> +#include <linux/percpu_ida.h> #include "vhost.h" #define TCM_VHOST_VERSION "v0.1" #define TCM_VHOST_NAMELEN 256 #define TCM_VHOST_MAX_CDB_SIZE 32 +#define TCM_VHOST_DEFAULT_TAGS 256 +#define TCM_VHOST_PREALLOC_SGLS 2048 +#define TCM_VHOST_PREALLOC_PAGES 2048 struct vhost_scsi_inflight { /* Wait for the flush operation to finish */ @@ -79,6 +83,7 @@ struct tcm_vhost_cmd { u32 tvc_lun; /* Pointer to the SGL formatted memory from virtio-scsi */ struct scatterlist *tvc_sgl; + struct page **tvc_upages; /* Pointer to response */ struct virtio_scsi_cmd_resp __user *tvc_resp; /* Pointer to vhost_scsi for our device */ @@ -450,17 +455,16 @@ static void tcm_vhost_release_cmd(struct se_cmd *se_cmd) { struct tcm_vhost_cmd *tv_cmd = container_of(se_cmd, struct tcm_vhost_cmd, tvc_se_cmd); + struct se_session *se_sess = se_cmd->se_sess; if (tv_cmd->tvc_sgl_count) { u32 i; for (i = 0; i < tv_cmd->tvc_sgl_count; i++) put_page(sg_page(&tv_cmd->tvc_sgl[i])); - - kfree(tv_cmd->tvc_sgl); } tcm_vhost_put_inflight(tv_cmd->inflight); - kfree(tv_cmd); + percpu_ida_free(&se_sess->sess_tag_pool, se_cmd->map_tag); } static int tcm_vhost_shutdown_session(struct se_session *se_sess) @@ -704,7 +708,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) } static struct tcm_vhost_cmd * -vhost_scsi_allocate_cmd(struct vhost_virtqueue *vq, +vhost_scsi_get_tag(struct vhost_virtqueue *vq, struct tcm_vhost_tpg *tpg, struct virtio_scsi_cmd_req *v_req, u32 exp_data_len, @@ -712,18 +716,27 @@ vhost_scsi_allocate_cmd(struct vhost_virtqueue *vq, { struct tcm_vhost_cmd *cmd; struct tcm_vhost_nexus *tv_nexus; + struct se_session *se_sess; + struct scatterlist *sg; + struct page **pages; + int tag; tv_nexus = tpg->tpg_nexus; if (!tv_nexus) { pr_err("Unable to locate active struct tcm_vhost_nexus\n"); return ERR_PTR(-EIO); } + se_sess = tv_nexus->tvn_se_sess; - cmd = kzalloc(sizeof(struct tcm_vhost_cmd), GFP_ATOMIC); - if (!cmd) { - pr_err("Unable to allocate struct tcm_vhost_cmd\n"); - return ERR_PTR(-ENOMEM); - } + tag = percpu_ida_alloc(&se_sess->sess_tag_pool, GFP_KERNEL); + cmd = &((struct tcm_vhost_cmd *)se_sess->sess_cmd_map)[tag]; + sg = cmd->tvc_sgl; + pages = cmd->tvc_upages; + memset(cmd, 0, sizeof(struct tcm_vhost_cmd)); + + cmd->tvc_sgl = sg; + cmd->tvc_upages = pages; + cmd->tvc_se_cmd.map_tag = tag; cmd->tvc_tag = v_req->tag; cmd->tvc_task_attr = v_req->task_attr; cmd->tvc_exp_data_len = exp_data_len; @@ -740,7 +753,8 @@ vhost_scsi_allocate_cmd(struct vhost_virtqueue *vq, * Returns the number of scatterlist entries used or -errno on error. */ static int -vhost_scsi_map_to_sgl(struct scatterlist *sgl, +vhost_scsi_map_to_sgl(struct tcm_vhost_cmd *tv_cmd, + struct scatterlist *sgl, unsigned int sgl_count, struct iovec *iov, int write) @@ -752,13 +766,25 @@ vhost_scsi_map_to_sgl(struct scatterlist *sgl, struct page **pages; int ret, i; + if (sgl_count > TCM_VHOST_PREALLOC_SGLS) { + pr_err("vhost_scsi_map_to_sgl() psgl_count: %u greater than" + " preallocated TCM_VHOST_PREALLOC_SGLS: %u\n", + sgl_count, TCM_VHOST_PREALLOC_SGLS); + return -ENOBUFS; + } + pages_nr = iov_num_pages(iov); if (pages_nr > sgl_count) return -ENOBUFS; - pages = kmalloc(pages_nr * sizeof(struct page *), GFP_KERNEL); - if (!pages) - return -ENOMEM; + if (pages_nr > TCM_VHOST_PREALLOC_PAGES) { + pr_err("vhost_scsi_map_to_sgl() pages_nr: %u greater than" + " preallocated TCM_VHOST_PREALLOC_PAGES: %u\n", + pages_nr, TCM_VHOST_PREALLOC_PAGES); + return -ENOBUFS; + } + + pages = tv_cmd->tvc_upages; ret = get_user_pages_fast((unsigned long)ptr, pages_nr, write, pages); /* No pages were pinned */ @@ -783,7 +809,6 @@ vhost_scsi_map_to_sgl(struct scatterlist *sgl, } out: - kfree(pages); return ret; } @@ -807,24 +832,20 @@ vhost_scsi_map_iov_to_sgl(struct tcm_vhost_cmd *cmd, /* TODO overflow checking */ - sg = kmalloc(sizeof(cmd->tvc_sgl[0]) * sgl_count, GFP_ATOMIC); - if (!sg) - return -ENOMEM; - pr_debug("%s sg %p sgl_count %u is_err %d\n", __func__, - sg, sgl_count, !sg); + sg = cmd->tvc_sgl; + pr_debug("%s sg %p sgl_count %u\n", __func__, sg, sgl_count); sg_init_table(sg, sgl_count); - cmd->tvc_sgl = sg; cmd->tvc_sgl_count = sgl_count; pr_debug("Mapping %u iovecs for %u pages\n", niov, sgl_count); for (i = 0; i < niov; i++) { - ret = vhost_scsi_map_to_sgl(sg, sgl_count, &iov[i], write); + ret = vhost_scsi_map_to_sgl(cmd, sg, sgl_count, &iov[i], + write); if (ret < 0) { for (i = 0; i < cmd->tvc_sgl_count; i++) put_page(sg_page(&cmd->tvc_sgl[i])); - kfree(cmd->tvc_sgl); - cmd->tvc_sgl = NULL; + cmd->tvc_sgl_count = 0; return ret; } @@ -989,10 +1010,10 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) for (i = 0; i < data_num; i++) exp_data_len += vq->iov[data_first + i].iov_len; - cmd = vhost_scsi_allocate_cmd(vq, tpg, &v_req, - exp_data_len, data_direction); + cmd = vhost_scsi_get_tag(vq, tpg, &v_req, + exp_data_len, data_direction); if (IS_ERR(cmd)) { - vq_err(vq, "vhost_scsi_allocate_cmd failed %ld\n", + vq_err(vq, "vhost_scsi_get_tag failed %ld\n", PTR_ERR(cmd)); goto err_cmd; } @@ -1654,11 +1675,31 @@ static void tcm_vhost_drop_nodeacl(struct se_node_acl *se_acl) kfree(nacl); } +static void tcm_vhost_free_cmd_map_res(struct tcm_vhost_nexus *nexus, + struct se_session *se_sess) +{ + struct tcm_vhost_cmd *tv_cmd; + unsigned int i; + + if (!se_sess->sess_cmd_map) + return; + + for (i = 0; i < TCM_VHOST_DEFAULT_TAGS; i++) { + tv_cmd = &((struct tcm_vhost_cmd *)se_sess->sess_cmd_map)[i]; + + kfree(tv_cmd->tvc_sgl); + kfree(tv_cmd->tvc_upages); + } +} + static int tcm_vhost_make_nexus(struct tcm_vhost_tpg *tpg, const char *name) { struct se_portal_group *se_tpg; + struct se_session *se_sess; struct tcm_vhost_nexus *tv_nexus; + struct tcm_vhost_cmd *tv_cmd; + unsigned int i; mutex_lock(&tpg->tv_tpg_mutex); if (tpg->tpg_nexus) { @@ -1675,14 +1716,37 @@ static int tcm_vhost_make_nexus(struct tcm_vhost_tpg *tpg, return -ENOMEM; } /* - * Initialize the struct se_session pointer + * Initialize the struct se_session pointer and setup tagpool + * for struct tcm_vhost_cmd descriptors */ - tv_nexus->tvn_se_sess = transport_init_session(); + tv_nexus->tvn_se_sess = transport_init_session_tags( + TCM_VHOST_DEFAULT_TAGS, + sizeof(struct tcm_vhost_cmd)); if (IS_ERR(tv_nexus->tvn_se_sess)) { mutex_unlock(&tpg->tv_tpg_mutex); kfree(tv_nexus); return -ENOMEM; } + se_sess = tv_nexus->tvn_se_sess; + for (i = 0; i < TCM_VHOST_DEFAULT_TAGS; i++) { + tv_cmd = &((struct tcm_vhost_cmd *)se_sess->sess_cmd_map)[i]; + + tv_cmd->tvc_sgl = kzalloc(sizeof(struct scatterlist) * + TCM_VHOST_PREALLOC_SGLS, GFP_KERNEL); + if (!tv_cmd->tvc_sgl) { + mutex_unlock(&tpg->tv_tpg_mutex); + pr_err("Unable to allocate tv_cmd->tvc_sgl\n"); + goto out; + } + + tv_cmd->tvc_upages = kzalloc(sizeof(struct page *) * + TCM_VHOST_PREALLOC_PAGES, GFP_KERNEL); + if (!tv_cmd->tvc_upages) { + mutex_unlock(&tpg->tv_tpg_mutex); + pr_err("Unable to allocate tv_cmd->tvc_upages\n"); + goto out; + } + } /* * Since we are running in 'demo mode' this call with generate a * struct se_node_acl for the tcm_vhost struct se_portal_group with @@ -1694,9 +1758,7 @@ static int tcm_vhost_make_nexus(struct tcm_vhost_tpg *tpg, mutex_unlock(&tpg->tv_tpg_mutex); pr_debug("core_tpg_check_initiator_node_acl() failed" " for %s\n", name); - transport_free_session(tv_nexus->tvn_se_sess); - kfree(tv_nexus); - return -ENOMEM; + goto out; } /* * Now register the TCM vhost virtual I_T Nexus as active with the @@ -1708,6 +1770,12 @@ static int tcm_vhost_make_nexus(struct tcm_vhost_tpg *tpg, mutex_unlock(&tpg->tv_tpg_mutex); return 0; + +out: + tcm_vhost_free_cmd_map_res(tv_nexus, se_sess); + transport_free_session(se_sess); + kfree(tv_nexus); + return -ENOMEM; } static int tcm_vhost_drop_nexus(struct tcm_vhost_tpg *tpg) @@ -1747,6 +1815,8 @@ static int tcm_vhost_drop_nexus(struct tcm_vhost_tpg *tpg) pr_debug("TCM_vhost_ConfigFS: Removing I_T Nexus to emulated" " %s Initiator Port: %s\n", tcm_vhost_dump_proto_id(tpg->tport), tv_nexus->tvn_se_sess->se_node_acl->initiatorname); + + tcm_vhost_free_cmd_map_res(tv_nexus, se_sess); /* * Release the SCSI I_T Nexus to the emulated vhost Target Port */ diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index e58cf0001cee..9a9502a4aa50 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -13,7 +13,7 @@ #include <linux/eventfd.h> #include <linux/vhost.h> -#include <linux/socket.h> /* memcpy_fromiovec */ +#include <linux/uio.h> #include <linux/mm.h> #include <linux/mmu_context.h> #include <linux/miscdevice.h> @@ -1332,48 +1332,9 @@ EXPORT_SYMBOL_GPL(vhost_discard_vq_desc); * want to notify the guest, using eventfd. */ int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len) { - struct vring_used_elem __user *used; + struct vring_used_elem heads = { head, len }; - /* The virtqueue contains a ring of used buffers. Get a pointer to the - * next entry in that used ring. */ - used = &vq->used->ring[vq->last_used_idx % vq->num]; - if (__put_user(head, &used->id)) { - vq_err(vq, "Failed to write used id"); - return -EFAULT; - } - if (__put_user(len, &used->len)) { - vq_err(vq, "Failed to write used len"); - return -EFAULT; - } - /* Make sure buffer is written before we update index. */ - smp_wmb(); - if (__put_user(vq->last_used_idx + 1, &vq->used->idx)) { - vq_err(vq, "Failed to increment used idx"); - return -EFAULT; - } - if (unlikely(vq->log_used)) { - /* Make sure data is seen before log. */ - smp_wmb(); - /* Log used ring entry write. */ - log_write(vq->log_base, - vq->log_addr + - ((void __user *)used - (void __user *)vq->used), - sizeof *used); - /* Log used index update. */ - log_write(vq->log_base, - vq->log_addr + offsetof(struct vring_used, idx), - sizeof vq->used->idx); - if (vq->log_ctx) - eventfd_signal(vq->log_ctx, 1); - } - vq->last_used_idx++; - /* If the driver never bothers to signal in a very long while, - * used index might wrap around. If that happens, invalidate - * signalled_used index we stored. TODO: make sure driver - * signals at least once in 2^16 and remove this. */ - if (unlikely(vq->last_used_idx == vq->signalled_used)) - vq->signalled_used_valid = false; - return 0; + return vhost_add_used_n(vq, &heads, 1); } EXPORT_SYMBOL_GPL(vhost_add_used); @@ -1387,7 +1348,16 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq, start = vq->last_used_idx % vq->num; used = vq->used->ring + start; - if (__copy_to_user(used, heads, count * sizeof *used)) { + if (count == 1) { + if (__put_user(heads[0].id, &used->id)) { + vq_err(vq, "Failed to write used id"); + return -EFAULT; + } + if (__put_user(heads[0].len, &used->len)) { + vq_err(vq, "Failed to write used len"); + return -EFAULT; + } + } else if (__copy_to_user(used, heads, count * sizeof *used)) { vq_err(vq, "Failed to write used"); return -EFAULT; } |