summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Wang <jasowang@redhat.com>2018-07-20 08:15:21 +0800
committerDavid S. Miller <davem@davemloft.net>2018-07-22 09:43:31 -0700
commit4afb52c2af44ac761e829d4cd511a20b577959fa (patch)
tree7cc7288ea2250650a0ded03559ac9711c1abeed7
parentd0d869718754da534719be32f2c28b1210c3955d (diff)
downloadlwn-4afb52c2af44ac761e829d4cd511a20b577959fa.tar.gz
lwn-4afb52c2af44ac761e829d4cd511a20b577959fa.zip
vhost_net: batch update used ring for datacopy TX
Like commit e2b3b35eb989 ("vhost_net: batch used ring update in rx"), this patches implements batch used ring update for datacopy TX (zerocopy has already done some kind of batching). Testpmd transmission from guest to host (XDP_DROP on tap) shows 25.8% improvement (from ~3.1Mpps to ~3.9Mpps) on Broadwell i7-5600U CPU @ 2.60GHz machine. Netperf TCP tests does not show obvious differences. Signed-off-by: Jason Wang <jasowang@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/vhost/net.c40
1 files changed, 25 insertions, 15 deletions
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 2fd2f0e3d0f4..367d8023b54d 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -428,16 +428,31 @@ static int vhost_net_enable_vq(struct vhost_net *n,
return vhost_poll_start(poll, sock->file);
}
+static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq)
+{
+ struct vhost_virtqueue *vq = &nvq->vq;
+ struct vhost_dev *dev = vq->dev;
+
+ if (!nvq->done_idx)
+ return;
+
+ vhost_add_used_and_signal_n(dev, vq, vq->heads, nvq->done_idx);
+ nvq->done_idx = 0;
+}
+
static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
- struct vhost_virtqueue *vq,
+ struct vhost_net_virtqueue *nvq,
unsigned int *out_num, unsigned int *in_num,
bool *busyloop_intr)
{
+ struct vhost_virtqueue *vq = &nvq->vq;
unsigned long uninitialized_var(endtime);
int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
out_num, in_num, NULL, NULL);
if (r == vq->num && vq->busyloop_timeout) {
+ if (!vhost_sock_zcopy(vq->private_data))
+ vhost_net_signal_used(nvq);
preempt_disable();
endtime = busy_clock() + vq->busyloop_timeout;
while (vhost_can_busy_poll(endtime)) {
@@ -493,7 +508,8 @@ static int get_tx_bufs(struct vhost_net *net,
struct vhost_virtqueue *vq = &nvq->vq;
int ret;
- ret = vhost_net_tx_get_vq_desc(net, vq, out, in, busyloop_intr);
+ ret = vhost_net_tx_get_vq_desc(net, nvq, out, in, busyloop_intr);
+
if (ret < 0 || ret == vq->num)
return ret;
@@ -557,6 +573,9 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
break;
}
+ vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head);
+ vq->heads[nvq->done_idx].len = 0;
+
total_len += len;
if (tx_can_batch(vq, total_len))
msg.msg_flags |= MSG_MORE;
@@ -573,12 +592,15 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
if (err != len)
pr_debug("Truncated TX packet: len %d != %zd\n",
err, len);
- vhost_add_used_and_signal(&net->dev, vq, head, 0);
+ if (++nvq->done_idx >= VHOST_NET_BATCH)
+ vhost_net_signal_used(nvq);
if (vhost_exceeds_weight(++sent_pkts, total_len)) {
vhost_poll_queue(&vq->poll);
break;
}
}
+
+ vhost_net_signal_used(nvq);
}
static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
@@ -741,18 +763,6 @@ static int sk_has_rx_data(struct sock *sk)
return skb_queue_empty(&sk->sk_receive_queue);
}
-static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq)
-{
- struct vhost_virtqueue *vq = &nvq->vq;
- struct vhost_dev *dev = vq->dev;
-
- if (!nvq->done_idx)
- return;
-
- vhost_add_used_and_signal_n(dev, vq, vq->heads, nvq->done_idx);
- nvq->done_idx = 0;
-}
-
static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
bool *busyloop_intr)
{