diff options
author | Paul Durrant <Paul.Durrant@citrix.com> | 2016-05-13 09:37:27 +0100 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-05-16 13:35:56 -0400 |
commit | 40d8abdee806d496a60ee607a6d01b1cd7fabaf0 (patch) | |
tree | 7a4bd1027e08d3a2a37a678ab7f3c58cc14b0be5 /drivers/net/xen-netback | |
parent | 4e15ee2cb46fed730fe6f0195a86d44e5aeef129 (diff) | |
download | lwn-40d8abdee806d496a60ee607a6d01b1cd7fabaf0.tar.gz lwn-40d8abdee806d496a60ee607a6d01b1cd7fabaf0.zip |
xen-netback: add control protocol implementation
My recent patch to include/xen/interface/io/netif.h defines a new shared
ring (in addition to the rx and tx rings) for passing control messages
from a VM frontend driver to a backend driver.
A previous patch added the necessary boilerplate for mapping the control
ring from the frontend, should it be created. This patch adds
implementations for each of the defined protocol messages.
Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Acked-by: Wei Liu <wei.liu2@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/xen-netback')
-rw-r--r-- | drivers/net/xen-netback/Makefile | 2 | ||||
-rw-r--r-- | drivers/net/xen-netback/common.h | 46 | ||||
-rw-r--r-- | drivers/net/xen-netback/hash.c | 384 | ||||
-rw-r--r-- | drivers/net/xen-netback/interface.c | 24 | ||||
-rw-r--r-- | drivers/net/xen-netback/netback.c | 49 |
5 files changed, 502 insertions, 3 deletions
diff --git a/drivers/net/xen-netback/Makefile b/drivers/net/xen-netback/Makefile index e346e8125ef5..11e02be9db1a 100644 --- a/drivers/net/xen-netback/Makefile +++ b/drivers/net/xen-netback/Makefile @@ -1,3 +1,3 @@ obj-$(CONFIG_XEN_NETDEV_BACKEND) := xen-netback.o -xen-netback-y := netback.o xenbus.o interface.o +xen-netback-y := netback.o xenbus.o interface.o hash.o diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 093a12abf71f..84d6cbdd11b2 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -220,6 +220,35 @@ struct xenvif_mcast_addr { #define XEN_NETBK_MCAST_MAX 64 +#define XEN_NETBK_MAX_HASH_KEY_SIZE 40 +#define XEN_NETBK_MAX_HASH_MAPPING_SIZE 128 +#define XEN_NETBK_HASH_TAG_SIZE 40 + +struct xenvif_hash_cache_entry { + struct list_head link; + struct rcu_head rcu; + u8 tag[XEN_NETBK_HASH_TAG_SIZE]; + unsigned int len; + u32 val; + int seq; +}; + +struct xenvif_hash_cache { + spinlock_t lock; + struct list_head list; + unsigned int count; + atomic_t seq; +}; + +struct xenvif_hash { + unsigned int alg; + u32 flags; + u8 key[XEN_NETBK_MAX_HASH_KEY_SIZE]; + u32 mapping[XEN_NETBK_MAX_HASH_MAPPING_SIZE]; + unsigned int size; + struct xenvif_hash_cache cache; +}; + struct xenvif { /* Unique identifier for this interface. */ domid_t domid; @@ -251,6 +280,8 @@ struct xenvif { unsigned int num_queues; /* active queues, resource allocated */ unsigned int stalled_queues; + struct xenvif_hash hash; + struct xenbus_watch credit_watch; struct xenbus_watch mcast_ctrl_watch; @@ -353,6 +384,7 @@ extern bool separate_tx_rx_irq; extern unsigned int rx_drain_timeout_msecs; extern unsigned int rx_stall_timeout_msecs; extern unsigned int xenvif_max_queues; +extern unsigned int xenvif_hash_cache_size; #ifdef CONFIG_DEBUG_FS extern struct dentry *xen_netback_dbg_root; @@ -366,4 +398,18 @@ void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue); bool xenvif_mcast_match(struct xenvif *vif, const u8 *addr); void xenvif_mcast_addr_list_free(struct xenvif *vif); +/* Hash */ +void xenvif_init_hash(struct xenvif *vif); +void xenvif_deinit_hash(struct xenvif *vif); + +u32 xenvif_set_hash_alg(struct xenvif *vif, u32 alg); +u32 xenvif_get_hash_flags(struct xenvif *vif, u32 *flags); +u32 xenvif_set_hash_flags(struct xenvif *vif, u32 flags); +u32 xenvif_set_hash_key(struct xenvif *vif, u32 gref, u32 len); +u32 xenvif_set_hash_mapping_size(struct xenvif *vif, u32 size); +u32 xenvif_set_hash_mapping(struct xenvif *vif, u32 gref, u32 len, + u32 off); + +void xenvif_set_skb_hash(struct xenvif *vif, struct sk_buff *skb); + #endif /* __XEN_NETBACK__COMMON_H__ */ diff --git a/drivers/net/xen-netback/hash.c b/drivers/net/xen-netback/hash.c new file mode 100644 index 000000000000..392e3929ae84 --- /dev/null +++ b/drivers/net/xen-netback/hash.c @@ -0,0 +1,384 @@ +/* + * Copyright (c) 2016 Citrix Systems Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Softare Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define XEN_NETIF_DEFINE_TOEPLITZ + +#include "common.h" +#include <linux/vmalloc.h> +#include <linux/rculist.h> + +static void xenvif_del_hash(struct rcu_head *rcu) +{ + struct xenvif_hash_cache_entry *entry; + + entry = container_of(rcu, struct xenvif_hash_cache_entry, rcu); + + kfree(entry); +} + +static void xenvif_add_hash(struct xenvif *vif, const u8 *tag, + unsigned int len, u32 val) +{ + struct xenvif_hash_cache_entry *new, *entry, *oldest; + unsigned long flags; + bool found; + + new = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!new) + return; + + memcpy(new->tag, tag, len); + new->len = len; + new->val = val; + + spin_lock_irqsave(&vif->hash.cache.lock, flags); + + found = false; + oldest = NULL; + list_for_each_entry_rcu(entry, &vif->hash.cache.list, link) { + /* Make sure we don't add duplicate entries */ + if (entry->len == len && + memcmp(entry->tag, tag, len) == 0) + found = true; + if (!oldest || entry->seq < oldest->seq) + oldest = entry; + } + + if (!found) { + new->seq = atomic_inc_return(&vif->hash.cache.seq); + list_add_rcu(&new->link, &vif->hash.cache.list); + + if (++vif->hash.cache.count > xenvif_hash_cache_size) { + list_del_rcu(&oldest->link); + vif->hash.cache.count--; + call_rcu(&oldest->rcu, xenvif_del_hash); + } + } + + spin_unlock_irqrestore(&vif->hash.cache.lock, flags); + + if (found) + kfree(new); +} + +static u32 xenvif_new_hash(struct xenvif *vif, const u8 *data, + unsigned int len) +{ + u32 val; + + val = xen_netif_toeplitz_hash(vif->hash.key, + sizeof(vif->hash.key), + data, len); + + if (xenvif_hash_cache_size != 0) + xenvif_add_hash(vif, data, len, val); + + return val; +} + +static void xenvif_flush_hash(struct xenvif *vif) +{ + struct xenvif_hash_cache_entry *entry; + unsigned long flags; + + if (xenvif_hash_cache_size == 0) + return; + + spin_lock_irqsave(&vif->hash.cache.lock, flags); + + list_for_each_entry_rcu(entry, &vif->hash.cache.list, link) { + list_del_rcu(&entry->link); + vif->hash.cache.count--; + call_rcu(&entry->rcu, xenvif_del_hash); + } + + spin_unlock_irqrestore(&vif->hash.cache.lock, flags); +} + +static u32 xenvif_find_hash(struct xenvif *vif, const u8 *data, + unsigned int len) +{ + struct xenvif_hash_cache_entry *entry; + u32 val; + bool found; + + if (len >= XEN_NETBK_HASH_TAG_SIZE) + return 0; + + if (xenvif_hash_cache_size == 0) + return xenvif_new_hash(vif, data, len); + + rcu_read_lock(); + + found = false; + + list_for_each_entry_rcu(entry, &vif->hash.cache.list, link) { + if (entry->len == len && + memcmp(entry->tag, data, len) == 0) { + val = entry->val; + entry->seq = atomic_inc_return(&vif->hash.cache.seq); + found = true; + break; + } + } + + rcu_read_unlock(); + + if (!found) + val = xenvif_new_hash(vif, data, len); + + return val; +} + +void xenvif_set_skb_hash(struct xenvif *vif, struct sk_buff *skb) +{ + struct flow_keys flow; + u32 hash = 0; + enum pkt_hash_types type = PKT_HASH_TYPE_NONE; + u32 flags = vif->hash.flags; + bool has_tcp_hdr; + + /* Quick rejection test: If the network protocol doesn't + * correspond to any enabled hash type then there's no point + * in parsing the packet header. + */ + switch (skb->protocol) { + case htons(ETH_P_IP): + if (flags & (XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP | + XEN_NETIF_CTRL_HASH_TYPE_IPV4)) + break; + + goto done; + + case htons(ETH_P_IPV6): + if (flags & (XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP | + XEN_NETIF_CTRL_HASH_TYPE_IPV6)) + break; + + goto done; + + default: + goto done; + } + + memset(&flow, 0, sizeof(flow)); + if (!skb_flow_dissect_flow_keys(skb, &flow, 0)) + goto done; + + has_tcp_hdr = (flow.basic.ip_proto == IPPROTO_TCP) && + !(flow.control.flags & FLOW_DIS_IS_FRAGMENT); + + switch (skb->protocol) { + case htons(ETH_P_IP): + if (has_tcp_hdr && + (flags & XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP)) { + u8 data[12]; + + memcpy(&data[0], &flow.addrs.v4addrs.src, 4); + memcpy(&data[4], &flow.addrs.v4addrs.dst, 4); + memcpy(&data[8], &flow.ports.src, 2); + memcpy(&data[10], &flow.ports.dst, 2); + + hash = xenvif_find_hash(vif, data, sizeof(data)); + type = PKT_HASH_TYPE_L4; + } else if (flags & XEN_NETIF_CTRL_HASH_TYPE_IPV4) { + u8 data[8]; + + memcpy(&data[0], &flow.addrs.v4addrs.src, 4); + memcpy(&data[4], &flow.addrs.v4addrs.dst, 4); + + hash = xenvif_find_hash(vif, data, sizeof(data)); + type = PKT_HASH_TYPE_L3; + } + + break; + + case htons(ETH_P_IPV6): + if (has_tcp_hdr && + (flags & XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP)) { + u8 data[36]; + + memcpy(&data[0], &flow.addrs.v6addrs.src, 16); + memcpy(&data[16], &flow.addrs.v6addrs.dst, 16); + memcpy(&data[32], &flow.ports.src, 2); + memcpy(&data[34], &flow.ports.dst, 2); + + hash = xenvif_find_hash(vif, data, sizeof(data)); + type = PKT_HASH_TYPE_L4; + } else if (flags & XEN_NETIF_CTRL_HASH_TYPE_IPV6) { + u8 data[32]; + + memcpy(&data[0], &flow.addrs.v6addrs.src, 16); + memcpy(&data[16], &flow.addrs.v6addrs.dst, 16); + + hash = xenvif_find_hash(vif, data, sizeof(data)); + type = PKT_HASH_TYPE_L3; + } + + break; + } + +done: + if (type == PKT_HASH_TYPE_NONE) + skb_clear_hash(skb); + else + __skb_set_sw_hash(skb, hash, type == PKT_HASH_TYPE_L4); +} + +u32 xenvif_set_hash_alg(struct xenvif *vif, u32 alg) +{ + switch (alg) { + case XEN_NETIF_CTRL_HASH_ALGORITHM_NONE: + case XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ: + break; + + default: + return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER; + } + + vif->hash.alg = alg; + + return XEN_NETIF_CTRL_STATUS_SUCCESS; +} + +u32 xenvif_get_hash_flags(struct xenvif *vif, u32 *flags) +{ + if (vif->hash.alg == XEN_NETIF_CTRL_HASH_ALGORITHM_NONE) + return XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED; + + *flags = XEN_NETIF_CTRL_HASH_TYPE_IPV4 | + XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP | + XEN_NETIF_CTRL_HASH_TYPE_IPV6 | + XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP; + + return XEN_NETIF_CTRL_STATUS_SUCCESS; +} + +u32 xenvif_set_hash_flags(struct xenvif *vif, u32 flags) +{ + if (flags & ~(XEN_NETIF_CTRL_HASH_TYPE_IPV4 | + XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP | + XEN_NETIF_CTRL_HASH_TYPE_IPV6 | + XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP)) + return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER; + + if (vif->hash.alg == XEN_NETIF_CTRL_HASH_ALGORITHM_NONE) + return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER; + + vif->hash.flags = flags; + + return XEN_NETIF_CTRL_STATUS_SUCCESS; +} + +u32 xenvif_set_hash_key(struct xenvif *vif, u32 gref, u32 len) +{ + u8 *key = vif->hash.key; + struct gnttab_copy copy_op = { + .source.u.ref = gref, + .source.domid = vif->domid, + .dest.u.gmfn = virt_to_gfn(key), + .dest.domid = DOMID_SELF, + .dest.offset = xen_offset_in_page(key), + .len = len, + .flags = GNTCOPY_source_gref + }; + + if (len > XEN_NETBK_MAX_HASH_KEY_SIZE) + return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER; + + if (len != 0) { + gnttab_batch_copy(©_op, 1); + + if (copy_op.status != GNTST_okay) + return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER; + } + + /* Clear any remaining key octets */ + if (len < XEN_NETBK_MAX_HASH_KEY_SIZE) + memset(key + len, 0, XEN_NETBK_MAX_HASH_KEY_SIZE - len); + + xenvif_flush_hash(vif); + + return XEN_NETIF_CTRL_STATUS_SUCCESS; +} + +u32 xenvif_set_hash_mapping_size(struct xenvif *vif, u32 size) +{ + if (size > XEN_NETBK_MAX_HASH_MAPPING_SIZE) + return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER; + + vif->hash.size = size; + memset(vif->hash.mapping, 0, sizeof(u32) * size); + + return XEN_NETIF_CTRL_STATUS_SUCCESS; +} + +u32 xenvif_set_hash_mapping(struct xenvif *vif, u32 gref, u32 len, + u32 off) +{ + u32 *mapping = &vif->hash.mapping[off]; + struct gnttab_copy copy_op = { + .source.u.ref = gref, + .source.domid = vif->domid, + .dest.u.gmfn = virt_to_gfn(mapping), + .dest.domid = DOMID_SELF, + .dest.offset = xen_offset_in_page(mapping), + .len = len * sizeof(u32), + .flags = GNTCOPY_source_gref + }; + + if ((off + len > vif->hash.size) || copy_op.len > XEN_PAGE_SIZE) + return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER; + + while (len-- != 0) + if (mapping[off++] >= vif->num_queues) + return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER; + + if (len != 0) { + gnttab_batch_copy(©_op, 1); + + if (copy_op.status != GNTST_okay) + return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER; + } + + return XEN_NETIF_CTRL_STATUS_SUCCESS; +} + +void xenvif_init_hash(struct xenvif *vif) +{ + if (xenvif_hash_cache_size == 0) + return; + + spin_lock_init(&vif->hash.cache.lock); + INIT_LIST_HEAD(&vif->hash.cache.list); +} + +void xenvif_deinit_hash(struct xenvif *vif) +{ + xenvif_flush_hash(vif); +} diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 78a10d2af101..5a39cdbc217c 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -151,6 +151,24 @@ void xenvif_wake_queue(struct xenvif_queue *queue) netif_tx_wake_queue(netdev_get_tx_queue(dev, id)); } +static u16 xenvif_select_queue(struct net_device *dev, struct sk_buff *skb, + void *accel_priv, + select_queue_fallback_t fallback) +{ + struct xenvif *vif = netdev_priv(dev); + unsigned int size = vif->hash.size; + + if (vif->hash.alg == XEN_NETIF_CTRL_HASH_ALGORITHM_NONE) + return fallback(dev, skb) % dev->real_num_tx_queues; + + xenvif_set_skb_hash(vif, skb); + + if (size == 0) + return skb_get_hash_raw(skb) % dev->real_num_tx_queues; + + return vif->hash.mapping[skb_get_hash_raw(skb) % size]; +} + static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct xenvif *vif = netdev_priv(dev); @@ -395,6 +413,7 @@ static const struct ethtool_ops xenvif_ethtool_ops = { }; static const struct net_device_ops xenvif_netdev_ops = { + .ndo_select_queue = xenvif_select_queue, .ndo_start_xmit = xenvif_start_xmit, .ndo_get_stats = xenvif_get_stats, .ndo_open = xenvif_open, @@ -563,6 +582,8 @@ int xenvif_connect_ctrl(struct xenvif *vif, grant_ref_t ring_ref, vif->ctrl_irq = err; + xenvif_init_hash(vif); + task = kthread_create(xenvif_ctrl_kthread, (void *)vif, "%s-control", dev->name); if (IS_ERR(task)) { @@ -579,6 +600,7 @@ int xenvif_connect_ctrl(struct xenvif *vif, grant_ref_t ring_ref, return 0; err_deinit: + xenvif_deinit_hash(vif); unbind_from_irqhandler(vif->ctrl_irq, vif); vif->ctrl_irq = 0; @@ -749,6 +771,8 @@ void xenvif_disconnect_ctrl(struct xenvif *vif) vif->ctrl_task = NULL; } + xenvif_deinit_hash(vif); + if (vif->ctrl_irq) { unbind_from_irqhandler(vif->ctrl_irq, vif); vif->ctrl_irq = 0; diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index ff22b6daa077..1916ab332d60 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -89,6 +89,11 @@ module_param(fatal_skb_slots, uint, 0444); */ #define XEN_NETBACK_TX_COPY_LEN 128 +/* This is the maximum number of flows in the hash cache. */ +#define XENVIF_HASH_CACHE_SIZE_DEFAULT 64 +unsigned int xenvif_hash_cache_size = XENVIF_HASH_CACHE_SIZE_DEFAULT; +module_param_named(hash_cache_size, xenvif_hash_cache_size, uint, 0644); +MODULE_PARM_DESC(hash_cache_size, "Number of flows in the hash cache"); static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, u8 status); @@ -2192,8 +2197,48 @@ static void push_ctrl_response(struct xenvif *vif) static void process_ctrl_request(struct xenvif *vif, const struct xen_netif_ctrl_request *req) { - make_ctrl_response(vif, req, XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED, - 0); + u32 status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED; + u32 data = 0; + + switch (req->type) { + case XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM: + status = xenvif_set_hash_alg(vif, req->data[0]); + break; + + case XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS: + status = xenvif_get_hash_flags(vif, &data); + break; + + case XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS: + status = xenvif_set_hash_flags(vif, req->data[0]); + break; + + case XEN_NETIF_CTRL_TYPE_SET_HASH_KEY: + status = xenvif_set_hash_key(vif, req->data[0], + req->data[1]); + break; + + case XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE: + status = XEN_NETIF_CTRL_STATUS_SUCCESS; + data = XEN_NETBK_MAX_HASH_MAPPING_SIZE; + break; + + case XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE: + status = xenvif_set_hash_mapping_size(vif, + req->data[0]); + break; + + case XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING: + status = xenvif_set_hash_mapping(vif, req->data[0], + req->data[1], + req->data[2]); + break; + + default: + break; + } + + make_ctrl_response(vif, req, status, data); push_ctrl_response(vif); } |