summaryrefslogtreecommitdiff
path: root/net/core/dev.c
diff options
context:
space:
mode:
authorTom Herbert <therbert@google.com>2010-11-21 13:17:27 +0000
committerDavid S. Miller <davem@davemloft.net>2010-11-24 11:44:20 -0800
commit1d24eb4815d1e0e8b451ecc546645f8ef1176d4f (patch)
tree0172e72b9452dc46c4e1043817005979cec022a7 /net/core/dev.c
parent3853b5841c01a3f492fe137afaad9c209e5162c6 (diff)
downloadlwn-1d24eb4815d1e0e8b451ecc546645f8ef1176d4f.tar.gz
lwn-1d24eb4815d1e0e8b451ecc546645f8ef1176d4f.zip
xps: Transmit Packet Steering
This patch implements transmit packet steering (XPS) for multiqueue devices. XPS selects a transmit queue during packet transmission based on configuration. This is done by mapping the CPU transmitting the packet to a queue. This is the transmit side analogue to RPS-- where RPS is selecting a CPU based on receive queue, XPS selects a queue based on the CPU (previously there was an XPS patch from Eric Dumazet, but that might more appropriately be called transmit completion steering). Each transmit queue can be associated with a number of CPUs which will use the queue to send packets. This is configured as a CPU mask on a per queue basis in: /sys/class/net/eth<n>/queues/tx-<n>/xps_cpus The mappings are stored per device in an inverted data structure that maps CPUs to queues. In the netdevice structure this is an array of num_possible_cpu structures where each structure holds and array of queue_indexes for queues which that CPU can use. The benefits of XPS are improved locality in the per queue data structures. Also, transmit completions are more likely to be done nearer to the sending thread, so this should promote locality back to the socket on free (e.g. UDP). The benefits of XPS are dependent on cache hierarchy, application load, and other factors. XPS would nominally be configured so that a queue would only be shared by CPUs which are sharing a cache, the degenerative configuration woud be that each CPU has it's own queue. Below are some benchmark results which show the potential benfit of this patch. The netperf test has 500 instances of netperf TCP_RR test with 1 byte req. and resp. bnx2x on 16 core AMD XPS (16 queues, 1 TX queue per CPU) 1234K at 100% CPU No XPS (16 queues) 996K at 100% CPU Signed-off-by: Tom Herbert <therbert@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c53
1 files changed, 50 insertions, 3 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 7b17674a29ec..c852f0038a08 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1557,12 +1557,16 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
*/
int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
{
+ int rc;
+
if (txq < 1 || txq > dev->num_tx_queues)
return -EINVAL;
if (dev->reg_state == NETREG_REGISTERED) {
ASSERT_RTNL();
+ rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
+ txq);
if (txq < dev->real_num_tx_queues)
qdisc_reset_all_tx_gt(dev, txq);
}
@@ -2142,6 +2146,44 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
return queue_index;
}
+static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
+{
+#ifdef CONFIG_RPS
+ struct xps_dev_maps *dev_maps;
+ struct xps_map *map;
+ int queue_index = -1;
+
+ rcu_read_lock();
+ dev_maps = rcu_dereference(dev->xps_maps);
+ if (dev_maps) {
+ map = rcu_dereference(
+ dev_maps->cpu_map[raw_smp_processor_id()]);
+ if (map) {
+ if (map->len == 1)
+ queue_index = map->queues[0];
+ else {
+ u32 hash;
+ if (skb->sk && skb->sk->sk_hash)
+ hash = skb->sk->sk_hash;
+ else
+ hash = (__force u16) skb->protocol ^
+ skb->rxhash;
+ hash = jhash_1word(hash, hashrnd);
+ queue_index = map->queues[
+ ((u64)hash * map->len) >> 32];
+ }
+ if (unlikely(queue_index >= dev->real_num_tx_queues))
+ queue_index = -1;
+ }
+ }
+ rcu_read_unlock();
+
+ return queue_index;
+#else
+ return -1;
+#endif
+}
+
static struct netdev_queue *dev_pick_tx(struct net_device *dev,
struct sk_buff *skb)
{
@@ -2161,7 +2203,9 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
queue_index >= dev->real_num_tx_queues) {
int old_index = queue_index;
- queue_index = skb_tx_hash(dev, skb);
+ queue_index = get_xps_queue(dev, skb);
+ if (queue_index < 0)
+ queue_index = skb_tx_hash(dev, skb);
if (queue_index != old_index && sk) {
struct dst_entry *dst =
@@ -5066,6 +5110,7 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
{
unsigned int count = dev->num_tx_queues;
struct netdev_queue *tx;
+ int i;
BUG_ON(count < 1);
@@ -5076,6 +5121,10 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
return -ENOMEM;
}
dev->_tx = tx;
+
+ for (i = 0; i < count; i++)
+ tx[i].dev = dev;
+
return 0;
}
@@ -5083,8 +5132,6 @@ static void netdev_init_one_queue(struct net_device *dev,
struct netdev_queue *queue,
void *_unused)
{
- queue->dev = dev;
-
/* Initialize queue lock */
spin_lock_init(&queue->_xmit_lock);
netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);