summaryrefslogtreecommitdiff
path: root/net/core/dev.c
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2015-05-09 22:51:32 +0200
committerDavid S. Miller <davem@davemloft.net>2015-05-11 11:10:35 -0400
commitd2788d34885d4ce5ba17a8996fd95d28942e574e (patch)
tree8d57eceb0329c9f00fd1b00cc85ae497658219a6 /net/core/dev.c
parentc9e99fd078ef7fdcd9ee4f5a4cfdbece319587af (diff)
downloadlwn-d2788d34885d4ce5ba17a8996fd95d28942e574e.tar.gz
lwn-d2788d34885d4ce5ba17a8996fd95d28942e574e.zip
net: sched: further simplify handle_ing
Ingress qdisc has no other purpose than calling into tc_classify() that executes attached classifier(s) and action(s). It has a 1:1 relationship to dev->ingress_queue. After having commit 087c1a601ad7 ("net: sched: run ingress qdisc without locks") removed the central ingress lock, one major contention point is gone. The extra indirection layers however, are not necessary for calling into ingress qdisc. pktgen calling locally into netif_receive_skb() with a dummy u32, single CPU result on a Supermicro X10SLM-F, Xeon E3-1240: before ~21,1 Mpps, after patch ~22,9 Mpps. We can redirect the private classifier list to the netdev directly, without changing any classifier API bits (!) and execute on that from handle_ing() side. The __QDISC_STATE_DEACTIVATE test can be removed, ingress qdisc doesn't have a queue and thus dev_deactivate_queue() is also not applicable, ingress_cl_list provides similar behaviour. In other words, ingress qdisc acts like TCQ_F_BUILTIN qdisc. One next possible step is the removal of the dev's ingress (dummy) netdev_queue, and to only have the list member in the netdevice itself. Note, the filter chain is RCU protected and individual filter elements are being kfree'd by sched subsystem after RCU grace period. RCU read lock is being held by __netif_receive_skb_core(). Joint work with Alexei Starovoitov. Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Signed-off-by: Alexei Starovoitov <ast@plumgrid.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c30
1 files changed, 18 insertions, 12 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 8a757464bfa2..e5f77c40bbd1 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3525,31 +3525,37 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
struct packet_type **pt_prev,
int *ret, struct net_device *orig_dev)
{
- struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
- struct Qdisc *q;
+ struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list);
+ struct tcf_result cl_res;
/* If there's at least one ingress present somewhere (so
* we get here via enabled static key), remaining devices
* that are not configured with an ingress qdisc will bail
- * out w/o the rcu_dereference().
+ * out here.
*/
- if (!rxq || (q = rcu_dereference(rxq->qdisc)) == &noop_qdisc)
+ if (!cl)
return skb;
-
if (*pt_prev) {
*ret = deliver_skb(skb, *pt_prev, orig_dev);
*pt_prev = NULL;
}
+ qdisc_bstats_update_cpu(cl->q, skb);
skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
- if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
- switch (qdisc_enqueue_root(skb, q)) {
- case TC_ACT_SHOT:
- case TC_ACT_STOLEN:
- kfree_skb(skb);
- return NULL;
- }
+ switch (tc_classify(skb, cl, &cl_res)) {
+ case TC_ACT_OK:
+ case TC_ACT_RECLASSIFY:
+ skb->tc_index = TC_H_MIN(cl_res.classid);
+ break;
+ case TC_ACT_SHOT:
+ qdisc_qstats_drop_cpu(cl->q);
+ case TC_ACT_STOLEN:
+ case TC_ACT_QUEUED:
+ kfree_skb(skb);
+ return NULL;
+ default:
+ break;
}
return skb;