summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2017-01-08 20:59:03 -0500
committerDavid S. Miller <davem@davemloft.net>2017-01-08 20:59:03 -0500
commit4289e60cb056ccae4311197d8a4a798aca0f8e55 (patch)
tree2a40cf0584931cbc96185814f8d922fd4f363f00
parent29b84f20e2c59317db133d5dab96bbf500714227 (diff)
parentbc31c905e946b5c55df5d2938335e78ffb3157ca (diff)
downloadlwn-4289e60cb056ccae4311197d8a4a798aca0f8e55.tar.gz
lwn-4289e60cb056ccae4311197d8a4a798aca0f8e55.zip
Merge branch 'tc-skb-diet'
Willem de Bruijn says: ==================== convert tc_verd to integer bitfields The skb tc_verd field takes up two bytes but uses far fewer bits. Convert the remaining use cases to bitfields that fit in existing holes (depending on config options) and potentially save the two bytes in struct sk_buff. This patchset is based on an earlier set by Florian Westphal and its discussion (http://www.spinics.net/lists/netdev/msg329181.html). Patches 1 and 2 are low hanging fruit: removing the last traces of data that are no longer stored in tc_verd. Patches 3 and 4 convert tc_verd to individual bitfields (5 bits). Patch 5 reduces TC_AT to a single bitfield, as AT_STACK is not valid here (unlike in the case of TC_FROM). Patch 6 changes TC_FROM to two bitfields with clearly defined purpose. It may be possible to reduce storage further after this initial round. If tc_skip_classify is set only by IFB, testing skb_iif may suffice. The L2 header pushing/popping logic can perhaps be shared with AF_PACKET, which currently not pkt_type for the same purpose. Changes: RFC -> v1 - (patch 3): remove no longer needed label in tfc_action_exec - (patch 5): set tc_at_ingress at the same points as existing SET_TC_AT calls Tested ingress mirred + netem + ifb: ip link set dev ifb0 up tc qdisc add dev eth0 ingress tc filter add dev eth0 parent ffff: \ u32 match ip dport 8000 0xffff \ action mirred egress redirect dev ifb0 tc qdisc add dev ifb0 root netem delay 1000ms nc -u -l 8000 & ssh $otherhost nc -u $host 8000 Tested egress mirred: ip link add veth1 type veth peer name veth2 ip link set dev veth1 up ip link set dev veth2 up tcpdump -n -i veth2 udp and dst port 8000 & tc qdisc add dev eth0 root handle 1: prio tc filter add dev eth0 parent 1:0 \ u32 match ip dport 8000 0xffff \ action mirred egress redirect dev veth1 tc qdisc add dev veth1 root netem delay 1000ms nc -u $otherhost 8000 Tested ingress mirred: ip link add veth1 type veth peer name veth2 ip link add veth3 type veth peer name veth4 ip netns add ns0 ip netns add ns1 for i in 1 2 3 4; do \ NS=ns$((${i}%2)); \ ip link set dev veth${i} netns ${NS}; \ ip netns exec ${NS} \ ip addr add dev veth${i} 192.168.1.${i}/24; \ ip netns exec ${NS} \ ip link set dev veth${i} up; \ done ip netns exec ns0 tc qdisc add dev veth2 ingress ip netns exec ns0 \ tc filter add dev veth2 parent ffff: \ u32 match ip dport 8000 0xffff \ action mirred ingress redirect dev veth4 ip netns exec ns0 \ tcpdump -n -i veth4 udp and dst port 8000 & ip netns exec ns1 \ nc -u 192.168.1.2 8000 ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ifb.c16
-rw-r--r--drivers/staging/octeon/ethernet-tx.c5
-rw-r--r--include/linux/skbuff.h15
-rw-r--r--include/net/sch_generic.h20
-rw-r--r--include/uapi/linux/pkt_cls.h55
-rw-r--r--net/core/dev.c22
-rw-r--r--net/core/pktgen.c4
-rw-r--r--net/core/skbuff.c3
-rw-r--r--net/sched/act_api.c11
-rw-r--r--net/sched/act_ife.c7
-rw-r--r--net/sched/act_mirred.c21
-rw-r--r--net/sched/sch_api.c3
-rw-r--r--net/sched/sch_netem.c2
13 files changed, 66 insertions, 118 deletions
diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index 082534e187fc..312fce7302d3 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -78,10 +78,8 @@ static void ifb_ri_tasklet(unsigned long _txp)
}
while ((skb = __skb_dequeue(&txp->tq)) != NULL) {
- u32 from = G_TC_FROM(skb->tc_verd);
-
- skb->tc_verd = 0;
- skb->tc_verd = SET_TC_NCLS(skb->tc_verd);
+ skb->tc_redirected = 0;
+ skb->tc_skip_classify = 1;
u64_stats_update_begin(&txp->tsync);
txp->tx_packets++;
@@ -101,13 +99,12 @@ static void ifb_ri_tasklet(unsigned long _txp)
rcu_read_unlock();
skb->skb_iif = txp->dev->ifindex;
- if (from & AT_EGRESS) {
+ if (!skb->tc_from_ingress) {
dev_queue_xmit(skb);
- } else if (from & AT_INGRESS) {
+ } else {
skb_pull(skb, skb->mac_len);
netif_receive_skb(skb);
- } else
- BUG();
+ }
}
if (__netif_tx_trylock(txq)) {
@@ -239,7 +236,6 @@ static void ifb_setup(struct net_device *dev)
static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ifb_dev_private *dp = netdev_priv(dev);
- u32 from = G_TC_FROM(skb->tc_verd);
struct ifb_q_private *txp = dp->tx_private + skb_get_queue_mapping(skb);
u64_stats_update_begin(&txp->rsync);
@@ -247,7 +243,7 @@ static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)
txp->rx_bytes += skb->len;
u64_stats_update_end(&txp->rsync);
- if (!(from & (AT_INGRESS|AT_EGRESS)) || !skb->skb_iif) {
+ if (!skb->tc_redirected || !skb->skb_iif) {
dev_kfree_skb(skb);
dev->stats.rx_dropped++;
return NETDEV_TX_OK;
diff --git a/drivers/staging/octeon/ethernet-tx.c b/drivers/staging/octeon/ethernet-tx.c
index 6b4c20872323..0b8053205091 100644
--- a/drivers/staging/octeon/ethernet-tx.c
+++ b/drivers/staging/octeon/ethernet-tx.c
@@ -23,6 +23,7 @@
#endif /* CONFIG_XFRM */
#include <linux/atomic.h>
+#include <net/sch_generic.h>
#include <asm/octeon/octeon.h>
@@ -369,9 +370,7 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev)
#ifdef CONFIG_NET_SCHED
skb->tc_index = 0;
-#ifdef CONFIG_NET_CLS_ACT
- skb->tc_verd = 0;
-#endif /* CONFIG_NET_CLS_ACT */
+ skb_reset_tc(skb);
#endif /* CONFIG_NET_SCHED */
#endif /* REUSE_SKBUFFS_WITHOUT_FREE */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index b53c0cfd417e..3149a88de548 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -589,6 +589,10 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1,
* @pkt_type: Packet class
* @fclone: skbuff clone status
* @ipvs_property: skbuff is owned by ipvs
+ * @tc_skip_classify: do not classify packet. set by IFB device
+ * @tc_at_ingress: used within tc_classify to distinguish in/egress
+ * @tc_redirected: packet was redirected by a tc action
+ * @tc_from_ingress: if tc_redirected, tc_at_ingress at time of redirect
* @peeked: this packet has been seen already, so stats have been
* done for it, don't do them again
* @nf_trace: netfilter packet trace flag
@@ -598,7 +602,6 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1,
* @nf_bridge: Saved data about a bridged frame - see br_netfilter.c
* @skb_iif: ifindex of device we arrived on
* @tc_index: Traffic control index
- * @tc_verd: traffic control verdict
* @hash: the packet hash
* @queue_mapping: Queue mapping for multiqueue devices
* @xmit_more: More SKBs are pending for this queue
@@ -749,13 +752,15 @@ struct sk_buff {
#ifdef CONFIG_NET_SWITCHDEV
__u8 offload_fwd_mark:1;
#endif
- /* 2, 4 or 5 bit hole */
+#ifdef CONFIG_NET_CLS_ACT
+ __u8 tc_skip_classify:1;
+ __u8 tc_at_ingress:1;
+ __u8 tc_redirected:1;
+ __u8 tc_from_ingress:1;
+#endif
#ifdef CONFIG_NET_SCHED
__u16 tc_index; /* traffic control index */
-#ifdef CONFIG_NET_CLS_ACT
- __u16 tc_verd; /* traffic control verdict */
-#endif
#endif
union {
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 498f81b229a4..e2f426f6d62f 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -409,15 +409,33 @@ bool tcf_destroy(struct tcf_proto *tp, bool force);
void tcf_destroy_chain(struct tcf_proto __rcu **fl);
int skb_do_redirect(struct sk_buff *);
+static inline void skb_reset_tc(struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_CLS_ACT
+ skb->tc_redirected = 0;
+#endif
+}
+
static inline bool skb_at_tc_ingress(const struct sk_buff *skb)
{
#ifdef CONFIG_NET_CLS_ACT
- return G_TC_AT(skb->tc_verd) & AT_INGRESS;
+ return skb->tc_at_ingress;
#else
return false;
#endif
}
+static inline bool skb_skip_tc_classify(struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_CLS_ACT
+ if (skb->tc_skip_classify) {
+ skb->tc_skip_classify = 0;
+ return true;
+ }
+#endif
+ return false;
+}
+
/* Reset all TX qdiscs greater then index of a device. */
static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i)
{
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index cb4bcdc58543..a081efbd61a2 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -4,61 +4,6 @@
#include <linux/types.h>
#include <linux/pkt_sched.h>
-#ifdef __KERNEL__
-/* I think i could have done better macros ; for now this is stolen from
- * some arch/mips code - jhs
-*/
-#define _TC_MAKE32(x) ((x))
-
-#define _TC_MAKEMASK1(n) (_TC_MAKE32(1) << _TC_MAKE32(n))
-#define _TC_MAKEMASK(v,n) (_TC_MAKE32((_TC_MAKE32(1)<<(v))-1) << _TC_MAKE32(n))
-#define _TC_MAKEVALUE(v,n) (_TC_MAKE32(v) << _TC_MAKE32(n))
-#define _TC_GETVALUE(v,n,m) ((_TC_MAKE32(v) & _TC_MAKE32(m)) >> _TC_MAKE32(n))
-
-/* verdict bit breakdown
- *
-bit 0: when set -> this packet has been munged already
-
-bit 1: when set -> It is ok to munge this packet
-
-bit 2,3,4,5: Reclassify counter - sort of reverse TTL - if exceeded
-assume loop
-
-bit 6,7: Where this packet was last seen
-0: Above the transmit example at the socket level
-1: on the Ingress
-2: on the Egress
-
-bit 8: when set --> Request not to classify on ingress.
-
-bits 9,10,11: redirect counter - redirect TTL. Loop avoidance
-
- *
- * */
-
-#define S_TC_FROM _TC_MAKE32(6)
-#define M_TC_FROM _TC_MAKEMASK(2,S_TC_FROM)
-#define G_TC_FROM(x) _TC_GETVALUE(x,S_TC_FROM,M_TC_FROM)
-#define V_TC_FROM(x) _TC_MAKEVALUE(x,S_TC_FROM)
-#define SET_TC_FROM(v,n) ((V_TC_FROM(n)) | (v & ~M_TC_FROM))
-#define AT_STACK 0x0
-#define AT_INGRESS 0x1
-#define AT_EGRESS 0x2
-
-#define TC_NCLS _TC_MAKEMASK1(8)
-#define SET_TC_NCLS(v) ( TC_NCLS | (v & ~TC_NCLS))
-#define CLR_TC_NCLS(v) ( v & ~TC_NCLS)
-
-#define S_TC_AT _TC_MAKE32(12)
-#define M_TC_AT _TC_MAKEMASK(2,S_TC_AT)
-#define G_TC_AT(x) _TC_GETVALUE(x,S_TC_AT,M_TC_AT)
-#define V_TC_AT(x) _TC_MAKEVALUE(x,S_TC_AT)
-#define SET_TC_AT(v,n) ((V_TC_AT(n)) | (v & ~M_TC_AT))
-
-#define MAX_REC_LOOP 4
-#define MAX_RED_LOOP 4
-#endif
-
/* Action attributes */
enum {
TCA_ACT_UNSPEC,
diff --git a/net/core/dev.c b/net/core/dev.c
index 56818f7eab2b..c143f1391117 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3153,9 +3153,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
if (!cl)
return skb;
- /* skb->tc_verd and qdisc_skb_cb(skb)->pkt_len were already set
- * earlier by the caller.
- */
+ /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
qdisc_bstats_cpu_update(cl->q, skb);
switch (tc_classify(skb, cl, &cl_res, false)) {
@@ -3320,7 +3318,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
qdisc_pkt_len_init(skb);
#ifdef CONFIG_NET_CLS_ACT
- skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
+ skb->tc_at_ingress = 0;
# ifdef CONFIG_NET_EGRESS
if (static_key_false(&egress_needed)) {
skb = sch_handle_egress(skb, &rc, dev);
@@ -3920,7 +3918,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
}
qdisc_skb_cb(skb)->pkt_len = skb->len;
- skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
+ skb->tc_at_ingress = 1;
qdisc_bstats_cpu_update(cl->q, skb);
switch (tc_classify(skb, cl, &cl_res, false)) {
@@ -4093,12 +4091,8 @@ another_round:
goto out;
}
-#ifdef CONFIG_NET_CLS_ACT
- if (skb->tc_verd & TC_NCLS) {
- skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
- goto ncls;
- }
-#endif
+ if (skb_skip_tc_classify(skb))
+ goto skip_classify;
if (pfmemalloc)
goto skip_taps;
@@ -4126,10 +4120,8 @@ skip_taps:
goto out;
}
#endif
-#ifdef CONFIG_NET_CLS_ACT
- skb->tc_verd = 0;
-ncls:
-#endif
+ skb_reset_tc(skb);
+skip_classify:
if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
goto drop;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 8e69ce472236..96947f5d41e4 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3439,9 +3439,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
/* skb was 'freed' by stack, so clean few
* bits and reuse it
*/
-#ifdef CONFIG_NET_CLS_ACT
- skb->tc_verd = 0; /* reset reclass/redir ttl */
-#endif
+ skb_reset_tc(skb);
} while (--burst > 0);
goto out; /* Skips xmit_mode M_START_XMIT */
} else if (pkt_dev->xmit_mode == M_QUEUE_XMIT) {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 5a03730fbc1a..adec4bf807d8 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -878,9 +878,6 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
#endif
#ifdef CONFIG_NET_SCHED
CHECK_SKB_FIELD(tc_index);
-#ifdef CONFIG_NET_CLS_ACT
- CHECK_SKB_FIELD(tc_verd);
-#endif
#endif
}
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 2095c83ce773..f04715a57300 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -426,11 +426,9 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
{
int ret = -1, i;
- if (skb->tc_verd & TC_NCLS) {
- skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
- ret = TC_ACT_OK;
- goto exec_done;
- }
+ if (skb_skip_tc_classify(skb))
+ return TC_ACT_OK;
+
for (i = 0; i < nr_actions; i++) {
const struct tc_action *a = actions[i];
@@ -439,9 +437,8 @@ repeat:
if (ret == TC_ACT_REPEAT)
goto repeat; /* we need a ttl - JHS */
if (ret != TC_ACT_PIPE)
- goto exec_done;
+ break;
}
-exec_done:
return ret;
}
EXPORT_SYMBOL(tcf_action_exec);
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 80b848d3f096..921fb20eaa7c 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -736,12 +736,11 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
u16 metalen = ife_get_sz(skb, ife);
int hdrm = metalen + skb->dev->hard_header_len + IFE_METAHDRLEN;
unsigned int skboff = skb->dev->hard_header_len;
- u32 at = G_TC_AT(skb->tc_verd);
int new_len = skb->len + hdrm;
bool exceed_mtu = false;
int err;
- if (at & AT_EGRESS) {
+ if (!skb_at_tc_ingress(skb)) {
if (new_len > skb->dev->mtu)
exceed_mtu = true;
}
@@ -773,7 +772,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
return TC_ACT_SHOT;
}
- if (!(at & AT_EGRESS))
+ if (skb_at_tc_ingress(skb))
skb_push(skb, skb->dev->hard_header_len);
iethh = (struct ethhdr *)skb->data;
@@ -816,7 +815,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
ether_addr_copy(oethh->h_dest, iethh->h_dest);
oethh->h_proto = htons(ife->eth_type);
- if (!(at & AT_EGRESS))
+ if (skb_at_tc_ingress(skb))
skb_pull(skb, skb->dev->hard_header_len);
spin_unlock(&ife->tcf_lock);
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 2d9fa6e0a1b4..84682f02b611 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -39,15 +39,15 @@ static bool tcf_mirred_is_act_redirect(int action)
return action == TCA_EGRESS_REDIR || action == TCA_INGRESS_REDIR;
}
-static u32 tcf_mirred_act_direction(int action)
+static bool tcf_mirred_act_wants_ingress(int action)
{
switch (action) {
case TCA_EGRESS_REDIR:
case TCA_EGRESS_MIRROR:
- return AT_EGRESS;
+ return false;
case TCA_INGRESS_REDIR:
case TCA_INGRESS_MIRROR:
- return AT_INGRESS;
+ return true;
default:
BUG();
}
@@ -170,7 +170,6 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
int retval, err = 0;
int m_eaction;
int mac_len;
- u32 at;
tcf_lastuse_update(&m->tcf_tm);
bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
@@ -191,7 +190,6 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
goto out;
}
- at = G_TC_AT(skb->tc_verd);
skb2 = skb_clone(skb, GFP_ATOMIC);
if (!skb2)
goto out;
@@ -200,8 +198,9 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
* and devices expect a mac header on xmit, then mac push/pull is
* needed.
*/
- if (at != tcf_mirred_act_direction(m_eaction) && m_mac_header_xmit) {
- if (at & AT_EGRESS) {
+ if (skb_at_tc_ingress(skb) != tcf_mirred_act_wants_ingress(m_eaction) &&
+ m_mac_header_xmit) {
+ if (!skb_at_tc_ingress(skb)) {
/* caught at egress, act ingress: pull mac */
mac_len = skb_network_header(skb) - skb_mac_header(skb);
skb_pull_rcsum(skb2, mac_len);
@@ -212,12 +211,14 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
}
/* mirror is always swallowed */
- if (tcf_mirred_is_act_redirect(m_eaction))
- skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at);
+ if (tcf_mirred_is_act_redirect(m_eaction)) {
+ skb2->tc_redirected = 1;
+ skb2->tc_from_ingress = skb2->tc_at_ingress;
+ }
skb2->skb_iif = skb->dev->ifindex;
skb2->dev = dev;
- if (tcf_mirred_act_direction(m_eaction) & AT_EGRESS)
+ if (!tcf_mirred_act_wants_ingress(m_eaction))
err = dev_queue_xmit(skb2);
else
err = netif_receive_skb(skb2);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index d7b93429f0cc..ef53ede11590 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1861,6 +1861,7 @@ int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
{
__be16 protocol = tc_skb_protocol(skb);
#ifdef CONFIG_NET_CLS_ACT
+ const int max_reclassify_loop = 4;
const struct tcf_proto *old_tp = tp;
int limit = 0;
@@ -1885,7 +1886,7 @@ reclassify:
return TC_ACT_UNSPEC; /* signal: continue lookup */
#ifdef CONFIG_NET_CLS_ACT
reset:
- if (unlikely(limit++ >= MAX_REC_LOOP)) {
+ if (unlikely(limit++ >= max_reclassify_loop)) {
net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n",
tp->q->ops->id, tp->prio & 0xffff,
ntohs(tp->protocol));
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index bcfadfdea8e0..c8bb62a1e744 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -626,7 +626,7 @@ deliver:
* If it's at ingress let's pretend the delay is
* from the network (tstamp will be updated).
*/
- if (G_TC_FROM(skb->tc_verd) & AT_INGRESS)
+ if (skb->tc_redirected && skb->tc_from_ingress)
skb->tstamp = 0;
#endif