From 0a14842f5a3c0e88a1e59fac5c3025db39721f74 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 20 Apr 2011 09:27:32 +0000 Subject: net: filter: Just In Time compiler for x86-64 In order to speedup packet filtering, here is an implementation of a JIT compiler for x86_64 It is disabled by default, and must be enabled by the admin. echo 1 >/proc/sys/net/core/bpf_jit_enable It uses module_alloc() and module_free() to get memory in the 2GB text kernel range since we call helpers functions from the generated code. EAX : BPF A accumulator EBX : BPF X accumulator RDI : pointer to skb (first argument given to JIT function) RBP : frame pointer (even if CONFIG_FRAME_POINTER=n) r9d : skb->len - skb->data_len (headlen) r8 : skb->data To get a trace of generated code, use : echo 2 >/proc/sys/net/core/bpf_jit_enable Example of generated code : # tcpdump -p -n -s 0 -i eth1 host 192.168.20.0/24 flen=18 proglen=147 pass=3 image=ffffffffa00b5000 JIT code: ffffffffa00b5000: 55 48 89 e5 48 83 ec 60 48 89 5d f8 44 8b 4f 60 JIT code: ffffffffa00b5010: 44 2b 4f 64 4c 8b 87 b8 00 00 00 be 0c 00 00 00 JIT code: ffffffffa00b5020: e8 24 7b f7 e0 3d 00 08 00 00 75 28 be 1a 00 00 JIT code: ffffffffa00b5030: 00 e8 fe 7a f7 e0 24 00 3d 00 14 a8 c0 74 49 be JIT code: ffffffffa00b5040: 1e 00 00 00 e8 eb 7a f7 e0 24 00 3d 00 14 a8 c0 JIT code: ffffffffa00b5050: 74 36 eb 3b 3d 06 08 00 00 74 07 3d 35 80 00 00 JIT code: ffffffffa00b5060: 75 2d be 1c 00 00 00 e8 c8 7a f7 e0 24 00 3d 00 JIT code: ffffffffa00b5070: 14 a8 c0 74 13 be 26 00 00 00 e8 b5 7a f7 e0 24 JIT code: ffffffffa00b5080: 00 3d 00 14 a8 c0 75 07 b8 ff ff 00 00 eb 02 31 JIT code: ffffffffa00b5090: c0 c9 c3 BPF program is 144 bytes long, so native program is almost same size ;) (000) ldh [12] (001) jeq #0x800 jt 2 jf 8 (002) ld [26] (003) and #0xffffff00 (004) jeq #0xc0a81400 jt 16 jf 5 (005) ld [30] (006) and #0xffffff00 (007) jeq #0xc0a81400 jt 16 jf 17 (008) jeq #0x806 jt 10 jf 9 (009) jeq #0x8035 jt 10 jf 17 (010) ld [28] (011) and #0xffffff00 (012) jeq #0xc0a81400 jt 16 jf 13 (013) ld [38] (014) and #0xffffff00 (015) jeq #0xc0a81400 jt 16 jf 17 (016) ret #65535 (017) ret #0 Signed-off-by: Eric Dumazet Cc: Arnaldo Carvalho de Melo Cc: Ben Hutchings Cc: Hagen Paul Pfeifer Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d0ae90af0b40..79aafbbf430a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -391,8 +391,8 @@ struct sk_buff { __u32 rxhash; + __u16 queue_mapping; kmemcheck_bitfield_begin(flags2); - __u16 queue_mapping:16; #ifdef CONFIG_IPV6_NDISC_NODETYPE __u8 ndisc_nodetype:2; #endif -- cgit v1.2.3 From 34ea646c9f8c18fd2e4332ff3b2b509f878c56f1 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 22 May 2011 18:55:10 +0200 Subject: net: add missing prefetch.h include Fixes build errors on s390 and probably other archs as well: In file included from net/ipv4/ip_forward.c:32:0: include/net/udp.h: In function 'udp_csum_outgoing': include/net/udp.h:141:2: error: implicit declaration of function 'prefetch' Signed-off-by: Heiko Carstens Signed-off-by: Linus Torvalds --- include/linux/skbuff.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 79aafbbf430a..827681540d6f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -28,6 +28,7 @@ #include #include #include +#include #include /* Don't change this without changing skb_csum_unnecessary! */ -- cgit v1.2.3 From 0fcbe742eaac14bd5032b369c09e9d94be9058ad Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 22 May 2011 20:35:29 -0400 Subject: net: Remove prefetches from SKB list handlers. Noticed by Linus. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 827681540d6f..09901fdd73ae 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1783,7 +1783,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) #define skb_queue_walk(queue, skb) \ for (skb = (queue)->next; \ - prefetch(skb->next), (skb != (struct sk_buff *)(queue)); \ + (skb != (struct sk_buff *)(queue)); \ skb = skb->next) #define skb_queue_walk_safe(queue, skb, tmp) \ @@ -1792,7 +1792,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) skb = tmp, tmp = skb->next) #define skb_queue_walk_from(queue, skb) \ - for (; prefetch(skb->next), (skb != (struct sk_buff *)(queue)); \ + for (; (skb != (struct sk_buff *)(queue)); \ skb = skb->next) #define skb_queue_walk_from_safe(queue, skb, tmp) \ @@ -1802,7 +1802,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) #define skb_queue_reverse_walk(queue, skb) \ for (skb = (queue)->prev; \ - prefetch(skb->prev), (skb != (struct sk_buff *)(queue)); \ + (skb != (struct sk_buff *)(queue)); \ skb = skb->prev) #define skb_queue_reverse_walk_safe(queue, skb, tmp) \ -- cgit v1.2.3 From 67f11f4deda0818640decb19a28c537dbe5d429e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 22 May 2011 20:54:11 -0400 Subject: net: Remove linux/prefetch.h include from linux/skbuff.h No longer needed. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 09901fdd73ae..8cac356b77b2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -28,7 +28,6 @@ #include #include #include -#include #include /* Don't change this without changing skb_csum_unnecessary! */ -- cgit v1.2.3 From c4264f27e83968ddfe3f0cfe7a33adfb320e1e42 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sat, 21 May 2011 19:46:09 +0000 Subject: net: skb_trim explicitely check the linearity instead of data_len The purpose of the check on data_len is to check linearity, so use the inline helper for this. No overhead and more explicit. Signed-off-by: Emmanuel Grumbach Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 8cac356b77b2..aeaad97e6815 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1442,7 +1442,7 @@ extern int ___pskb_trim(struct sk_buff *skb, unsigned int len); static inline void __skb_trim(struct sk_buff *skb, unsigned int len) { - if (unlikely(skb->data_len)) { + if (unlikely(skb_is_nonlinear(skb))) { WARN_ON(1); return; } -- cgit v1.2.3 From a1e4891fd48d298870b704c6eb48cba0da5ed6b1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 22 May 2011 16:51:43 -0700 Subject: Remove prefetch() from and "netlabel_addrlist.h" Commit e66eed651fd1 ("list: remove prefetching from regular list iterators") removed the include of prefetch.h from list.h. The skbuff list traversal still had them. Quoth David Miller: "Please just remove the prefetches. Those are modelled after list.h as I intend to eventually convert SKB list handling to "struct list_head" but we're not there yet. Therefore if we kill prefetches from list.h we should kill it from these things in skbuff.h too." Requested-by: David Miller Signed-off-by: Linus Torvalds --- include/linux/skbuff.h | 7 +++---- net/netlabel/netlabel_addrlist.h | 8 ++++---- 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 827681540d6f..16c9c091555d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -28,7 +28,6 @@ #include #include #include -#include #include /* Don't change this without changing skb_csum_unnecessary! */ @@ -1783,7 +1782,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) #define skb_queue_walk(queue, skb) \ for (skb = (queue)->next; \ - prefetch(skb->next), (skb != (struct sk_buff *)(queue)); \ + skb != (struct sk_buff *)(queue); \ skb = skb->next) #define skb_queue_walk_safe(queue, skb, tmp) \ @@ -1792,7 +1791,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) skb = tmp, tmp = skb->next) #define skb_queue_walk_from(queue, skb) \ - for (; prefetch(skb->next), (skb != (struct sk_buff *)(queue)); \ + for (; skb != (struct sk_buff *)(queue); \ skb = skb->next) #define skb_queue_walk_from_safe(queue, skb, tmp) \ @@ -1802,7 +1801,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) #define skb_queue_reverse_walk(queue, skb) \ for (skb = (queue)->prev; \ - prefetch(skb->prev), (skb != (struct sk_buff *)(queue)); \ + skb != (struct sk_buff *)(queue); \ skb = skb->prev) #define skb_queue_reverse_walk_safe(queue, skb, tmp) \ diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h index 1c1c093cf279..2b9644e19de0 100644 --- a/net/netlabel/netlabel_addrlist.h +++ b/net/netlabel/netlabel_addrlist.h @@ -96,12 +96,12 @@ static inline struct netlbl_af4list *__af4list_valid_rcu(struct list_head *s, #define netlbl_af4list_foreach(iter, head) \ for (iter = __af4list_valid((head)->next, head); \ - prefetch(iter->list.next), &iter->list != (head); \ + &iter->list != (head); \ iter = __af4list_valid(iter->list.next, head)) #define netlbl_af4list_foreach_rcu(iter, head) \ for (iter = __af4list_valid_rcu((head)->next, head); \ - prefetch(iter->list.next), &iter->list != (head); \ + &iter->list != (head); \ iter = __af4list_valid_rcu(iter->list.next, head)) #define netlbl_af4list_foreach_safe(iter, tmp, head) \ @@ -163,12 +163,12 @@ static inline struct netlbl_af6list *__af6list_valid_rcu(struct list_head *s, #define netlbl_af6list_foreach(iter, head) \ for (iter = __af6list_valid((head)->next, head); \ - prefetch(iter->list.next), &iter->list != (head); \ + &iter->list != (head); \ iter = __af6list_valid(iter->list.next, head)) #define netlbl_af6list_foreach_rcu(iter, head) \ for (iter = __af6list_valid_rcu((head)->next, head); \ - prefetch(iter->list.next), &iter->list != (head); \ + &iter->list != (head); \ iter = __af6list_valid_rcu(iter->list.next, head)) #define netlbl_af6list_foreach_safe(iter, tmp, head) \ -- cgit v1.2.3 From 0b5c9db1b11d3175bb42b80663a9f072f801edf5 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 10 Jun 2011 06:56:58 +0000 Subject: vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check Testing of VLAN_FLAG_REORDER_HDR does not belong in vlan_untag but rather in vlan_do_receive. Otherwise the vlan header will not be properly put on the packet in the case of vlan header accelleration. As we remove the check from vlan_check_reorder_header rename it vlan_reorder_header to keep the naming clean. Fix up the skb->pkt_type early so we don't look at the packet after adding the vlan tag, which guarantees we don't goof and look at the wrong field. Use a simple if statement instead of a complicated switch statement to decided that we need to increment rx_stats for a multicast packet. Hopefully at somepoint we will just declare the case where VLAN_FLAG_REORDER_HDR is cleared as unsupported and remove the code. Until then this keeps it working correctly. Signed-off-by: Eric W. Biederman Signed-off-by: Jiri Pirko Acked-by: Changli Gao Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 25 ++++++++++++++++++--- include/linux/skbuff.h | 5 +++++ net/8021q/vlan_core.c | 60 +++++++++++++++++++++++++++---------------------- net/core/dev.c | 2 +- 4 files changed, 61 insertions(+), 31 deletions(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index dc01681fbb42..affa27380b72 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -225,7 +225,7 @@ static inline int vlan_hwaccel_receive_skb(struct sk_buff *skb, } /** - * __vlan_put_tag - regular VLAN tag inserting + * vlan_insert_tag - regular VLAN tag inserting * @skb: skbuff to tag * @vlan_tci: VLAN TCI to insert * @@ -234,8 +234,10 @@ static inline int vlan_hwaccel_receive_skb(struct sk_buff *skb, * * Following the skb_unshare() example, in case of error, the calling function * doesn't have to worry about freeing the original skb. + * + * Does not change skb->protocol so this function can be used during receive. */ -static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, u16 vlan_tci) +static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, u16 vlan_tci) { struct vlan_ethhdr *veth; @@ -255,8 +257,25 @@ static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, u16 vlan_tci) /* now, the TCI */ veth->h_vlan_TCI = htons(vlan_tci); - skb->protocol = htons(ETH_P_8021Q); + return skb; +} +/** + * __vlan_put_tag - regular VLAN tag inserting + * @skb: skbuff to tag + * @vlan_tci: VLAN TCI to insert + * + * Inserts the VLAN tag into @skb as part of the payload + * Returns a VLAN tagged skb. If a new skb is created, @skb is freed. + * + * Following the skb_unshare() example, in case of error, the calling function + * doesn't have to worry about freeing the original skb. + */ +static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, u16 vlan_tci) +{ + skb = vlan_insert_tag(skb, vlan_tci); + if (skb) + skb->protocol = htons(ETH_P_8021Q); return skb; } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e8b78ce14474..c0a4f3ab0cc0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1256,6 +1256,11 @@ static inline void skb_reserve(struct sk_buff *skb, int len) skb->tail += len; } +static inline void skb_reset_mac_len(struct sk_buff *skb) +{ + skb->mac_len = skb->network_header - skb->mac_header; +} + #ifdef NET_SKBUFF_DATA_USES_OFFSET static inline unsigned char *skb_transport_header(const struct sk_buff *skb) { diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 41495dc2a4c9..fcc684678af6 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -23,6 +23,31 @@ bool vlan_do_receive(struct sk_buff **skbp) return false; skb->dev = vlan_dev; + if (skb->pkt_type == PACKET_OTHERHOST) { + /* Our lower layer thinks this is not local, let's make sure. + * This allows the VLAN to have a different MAC than the + * underlying device, and still route correctly. */ + if (!compare_ether_addr(eth_hdr(skb)->h_dest, + vlan_dev->dev_addr)) + skb->pkt_type = PACKET_HOST; + } + + if (!(vlan_dev_info(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR)) { + unsigned int offset = skb->data - skb_mac_header(skb); + + /* + * vlan_insert_tag expect skb->data pointing to mac header. + * So change skb->data before calling it and change back to + * original position later + */ + skb_push(skb, offset); + skb = *skbp = vlan_insert_tag(skb, skb->vlan_tci); + if (!skb) + return false; + skb_pull(skb, offset + VLAN_HLEN); + skb_reset_mac_len(skb); + } + skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci); skb->vlan_tci = 0; @@ -31,22 +56,8 @@ bool vlan_do_receive(struct sk_buff **skbp) u64_stats_update_begin(&rx_stats->syncp); rx_stats->rx_packets++; rx_stats->rx_bytes += skb->len; - - switch (skb->pkt_type) { - case PACKET_BROADCAST: - break; - case PACKET_MULTICAST: + if (skb->pkt_type == PACKET_MULTICAST) rx_stats->rx_multicast++; - break; - case PACKET_OTHERHOST: - /* Our lower layer thinks this is not local, let's make sure. - * This allows the VLAN to have a different MAC than the - * underlying device, and still route correctly. */ - if (!compare_ether_addr(eth_hdr(skb)->h_dest, - vlan_dev->dev_addr)) - skb->pkt_type = PACKET_HOST; - break; - } u64_stats_update_end(&rx_stats->syncp); return true; @@ -89,18 +100,13 @@ gro_result_t vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp, } EXPORT_SYMBOL(vlan_gro_frags); -static struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb) +static struct sk_buff *vlan_reorder_header(struct sk_buff *skb) { - if (vlan_dev_info(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) { - if (skb_cow(skb, skb_headroom(skb)) < 0) - skb = NULL; - if (skb) { - /* Lifted from Gleb's VLAN code... */ - memmove(skb->data - ETH_HLEN, - skb->data - VLAN_ETH_HLEN, 12); - skb->mac_header += VLAN_HLEN; - } - } + if (skb_cow(skb, skb_headroom(skb)) < 0) + return NULL; + memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN); + skb->mac_header += VLAN_HLEN; + skb_reset_mac_len(skb); return skb; } @@ -161,7 +167,7 @@ struct sk_buff *vlan_untag(struct sk_buff *skb) skb_pull_rcsum(skb, VLAN_HLEN); vlan_set_encap_proto(skb, vhdr); - skb = vlan_check_reorder_header(skb); + skb = vlan_reorder_header(skb); if (unlikely(!skb)) goto err_free; diff --git a/net/core/dev.c b/net/core/dev.c index a54c9f87ddbb..9c58c1ec41a9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3114,7 +3114,7 @@ static int __netif_receive_skb(struct sk_buff *skb) skb_reset_network_header(skb); skb_reset_transport_header(skb); - skb->mac_len = skb->network_header - skb->mac_header; + skb_reset_mac_len(skb); pt_prev = NULL; -- cgit v1.2.3