diff options
author | David S. Miller <davem@davemloft.net> | 2015-03-09 15:58:21 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-03-09 15:58:21 -0400 |
commit | 5428aef81157768f1052b116e0cc8abf88ff3e36 (patch) | |
tree | a4fe8e39c5986b59ae50d3a6fefe46c309e2444e /net/bridge | |
parent | 26c459a8072f2bb0680081205376e1371c114b12 (diff) | |
parent | e5de75bf88858f5b3ab11e2504b86ec059f03102 (diff) | |
download | lwn-5428aef81157768f1052b116e0cc8abf88ff3e36.tar.gz lwn-5428aef81157768f1052b116e0cc8abf88ff3e36.zip |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says:
====================
Netfilter updates for net-next
The following patchset contains Netfilter updates for your net-next
tree. Basically, improvements for the packet rejection infrastructure,
deprecation of CLUSTERIP, cleanups for nf_tables and some untangling for
br_netfilter. More specifically they are:
1) Send packet to reset flow if checksum is valid, from Florian Westphal.
2) Fix nf_tables reject bridge from the input chain, also from Florian.
3) Deprecate the CLUSTERIP target, the cluster match supersedes it in
functionality and it's known to have problems.
4) A couple of cleanups for nf_tables rule tracing infrastructure, from
Patrick McHardy.
5) Another cleanup to place transaction declarations at the bottom of
nf_tables.h, also from Patrick.
6) Consolidate Kconfig dependencies wrt. NF_TABLES.
7) Limit table names to 32 bytes in nf_tables.
8) mac header copying in bridge netfilter is already required when
calling ip_fragment(), from Florian Westphal.
9) move nf_bridge_update_protocol() to br_netfilter.c, also from
Florian.
10) Small refactor in br_netfilter in the transmission path, again from
Florian.
11) Move br_nf_pre_routing_finish_bridge_slow() to br_netfilter.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/bridge')
-rw-r--r-- | net/bridge/br_device.c | 5 | ||||
-rw-r--r-- | net/bridge/br_forward.c | 4 | ||||
-rw-r--r-- | net/bridge/br_netfilter.c | 78 | ||||
-rw-r--r-- | net/bridge/br_private.h | 5 | ||||
-rw-r--r-- | net/bridge/netfilter/nft_reject_bridge.c | 84 |
5 files changed, 147 insertions, 29 deletions
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index ffd379db5938..294cbcc49263 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -36,13 +36,10 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) u16 vid = 0; rcu_read_lock(); -#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) { - br_nf_pre_routing_finish_bridge_slow(skb); + if (br_nf_prerouting_finish_bridge(skb)) { rcu_read_unlock(); return NETDEV_TX_OK; } -#endif u64_stats_update_begin(&brstats->syncp); brstats->tx_packets++; diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 1238fabff874..3304a5442331 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -37,9 +37,7 @@ static inline int should_deliver(const struct net_bridge_port *p, int br_dev_queue_push_xmit(struct sk_buff *skb) { - /* ip_fragment doesn't copy the MAC header */ - if (nf_bridge_maybe_copy_header(skb) || - !is_skb_forwardable(skb->dev, skb)) { + if (!is_skb_forwardable(skb->dev, skb)) { kfree_skb(skb); } else { skb_push(skb, ETH_HLEN); diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 0ee453fad3de..a8361c7cdf81 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -239,6 +239,14 @@ drop: return -1; } +static void nf_bridge_update_protocol(struct sk_buff *skb) +{ + if (skb->nf_bridge->mask & BRNF_8021Q) + skb->protocol = htons(ETH_P_8021Q); + else if (skb->nf_bridge->mask & BRNF_PPPoE) + skb->protocol = htons(ETH_P_PPP_SES); +} + /* PF_BRIDGE/PRE_ROUTING *********************************************/ /* Undo the changes made for ip6tables PREROUTING and continue the * bridge PRE_ROUTING hook. */ @@ -764,23 +772,53 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops, } #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) +static bool nf_bridge_copy_header(struct sk_buff *skb) +{ + int err; + unsigned int header_size; + + nf_bridge_update_protocol(skb); + header_size = ETH_HLEN + nf_bridge_encap_header_len(skb); + err = skb_cow_head(skb, header_size); + if (err) + return false; + + skb_copy_to_linear_data_offset(skb, -header_size, + skb->nf_bridge->data, header_size); + __skb_push(skb, nf_bridge_encap_header_len(skb)); + return true; +} + +static int br_nf_push_frag_xmit(struct sk_buff *skb) +{ + if (!nf_bridge_copy_header(skb)) { + kfree_skb(skb); + return 0; + } + + return br_dev_queue_push_xmit(skb); +} + static int br_nf_dev_queue_xmit(struct sk_buff *skb) { int ret; int frag_max_size; + unsigned int mtu_reserved; + if (skb_is_gso(skb) || skb->protocol != htons(ETH_P_IP)) + return br_dev_queue_push_xmit(skb); + + mtu_reserved = nf_bridge_mtu_reduction(skb); /* This is wrong! We should preserve the original fragment * boundaries by preserving frag_list rather than refragmenting. */ - if (skb->protocol == htons(ETH_P_IP) && - skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu && - !skb_is_gso(skb)) { + if (skb->len + mtu_reserved > skb->dev->mtu) { frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; if (br_parse_ip_options(skb)) /* Drop invalid packet */ return NF_DROP; IPCB(skb)->frag_max_size = frag_max_size; - ret = ip_fragment(skb, br_dev_queue_push_xmit); + ret = ip_fragment(skb, br_nf_push_frag_xmit); } else ret = br_dev_queue_push_xmit(skb); @@ -854,6 +892,38 @@ static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops, return NF_ACCEPT; } +/* This is called when br_netfilter has called into iptables/netfilter, + * and DNAT has taken place on a bridge-forwarded packet. + * + * neigh->output has created a new MAC header, with local br0 MAC + * as saddr. + * + * This restores the original MAC saddr of the bridged packet + * before invoking bridge forward logic to transmit the packet. + */ +static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) +{ + struct nf_bridge_info *nf_bridge = skb->nf_bridge; + + skb_pull(skb, ETH_HLEN); + nf_bridge->mask &= ~BRNF_BRIDGED_DNAT; + + skb_copy_to_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN), + skb->nf_bridge->data, ETH_HLEN-ETH_ALEN); + skb->dev = nf_bridge->physindev; + br_handle_frame_finish(skb); +} + +int br_nf_prerouting_finish_bridge(struct sk_buff *skb) +{ + if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) { + br_nf_pre_routing_finish_bridge_slow(skb); + return 1; + } + return 0; +} +EXPORT_SYMBOL_GPL(br_nf_prerouting_finish_bridge); + void br_netfilter_enable(void) { } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index c32e279c62f8..f0a0438dbd6d 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -765,10 +765,15 @@ static inline int br_vlan_enabled(struct net_bridge *br) /* br_netfilter.c */ #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) +int br_nf_prerouting_finish_bridge(struct sk_buff *skb); int br_nf_core_init(void); void br_nf_core_fini(void); void br_netfilter_rtable_init(struct net_bridge *); #else +static inline int br_nf_prerouting_finish_bridge(struct sk_buff *skb) +{ + return 0; +} static inline int br_nf_core_init(void) { return 0; } static inline void br_nf_core_fini(void) {} #define br_netfilter_rtable_init(x) diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index 3244aead0926..5c6c96585acd 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -21,6 +21,7 @@ #include <net/ip.h> #include <net/ip6_checksum.h> #include <linux/netfilter_bridge.h> +#include <linux/netfilter_ipv6.h> #include "../br_private.h" static void nft_reject_br_push_etherhdr(struct sk_buff *oldskb, @@ -36,7 +37,12 @@ static void nft_reject_br_push_etherhdr(struct sk_buff *oldskb, skb_pull(nskb, ETH_HLEN); } -static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb, int hook) +/* We cannot use oldskb->dev, it can be either bridge device (NF_BRIDGE INPUT) + * or the bridge port (NF_BRIDGE PREROUTING). + */ +static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb, + const struct net_device *dev, + int hook) { struct sk_buff *nskb; struct iphdr *niph; @@ -65,11 +71,12 @@ static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb, int hook) nft_reject_br_push_etherhdr(oldskb, nskb); - br_deliver(br_port_get_rcu(oldskb->dev), nskb); + br_deliver(br_port_get_rcu(dev), nskb); } -static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, int hook, - u8 code) +static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, + const struct net_device *dev, + int hook, u8 code) { struct sk_buff *nskb; struct iphdr *niph; @@ -77,8 +84,9 @@ static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, int hook, unsigned int len; void *payload; __wsum csum; + u8 proto; - if (!nft_bridge_iphdr_validate(oldskb)) + if (oldskb->csum_bad || !nft_bridge_iphdr_validate(oldskb)) return; /* IP header checks: fragment. */ @@ -91,7 +99,17 @@ static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, int hook, if (!pskb_may_pull(oldskb, len)) return; - if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), 0)) + if (pskb_trim_rcsum(oldskb, htons(ip_hdr(oldskb)->tot_len))) + return; + + if (ip_hdr(oldskb)->protocol == IPPROTO_TCP || + ip_hdr(oldskb)->protocol == IPPROTO_UDP) + proto = ip_hdr(oldskb)->protocol; + else + proto = 0; + + if (!skb_csum_unnecessary(oldskb) && + nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), proto)) return; nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct icmphdr) + @@ -120,11 +138,13 @@ static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, int hook, nft_reject_br_push_etherhdr(oldskb, nskb); - br_deliver(br_port_get_rcu(oldskb->dev), nskb); + br_deliver(br_port_get_rcu(dev), nskb); } static void nft_reject_br_send_v6_tcp_reset(struct net *net, - struct sk_buff *oldskb, int hook) + struct sk_buff *oldskb, + const struct net_device *dev, + int hook) { struct sk_buff *nskb; const struct tcphdr *oth; @@ -152,12 +172,37 @@ static void nft_reject_br_send_v6_tcp_reset(struct net *net, nft_reject_br_push_etherhdr(oldskb, nskb); - br_deliver(br_port_get_rcu(oldskb->dev), nskb); + br_deliver(br_port_get_rcu(dev), nskb); +} + +static bool reject6_br_csum_ok(struct sk_buff *skb, int hook) +{ + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + int thoff; + __be16 fo; + u8 proto = ip6h->nexthdr; + + if (skb->csum_bad) + return false; + + if (skb_csum_unnecessary(skb)) + return true; + + if (ip6h->payload_len && + pskb_trim_rcsum(skb, ntohs(ip6h->payload_len) + sizeof(*ip6h))) + return false; + + thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo); + if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0) + return false; + + return nf_ip6_checksum(skb, hook, thoff, proto) == 0; } static void nft_reject_br_send_v6_unreach(struct net *net, - struct sk_buff *oldskb, int hook, - u8 code) + struct sk_buff *oldskb, + const struct net_device *dev, + int hook, u8 code) { struct sk_buff *nskb; struct ipv6hdr *nip6h; @@ -176,6 +221,9 @@ static void nft_reject_br_send_v6_unreach(struct net *net, if (!pskb_may_pull(oldskb, len)) return; + if (!reject6_br_csum_ok(oldskb, hook)) + return; + nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct icmp6hdr) + LL_MAX_HEADER + len, GFP_ATOMIC); if (!nskb) @@ -205,7 +253,7 @@ static void nft_reject_br_send_v6_unreach(struct net *net, nft_reject_br_push_etherhdr(oldskb, nskb); - br_deliver(br_port_get_rcu(oldskb->dev), nskb); + br_deliver(br_port_get_rcu(dev), nskb); } static void nft_reject_bridge_eval(const struct nft_expr *expr, @@ -224,16 +272,16 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr, case htons(ETH_P_IP): switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nft_reject_br_send_v4_unreach(pkt->skb, + nft_reject_br_send_v4_unreach(pkt->skb, pkt->in, pkt->ops->hooknum, priv->icmp_code); break; case NFT_REJECT_TCP_RST: - nft_reject_br_send_v4_tcp_reset(pkt->skb, + nft_reject_br_send_v4_tcp_reset(pkt->skb, pkt->in, pkt->ops->hooknum); break; case NFT_REJECT_ICMPX_UNREACH: - nft_reject_br_send_v4_unreach(pkt->skb, + nft_reject_br_send_v4_unreach(pkt->skb, pkt->in, pkt->ops->hooknum, nft_reject_icmp_code(priv->icmp_code)); break; @@ -242,16 +290,16 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr, case htons(ETH_P_IPV6): switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nft_reject_br_send_v6_unreach(net, pkt->skb, + nft_reject_br_send_v6_unreach(net, pkt->skb, pkt->in, pkt->ops->hooknum, priv->icmp_code); break; case NFT_REJECT_TCP_RST: - nft_reject_br_send_v6_tcp_reset(net, pkt->skb, + nft_reject_br_send_v6_tcp_reset(net, pkt->skb, pkt->in, pkt->ops->hooknum); break; case NFT_REJECT_ICMPX_UNREACH: - nft_reject_br_send_v6_unreach(net, pkt->skb, + nft_reject_br_send_v6_unreach(net, pkt->skb, pkt->in, pkt->ops->hooknum, nft_reject_icmpv6_code(priv->icmp_code)); break; |