diff options
author | Aya Levin <ayal@nvidia.com> | 2021-01-07 15:50:18 +0200 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2021-01-09 14:06:32 -0800 |
commit | b210de4f8c97d57de051e805686248ec4c6cfc52 (patch) | |
tree | ba7e41b91053d811d14426cc279b27ddbda866db /net | |
parent | a2bc221b972db91e4be1970e776e98f16aa87904 (diff) | |
download | lwn-b210de4f8c97d57de051e805686248ec4c6cfc52.tar.gz lwn-b210de4f8c97d57de051e805686248ec4c6cfc52.zip |
net: ipv6: Validate GSO SKB before finish IPv6 processing
There are cases where GSO segment's length exceeds the egress MTU:
- Forwarding of a TCP GRO skb, when DF flag is not set.
- Forwarding of an skb that arrived on a virtualisation interface
(virtio-net/vhost/tap) with TSO/GSO size set by other network
stack.
- Local GSO skb transmitted on an NETIF_F_TSO tunnel stacked over an
interface with a smaller MTU.
- Arriving GRO skb (or GSO skb in a virtualised environment) that is
bridged to a NETIF_F_TSO tunnel stacked over an interface with an
insufficient MTU.
If so:
- Consume the SKB and its segments.
- Issue an ICMP packet with 'Packet Too Big' message containing the
MTU, allowing the source host to reduce its Path MTU appropriately.
Note: These cases are handled in the same manner in IPv4 output finish.
This patch aligns the behavior of IPv6 and the one of IPv4.
Fixes: 9e50849054a4 ("netfilter: ipv6: move POSTROUTING invocation before fragmentation")
Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://lore.kernel.org/r/1610027418-30438-1-git-send-email-ayal@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net')
-rw-r--r-- | net/ipv6/ip6_output.c | 41 |
1 files changed, 40 insertions, 1 deletions
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 749ad72386b2..077d43af8226 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -125,8 +125,43 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * return -EINVAL; } +static int +ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk, + struct sk_buff *skb, unsigned int mtu) +{ + struct sk_buff *segs, *nskb; + netdev_features_t features; + int ret = 0; + + /* Please see corresponding comment in ip_finish_output_gso + * describing the cases where GSO segment length exceeds the + * egress MTU. + */ + features = netif_skb_features(skb); + segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); + if (IS_ERR_OR_NULL(segs)) { + kfree_skb(skb); + return -ENOMEM; + } + + consume_skb(skb); + + skb_list_walk_safe(segs, segs, nskb) { + int err; + + skb_mark_not_on_list(segs); + err = ip6_fragment(net, sk, segs, ip6_finish_output2); + if (err && ret == 0) + ret = err; + } + + return ret; +} + static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) { + unsigned int mtu; + #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) /* Policy lookup after SNAT yielded a new policy */ if (skb_dst(skb)->xfrm) { @@ -135,7 +170,11 @@ static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff } #endif - if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || + mtu = ip6_skb_dst_mtu(skb); + if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu)) + return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu); + + if ((skb->len > mtu && !skb_is_gso(skb)) || dst_allfrag(skb_dst(skb)) || (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) return ip6_fragment(net, sk, skb, ip6_finish_output2); |