diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-11-01 16:33:20 -1000 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-11-01 16:33:20 -1000 |
commit | ff269e2cd5adce4ae14f883fc9c8803bc43ee1e9 (patch) | |
tree | 7d1a3570156874fc7359bbc70fa23f61ac175058 /net | |
parent | 05bf73aa27ba89474763cea7b9cd2626eda61e01 (diff) | |
parent | f2fbb908112311423b09cd0d2b4978f174b99585 (diff) | |
download | lwn-ff269e2cd5adce4ae14f883fc9c8803bc43ee1e9.tar.gz lwn-ff269e2cd5adce4ae14f883fc9c8803bc43ee1e9.zip |
Merge tag 'net-next-6.7-followup' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull more networking updates from Jakub Kicinski:
- Support GRO decapsulation for IPsec ESP in UDP
- Add a handful of MODULE_DESCRIPTION()s
- Drop questionable alignment check in TCP AO to avoid
build issue after changes in the crypto tree
* tag 'net-next-6.7-followup' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next:
net: tcp: remove call to obsolete crypto_ahash_alignmask()
net: fill in MODULE_DESCRIPTION()s under drivers/net/
net: fill in MODULE_DESCRIPTION()s under net/802*
net: fill in MODULE_DESCRIPTION()s under net/core
net: fill in MODULE_DESCRIPTION()s in kuba@'s modules
xfrm: policy: fix layer 4 flowi decoding
xfrm Fix use after free in __xfrm6_udp_encap_rcv.
xfrm: policy: replace session decode with flow dissector
xfrm: move mark and oif flowi decode into common code
xfrm: pass struct net to xfrm_decode_session wrappers
xfrm: Support GRO for IPv6 ESP in UDP encapsulation
xfrm: Support GRO for IPv4 ESP in UDP encapsulation
xfrm: Use the XFRM_GRO to indicate a GRO call on input
xfrm: Annotate struct xfrm_sec_ctx with __counted_by
xfrm: Remove unused function declarations
Diffstat (limited to 'net')
-rw-r--r-- | net/802/fddi.c | 1 | ||||
-rw-r--r-- | net/802/garp.c | 1 | ||||
-rw-r--r-- | net/802/mrp.c | 1 | ||||
-rw-r--r-- | net/802/p8022.c | 1 | ||||
-rw-r--r-- | net/802/psnap.c | 1 | ||||
-rw-r--r-- | net/802/stp.c | 1 | ||||
-rw-r--r-- | net/8021q/vlan.c | 1 | ||||
-rw-r--r-- | net/core/dev_addr_lists_test.c | 1 | ||||
-rw-r--r-- | net/core/selftests.c | 1 | ||||
-rw-r--r-- | net/ipv4/esp4_offload.c | 6 | ||||
-rw-r--r-- | net/ipv4/icmp.c | 2 | ||||
-rw-r--r-- | net/ipv4/ip_vti.c | 4 | ||||
-rw-r--r-- | net/ipv4/netfilter.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_ao.c | 6 | ||||
-rw-r--r-- | net/ipv4/udp.c | 16 | ||||
-rw-r--r-- | net/ipv4/xfrm4_input.c | 95 | ||||
-rw-r--r-- | net/ipv6/af_inet6.c | 1 | ||||
-rw-r--r-- | net/ipv6/esp6_offload.c | 10 | ||||
-rw-r--r-- | net/ipv6/icmp.c | 2 | ||||
-rw-r--r-- | net/ipv6/ip6_vti.c | 4 | ||||
-rw-r--r-- | net/ipv6/netfilter.c | 2 | ||||
-rw-r--r-- | net/ipv6/xfrm6_input.c | 103 | ||||
-rw-r--r-- | net/netfilter/nf_nat_proto.c | 2 | ||||
-rw-r--r-- | net/xfrm/xfrm_input.c | 6 | ||||
-rw-r--r-- | net/xfrm/xfrm_interface_core.c | 4 | ||||
-rw-r--r-- | net/xfrm/xfrm_policy.c | 299 |
26 files changed, 336 insertions, 237 deletions
diff --git a/net/802/fddi.c b/net/802/fddi.c index 7533ce26ba5f..888379ae35ec 100644 --- a/net/802/fddi.c +++ b/net/802/fddi.c @@ -175,4 +175,5 @@ struct net_device *alloc_fddidev(int sizeof_priv) } EXPORT_SYMBOL(alloc_fddidev); +MODULE_DESCRIPTION("Core routines for FDDI network devices"); MODULE_LICENSE("GPL"); diff --git a/net/802/garp.c b/net/802/garp.c index ab24b21fbb49..6a743d004301 100644 --- a/net/802/garp.c +++ b/net/802/garp.c @@ -21,6 +21,7 @@ static unsigned int garp_join_time __read_mostly = 200; module_param(garp_join_time, uint, 0644); MODULE_PARM_DESC(garp_join_time, "Join time in ms (default 200ms)"); +MODULE_DESCRIPTION("IEEE 802.1D Generic Attribute Registration Protocol (GARP)"); MODULE_LICENSE("GPL"); static const struct garp_state_trans { diff --git a/net/802/mrp.c b/net/802/mrp.c index eafc21ecc287..3154d7409493 100644 --- a/net/802/mrp.c +++ b/net/802/mrp.c @@ -26,6 +26,7 @@ static unsigned int mrp_periodic_time __read_mostly = 1000; module_param(mrp_periodic_time, uint, 0644); MODULE_PARM_DESC(mrp_periodic_time, "Periodic time in ms (default 1s)"); +MODULE_DESCRIPTION("IEEE 802.1Q Multiple Registration Protocol (MRP)"); MODULE_LICENSE("GPL"); static const u8 diff --git a/net/802/p8022.c b/net/802/p8022.c index 79c23173116c..78c25168d7c9 100644 --- a/net/802/p8022.c +++ b/net/802/p8022.c @@ -60,4 +60,5 @@ void unregister_8022_client(struct datalink_proto *proto) EXPORT_SYMBOL(register_8022_client); EXPORT_SYMBOL(unregister_8022_client); +MODULE_DESCRIPTION("Support for 802.2 demultiplexing off Ethernet"); MODULE_LICENSE("GPL"); diff --git a/net/802/psnap.c b/net/802/psnap.c index 1406bfdbda13..fca9d454905f 100644 --- a/net/802/psnap.c +++ b/net/802/psnap.c @@ -160,4 +160,5 @@ void unregister_snap_client(struct datalink_proto *proto) kfree(proto); } +MODULE_DESCRIPTION("SNAP data link layer. Derived from 802.2"); MODULE_LICENSE("GPL"); diff --git a/net/802/stp.c b/net/802/stp.c index d550d9f88f60..03c9f75e92c9 100644 --- a/net/802/stp.c +++ b/net/802/stp.c @@ -98,4 +98,5 @@ void stp_proto_unregister(const struct stp_proto *proto) } EXPORT_SYMBOL_GPL(stp_proto_unregister); +MODULE_DESCRIPTION("SAP demux for IEEE 802.1D Spanning Tree Protocol (STP)"); MODULE_LICENSE("GPL"); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index e40aa3e3641c..e45187b88220 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -738,5 +738,6 @@ static void __exit vlan_cleanup_module(void) module_init(vlan_proto_init); module_exit(vlan_cleanup_module); +MODULE_DESCRIPTION("802.1Q/802.1ad VLAN Protocol"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_VERSION); diff --git a/net/core/dev_addr_lists_test.c b/net/core/dev_addr_lists_test.c index 90e7e3811ae7..4dbd0dc6aea2 100644 --- a/net/core/dev_addr_lists_test.c +++ b/net/core/dev_addr_lists_test.c @@ -233,4 +233,5 @@ static struct kunit_suite dev_addr_test_suite = { }; kunit_test_suite(dev_addr_test_suite); +MODULE_DESCRIPTION("KUnit tests for struct netdev_hw_addr_list"); MODULE_LICENSE("GPL"); diff --git a/net/core/selftests.c b/net/core/selftests.c index 94fe3146a959..8f801e6e3b91 100644 --- a/net/core/selftests.c +++ b/net/core/selftests.c @@ -405,5 +405,6 @@ void net_selftest_get_strings(u8 *data) } EXPORT_SYMBOL_GPL(net_selftest_get_strings); +MODULE_DESCRIPTION("Common library for generic PHY ethtool selftests"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Oleksij Rempel <o.rempel@pengutronix.de>"); diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 10e96ed6c9e3..b3271957ad9a 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -33,6 +33,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, int offset = skb_gro_offset(skb); struct xfrm_offload *xo; struct xfrm_state *x; + int encap_type = 0; __be32 seq; __be32 spi; @@ -70,6 +71,9 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, xo->flags |= XFRM_GRO; + if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP) + encap_type = UDP_ENCAP_ESPINUDP; + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL; XFRM_SPI_SKB_CB(skb)->family = AF_INET; XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); @@ -77,7 +81,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, /* We don't need to handle errors from xfrm_input, it does all * the error handling and frees the resources on error. */ - xfrm_input(skb, IPPROTO_ESP, spi, -2); + xfrm_input(skb, IPPROTO_ESP, spi, encap_type); return ERR_PTR(-EINPROGRESS); out_reset: diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index b8607763d113..e63a3bf99617 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -517,7 +517,7 @@ static struct rtable *icmp_route_lookup(struct net *net, } else return rt; - err = xfrm_decode_session_reverse(skb_in, flowi4_to_flowi(&fl4_dec), AF_INET); + err = xfrm_decode_session_reverse(net, skb_in, flowi4_to_flowi(&fl4_dec), AF_INET); if (err) goto relookup_failed; diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index d1e7d0ceb7ed..9ab9b3ebe0cd 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -288,11 +288,11 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) switch (skb->protocol) { case htons(ETH_P_IP): memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - xfrm_decode_session(skb, &fl, AF_INET); + xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET); break; case htons(ETH_P_IPV6): memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); - xfrm_decode_session(skb, &fl, AF_INET6); + xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET6); break; default: goto tx_err; diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index bd135165482a..591a2737808e 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -62,7 +62,7 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un #ifdef CONFIG_XFRM if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && - xfrm_decode_session(skb, flowi4_to_flowi(&fl4), AF_INET) == 0) { + xfrm_decode_session(net, skb, flowi4_to_flowi(&fl4), AF_INET) == 0) { struct dst_entry *dst = skb_dst(skb); skb_dst_set(skb, NULL); dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), sk, 0); diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 6a845e906a1d..ef5472ed6158 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -1533,10 +1533,6 @@ static struct tcp_ao_key *tcp_ao_key_alloc(struct sock *sk, goto err_free_pool; tfm = crypto_ahash_reqtfm(hp.req); - if (crypto_ahash_alignmask(tfm) > TCP_AO_KEY_ALIGN) { - err = -EOPNOTSUPP; - goto err_pool_end; - } digest_size = crypto_ahash_digestsize(tfm); tcp_sigpool_end(&hp); @@ -1551,8 +1547,6 @@ static struct tcp_ao_key *tcp_ao_key_alloc(struct sock *sk, key->digest_size = digest_size; return key; -err_pool_end: - tcp_sigpool_end(&hp); err_free_pool: tcp_sigpool_release(pool_id); return ERR_PTR(err); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1734fd6a1ce0..89e5a806b82e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2630,6 +2630,19 @@ void udp_destroy_sock(struct sock *sk) } } +static void set_xfrm_gro_udp_encap_rcv(__u16 encap_type, unsigned short family, + struct sock *sk) +{ +#ifdef CONFIG_XFRM + if (udp_test_bit(GRO_ENABLED, sk) && encap_type == UDP_ENCAP_ESPINUDP) { + if (family == AF_INET) + WRITE_ONCE(udp_sk(sk)->gro_receive, xfrm4_gro_udp_encap_rcv); + else if (IS_ENABLED(CONFIG_IPV6) && family == AF_INET6) + WRITE_ONCE(udp_sk(sk)->gro_receive, ipv6_stub->xfrm6_gro_udp_encap_rcv); + } +#endif +} + /* * Socket option code for UDP */ @@ -2679,6 +2692,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, case 0: #ifdef CONFIG_XFRM case UDP_ENCAP_ESPINUDP: + set_xfrm_gro_udp_encap_rcv(val, sk->sk_family, sk); + fallthrough; case UDP_ENCAP_ESPINUDP_NON_IKE: #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) @@ -2721,6 +2736,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, udp_tunnel_encap_enable(sk); udp_assign_bit(GRO_ENABLED, sk, valbool); udp_assign_bit(ACCEPT_L4, sk, valbool); + set_xfrm_gro_udp_encap_rcv(up->encap_type, sk->sk_family, sk); break; /* diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 183f6dc37242..c54676998eb6 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -17,6 +17,8 @@ #include <linux/netfilter_ipv4.h> #include <net/ip.h> #include <net/xfrm.h> +#include <net/protocol.h> +#include <net/gro.h> static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk, struct sk_buff *skb) @@ -72,14 +74,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async) return 0; } -/* If it's a keepalive packet, then just eat it. - * If it's an encapsulated packet, then pass it to the - * IPsec xfrm input. - * Returns 0 if skb passed to xfrm or was dropped. - * Returns >0 if skb should be passed to UDP. - * Returns <0 if skb should be resubmitted (-ret is protocol) - */ -int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) +static int __xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull) { struct udp_sock *up = udp_sk(sk); struct udphdr *uh; @@ -110,7 +105,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) case UDP_ENCAP_ESPINUDP: /* Check if this is a keepalive packet. If so, eat it. */ if (len == 1 && udpdata[0] == 0xff) { - goto drop; + return -EINVAL; } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) { /* ESP Packet without Non-ESP header */ len = sizeof(struct udphdr); @@ -121,7 +116,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) case UDP_ENCAP_ESPINUDP_NON_IKE: /* Check if this is a keepalive packet. If so, eat it. */ if (len == 1 && udpdata[0] == 0xff) { - goto drop; + return -EINVAL; } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) && udpdata32[0] == 0 && udpdata32[1] == 0) { @@ -139,7 +134,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) * protocol to ESP, and then call into the transform receiver. */ if (skb_unclone(skb, GFP_ATOMIC)) - goto drop; + return -EINVAL; /* Now we can update and verify the packet length... */ iph = ip_hdr(skb); @@ -147,25 +142,89 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) iph->tot_len = htons(ntohs(iph->tot_len) - len); if (skb->len < iphlen + len) { /* packet is too small!?! */ - goto drop; + return -EINVAL; } /* pull the data buffer up to the ESP header and set the * transport header to point to ESP. Keep UDP on the stack * for later. */ - __skb_pull(skb, len); - skb_reset_transport_header(skb); + if (pull) { + __skb_pull(skb, len); + skb_reset_transport_header(skb); + } else { + skb_set_transport_header(skb, len); + } /* process ESP */ - return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, encap_type); - -drop: - kfree_skb(skb); return 0; } + +/* If it's a keepalive packet, then just eat it. + * If it's an encapsulated packet, then pass it to the + * IPsec xfrm input. + * Returns 0 if skb passed to xfrm or was dropped. + * Returns >0 if skb should be passed to UDP. + * Returns <0 if skb should be resubmitted (-ret is protocol) + */ +int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) +{ + int ret; + + ret = __xfrm4_udp_encap_rcv(sk, skb, true); + if (!ret) + return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, + udp_sk(sk)->encap_type); + + if (ret < 0) { + kfree_skb(skb); + return 0; + } + + return ret; +} EXPORT_SYMBOL(xfrm4_udp_encap_rcv); +struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, + struct sk_buff *skb) +{ + int offset = skb_gro_offset(skb); + const struct net_offload *ops; + struct sk_buff *pp = NULL; + int ret; + + offset = offset - sizeof(struct udphdr); + + if (!pskb_pull(skb, offset)) + return NULL; + + rcu_read_lock(); + ops = rcu_dereference(inet_offloads[IPPROTO_ESP]); + if (!ops || !ops->callbacks.gro_receive) + goto out; + + ret = __xfrm4_udp_encap_rcv(sk, skb, false); + if (ret) + goto out; + + skb_push(skb, offset); + NAPI_GRO_CB(skb)->proto = IPPROTO_UDP; + + pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); + rcu_read_unlock(); + + return pp; + +out: + rcu_read_unlock(); + skb_push(skb, offset); + NAPI_GRO_CB(skb)->same_flow = 0; + NAPI_GRO_CB(skb)->flush = 1; + + return NULL; +} +EXPORT_SYMBOL(xfrm4_gro_udp_encap_rcv); + int xfrm4_rcv(struct sk_buff *skb) { return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index c35d302a3da9..13a1833a4df5 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -1050,6 +1050,7 @@ static const struct ipv6_stub ipv6_stub_impl = { #if IS_ENABLED(CONFIG_XFRM) .xfrm6_local_rxpmtu = xfrm6_local_rxpmtu, .xfrm6_udp_encap_rcv = xfrm6_udp_encap_rcv, + .xfrm6_gro_udp_encap_rcv = xfrm6_gro_udp_encap_rcv, .xfrm6_rcv_encap = xfrm6_rcv_encap, #endif .nd_tbl = &nd_tbl, diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index a189e08370a5..527b7caddbc6 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -34,7 +34,9 @@ static __u16 esp6_nexthdr_esp_offset(struct ipv6hdr *ipv6_hdr, int nhlen) int off = sizeof(struct ipv6hdr); struct ipv6_opt_hdr *exthdr; - if (likely(ipv6_hdr->nexthdr == NEXTHDR_ESP)) + /* ESP or ESPINUDP */ + if (likely(ipv6_hdr->nexthdr == NEXTHDR_ESP || + ipv6_hdr->nexthdr == NEXTHDR_UDP)) return offsetof(struct ipv6hdr, nexthdr); while (off < nhlen) { @@ -54,10 +56,14 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head, int offset = skb_gro_offset(skb); struct xfrm_offload *xo; struct xfrm_state *x; + int encap_type = 0; __be32 seq; __be32 spi; int nhoff; + if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP) + encap_type = UDP_ENCAP_ESPINUDP; + if (!pskb_pull(skb, offset)) return NULL; @@ -104,7 +110,7 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head, /* We don't need to handle errors from xfrm_input, it does all * the error handling and frees the resources on error. */ - xfrm_input(skb, IPPROTO_ESP, spi, -2); + xfrm_input(skb, IPPROTO_ESP, spi, encap_type); return ERR_PTR(-EINPROGRESS); out_reset: diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 8fb4a791881a..f62427097126 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -385,7 +385,7 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net, return dst; } - err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6); + err = xfrm_decode_session_reverse(net, skb, flowi6_to_flowi(&fl2), AF_INET6); if (err) goto relookup_failed; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 73c85d4e0e9c..e550240c85e1 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -569,11 +569,11 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_err; memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); - xfrm_decode_session(skb, &fl, AF_INET6); + xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET6); break; case htons(ETH_P_IP): memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - xfrm_decode_session(skb, &fl, AF_INET); + xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET); break; default: goto tx_err; diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 857713d7a38a..53d255838e6a 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -61,7 +61,7 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff #ifdef CONFIG_XFRM if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && - xfrm_decode_session(skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) { + xfrm_decode_session(net, skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) { skb_dst_set(skb, NULL); dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0); if (IS_ERR(dst)) diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 4156387248e4..6e36e5047fba 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -16,6 +16,8 @@ #include <linux/netfilter_ipv6.h> #include <net/ipv6.h> #include <net/xfrm.h> +#include <net/protocol.h> +#include <net/gro.h> int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi, struct ip6_tnl *t) @@ -67,14 +69,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) return 0; } -/* If it's a keepalive packet, then just eat it. - * If it's an encapsulated packet, then pass it to the - * IPsec xfrm input. - * Returns 0 if skb passed to xfrm or was dropped. - * Returns >0 if skb should be passed to UDP. - * Returns <0 if skb should be resubmitted (-ret is protocol) - */ -int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) +static int __xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull) { struct udp_sock *up = udp_sk(sk); struct udphdr *uh; @@ -85,9 +80,6 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) __be32 *udpdata32; u16 encap_type; - if (skb->protocol == htons(ETH_P_IP)) - return xfrm4_udp_encap_rcv(sk, skb); - encap_type = READ_ONCE(up->encap_type); /* if this is not encapsulated socket, then just return now */ if (!encap_type) @@ -109,7 +101,7 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) case UDP_ENCAP_ESPINUDP: /* Check if this is a keepalive packet. If so, eat it. */ if (len == 1 && udpdata[0] == 0xff) { - goto drop; + return -EINVAL; } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) { /* ESP Packet without Non-ESP header */ len = sizeof(struct udphdr); @@ -120,7 +112,7 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) case UDP_ENCAP_ESPINUDP_NON_IKE: /* Check if this is a keepalive packet. If so, eat it. */ if (len == 1 && udpdata[0] == 0xff) { - goto drop; + return -EINVAL; } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) && udpdata32[0] == 0 && udpdata32[1] == 0) { @@ -138,31 +130,100 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) * protocol to ESP, and then call into the transform receiver. */ if (skb_unclone(skb, GFP_ATOMIC)) - goto drop; + return -EINVAL; /* Now we can update and verify the packet length... */ ip6h = ipv6_hdr(skb); ip6h->payload_len = htons(ntohs(ip6h->payload_len) - len); if (skb->len < ip6hlen + len) { /* packet is too small!?! */ - goto drop; + return -EINVAL; } /* pull the data buffer up to the ESP header and set the * transport header to point to ESP. Keep UDP on the stack * for later. */ - __skb_pull(skb, len); - skb_reset_transport_header(skb); + if (pull) { + __skb_pull(skb, len); + skb_reset_transport_header(skb); + } else { + skb_set_transport_header(skb, len); + } /* process ESP */ - return xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, encap_type); - -drop: - kfree_skb(skb); return 0; } +/* If it's a keepalive packet, then just eat it. + * If it's an encapsulated packet, then pass it to the + * IPsec xfrm input. + * Returns 0 if skb passed to xfrm or was dropped. + * Returns >0 if skb should be passed to UDP. + * Returns <0 if skb should be resubmitted (-ret is protocol) + */ +int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) +{ + int ret; + + if (skb->protocol == htons(ETH_P_IP)) + return xfrm4_udp_encap_rcv(sk, skb); + + ret = __xfrm6_udp_encap_rcv(sk, skb, true); + if (!ret) + return xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, + udp_sk(sk)->encap_type); + + if (ret < 0) { + kfree_skb(skb); + return 0; + } + + return ret; +} + +struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, + struct sk_buff *skb) +{ + int offset = skb_gro_offset(skb); + const struct net_offload *ops; + struct sk_buff *pp = NULL; + int ret; + + if (skb->protocol == htons(ETH_P_IP)) + return xfrm4_gro_udp_encap_rcv(sk, head, skb); + + offset = offset - sizeof(struct udphdr); + + if (!pskb_pull(skb, offset)) + return NULL; + + rcu_read_lock(); + ops = rcu_dereference(inet6_offloads[IPPROTO_ESP]); + if (!ops || !ops->callbacks.gro_receive) + goto out; + + ret = __xfrm6_udp_encap_rcv(sk, skb, false); + if (ret) + goto out; + + skb_push(skb, offset); + NAPI_GRO_CB(skb)->proto = IPPROTO_UDP; + + pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); + rcu_read_unlock(); + + return pp; + +out: + rcu_read_unlock(); + skb_push(skb, offset); + NAPI_GRO_CB(skb)->same_flow = 0; + NAPI_GRO_CB(skb)->flush = 1; + + return NULL; +} + int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t) { return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff], diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c index 6d969468c779..dc450cc81222 100644 --- a/net/netfilter/nf_nat_proto.c +++ b/net/netfilter/nf_nat_proto.c @@ -668,7 +668,7 @@ static int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int struct flowi fl; int err; - err = xfrm_decode_session(skb, &fl, family); + err = xfrm_decode_session(net, skb, &fl, family); if (err < 0) return err; diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index d5ee96789d4b..bd4ce21d76d7 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -462,7 +462,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) struct xfrm_offload *xo = xfrm_offload(skb); struct sec_path *sp; - if (encap_type < 0) { + if (encap_type < 0 || (xo && xo->flags & XFRM_GRO)) { x = xfrm_input_state(skb); if (unlikely(x->km.state != XFRM_STATE_VALID)) { @@ -485,9 +485,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) seq = XFRM_SKB_CB(skb)->seq.input.low; goto resume; } - - /* encap_type < -1 indicates a GRO call. */ - encap_type = 0; + /* GRO call */ seq = XFRM_SPI_SKB_CB(skb)->seq; if (xo && (xo->flags & CRYPTO_DONE)) { diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c index e21cc71095bb..21d50d75c260 100644 --- a/net/xfrm/xfrm_interface_core.c +++ b/net/xfrm/xfrm_interface_core.c @@ -536,7 +536,7 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev) switch (skb->protocol) { case htons(ETH_P_IPV6): memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); - xfrm_decode_session(skb, &fl, AF_INET6); + xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET6); if (!dst) { fl.u.ip6.flowi6_oif = dev->ifindex; fl.u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC; @@ -551,7 +551,7 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev) break; case htons(ETH_P_IP): memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - xfrm_decode_session(skb, &fl, AF_INET); + xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET); if (!dst) { struct rtable *rt; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 5cdd3bca3637..c13dc3ef7910 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -149,6 +149,21 @@ struct xfrm_pol_inexact_candidates { struct hlist_head *res[XFRM_POL_CAND_MAX]; }; +struct xfrm_flow_keys { + struct flow_dissector_key_basic basic; + struct flow_dissector_key_control control; + union { + struct flow_dissector_key_ipv4_addrs ipv4; + struct flow_dissector_key_ipv6_addrs ipv6; + } addrs; + struct flow_dissector_key_ip ip; + struct flow_dissector_key_icmp icmp; + struct flow_dissector_key_ports ports; + struct flow_dissector_key_keyid gre; +}; + +static struct flow_dissector xfrm_session_dissector __ro_after_init; + static DEFINE_SPINLOCK(xfrm_if_cb_lock); static struct xfrm_if_cb const __rcu *xfrm_if_cb __read_mostly; @@ -2858,7 +2873,7 @@ static void xfrm_policy_queue_process(struct timer_list *t) /* Fixup the mark to support VTI. */ skb_mark = skb->mark; skb->mark = pol->mark.v; - xfrm_decode_session(skb, &fl, dst->ops->family); + xfrm_decode_session(net, skb, &fl, dst->ops->family); skb->mark = skb_mark; spin_unlock(&pq->hold_queue.lock); @@ -2894,7 +2909,7 @@ static void xfrm_policy_queue_process(struct timer_list *t) /* Fixup the mark to support VTI. */ skb_mark = skb->mark; skb->mark = pol->mark.v; - xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family); + xfrm_decode_session(net, skb, &fl, skb_dst(skb)->ops->family); skb->mark = skb_mark; dst_hold(xfrm_dst_path(skb_dst(skb))); @@ -3372,209 +3387,106 @@ xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int star } static void -decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse) +decode_session4(const struct xfrm_flow_keys *flkeys, struct flowi *fl, bool reverse) { - const struct iphdr *iph = ip_hdr(skb); - int ihl = iph->ihl; - u8 *xprth = skb_network_header(skb) + ihl * 4; struct flowi4 *fl4 = &fl->u.ip4; - int oif = 0; - - if (skb_dst(skb) && skb_dst(skb)->dev) - oif = skb_dst(skb)->dev->ifindex; memset(fl4, 0, sizeof(struct flowi4)); - fl4->flowi4_mark = skb->mark; - fl4->flowi4_oif = reverse ? skb->skb_iif : oif; - - fl4->flowi4_proto = iph->protocol; - fl4->daddr = reverse ? iph->saddr : iph->daddr; - fl4->saddr = reverse ? iph->daddr : iph->saddr; - fl4->flowi4_tos = iph->tos & ~INET_ECN_MASK; - - if (!ip_is_fragment(iph)) { - switch (iph->protocol) { - case IPPROTO_UDP: - case IPPROTO_UDPLITE: - case IPPROTO_TCP: - case IPPROTO_SCTP: - case IPPROTO_DCCP: - if (xprth + 4 < skb->data || - pskb_may_pull(skb, xprth + 4 - skb->data)) { - __be16 *ports; - - xprth = skb_network_header(skb) + ihl * 4; - ports = (__be16 *)xprth; - - fl4->fl4_sport = ports[!!reverse]; - fl4->fl4_dport = ports[!reverse]; - } - break; - case IPPROTO_ICMP: - if (xprth + 2 < skb->data || - pskb_may_pull(skb, xprth + 2 - skb->data)) { - u8 *icmp; - xprth = skb_network_header(skb) + ihl * 4; - icmp = xprth; + if (reverse) { + fl4->saddr = flkeys->addrs.ipv4.dst; + fl4->daddr = flkeys->addrs.ipv4.src; + fl4->fl4_sport = flkeys->ports.dst; + fl4->fl4_dport = flkeys->ports.src; + } else { + fl4->saddr = flkeys->addrs.ipv4.src; + fl4->daddr = flkeys->addrs.ipv4.dst; + fl4->fl4_sport = flkeys->ports.src; + fl4->fl4_dport = flkeys->ports.dst; + } - fl4->fl4_icmp_type = icmp[0]; - fl4->fl4_icmp_code = icmp[1]; - } - break; - case IPPROTO_GRE: - if (xprth + 12 < skb->data || - pskb_may_pull(skb, xprth + 12 - skb->data)) { - __be16 *greflags; - __be32 *gre_hdr; - - xprth = skb_network_header(skb) + ihl * 4; - greflags = (__be16 *)xprth; - gre_hdr = (__be32 *)xprth; - - if (greflags[0] & GRE_KEY) { - if (greflags[0] & GRE_CSUM) - gre_hdr++; - fl4->fl4_gre_key = gre_hdr[1]; - } - } - break; - default: - break; - } + switch (flkeys->basic.ip_proto) { + case IPPROTO_GRE: + fl4->fl4_gre_key = flkeys->gre.keyid; + break; + case IPPROTO_ICMP: + fl4->fl4_icmp_type = flkeys->icmp.type; + fl4->fl4_icmp_code = flkeys->icmp.code; + break; } + + fl4->flowi4_proto = flkeys->basic.ip_proto; + fl4->flowi4_tos = flkeys->ip.tos; } #if IS_ENABLED(CONFIG_IPV6) static void -decode_session6(struct sk_buff *skb, struct flowi *fl, bool reverse) +decode_session6(const struct xfrm_flow_keys *flkeys, struct flowi *fl, bool reverse) { struct flowi6 *fl6 = &fl->u.ip6; - int onlyproto = 0; - const struct ipv6hdr *hdr = ipv6_hdr(skb); - u32 offset = sizeof(*hdr); - struct ipv6_opt_hdr *exthdr; - const unsigned char *nh = skb_network_header(skb); - u16 nhoff = IP6CB(skb)->nhoff; - int oif = 0; - u8 nexthdr; - - if (!nhoff) - nhoff = offsetof(struct ipv6hdr, nexthdr); - - nexthdr = nh[nhoff]; - - if (skb_dst(skb) && skb_dst(skb)->dev) - oif = skb_dst(skb)->dev->ifindex; memset(fl6, 0, sizeof(struct flowi6)); - fl6->flowi6_mark = skb->mark; - fl6->flowi6_oif = reverse ? skb->skb_iif : oif; - - fl6->daddr = reverse ? hdr->saddr : hdr->daddr; - fl6->saddr = reverse ? hdr->daddr : hdr->saddr; - - while (nh + offset + sizeof(*exthdr) < skb->data || - pskb_may_pull(skb, nh + offset + sizeof(*exthdr) - skb->data)) { - nh = skb_network_header(skb); - exthdr = (struct ipv6_opt_hdr *)(nh + offset); - - switch (nexthdr) { - case NEXTHDR_FRAGMENT: - onlyproto = 1; - fallthrough; - case NEXTHDR_ROUTING: - case NEXTHDR_HOP: - case NEXTHDR_DEST: - offset += ipv6_optlen(exthdr); - nexthdr = exthdr->nexthdr; - break; - case IPPROTO_UDP: - case IPPROTO_UDPLITE: - case IPPROTO_TCP: - case IPPROTO_SCTP: - case IPPROTO_DCCP: - if (!onlyproto && (nh + offset + 4 < skb->data || - pskb_may_pull(skb, nh + offset + 4 - skb->data))) { - __be16 *ports; - - nh = skb_network_header(skb); - ports = (__be16 *)(nh + offset); - fl6->fl6_sport = ports[!!reverse]; - fl6->fl6_dport = ports[!reverse]; - } - fl6->flowi6_proto = nexthdr; - return; - case IPPROTO_ICMPV6: - if (!onlyproto && (nh + offset + 2 < skb->data || - pskb_may_pull(skb, nh + offset + 2 - skb->data))) { - u8 *icmp; - - nh = skb_network_header(skb); - icmp = (u8 *)(nh + offset); - fl6->fl6_icmp_type = icmp[0]; - fl6->fl6_icmp_code = icmp[1]; - } - fl6->flowi6_proto = nexthdr; - return; - case IPPROTO_GRE: - if (!onlyproto && - (nh + offset + 12 < skb->data || - pskb_may_pull(skb, nh + offset + 12 - skb->data))) { - struct gre_base_hdr *gre_hdr; - __be32 *gre_key; - - nh = skb_network_header(skb); - gre_hdr = (struct gre_base_hdr *)(nh + offset); - gre_key = (__be32 *)(gre_hdr + 1); - - if (gre_hdr->flags & GRE_KEY) { - if (gre_hdr->flags & GRE_CSUM) - gre_key++; - fl6->fl6_gre_key = *gre_key; - } - } - fl6->flowi6_proto = nexthdr; - return; -#if IS_ENABLED(CONFIG_IPV6_MIP6) - case IPPROTO_MH: - offset += ipv6_optlen(exthdr); - if (!onlyproto && (nh + offset + 3 < skb->data || - pskb_may_pull(skb, nh + offset + 3 - skb->data))) { - struct ip6_mh *mh; - - nh = skb_network_header(skb); - mh = (struct ip6_mh *)(nh + offset); - fl6->fl6_mh_type = mh->ip6mh_type; - } - fl6->flowi6_proto = nexthdr; - return; -#endif - default: - fl6->flowi6_proto = nexthdr; - return; - } + if (reverse) { + fl6->saddr = flkeys->addrs.ipv6.dst; + fl6->daddr = flkeys->addrs.ipv6.src; + fl6->fl6_sport = flkeys->ports.dst; + fl6->fl6_dport = flkeys->ports.src; + } else { + fl6->saddr = flkeys->addrs.ipv6.src; + fl6->daddr = flkeys->addrs.ipv6.dst; + fl6->fl6_sport = flkeys->ports.src; + fl6->fl6_dport = flkeys->ports.dst; + } + + switch (flkeys->basic.ip_proto) { + case IPPROTO_GRE: + fl6->fl6_gre_key = flkeys->gre.keyid; + break; + case IPPROTO_ICMPV6: + fl6->fl6_icmp_type = flkeys->icmp.type; + fl6->fl6_icmp_code = flkeys->icmp.code; + break; } + + fl6->flowi6_proto = flkeys->basic.ip_proto; } #endif -int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, +int __xfrm_decode_session(struct net *net, struct sk_buff *skb, struct flowi *fl, unsigned int family, int reverse) { + struct xfrm_flow_keys flkeys; + + memset(&flkeys, 0, sizeof(flkeys)); + __skb_flow_dissect(net, skb, &xfrm_session_dissector, &flkeys, + NULL, 0, 0, 0, FLOW_DISSECTOR_F_STOP_AT_ENCAP); + switch (family) { case AF_INET: - decode_session4(skb, fl, reverse); + decode_session4(&flkeys, fl, reverse); break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: - decode_session6(skb, fl, reverse); + decode_session6(&flkeys, fl, reverse); break; #endif default: return -EAFNOSUPPORT; } + fl->flowi_mark = skb->mark; + if (reverse) { + fl->flowi_oif = skb->skb_iif; + } else { + int oif = 0; + + if (skb_dst(skb) && skb_dst(skb)->dev) + oif = skb_dst(skb)->dev->ifindex; + + fl->flowi_oif = oif; + } + return security_xfrm_decode_session(skb, &fl->flowi_secid); } EXPORT_SYMBOL(__xfrm_decode_session); @@ -3623,7 +3535,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, reverse = dir & ~XFRM_POLICY_MASK; dir &= XFRM_POLICY_MASK; - if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) { + if (__xfrm_decode_session(net, skb, &fl, family, reverse) < 0) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); return 0; } @@ -3779,7 +3691,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) struct dst_entry *dst; int res = 1; - if (xfrm_decode_session(skb, &fl, family) < 0) { + if (xfrm_decode_session(net, skb, &fl, family) < 0) { XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR); return 0; } @@ -4258,8 +4170,47 @@ static struct pernet_operations __net_initdata xfrm_net_ops = { .exit = xfrm_net_exit, }; +static const struct flow_dissector_key xfrm_flow_dissector_keys[] = { + { + .key_id = FLOW_DISSECTOR_KEY_CONTROL, + .offset = offsetof(struct xfrm_flow_keys, control), + }, + { + .key_id = FLOW_DISSECTOR_KEY_BASIC, + .offset = offsetof(struct xfrm_flow_keys, basic), + }, + { + .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS, + .offset = offsetof(struct xfrm_flow_keys, addrs.ipv4), + }, + { + .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS, + .offset = offsetof(struct xfrm_flow_keys, addrs.ipv6), + }, + { + .key_id = FLOW_DISSECTOR_KEY_PORTS, + .offset = offsetof(struct xfrm_flow_keys, ports), + }, + { + .key_id = FLOW_DISSECTOR_KEY_GRE_KEYID, + .offset = offsetof(struct xfrm_flow_keys, gre), + }, + { + .key_id = FLOW_DISSECTOR_KEY_IP, + .offset = offsetof(struct xfrm_flow_keys, ip), + }, + { + .key_id = FLOW_DISSECTOR_KEY_ICMP, + .offset = offsetof(struct xfrm_flow_keys, icmp), + }, +}; + void __init xfrm_init(void) { + skb_flow_dissector_init(&xfrm_session_dissector, + xfrm_flow_dissector_keys, + ARRAY_SIZE(xfrm_flow_dissector_keys)); + register_pernet_subsys(&xfrm_net_ops); xfrm_dev_init(); xfrm_input_init(); |