summaryrefslogtreecommitdiff
path: root/include/net
diff options
context:
space:
mode:
authorDavid Ahern <dsa@cumulusnetworks.com>2016-10-16 20:02:52 -0700
committerDavid S. Miller <davem@davemloft.net>2016-10-17 10:17:05 -0400
commita04a480d4392ea6efd117be2de564117b2a009c0 (patch)
treea8a8b400fb1a06174a1b9a9838a3b72343fd8b84 /include/net
parentfb5c6cfaec126d9a96b9dd471d4711bf4c737a6f (diff)
downloadlwn-a04a480d4392ea6efd117be2de564117b2a009c0.tar.gz
lwn-a04a480d4392ea6efd117be2de564117b2a009c0.zip
net: Require exact match for TCP socket lookups if dif is l3mdev
Currently, socket lookups for l3mdev (vrf) use cases can match a socket that is bound to a port but not a device (ie., a global socket). If the sysctl tcp_l3mdev_accept is not set this leads to ack packets going out based on the main table even though the packet came in from an L3 domain. The end result is that the connection does not establish creating confusion for users since the service is running and a socket shows in ss output. Fix by requiring an exact dif to sk_bound_dev_if match if the skb came through an interface enslaved to an l3mdev device and the tcp_l3mdev_accept is not set. skb's through an l3mdev interface are marked by setting a flag in inet{6}_skb_parm. The IPv6 variant is already set; this patch adds the flag for IPv4. Using an skb flag avoids a device lookup on the dif. The flag is set in the VRF driver using the IP{6}CB macros. For IPv4, the inet_skb_parm struct is moved in the cb per commit 971f10eca186, so the match function in the TCP stack needs to use TCP_SKB_CB. For IPv6, the move is done after the socket lookup, so IP6CB is used. The flags field in inet_skb_parm struct needs to be increased to add another flag. There is currently a 1-byte hole following the flags, so it can be expanded to u16 without increasing the size of the struct. Fixes: 193125dbd8eb ("net: Introduce VRF device driver") Signed-off-by: David Ahern <dsa@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/net')
-rw-r--r--include/net/ip.h8
-rw-r--r--include/net/tcp.h13
2 files changed, 19 insertions, 2 deletions
diff --git a/include/net/ip.h b/include/net/ip.h
index bc43c0fcae12..c9d07988911e 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -38,7 +38,7 @@ struct sock;
struct inet_skb_parm {
int iif;
struct ip_options opt; /* Compiled IP options */
- unsigned char flags;
+ u16 flags;
#define IPSKB_FORWARDED BIT(0)
#define IPSKB_XFRM_TUNNEL_SIZE BIT(1)
@@ -48,10 +48,16 @@ struct inet_skb_parm {
#define IPSKB_DOREDIRECT BIT(5)
#define IPSKB_FRAG_PMTU BIT(6)
#define IPSKB_FRAG_SEGS BIT(7)
+#define IPSKB_L3SLAVE BIT(8)
u16 frag_max_size;
};
+static inline bool ipv4_l3mdev_skb(u16 flags)
+{
+ return !!(flags & IPSKB_L3SLAVE);
+}
+
static inline unsigned int ip_hdrlen(const struct sk_buff *skb)
{
return ip_hdr(skb)->ihl * 4;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index f83b7f220a65..5b82d4d94834 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -794,12 +794,23 @@ struct tcp_skb_cb {
*/
static inline int tcp_v6_iif(const struct sk_buff *skb)
{
- bool l3_slave = skb_l3mdev_slave(TCP_SKB_CB(skb)->header.h6.flags);
+ bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
return l3_slave ? skb->skb_iif : TCP_SKB_CB(skb)->header.h6.iif;
}
#endif
+/* TCP_SKB_CB reference means this can not be used from early demux */
+static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+ if (!net->ipv4.sysctl_tcp_l3mdev_accept &&
+ ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags))
+ return true;
+#endif
+ return false;
+}
+
/* Due to TSO, an SKB can be composed of multiple actual
* packets. To keep these tracked properly, we use this.
*/