diff options
author | David S. Miller <davem@davemloft.net> | 2015-01-15 01:12:13 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-01-15 01:12:13 -0500 |
commit | 2e62fa699ffb3f5b3a6778c720818e95690e9faa (patch) | |
tree | d5442581624a3ff7f3b4d91d193ba5928cc98328 | |
parent | 3f3558bb512e0762d5a4573a4aaf038d7a616e92 (diff) | |
parent | 1dd144cf5b4b47e12438c2c6883925ce1a9b499f (diff) | |
download | lwn-2e62fa699ffb3f5b3a6778c720818e95690e9faa.tar.gz lwn-2e62fa699ffb3f5b3a6778c720818e95690e9faa.zip |
Merge branch 'vxlan_group_policy_extension'
Thomas Graf says:
====================
VXLAN Group Policy Extension
Implements supports for the Group Policy VXLAN extension [0] to provide
a lightweight and simple security label mechanism across network peers
based on VXLAN. The security context and associated metadata is mapped
to/from skb->mark. This allows further mapping to a SELinux context
using SECMARK, to implement ACLs directly with nftables, iptables, OVS,
tc, etc.
The extension is disabled by default and should be run on a distinct
port in mixed Linux VXLAN VTEP environments. Liberal VXLAN VTEPs
which ignore unknown reserved bits will be able to receive VXLAN-GBP
frames.
Simple usage example:
10.1.1.1:
# ip link add vxlan0 type vxlan id 10 remote 10.1.1.2 gbp
# iptables -I OUTPUT -m owner --uid-owner 101 -j MARK --set-mark 0x200
10.1.1.2:
# ip link add vxlan0 type vxlan id 10 remote 10.1.1.1 gbp
# iptables -I INPUT -m mark --mark 0x200 -j DROP
iproute2 [1] and OVS [2] support will be provided in separate patches.
[0] https://tools.ietf.org/html/draft-smith-vxlan-group-policy
[1] https://github.com/tgraf/iproute2/tree/vxlan-gbp
[2] https://github.com/tgraf/ovs/tree/vxlan-gbp
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/vxlan.c | 113 | ||||
-rw-r--r-- | include/net/ip_tunnels.h | 5 | ||||
-rw-r--r-- | include/net/vxlan.h | 82 | ||||
-rw-r--r-- | include/uapi/linux/if_link.h | 1 | ||||
-rw-r--r-- | include/uapi/linux/openvswitch.h | 11 | ||||
-rw-r--r-- | net/openvswitch/flow.c | 2 | ||||
-rw-r--r-- | net/openvswitch/flow.h | 14 | ||||
-rw-r--r-- | net/openvswitch/flow_netlink.c | 286 | ||||
-rw-r--r-- | net/openvswitch/vport-geneve.c | 15 | ||||
-rw-r--r-- | net/openvswitch/vport-vxlan.c | 91 | ||||
-rw-r--r-- | net/openvswitch/vport-vxlan.h | 11 |
11 files changed, 491 insertions, 140 deletions
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 99df0d76157c..6b6b45622a0a 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -263,15 +263,19 @@ static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb) return list_first_entry(&fdb->remotes, struct vxlan_rdst, list); } -/* Find VXLAN socket based on network namespace, address family and UDP port */ -static struct vxlan_sock *vxlan_find_sock(struct net *net, - sa_family_t family, __be16 port) +/* Find VXLAN socket based on network namespace, address family and UDP port + * and enabled unshareable flags. + */ +static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family, + __be16 port, u32 flags) { struct vxlan_sock *vs; + u32 match_flags = flags & VXLAN_F_UNSHAREABLE; hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) { if (inet_sk(vs->sock->sk)->inet_sport == port && - inet_sk(vs->sock->sk)->sk.sk_family == family) + inet_sk(vs->sock->sk)->sk.sk_family == family && + (vs->flags & VXLAN_F_UNSHAREABLE) == match_flags) return vs; } return NULL; @@ -291,11 +295,12 @@ static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id) /* Look up VNI in a per net namespace table */ static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id, - sa_family_t family, __be16 port) + sa_family_t family, __be16 port, + u32 flags) { struct vxlan_sock *vs; - vs = vxlan_find_sock(net, family, port); + vs = vxlan_find_sock(net, family, port, flags); if (!vs) return NULL; @@ -620,7 +625,8 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, continue; vh2 = (struct vxlanhdr *)(p->data + off_vx); - if (vh->vx_vni != vh2->vx_vni) { + if (vh->vx_flags != vh2->vx_flags || + vh->vx_vni != vh2->vx_vni) { NAPI_GRO_CB(p)->same_flow = 0; continue; } @@ -1183,6 +1189,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) struct vxlan_sock *vs; struct vxlanhdr *vxh; u32 flags, vni; + struct vxlan_metadata md = {0}; /* Need Vxlan and inner Ethernet header to be present */ if (!pskb_may_pull(skb, VXLAN_HLEN)) @@ -1216,6 +1223,24 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) vni &= VXLAN_VID_MASK; } + /* For backwards compatibility, only allow reserved fields to be + * used by VXLAN extensions if explicitly requested. + */ + if ((flags & VXLAN_HF_GBP) && (vs->flags & VXLAN_F_GBP)) { + struct vxlanhdr_gbp *gbp; + + gbp = (struct vxlanhdr_gbp *)vxh; + md.gbp = ntohs(gbp->policy_id); + + if (gbp->dont_learn) + md.gbp |= VXLAN_GBP_DONT_LEARN; + + if (gbp->policy_applied) + md.gbp |= VXLAN_GBP_POLICY_APPLIED; + + flags &= ~VXLAN_GBP_USED_BITS; + } + if (flags || (vni & ~VXLAN_VID_MASK)) { /* If there are any unprocessed flags remaining treat * this as a malformed packet. This behavior diverges from @@ -1229,7 +1254,8 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) goto bad_flags; } - vs->rcv(vs, skb, vxh->vx_vni); + md.vni = vxh->vx_vni; + vs->rcv(vs, skb, &md); return 0; drop: @@ -1246,8 +1272,8 @@ error: return 1; } -static void vxlan_rcv(struct vxlan_sock *vs, - struct sk_buff *skb, __be32 vx_vni) +static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, + struct vxlan_metadata *md) { struct iphdr *oip = NULL; struct ipv6hdr *oip6 = NULL; @@ -1258,7 +1284,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, int err = 0; union vxlan_addr *remote_ip; - vni = ntohl(vx_vni) >> 8; + vni = ntohl(md->vni) >> 8; /* Is this VNI defined? */ vxlan = vxlan_vs_find_vni(vs, vni); if (!vxlan) @@ -1292,6 +1318,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, goto drop; skb_reset_network_header(skb); + skb->mark = md->gbp; if (oip6) err = IP6_ECN_decapsulate(oip6, skb); @@ -1641,13 +1668,30 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb) return false; } +static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, struct vxlan_sock *vs, + struct vxlan_metadata *md) +{ + struct vxlanhdr_gbp *gbp; + + gbp = (struct vxlanhdr_gbp *)vxh; + vxh->vx_flags |= htonl(VXLAN_HF_GBP); + + if (md->gbp & VXLAN_GBP_DONT_LEARN) + gbp->dont_learn = 1; + + if (md->gbp & VXLAN_GBP_POLICY_APPLIED) + gbp->policy_applied = 1; + + gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK); +} + #if IS_ENABLED(CONFIG_IPV6) static int vxlan6_xmit_skb(struct vxlan_sock *vs, struct dst_entry *dst, struct sk_buff *skb, struct net_device *dev, struct in6_addr *saddr, struct in6_addr *daddr, __u8 prio, __u8 ttl, - __be16 src_port, __be16 dst_port, __be32 vni, - bool xnet) + __be16 src_port, __be16 dst_port, + struct vxlan_metadata *md, bool xnet) { struct vxlanhdr *vxh; int min_headroom; @@ -1696,7 +1740,7 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs, vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); vxh->vx_flags = htonl(VXLAN_HF_VNI); - vxh->vx_vni = vni; + vxh->vx_vni = md->vni; if (type & SKB_GSO_TUNNEL_REMCSUM) { u32 data = (skb_checksum_start_offset(skb) - hdrlen) >> @@ -1714,6 +1758,9 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs, } } + if (vs->flags & VXLAN_F_GBP) + vxlan_build_gbp_hdr(vxh, vs, md); + skb_set_inner_protocol(skb, htons(ETH_P_TEB)); udp_tunnel6_xmit_skb(vs->sock, dst, skb, dev, saddr, daddr, prio, @@ -1728,7 +1775,8 @@ err: int vxlan_xmit_skb(struct vxlan_sock *vs, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, - __be16 src_port, __be16 dst_port, __be32 vni, bool xnet) + __be16 src_port, __be16 dst_port, + struct vxlan_metadata *md, bool xnet) { struct vxlanhdr *vxh; int min_headroom; @@ -1771,7 +1819,7 @@ int vxlan_xmit_skb(struct vxlan_sock *vs, vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); vxh->vx_flags = htonl(VXLAN_HF_VNI); - vxh->vx_vni = vni; + vxh->vx_vni = md->vni; if (type & SKB_GSO_TUNNEL_REMCSUM) { u32 data = (skb_checksum_start_offset(skb) - hdrlen) >> @@ -1789,6 +1837,9 @@ int vxlan_xmit_skb(struct vxlan_sock *vs, } } + if (vs->flags & VXLAN_F_GBP) + vxlan_build_gbp_hdr(vxh, vs, md); + skb_set_inner_protocol(skb, htons(ETH_P_TEB)); return udp_tunnel_xmit_skb(vs->sock, rt, skb, src, dst, tos, @@ -1849,6 +1900,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, const struct iphdr *old_iph; struct flowi4 fl4; union vxlan_addr *dst; + struct vxlan_metadata md; __be16 src_port = 0, dst_port; u32 vni; __be16 df = 0; @@ -1910,7 +1962,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ip_rt_put(rt); dst_vxlan = vxlan_find_vni(vxlan->net, vni, - dst->sa.sa_family, dst_port); + dst->sa.sa_family, dst_port, + vxlan->flags); if (!dst_vxlan) goto tx_error; vxlan_encap_bypass(skb, vxlan, dst_vxlan); @@ -1919,11 +1972,12 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, tos = ip_tunnel_ecn_encap(tos, old_iph, skb); ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); + md.vni = htonl(vni << 8); + md.gbp = skb->mark; err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb, fl4.saddr, dst->sin.sin_addr.s_addr, - tos, ttl, df, src_port, dst_port, - htonl(vni << 8), + tos, ttl, df, src_port, dst_port, &md, !net_eq(vxlan->net, dev_net(vxlan->dev))); if (err < 0) { /* skb is already freed. */ @@ -1968,7 +2022,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, dst_release(ndst); dst_vxlan = vxlan_find_vni(vxlan->net, vni, - dst->sa.sa_family, dst_port); + dst->sa.sa_family, dst_port, + vxlan->flags); if (!dst_vxlan) goto tx_error; vxlan_encap_bypass(skb, vxlan, dst_vxlan); @@ -1976,10 +2031,12 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, } ttl = ttl ? : ip6_dst_hoplimit(ndst); + md.vni = htonl(vni << 8); + md.gbp = skb->mark; err = vxlan6_xmit_skb(vxlan->vn_sock, ndst, skb, dev, &fl6.saddr, &fl6.daddr, 0, ttl, - src_port, dst_port, htonl(vni << 8), + src_port, dst_port, &md, !net_eq(vxlan->net, dev_net(vxlan->dev))); #endif } @@ -2136,7 +2193,7 @@ static int vxlan_init(struct net_device *dev) spin_lock(&vn->sock_lock); vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET, - vxlan->dst_port); + vxlan->dst_port, vxlan->flags); if (vs && atomic_add_unless(&vs->refcnt, 1, 0)) { /* If we have a socket with same port already, reuse it */ vxlan_vs_add_dev(vs, vxlan); @@ -2382,6 +2439,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = { [IFLA_VXLAN_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 }, [IFLA_VXLAN_REMCSUM_TX] = { .type = NLA_U8 }, [IFLA_VXLAN_REMCSUM_RX] = { .type = NLA_U8 }, + [IFLA_VXLAN_GBP] = { .type = NLA_FLAG, }, }; static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[]) @@ -2542,7 +2600,7 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, return vs; spin_lock(&vn->sock_lock); - vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port); + vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port, flags); if (vs && ((vs->rcv != rcv) || !atomic_add_unless(&vs->refcnt, 1, 0))) vs = ERR_PTR(-EBUSY); @@ -2706,8 +2764,11 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, nla_get_u8(data[IFLA_VXLAN_REMCSUM_RX])) vxlan->flags |= VXLAN_F_REMCSUM_RX; + if (data[IFLA_VXLAN_GBP]) + vxlan->flags |= VXLAN_F_GBP; + if (vxlan_find_vni(net, vni, use_ipv6 ? AF_INET6 : AF_INET, - vxlan->dst_port)) { + vxlan->dst_port, vxlan->flags)) { pr_info("duplicate VNI %u\n", vni); return -EEXIST; } @@ -2851,6 +2912,10 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports)) goto nla_put_failure; + if (vxlan->flags & VXLAN_F_GBP && + nla_put_flag(skb, IFLA_VXLAN_GBP)) + goto nla_put_failure; + return 0; nla_put_failure: diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 25a59eb388a6..ce4db3cc5647 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -97,7 +97,10 @@ struct ip_tunnel { #define TUNNEL_DONT_FRAGMENT __cpu_to_be16(0x0100) #define TUNNEL_OAM __cpu_to_be16(0x0200) #define TUNNEL_CRIT_OPT __cpu_to_be16(0x0400) -#define TUNNEL_OPTIONS_PRESENT __cpu_to_be16(0x0800) +#define TUNNEL_GENEVE_OPT __cpu_to_be16(0x0800) +#define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000) + +#define TUNNEL_OPTIONS_PRESENT (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT) struct tnl_ptk_info { __be16 flags; diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 0a7443b49133..7be8c342fc95 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -11,15 +11,76 @@ #define VNI_HASH_BITS 10 #define VNI_HASH_SIZE (1<<VNI_HASH_BITS) -/* VXLAN protocol header */ +/* + * VXLAN Group Based Policy Extension: + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |1|-|-|-|1|-|-|-|R|D|R|R|A|R|R|R| Group Policy ID | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | VXLAN Network Identifier (VNI) | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * D = Don't Learn bit. When set, this bit indicates that the egress + * VTEP MUST NOT learn the source address of the encapsulated frame. + * + * A = Indicates that the group policy has already been applied to + * this packet. Policies MUST NOT be applied by devices when the + * A bit is set. + * + * [0] https://tools.ietf.org/html/draft-smith-vxlan-group-policy + */ +struct vxlanhdr_gbp { + __u8 vx_flags; +#ifdef __LITTLE_ENDIAN_BITFIELD + __u8 reserved_flags1:3, + policy_applied:1, + reserved_flags2:2, + dont_learn:1, + reserved_flags3:1; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u8 reserved_flags1:1, + dont_learn:1, + reserved_flags2:2, + policy_applied:1, + reserved_flags3:3; +#else +#error "Please fix <asm/byteorder.h>" +#endif + __be16 policy_id; + __be32 vx_vni; +}; + +#define VXLAN_GBP_USED_BITS (VXLAN_HF_GBP | 0xFFFFFF) + +/* skb->mark mapping + * + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |R|R|R|R|R|R|R|R|R|D|R|R|A|R|R|R| Group Policy ID | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +#define VXLAN_GBP_DONT_LEARN (BIT(6) << 16) +#define VXLAN_GBP_POLICY_APPLIED (BIT(3) << 16) +#define VXLAN_GBP_ID_MASK (0xFFFF) + +/* VXLAN protocol header: + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |G|R|R|R|I|R|R|C| Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | VXLAN Network Identifier (VNI) | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * G = 1 Group Policy (VXLAN-GBP) + * I = 1 VXLAN Network Identifier (VNI) present + * C = 1 Remote checksum offload (RCO) + */ struct vxlanhdr { __be32 vx_flags; __be32 vx_vni; }; /* VXLAN header flags. */ -#define VXLAN_HF_VNI 0x08000000 -#define VXLAN_HF_RCO 0x00200000 +#define VXLAN_HF_RCO BIT(24) +#define VXLAN_HF_VNI BIT(27) +#define VXLAN_HF_GBP BIT(31) /* Remote checksum offload header option */ #define VXLAN_RCO_MASK 0x7f /* Last byte of vni field */ @@ -32,8 +93,14 @@ struct vxlanhdr { #define VXLAN_VID_MASK (VXLAN_N_VID - 1) #define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr)) +struct vxlan_metadata { + __be32 vni; + u32 gbp; +}; + struct vxlan_sock; -typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb, __be32 key); +typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb, + struct vxlan_metadata *md); /* per UDP socket information */ struct vxlan_sock { @@ -60,6 +127,10 @@ struct vxlan_sock { #define VXLAN_F_UDP_ZERO_CSUM6_RX 0x100 #define VXLAN_F_REMCSUM_TX 0x200 #define VXLAN_F_REMCSUM_RX 0x400 +#define VXLAN_F_GBP 0x800 + +/* These flags must match in order for a socket to be shareable */ +#define VXLAN_F_UNSHAREABLE VXLAN_F_GBP struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, vxlan_rcv_t *rcv, void *data, @@ -70,7 +141,8 @@ void vxlan_sock_release(struct vxlan_sock *vs); int vxlan_xmit_skb(struct vxlan_sock *vs, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, - __be16 src_port, __be16 dst_port, __be32 vni, bool xnet); + __be16 src_port, __be16 dst_port, struct vxlan_metadata *md, + bool xnet); static inline netdev_features_t vxlan_features_check(struct sk_buff *skb, netdev_features_t features) diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index b2723f65846f..2a8380edbb7e 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -372,6 +372,7 @@ enum { IFLA_VXLAN_UDP_ZERO_CSUM6_RX, IFLA_VXLAN_REMCSUM_TX, IFLA_VXLAN_REMCSUM_RX, + IFLA_VXLAN_GBP, __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index f714e8633352..cd8d933963c2 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -252,11 +252,21 @@ enum ovs_vport_attr { #define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1) +enum { + OVS_VXLAN_EXT_UNSPEC, + OVS_VXLAN_EXT_GBP, /* Flag or __u32 */ + __OVS_VXLAN_EXT_MAX, +}; + +#define OVS_VXLAN_EXT_MAX (__OVS_VXLAN_EXT_MAX - 1) + + /* OVS_VPORT_ATTR_OPTIONS attributes for tunnels. */ enum { OVS_TUNNEL_ATTR_UNSPEC, OVS_TUNNEL_ATTR_DST_PORT, /* 16-bit UDP port, used by L4 tunnels. */ + OVS_TUNNEL_ATTR_EXTENSION, __OVS_TUNNEL_ATTR_MAX }; @@ -328,6 +338,7 @@ enum ovs_tunnel_key_attr { OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, /* Array of Geneve options. */ OVS_TUNNEL_KEY_ATTR_TP_SRC, /* be16 src Transport Port. */ OVS_TUNNEL_KEY_ATTR_TP_DST, /* be16 dst Transport Port. */ + OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS, /* Nested OVS_VXLAN_EXT_* */ __OVS_TUNNEL_KEY_ATTR_MAX }; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index df334fe43d7f..e2c348b8baca 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -691,7 +691,7 @@ int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info, BUILD_BUG_ON((1 << (sizeof(tun_info->options_len) * 8)) - 1 > sizeof(key->tun_opts)); - memcpy(GENEVE_OPTS(key, tun_info->options_len), + memcpy(TUN_METADATA_OPTS(key, tun_info->options_len), tun_info->options, tun_info->options_len); key->tun_opts_len = tun_info->options_len; } else { diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index a8b30f334388..d3d0a406562d 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -53,7 +53,7 @@ struct ovs_key_ipv4_tunnel { struct ovs_tunnel_info { struct ovs_key_ipv4_tunnel tunnel; - const struct geneve_opt *options; + const void *options; u8 options_len; }; @@ -61,10 +61,10 @@ struct ovs_tunnel_info { * maximum size. This allows us to get the benefits of variable length * matching for small options. */ -#define GENEVE_OPTS(flow_key, opt_len) \ - ((struct geneve_opt *)((flow_key)->tun_opts + \ - FIELD_SIZEOF(struct sw_flow_key, tun_opts) - \ - opt_len)) +#define TUN_METADATA_OFFSET(opt_len) \ + (FIELD_SIZEOF(struct sw_flow_key, tun_opts) - opt_len) +#define TUN_METADATA_OPTS(flow_key, opt_len) \ + ((void *)((flow_key)->tun_opts + TUN_METADATA_OFFSET(opt_len))) static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, __be32 saddr, __be32 daddr, @@ -73,7 +73,7 @@ static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, __be16 tp_dst, __be64 tun_id, __be16 tun_flags, - const struct geneve_opt *opts, + const void *opts, u8 opts_len) { tun_info->tunnel.tun_id = tun_id; @@ -105,7 +105,7 @@ static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, __be16 tp_dst, __be64 tun_id, __be16 tun_flags, - const struct geneve_opt *opts, + const void *opts, u8 opts_len) { __ovs_flow_tun_info_init(tun_info, iph->saddr, iph->daddr, diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index d1eecf707613..d210d1be3470 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -49,6 +49,14 @@ #include <net/mpls.h> #include "flow_netlink.h" +#include "vport-vxlan.h" + +struct ovs_len_tbl { + int len; + const struct ovs_len_tbl *next; +}; + +#define OVS_ATTR_NESTED -1 static void update_range(struct sw_flow_match *match, size_t offset, size_t size, bool is_mask) @@ -261,6 +269,9 @@ size_t ovs_tun_key_attr_size(void) + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */ + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */ + /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS is mutually exclusive with + * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it. + */ + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */ + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */ } @@ -289,29 +300,45 @@ size_t ovs_key_attr_size(void) + nla_total_size(28); /* OVS_KEY_ATTR_ND */ } +static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { + [OVS_TUNNEL_KEY_ATTR_ID] = { .len = sizeof(u64) }, + [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = { .len = sizeof(u32) }, + [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = { .len = sizeof(u32) }, + [OVS_TUNNEL_KEY_ATTR_TOS] = { .len = 1 }, + [OVS_TUNNEL_KEY_ATTR_TTL] = { .len = 1 }, + [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = { .len = 0 }, + [OVS_TUNNEL_KEY_ATTR_CSUM] = { .len = 0 }, + [OVS_TUNNEL_KEY_ATTR_TP_SRC] = { .len = sizeof(u16) }, + [OVS_TUNNEL_KEY_ATTR_TP_DST] = { .len = sizeof(u16) }, + [OVS_TUNNEL_KEY_ATTR_OAM] = { .len = 0 }, + [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = { .len = OVS_ATTR_NESTED }, + [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS] = { .len = OVS_ATTR_NESTED }, +}; + /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ -static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { - [OVS_KEY_ATTR_ENCAP] = -1, - [OVS_KEY_ATTR_PRIORITY] = sizeof(u32), - [OVS_KEY_ATTR_IN_PORT] = sizeof(u32), - [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32), - [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet), - [OVS_KEY_ATTR_VLAN] = sizeof(__be16), - [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16), - [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4), - [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6), - [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp), - [OVS_KEY_ATTR_TCP_FLAGS] = sizeof(__be16), - [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp), - [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp), - [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp), - [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6), - [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp), - [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd), - [OVS_KEY_ATTR_RECIRC_ID] = sizeof(u32), - [OVS_KEY_ATTR_DP_HASH] = sizeof(u32), - [OVS_KEY_ATTR_TUNNEL] = -1, - [OVS_KEY_ATTR_MPLS] = sizeof(struct ovs_key_mpls), +static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { + [OVS_KEY_ATTR_ENCAP] = { .len = OVS_ATTR_NESTED }, + [OVS_KEY_ATTR_PRIORITY] = { .len = sizeof(u32) }, + [OVS_KEY_ATTR_IN_PORT] = { .len = sizeof(u32) }, + [OVS_KEY_ATTR_SKB_MARK] = { .len = sizeof(u32) }, + [OVS_KEY_ATTR_ETHERNET] = { .len = sizeof(struct ovs_key_ethernet) }, + [OVS_KEY_ATTR_VLAN] = { .len = sizeof(__be16) }, + [OVS_KEY_ATTR_ETHERTYPE] = { .len = sizeof(__be16) }, + [OVS_KEY_ATTR_IPV4] = { .len = sizeof(struct ovs_key_ipv4) }, + [OVS_KEY_ATTR_IPV6] = { .len = sizeof(struct ovs_key_ipv6) }, + [OVS_KEY_ATTR_TCP] = { .len = sizeof(struct ovs_key_tcp) }, + [OVS_KEY_ATTR_TCP_FLAGS] = { .len = sizeof(__be16) }, + [OVS_KEY_ATTR_UDP] = { .len = sizeof(struct ovs_key_udp) }, + [OVS_KEY_ATTR_SCTP] = { .len = sizeof(struct ovs_key_sctp) }, + [OVS_KEY_ATTR_ICMP] = { .len = sizeof(struct ovs_key_icmp) }, + [OVS_KEY_ATTR_ICMPV6] = { .len = sizeof(struct ovs_key_icmpv6) }, + [OVS_KEY_ATTR_ARP] = { .len = sizeof(struct ovs_key_arp) }, + [OVS_KEY_ATTR_ND] = { .len = sizeof(struct ovs_key_nd) }, + [OVS_KEY_ATTR_RECIRC_ID] = { .len = sizeof(u32) }, + [OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) }, + [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED, + .next = ovs_tunnel_key_lens, }, + [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) }, }; static bool is_all_zero(const u8 *fp, size_t size) @@ -352,8 +379,8 @@ static int __parse_flow_nlattrs(const struct nlattr *attr, return -EINVAL; } - expected_len = ovs_key_lens[type]; - if (nla_len(nla) != expected_len && expected_len != -1) { + expected_len = ovs_key_lens[type].len; + if (nla_len(nla) != expected_len && expected_len != OVS_ATTR_NESTED) { OVS_NLERR(log, "Key %d has unexpected len %d expected %d", type, nla_len(nla), expected_len); return -EINVAL; @@ -432,13 +459,47 @@ static int genev_tun_opt_from_nlattr(const struct nlattr *a, SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true); } - opt_key_offset = (unsigned long)GENEVE_OPTS((struct sw_flow_key *)0, - nla_len(a)); + opt_key_offset = TUN_METADATA_OFFSET(nla_len(a)); SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a), nla_len(a), is_mask); return 0; } +static const struct nla_policy vxlan_opt_policy[OVS_VXLAN_EXT_MAX + 1] = { + [OVS_VXLAN_EXT_GBP] = { .type = NLA_U32 }, +}; + +static int vxlan_tun_opt_from_nlattr(const struct nlattr *a, + struct sw_flow_match *match, bool is_mask, + bool log) +{ + struct nlattr *tb[OVS_VXLAN_EXT_MAX+1]; + unsigned long opt_key_offset; + struct ovs_vxlan_opts opts; + int err; + + BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts)); + + err = nla_parse_nested(tb, OVS_VXLAN_EXT_MAX, a, vxlan_opt_policy); + if (err < 0) + return err; + + memset(&opts, 0, sizeof(opts)); + + if (tb[OVS_VXLAN_EXT_GBP]) + opts.gbp = nla_get_u32(tb[OVS_VXLAN_EXT_GBP]); + + if (!is_mask) + SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false); + else + SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true); + + opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts)); + SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts), + is_mask); + return 0; +} + static int ipv4_tun_from_nlattr(const struct nlattr *attr, struct sw_flow_match *match, bool is_mask, bool log) @@ -447,35 +508,22 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, int rem; bool ttl = false; __be16 tun_flags = 0; + int opts_type = 0; nla_for_each_nested(a, attr, rem) { int type = nla_type(a); int err; - static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { - [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64), - [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32), - [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32), - [OVS_TUNNEL_KEY_ATTR_TOS] = 1, - [OVS_TUNNEL_KEY_ATTR_TTL] = 1, - [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, - [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, - [OVS_TUNNEL_KEY_ATTR_TP_SRC] = sizeof(u16), - [OVS_TUNNEL_KEY_ATTR_TP_DST] = sizeof(u16), - [OVS_TUNNEL_KEY_ATTR_OAM] = 0, - [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = -1, - }; - if (type > OVS_TUNNEL_KEY_ATTR_MAX) { OVS_NLERR(log, "Tunnel attr %d out of range max %d", type, OVS_TUNNEL_KEY_ATTR_MAX); return -EINVAL; } - if (ovs_tunnel_key_lens[type] != nla_len(a) && - ovs_tunnel_key_lens[type] != -1) { + if (ovs_tunnel_key_lens[type].len != nla_len(a) && + ovs_tunnel_key_lens[type].len != OVS_ATTR_NESTED) { OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d", - type, nla_len(a), ovs_tunnel_key_lens[type]); + type, nla_len(a), ovs_tunnel_key_lens[type].len); return -EINVAL; } @@ -520,11 +568,30 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, tun_flags |= TUNNEL_OAM; break; case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: + if (opts_type) { + OVS_NLERR(log, "Multiple metadata blocks provided"); + return -EINVAL; + } + err = genev_tun_opt_from_nlattr(a, match, is_mask, log); if (err) return err; - tun_flags |= TUNNEL_OPTIONS_PRESENT; + tun_flags |= TUNNEL_GENEVE_OPT; + opts_type = type; + break; + case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: + if (opts_type) { + OVS_NLERR(log, "Multiple metadata blocks provided"); + return -EINVAL; + } + + err = vxlan_tun_opt_from_nlattr(a, match, is_mask, log); + if (err) + return err; + + tun_flags |= TUNNEL_VXLAN_OPT; + opts_type = type; break; default: OVS_NLERR(log, "Unknown IPv4 tunnel attribute %d", @@ -553,13 +620,29 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, } } + return opts_type; +} + +static int vxlan_opt_to_nlattr(struct sk_buff *skb, + const void *tun_opts, int swkey_tun_opts_len) +{ + const struct ovs_vxlan_opts *opts = tun_opts; + struct nlattr *nla; + + nla = nla_nest_start(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS); + if (!nla) + return -EMSGSIZE; + + if (nla_put_u32(skb, OVS_VXLAN_EXT_GBP, opts->gbp) < 0) + return -EMSGSIZE; + + nla_nest_end(skb, nla); return 0; } static int __ipv4_tun_to_nlattr(struct sk_buff *skb, const struct ovs_key_ipv4_tunnel *output, - const struct geneve_opt *tun_opts, - int swkey_tun_opts_len) + const void *tun_opts, int swkey_tun_opts_len) { if (output->tun_flags & TUNNEL_KEY && nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id)) @@ -590,18 +673,22 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb, if ((output->tun_flags & TUNNEL_OAM) && nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM)) return -EMSGSIZE; - if (tun_opts && - nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, - swkey_tun_opts_len, tun_opts)) - return -EMSGSIZE; + if (tun_opts) { + if (output->tun_flags & TUNNEL_GENEVE_OPT && + nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, + swkey_tun_opts_len, tun_opts)) + return -EMSGSIZE; + else if (output->tun_flags & TUNNEL_VXLAN_OPT && + vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len)) + return -EMSGSIZE; + } return 0; } static int ipv4_tun_to_nlattr(struct sk_buff *skb, const struct ovs_key_ipv4_tunnel *output, - const struct geneve_opt *tun_opts, - int swkey_tun_opts_len) + const void *tun_opts, int swkey_tun_opts_len) { struct nlattr *nla; int err; @@ -675,7 +762,7 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, } if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, - is_mask, log)) + is_mask, log) < 0) return -EINVAL; *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); } @@ -915,18 +1002,16 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, return 0; } -static void nlattr_set(struct nlattr *attr, u8 val, bool is_attr_mask_key) +static void nlattr_set(struct nlattr *attr, u8 val, + const struct ovs_len_tbl *tbl) { struct nlattr *nla; int rem; /* The nlattr stream should already have been validated */ nla_for_each_nested(nla, attr, rem) { - /* We assume that ovs_key_lens[type] == -1 means that type is a - * nested attribute - */ - if (is_attr_mask_key && ovs_key_lens[nla_type(nla)] == -1) - nlattr_set(nla, val, false); + if (tbl && tbl[nla_type(nla)].len == OVS_ATTR_NESTED) + nlattr_set(nla, val, tbl[nla_type(nla)].next); else memset(nla_data(nla), val, nla_len(nla)); } @@ -934,7 +1019,7 @@ static void nlattr_set(struct nlattr *attr, u8 val, bool is_attr_mask_key) static void mask_set_nlattr(struct nlattr *attr, u8 val) { - nlattr_set(attr, val, true); + nlattr_set(attr, val, ovs_key_lens); } /** @@ -1148,10 +1233,10 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, goto nla_put_failure; if ((swkey->tun_key.ipv4_dst || is_mask)) { - const struct geneve_opt *opts = NULL; + const void *opts = NULL; if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT) - opts = GENEVE_OPTS(output, swkey->tun_opts_len); + opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len); if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts, swkey->tun_opts_len)) @@ -1540,6 +1625,34 @@ void ovs_match_init(struct sw_flow_match *match, } } +static int validate_geneve_opts(struct sw_flow_key *key) +{ + struct geneve_opt *option; + int opts_len = key->tun_opts_len; + bool crit_opt = false; + + option = (struct geneve_opt *)TUN_METADATA_OPTS(key, key->tun_opts_len); + while (opts_len > 0) { + int len; + + if (opts_len < sizeof(*option)) + return -EINVAL; + + len = sizeof(*option) + option->length * 4; + if (len > opts_len) + return -EINVAL; + + crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE); + + option = (struct geneve_opt *)((u8 *)option + len); + opts_len -= len; + }; + + key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0; + + return 0; +} + static int validate_and_copy_set_tun(const struct nlattr *attr, struct sw_flow_actions **sfa, bool log) { @@ -1547,36 +1660,23 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, struct sw_flow_key key; struct ovs_tunnel_info *tun_info; struct nlattr *a; - int err, start; + int err, start, opts_type; ovs_match_init(&match, &key, NULL); - err = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log); - if (err) - return err; + opts_type = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log); + if (opts_type < 0) + return opts_type; if (key.tun_opts_len) { - struct geneve_opt *option = GENEVE_OPTS(&key, - key.tun_opts_len); - int opts_len = key.tun_opts_len; - bool crit_opt = false; - - while (opts_len > 0) { - int len; - - if (opts_len < sizeof(*option)) - return -EINVAL; - - len = sizeof(*option) + option->length * 4; - if (len > opts_len) - return -EINVAL; - - crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE); - - option = (struct geneve_opt *)((u8 *)option + len); - opts_len -= len; - }; - - key.tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0; + switch (opts_type) { + case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: + err = validate_geneve_opts(&key); + if (err < 0) + return err; + break; + case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: + break; + } }; start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log); @@ -1597,9 +1697,9 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, * everything else will go away after flow setup. We can append * it to tun_info and then point there. */ - memcpy((tun_info + 1), GENEVE_OPTS(&key, key.tun_opts_len), - key.tun_opts_len); - tun_info->options = (struct geneve_opt *)(tun_info + 1); + memcpy((tun_info + 1), + TUN_METADATA_OPTS(&key, key.tun_opts_len), key.tun_opts_len); + tun_info->options = (tun_info + 1); } else { tun_info->options = NULL; } @@ -1622,8 +1722,8 @@ static int validate_set(const struct nlattr *a, return -EINVAL; if (key_type > OVS_KEY_ATTR_MAX || - (ovs_key_lens[key_type] != nla_len(ovs_key) && - ovs_key_lens[key_type] != -1)) + (ovs_key_lens[key_type].len != nla_len(ovs_key) && + ovs_key_lens[key_type].len != OVS_ATTR_NESTED)) return -EINVAL; switch (key_type) { diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c index 88a010c98c05..7ca3d454ff3b 100644 --- a/net/openvswitch/vport-geneve.c +++ b/net/openvswitch/vport-geneve.c @@ -88,7 +88,7 @@ static void geneve_rcv(struct geneve_sock *gs, struct sk_buff *skb) opts_len = geneveh->opt_len * 4; - flags = TUNNEL_KEY | TUNNEL_OPTIONS_PRESENT | + flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT | (udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0) | (geneveh->oam ? TUNNEL_OAM : 0) | (geneveh->critical ? TUNNEL_CRIT_OPT : 0); @@ -178,7 +178,7 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb) __be16 sport; struct rtable *rt; struct flowi4 fl; - u8 vni[3]; + u8 vni[3], opts_len, *opts; __be16 df; int err; @@ -200,11 +200,18 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb) tunnel_id_to_vni(tun_key->tun_id, vni); skb->ignore_df = 1; + if (tun_key->tun_flags & TUNNEL_GENEVE_OPT) { + opts = (u8 *)tun_info->options; + opts_len = tun_info->options_len; + } else { + opts = NULL; + opts_len = 0; + } + err = geneve_xmit_skb(geneve_port->gs, rt, skb, fl.saddr, tun_key->ipv4_dst, tun_key->ipv4_tos, tun_key->ipv4_ttl, df, sport, dport, - tun_key->tun_flags, vni, - tun_info->options_len, (u8 *)tun_info->options, + tun_key->tun_flags, vni, opts_len, opts, false); if (err < 0) ip_rt_put(rt); diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 1435a053a870..8a2d54cba9ba 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -40,6 +40,7 @@ #include "datapath.h" #include "vport.h" +#include "vport-vxlan.h" /** * struct vxlan_port - Keeps track of open UDP ports @@ -49,6 +50,7 @@ struct vxlan_port { struct vxlan_sock *vs; char name[IFNAMSIZ]; + u32 exts; /* VXLAN_F_* in <net/vxlan.h> */ }; static struct vport_ops ovs_vxlan_vport_ops; @@ -59,19 +61,30 @@ static inline struct vxlan_port *vxlan_vport(const struct vport *vport) } /* Called with rcu_read_lock and BH disabled. */ -static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni) +static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, + struct vxlan_metadata *md) { struct ovs_tunnel_info tun_info; + struct vxlan_port *vxlan_port; struct vport *vport = vs->data; struct iphdr *iph; + struct ovs_vxlan_opts opts = { + .gbp = md->gbp, + }; __be64 key; + __be16 flags; + + flags = TUNNEL_KEY; + vxlan_port = vxlan_vport(vport); + if (vxlan_port->exts & VXLAN_F_GBP) + flags |= TUNNEL_VXLAN_OPT; /* Save outer tunnel values */ iph = ip_hdr(skb); - key = cpu_to_be64(ntohl(vx_vni) >> 8); + key = cpu_to_be64(ntohl(md->vni) >> 8); ovs_flow_tun_info_init(&tun_info, iph, udp_hdr(skb)->source, udp_hdr(skb)->dest, - key, TUNNEL_KEY, NULL, 0); + key, flags, &opts, sizeof(opts)); ovs_vport_receive(vport, skb, &tun_info); } @@ -83,6 +96,21 @@ static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb) if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port))) return -EMSGSIZE; + + if (vxlan_port->exts) { + struct nlattr *exts; + + exts = nla_nest_start(skb, OVS_TUNNEL_ATTR_EXTENSION); + if (!exts) + return -EMSGSIZE; + + if (vxlan_port->exts & VXLAN_F_GBP && + nla_put_flag(skb, OVS_VXLAN_EXT_GBP)) + return -EMSGSIZE; + + nla_nest_end(skb, exts); + } + return 0; } @@ -95,6 +123,31 @@ static void vxlan_tnl_destroy(struct vport *vport) ovs_vport_deferred_free(vport); } +static const struct nla_policy exts_policy[OVS_VXLAN_EXT_MAX+1] = { + [OVS_VXLAN_EXT_GBP] = { .type = NLA_FLAG, }, +}; + +static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr) +{ + struct nlattr *exts[OVS_VXLAN_EXT_MAX+1]; + struct vxlan_port *vxlan_port; + int err; + + if (nla_len(attr) < sizeof(struct nlattr)) + return -EINVAL; + + err = nla_parse_nested(exts, OVS_VXLAN_EXT_MAX, attr, exts_policy); + if (err < 0) + return err; + + vxlan_port = vxlan_vport(vport); + + if (exts[OVS_VXLAN_EXT_GBP]) + vxlan_port->exts |= VXLAN_F_GBP; + + return 0; +} + static struct vport *vxlan_tnl_create(const struct vport_parms *parms) { struct net *net = ovs_dp_get_net(parms->dp); @@ -127,7 +180,17 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms) vxlan_port = vxlan_vport(vport); strncpy(vxlan_port->name, parms->name, IFNAMSIZ); - vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, 0); + a = nla_find_nested(options, OVS_TUNNEL_ATTR_EXTENSION); + if (a) { + err = vxlan_configure_exts(vport, a); + if (err) { + ovs_vport_free(vport); + goto error; + } + } + + vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, + vxlan_port->exts); if (IS_ERR(vs)) { ovs_vport_free(vport); return (void *)vs; @@ -140,12 +203,28 @@ error: return ERR_PTR(err); } +static int vxlan_ext_gbp(struct sk_buff *skb) +{ + const struct ovs_tunnel_info *tun_info; + const struct ovs_vxlan_opts *opts; + + tun_info = OVS_CB(skb)->egress_tun_info; + opts = tun_info->options; + + if (tun_info->tunnel.tun_flags & TUNNEL_VXLAN_OPT && + tun_info->options_len >= sizeof(*opts)) + return opts->gbp; + else + return 0; +} + static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) { struct net *net = ovs_dp_get_net(vport->dp); struct vxlan_port *vxlan_port = vxlan_vport(vport); __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport; const struct ovs_key_ipv4_tunnel *tun_key; + struct vxlan_metadata md = {0}; struct rtable *rt; struct flowi4 fl; __be16 src_port; @@ -170,12 +249,14 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) skb->ignore_df = 1; src_port = udp_flow_src_port(net, skb, 0, 0, true); + md.vni = htonl(be64_to_cpu(tun_key->tun_id) << 8); + md.gbp = vxlan_ext_gbp(skb); err = vxlan_xmit_skb(vxlan_port->vs, rt, skb, fl.saddr, tun_key->ipv4_dst, tun_key->ipv4_tos, tun_key->ipv4_ttl, df, src_port, dst_port, - htonl(be64_to_cpu(tun_key->tun_id) << 8), + &md, false); if (err < 0) ip_rt_put(rt); diff --git a/net/openvswitch/vport-vxlan.h b/net/openvswitch/vport-vxlan.h new file mode 100644 index 000000000000..4b08233e73d5 --- /dev/null +++ b/net/openvswitch/vport-vxlan.h @@ -0,0 +1,11 @@ +#ifndef VPORT_VXLAN_H +#define VPORT_VXLAN_H 1 + +#include <linux/kernel.h> +#include <linux/types.h> + +struct ovs_vxlan_opts { + __u32 gbp; +}; + +#endif |