From 971427f353f3c42c8dcef62e7124440df68eb809 Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Mon, 15 Sep 2014 19:37:25 -0700 Subject: openvswitch: Add recirc and hash action. Recirc action allows a packet to reenter openvswitch processing. currently openvswitch lookup flow for packet received and execute set of actions on that packet, with help of recirc action we can process/modify the packet and recirculate it back in openvswitch for another pass. OVS hash action calculates 5-tupple hash and set hash in flow-key hash. This can be used along with recirculation for distributing packets among different ports for bond devices. For example: OVS bonding can use following actions: Match on: bond flow; Action: hash, recirc(id) Match on: recirc-id == id and hash lower bits == a; Action: output port_bond_a Signed-off-by: Andy Zhou Acked-by: Jesse Gross Signed-off-by: Pravin B Shelar --- include/uapi/linux/openvswitch.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/uapi/linux/openvswitch.h') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index a794d1dd7b40..f7fc507d82ab 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -289,6 +289,9 @@ enum ovs_key_attr { OVS_KEY_ATTR_TUNNEL, /* Nested set of ovs_tunnel attributes */ OVS_KEY_ATTR_SCTP, /* struct ovs_key_sctp */ OVS_KEY_ATTR_TCP_FLAGS, /* be16 TCP flags. */ + OVS_KEY_ATTR_DP_HASH, /* u32 hash value. Value 0 indicates the hash + is not computed by the datapath. */ + OVS_KEY_ATTR_RECIRC_ID, /* u32 recirc id */ #ifdef __KERNEL__ OVS_KEY_ATTR_IPV4_TUNNEL, /* struct ovs_key_ipv4_tunnel */ @@ -493,6 +496,27 @@ struct ovs_action_push_vlan { __be16 vlan_tci; /* 802.1Q TCI (VLAN ID and priority). */ }; +/* Data path hash algorithm for computing Datapath hash. + * + * The algorithm type only specifies the fields in a flow + * will be used as part of the hash. Each datapath is free + * to use its own hash algorithm. The hash value will be + * opaque to the user space daemon. + */ +enum ovs_hash_alg { + OVS_HASH_ALG_L4, +}; + +/* + * struct ovs_action_hash - %OVS_ACTION_ATTR_HASH action argument. + * @hash_alg: Algorithm used to compute hash prior to recirculation. + * @hash_basis: basis used for computing hash. + */ +struct ovs_action_hash { + uint32_t hash_alg; /* One of ovs_hash_alg. */ + uint32_t hash_basis; +}; + /** * enum ovs_action_attr - Action types. * @@ -521,6 +545,8 @@ enum ovs_action_attr { OVS_ACTION_ATTR_PUSH_VLAN, /* struct ovs_action_push_vlan. */ OVS_ACTION_ATTR_POP_VLAN, /* No argument. */ OVS_ACTION_ATTR_SAMPLE, /* Nested OVS_SAMPLE_ATTR_*. */ + OVS_ACTION_ATTR_RECIRC, /* u32 recirc_id. */ + OVS_ACTION_ATTR_HASH, /* struct ovs_action_hash. */ __OVS_ACTION_ATTR_MAX }; -- cgit v1.2.3 From 67fa034194bf82a3d5ca841759d921297daa63ca Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Fri, 3 Oct 2014 15:35:30 -0700 Subject: openvswitch: Add support for matching on OAM packets. Some tunnel formats have mechanisms for indicating that packets are OAM frames that should be handled specially (either as high priority or not forwarded beyond an endpoint). This provides support for allowing those types of packets to be matched. Signed-off-by: Jesse Gross Signed-off-by: Andy Zhou Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 1 + net/openvswitch/datapath.c | 1 + net/openvswitch/flow_netlink.c | 17 ++++++++++++----- 3 files changed, 14 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux/openvswitch.h') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index f7fc507d82ab..7c06106f5af5 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -309,6 +309,7 @@ enum ovs_tunnel_key_attr { OVS_TUNNEL_KEY_ATTR_TTL, /* u8 Tunnel IP TTL. */ OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT, /* No argument, set DF. */ OVS_TUNNEL_KEY_ATTR_CSUM, /* No argument. CSUM packet. */ + OVS_TUNNEL_KEY_ATTR_OAM, /* No argument. OAM frame. */ __OVS_TUNNEL_KEY_ATTR_MAX }; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 9e3a2fae6a8f..f6bd93d5f435 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -369,6 +369,7 @@ static size_t key_attr_size(void) + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ + + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */ + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */ + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index f4c8daa73965..22c855fa0bc2 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -346,6 +346,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, [OVS_TUNNEL_KEY_ATTR_TTL] = 1, [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, + [OVS_TUNNEL_KEY_ATTR_OAM] = 0, }; if (type > OVS_TUNNEL_KEY_ATTR_MAX) { @@ -390,6 +391,9 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, case OVS_TUNNEL_KEY_ATTR_CSUM: tun_flags |= TUNNEL_CSUM; break; + case OVS_TUNNEL_KEY_ATTR_OAM: + tun_flags |= TUNNEL_OAM; + break; default: return -EINVAL; } @@ -431,21 +435,24 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb, nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id)) return -EMSGSIZE; if (output->ipv4_src && - nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src)) + nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src)) return -EMSGSIZE; if (output->ipv4_dst && - nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst)) + nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst)) return -EMSGSIZE; if (output->ipv4_tos && - nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos)) + nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos)) return -EMSGSIZE; if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl)) return -EMSGSIZE; if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) && - nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) + nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) return -EMSGSIZE; if ((output->tun_flags & TUNNEL_CSUM) && - nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) + nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) + return -EMSGSIZE; + if ((output->tun_flags & TUNNEL_OAM) && + nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM)) return -EMSGSIZE; nla_nest_end(skb, nla); -- cgit v1.2.3 From f0b128c1e2cc33ad104daf0f51a51e34f7763c5f Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Fri, 3 Oct 2014 15:35:31 -0700 Subject: openvswitch: Wrap struct ovs_key_ipv4_tunnel in a new structure. Currently, the flow information that is matched for tunnels and the tunnel data passed around with packets is the same. However, as additional information is added this is not necessarily desirable, as in the case of pointers. This adds a new structure for tunnel metadata which currently contains only the existing struct. This change is purely internal to the kernel since the current OVS_KEY_ATTR_IPV4_TUNNEL is simply a compressed version of OVS_KEY_ATTR_TUNNEL that is translated at flow setup. Signed-off-by: Jesse Gross Signed-off-by: Andy Zhou Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 2 +- net/openvswitch/actions.c | 5 +++-- net/openvswitch/datapath.h | 2 +- net/openvswitch/flow.c | 6 +++--- net/openvswitch/flow.h | 30 +++++++++++++++++------------- net/openvswitch/flow_netlink.c | 38 +++++++++++++++++++++++++++++++------- net/openvswitch/vport-gre.c | 16 +++++++++------- net/openvswitch/vport-vxlan.c | 10 +++++----- net/openvswitch/vport.c | 6 +++--- net/openvswitch/vport.h | 2 +- 10 files changed, 74 insertions(+), 43 deletions(-) (limited to 'include/uapi/linux/openvswitch.h') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 7c06106f5af5..6753032832e2 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -294,7 +294,7 @@ enum ovs_key_attr { OVS_KEY_ATTR_RECIRC_ID, /* u32 recirc id */ #ifdef __KERNEL__ - OVS_KEY_ATTR_IPV4_TUNNEL, /* struct ovs_key_ipv4_tunnel */ + OVS_KEY_ATTR_TUNNEL_INFO, /* struct ovs_tunnel_info */ #endif __OVS_KEY_ATTR_MAX }; diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 6932a42e41a2..006886dbee36 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -590,8 +590,8 @@ static int execute_set_action(struct sk_buff *skb, skb->mark = nla_get_u32(nested_attr); break; - case OVS_KEY_ATTR_IPV4_TUNNEL: - OVS_CB(skb)->egress_tun_key = nla_data(nested_attr); + case OVS_KEY_ATTR_TUNNEL_INFO: + OVS_CB(skb)->egress_tun_info = nla_data(nested_attr); break; case OVS_KEY_ATTR_ETHERNET: @@ -778,6 +778,7 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts); this_cpu_inc(exec_actions_level); + OVS_CB(skb)->egress_tun_info = NULL; err = do_execute_actions(dp, skb, key, acts->actions, acts->actions_len); diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index ac3f3df96961..974135439c5c 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -102,8 +102,8 @@ struct datapath { */ struct ovs_skb_cb { struct sw_flow *flow; + struct ovs_tunnel_info *egress_tun_info; struct vport *input_vport; - struct ovs_key_ipv4_tunnel *egress_tun_key; }; #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb) diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 913bdc1a83c0..2924cb340868 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -642,12 +642,12 @@ int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key) return key_extract(skb, key); } -int ovs_flow_key_extract(struct ovs_key_ipv4_tunnel *tun_key, +int ovs_flow_key_extract(struct ovs_tunnel_info *tun_info, struct sk_buff *skb, struct sw_flow_key *key) { /* Extract metadata from packet. */ - if (tun_key) - memcpy(&key->tun_key, tun_key, sizeof(key->tun_key)); + if (tun_info) + memcpy(&key->tun_key, &tun_info->tunnel, sizeof(key->tun_key)); else memset(&key->tun_key, 0, sizeof(key->tun_key)); diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 0f5db4ec565d..fe5a71b81c1f 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -49,20 +49,24 @@ struct ovs_key_ipv4_tunnel { u8 ipv4_ttl; } __packed __aligned(4); /* Minimize padding. */ -static inline void ovs_flow_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key, - const struct iphdr *iph, __be64 tun_id, - __be16 tun_flags) +struct ovs_tunnel_info { + struct ovs_key_ipv4_tunnel tunnel; +}; + +static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, + const struct iphdr *iph, + __be64 tun_id, __be16 tun_flags) { - tun_key->tun_id = tun_id; - tun_key->ipv4_src = iph->saddr; - tun_key->ipv4_dst = iph->daddr; - tun_key->ipv4_tos = iph->tos; - tun_key->ipv4_ttl = iph->ttl; - tun_key->tun_flags = tun_flags; + tun_info->tunnel.tun_id = tun_id; + tun_info->tunnel.ipv4_src = iph->saddr; + tun_info->tunnel.ipv4_dst = iph->daddr; + tun_info->tunnel.ipv4_tos = iph->tos; + tun_info->tunnel.ipv4_ttl = iph->ttl; + tun_info->tunnel.tun_flags = tun_flags; /* clear struct padding. */ - memset((unsigned char *) tun_key + OVS_TUNNEL_KEY_SIZE, 0, - sizeof(*tun_key) - OVS_TUNNEL_KEY_SIZE); + memset((unsigned char *)&tun_info->tunnel + OVS_TUNNEL_KEY_SIZE, 0, + sizeof(tun_info->tunnel) - OVS_TUNNEL_KEY_SIZE); } struct sw_flow_key { @@ -190,8 +194,8 @@ void ovs_flow_stats_clear(struct sw_flow *); u64 ovs_flow_used_time(unsigned long flow_jiffies); int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key); -int ovs_flow_key_extract(struct ovs_key_ipv4_tunnel *tun_key, - struct sk_buff *skb, struct sw_flow_key *key); +int ovs_flow_key_extract(struct ovs_tunnel_info *tun_info, struct sk_buff *skb, + struct sw_flow_key *key); /* Extract key from packet coming from userspace. */ int ovs_flow_key_extract_userspace(const struct nlattr *attr, struct sk_buff *skb, diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 22c855fa0bc2..5d6194d9dadc 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -1148,13 +1148,14 @@ out: return (struct nlattr *) ((unsigned char *)(*sfa) + next_offset); } -static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len) +static struct nlattr *__add_action(struct sw_flow_actions **sfa, + int attrtype, void *data, int len) { struct nlattr *a; a = reserve_sfa_size(sfa, nla_attr_size(len)); if (IS_ERR(a)) - return PTR_ERR(a); + return a; a->nla_type = attrtype; a->nla_len = nla_attr_size(len); @@ -1163,6 +1164,18 @@ static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, in memcpy(nla_data(a), data, len); memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len)); + return a; +} + +static int add_action(struct sw_flow_actions **sfa, int attrtype, + void *data, int len) +{ + struct nlattr *a; + + a = __add_action(sfa, attrtype, data, len); + if (IS_ERR(a)) + return PTR_ERR(a); + return 0; } @@ -1268,6 +1281,8 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, { struct sw_flow_match match; struct sw_flow_key key; + struct ovs_tunnel_info *tun_info; + struct nlattr *a; int err, start; ovs_match_init(&match, &key, NULL); @@ -1279,8 +1294,14 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, if (start < 0) return start; - err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &match.key->tun_key, - sizeof(match.key->tun_key)); + a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL, + sizeof(*tun_info)); + if (IS_ERR(a)) + return PTR_ERR(a); + + tun_info = nla_data(a); + tun_info->tunnel = key.tun_key; + add_nested_action_end(*sfa, start); return err; @@ -1563,17 +1584,20 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) int err; switch (key_type) { - case OVS_KEY_ATTR_IPV4_TUNNEL: + case OVS_KEY_ATTR_TUNNEL_INFO: { + struct ovs_tunnel_info *tun_info = nla_data(ovs_key); + start = nla_nest_start(skb, OVS_ACTION_ATTR_SET); if (!start) return -EMSGSIZE; - err = ipv4_tun_to_nlattr(skb, nla_data(ovs_key), - nla_data(ovs_key)); + err = ipv4_tun_to_nlattr(skb, &tun_info->tunnel, + nla_data(ovs_key)); if (err) return err; nla_nest_end(skb, start); break; + } default: if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key)) return -EMSGSIZE; diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index 309cca6e816f..fe768bd600eb 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -63,8 +63,10 @@ static __be16 filter_tnl_flags(__be16 flags) static struct sk_buff *__build_header(struct sk_buff *skb, int tunnel_hlen) { - const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->egress_tun_key; struct tnl_ptk_info tpi; + const struct ovs_key_ipv4_tunnel *tun_key; + + tun_key = &OVS_CB(skb)->egress_tun_info->tunnel; skb = gre_handle_offloads(skb, !!(tun_key->tun_flags & TUNNEL_CSUM)); if (IS_ERR(skb)) @@ -92,7 +94,7 @@ static __be64 key_to_tunnel_id(__be32 key, __be32 seq) static int gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) { - struct ovs_key_ipv4_tunnel tun_key; + struct ovs_tunnel_info tun_info; struct ovs_net *ovs_net; struct vport *vport; __be64 key; @@ -103,10 +105,10 @@ static int gre_rcv(struct sk_buff *skb, return PACKET_REJECT; key = key_to_tunnel_id(tpi->key, tpi->seq); - ovs_flow_tun_key_init(&tun_key, ip_hdr(skb), key, - filter_tnl_flags(tpi->flags)); + ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key, + filter_tnl_flags(tpi->flags)); - ovs_vport_receive(vport, skb, &tun_key); + ovs_vport_receive(vport, skb, &tun_info); return PACKET_RCVD; } @@ -137,12 +139,12 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb) __be16 df; int err; - if (unlikely(!OVS_CB(skb)->egress_tun_key)) { + if (unlikely(!OVS_CB(skb)->egress_tun_info)) { err = -EINVAL; goto error; } - tun_key = OVS_CB(skb)->egress_tun_key; + tun_key = &OVS_CB(skb)->egress_tun_info->tunnel; /* Route lookup */ memset(&fl, 0, sizeof(fl)); fl.daddr = tun_key->ipv4_dst; diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index f19539bb8adc..5fbff2c1ee49 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -58,7 +58,7 @@ static inline struct vxlan_port *vxlan_vport(const struct vport *vport) /* Called with rcu_read_lock and BH disabled. */ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni) { - struct ovs_key_ipv4_tunnel tun_key; + struct ovs_tunnel_info tun_info; struct vport *vport = vs->data; struct iphdr *iph; __be64 key; @@ -66,9 +66,9 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni) /* Save outer tunnel values */ iph = ip_hdr(skb); key = cpu_to_be64(ntohl(vx_vni) >> 8); - ovs_flow_tun_key_init(&tun_key, iph, key, TUNNEL_KEY); + ovs_flow_tun_info_init(&tun_info, iph, key, TUNNEL_KEY); - ovs_vport_receive(vport, skb, &tun_key); + ovs_vport_receive(vport, skb, &tun_info); } static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb) @@ -147,12 +147,12 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) __be16 df; int err; - if (unlikely(!OVS_CB(skb)->egress_tun_key)) { + if (unlikely(!OVS_CB(skb)->egress_tun_info)) { err = -EINVAL; goto error; } - tun_key = OVS_CB(skb)->egress_tun_key; + tun_key = &OVS_CB(skb)->egress_tun_info->tunnel; /* Route lookup */ memset(&fl, 0, sizeof(fl)); fl.daddr = tun_key->ipv4_dst; diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 5df8377fcfb1..3e50ee8a218c 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -432,7 +432,7 @@ u32 ovs_vport_find_upcall_portid(const struct vport *p, struct sk_buff *skb) * skb->data should point to the Ethernet header. */ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb, - struct ovs_key_ipv4_tunnel *tun_key) + struct ovs_tunnel_info *tun_info) { struct pcpu_sw_netstats *stats; struct sw_flow_key key; @@ -445,9 +445,9 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb, u64_stats_update_end(&stats->syncp); OVS_CB(skb)->input_vport = vport; - OVS_CB(skb)->egress_tun_key = NULL; + OVS_CB(skb)->egress_tun_info = NULL; /* Extract flow from 'skb' into 'key'. */ - error = ovs_flow_key_extract(tun_key, skb, &key); + error = ovs_flow_key_extract(tun_info, skb, &key); if (unlikely(error)) { kfree_skb(skb); return; diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 0efd62fef1e8..e28964aba021 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -207,7 +207,7 @@ static inline struct vport *vport_from_priv(void *priv) } void ovs_vport_receive(struct vport *, struct sk_buff *, - struct ovs_key_ipv4_tunnel *); + struct ovs_tunnel_info *); /* List of statically compiled vport implementations. Don't forget to also * add yours to the list at the top of vport.c. */ -- cgit v1.2.3 From f5796684069e0c71c65bce6a6d4766114aec1396 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Fri, 3 Oct 2014 15:35:33 -0700 Subject: openvswitch: Add support for Geneve tunneling. The Openvswitch implementation is completely agnostic to the options that are in use and can handle newly defined options without further work. It does this by simply matching on a byte array of options and allowing userspace to setup flows on this array. Signed-off-by: Jesse Gross Singed-off-by: Ansis Atteka Signed-off-by: Andy Zhou Acked-by: Thomas Graf Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 21 ++-- include/uapi/linux/openvswitch.h | 2 + net/openvswitch/Kconfig | 11 ++ net/openvswitch/Makefile | 4 + net/openvswitch/datapath.c | 5 +- net/openvswitch/flow.c | 20 +++- net/openvswitch/flow.h | 20 +++- net/openvswitch/flow_netlink.c | 176 ++++++++++++++++++++++++----- net/openvswitch/vport-geneve.c | 236 +++++++++++++++++++++++++++++++++++++++ net/openvswitch/vport-gre.c | 2 +- net/openvswitch/vport-vxlan.c | 2 +- net/openvswitch/vport.c | 3 + net/openvswitch/vport.h | 1 + 13 files changed, 461 insertions(+), 42 deletions(-) create mode 100644 net/openvswitch/vport-geneve.c (limited to 'include/uapi/linux/openvswitch.h') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index a9ce1556d773..5bc6edeb7143 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -86,17 +86,18 @@ struct ip_tunnel { struct gro_cells gro_cells; }; -#define TUNNEL_CSUM __cpu_to_be16(0x01) -#define TUNNEL_ROUTING __cpu_to_be16(0x02) -#define TUNNEL_KEY __cpu_to_be16(0x04) -#define TUNNEL_SEQ __cpu_to_be16(0x08) -#define TUNNEL_STRICT __cpu_to_be16(0x10) -#define TUNNEL_REC __cpu_to_be16(0x20) -#define TUNNEL_VERSION __cpu_to_be16(0x40) -#define TUNNEL_NO_KEY __cpu_to_be16(0x80) +#define TUNNEL_CSUM __cpu_to_be16(0x01) +#define TUNNEL_ROUTING __cpu_to_be16(0x02) +#define TUNNEL_KEY __cpu_to_be16(0x04) +#define TUNNEL_SEQ __cpu_to_be16(0x08) +#define TUNNEL_STRICT __cpu_to_be16(0x10) +#define TUNNEL_REC __cpu_to_be16(0x20) +#define TUNNEL_VERSION __cpu_to_be16(0x40) +#define TUNNEL_NO_KEY __cpu_to_be16(0x80) #define TUNNEL_DONT_FRAGMENT __cpu_to_be16(0x0100) -#define TUNNEL_OAM __cpu_to_be16(0x0200) -#define TUNNEL_CRIT_OPT __cpu_to_be16(0x0400) +#define TUNNEL_OAM __cpu_to_be16(0x0200) +#define TUNNEL_CRIT_OPT __cpu_to_be16(0x0400) +#define TUNNEL_OPTIONS_PRESENT __cpu_to_be16(0x0800) struct tnl_ptk_info { __be16 flags; diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 6753032832e2..435eabc5ffaa 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -192,6 +192,7 @@ enum ovs_vport_type { OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */ OVS_VPORT_TYPE_GRE, /* GRE tunnel. */ OVS_VPORT_TYPE_VXLAN, /* VXLAN tunnel. */ + OVS_VPORT_TYPE_GENEVE, /* Geneve tunnel. */ __OVS_VPORT_TYPE_MAX }; @@ -310,6 +311,7 @@ enum ovs_tunnel_key_attr { OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT, /* No argument, set DF. */ OVS_TUNNEL_KEY_ATTR_CSUM, /* No argument. CSUM packet. */ OVS_TUNNEL_KEY_ATTR_OAM, /* No argument. OAM frame. */ + OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, /* Array of Geneve options. */ __OVS_TUNNEL_KEY_ATTR_MAX }; diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index 6ecf491ad509..ba3bb8203b99 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -54,3 +54,14 @@ config OPENVSWITCH_VXLAN Say N to exclude this support and reduce the binary size. If unsure, say Y. + +config OPENVSWITCH_GENEVE + bool "Open vSwitch Geneve tunneling support" + depends on INET + depends on OPENVSWITCH + depends on GENEVE && !(OPENVSWITCH=y && GENEVE=m) + default y + ---help--- + If you say Y here, then the Open vSwitch will be able create geneve vport. + + Say N to exclude this support and reduce the binary size. diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile index 3591cb5dae91..9a33a273c375 100644 --- a/net/openvswitch/Makefile +++ b/net/openvswitch/Makefile @@ -15,6 +15,10 @@ openvswitch-y := \ vport-internal_dev.o \ vport-netdev.o +ifneq ($(CONFIG_OPENVSWITCH_GENEVE),) +openvswitch-y += vport-geneve.o +endif + ifneq ($(CONFIG_OPENVSWITCH_VXLAN),) openvswitch-y += vport-vxlan.o endif diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 010125c48244..2e31d9e7f4dc 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -370,6 +370,7 @@ static size_t key_attr_size(void) + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */ + + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */ + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */ + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ @@ -556,10 +557,12 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0, &acts); - rcu_assign_pointer(flow->sf_acts, acts); if (err) goto err_flow_free; + rcu_assign_pointer(flow->sf_acts, acts); + + OVS_CB(packet)->egress_tun_info = NULL; OVS_CB(packet)->flow = flow; packet->priority = flow->key.phy.priority; packet->mark = flow->key.phy.skb_mark; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 2924cb340868..62db02ba36bc 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -448,6 +448,9 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) int error; struct ethhdr *eth; + /* Flags are always used as part of stats */ + key->tp.flags = 0; + skb_reset_mac_header(skb); /* Link layer. We are guaranteed to have at least the 14 byte Ethernet @@ -646,10 +649,23 @@ int ovs_flow_key_extract(struct ovs_tunnel_info *tun_info, struct sk_buff *skb, struct sw_flow_key *key) { /* Extract metadata from packet. */ - if (tun_info) + if (tun_info) { memcpy(&key->tun_key, &tun_info->tunnel, sizeof(key->tun_key)); - else + + if (tun_info->options) { + BUILD_BUG_ON((1 << (sizeof(tun_info->options_len) * + 8)) - 1 + > sizeof(key->tun_opts)); + memcpy(GENEVE_OPTS(key, tun_info->options_len), + tun_info->options, tun_info->options_len); + key->tun_opts_len = tun_info->options_len; + } else { + key->tun_opts_len = 0; + } + } else { + key->tun_opts_len = 0; memset(&key->tun_key, 0, sizeof(key->tun_key)); + } key->phy.priority = skb->priority; key->phy.in_port = OVS_CB(skb)->input_vport->port_no; diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index fe5a71b81c1f..71813318c8c7 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -51,11 +51,24 @@ struct ovs_key_ipv4_tunnel { struct ovs_tunnel_info { struct ovs_key_ipv4_tunnel tunnel; + struct geneve_opt *options; + u8 options_len; }; +/* Store options at the end of the array if they are less than the + * maximum size. This allows us to get the benefits of variable length + * matching for small options. + */ +#define GENEVE_OPTS(flow_key, opt_len) \ + ((struct geneve_opt *)((flow_key)->tun_opts + \ + FIELD_SIZEOF(struct sw_flow_key, tun_opts) - \ + opt_len)) + static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, const struct iphdr *iph, - __be64 tun_id, __be16 tun_flags) + __be64 tun_id, __be16 tun_flags, + struct geneve_opt *opts, + u8 opts_len) { tun_info->tunnel.tun_id = tun_id; tun_info->tunnel.ipv4_src = iph->saddr; @@ -67,9 +80,14 @@ static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, /* clear struct padding. */ memset((unsigned char *)&tun_info->tunnel + OVS_TUNNEL_KEY_SIZE, 0, sizeof(tun_info->tunnel) - OVS_TUNNEL_KEY_SIZE); + + tun_info->options = opts; + tun_info->options_len = opts_len; } struct sw_flow_key { + u8 tun_opts[255]; + u8 tun_opts_len; struct ovs_key_ipv4_tunnel tun_key; /* Encapsulating tunnel key. */ struct { u32 priority; /* Packet QoS priority. */ diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 5d6194d9dadc..368f23307911 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -88,18 +89,20 @@ static void update_range__(struct sw_flow_match *match, } \ } while (0) -#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \ - do { \ - update_range__(match, offsetof(struct sw_flow_key, field), \ - len, is_mask); \ - if (is_mask) { \ - if ((match)->mask) \ - memcpy(&(match)->mask->key.field, value_p, len);\ - } else { \ - memcpy(&(match)->key->field, value_p, len); \ - } \ +#define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask) \ + do { \ + update_range__(match, offset, len, is_mask); \ + if (is_mask) \ + memcpy((u8 *)&(match)->mask->key + offset, value_p, \ + len); \ + else \ + memcpy((u8 *)(match)->key + offset, value_p, len); \ } while (0) +#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \ + SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \ + value_p, len, is_mask) + static u16 range_n_bytes(const struct sw_flow_key_range *range) { return range->end - range->start; @@ -335,6 +338,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, int rem; bool ttl = false; __be16 tun_flags = 0; + unsigned long opt_key_offset; nla_for_each_nested(a, attr, rem) { int type = nla_type(a); @@ -347,6 +351,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, [OVS_TUNNEL_KEY_ATTR_OAM] = 0, + [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = -1, }; if (type > OVS_TUNNEL_KEY_ATTR_MAX) { @@ -355,7 +360,8 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, return -EINVAL; } - if (ovs_tunnel_key_lens[type] != nla_len(a)) { + if (ovs_tunnel_key_lens[type] != nla_len(a) && + ovs_tunnel_key_lens[type] != -1) { OVS_NLERR("IPv4 tunnel attribute type has unexpected " " length (type=%d, length=%d, expected=%d).\n", type, nla_len(a), ovs_tunnel_key_lens[type]); @@ -394,7 +400,60 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, case OVS_TUNNEL_KEY_ATTR_OAM: tun_flags |= TUNNEL_OAM; break; + case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: + tun_flags |= TUNNEL_OPTIONS_PRESENT; + if (nla_len(a) > sizeof(match->key->tun_opts)) { + OVS_NLERR("Geneve option length exceeds maximum size (len %d, max %zu).\n", + nla_len(a), + sizeof(match->key->tun_opts)); + return -EINVAL; + } + + if (nla_len(a) % 4 != 0) { + OVS_NLERR("Geneve option length is not a multiple of 4 (len %d).\n", + nla_len(a)); + return -EINVAL; + } + + /* We need to record the length of the options passed + * down, otherwise packets with the same format but + * additional options will be silently matched. + */ + if (!is_mask) { + SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a), + false); + } else { + /* This is somewhat unusual because it looks at + * both the key and mask while parsing the + * attributes (and by extension assumes the key + * is parsed first). Normally, we would verify + * that each is the correct length and that the + * attributes line up in the validate function. + * However, that is difficult because this is + * variable length and we won't have the + * information later. + */ + if (match->key->tun_opts_len != nla_len(a)) { + OVS_NLERR("Geneve option key length (%d) is different from mask length (%d).", + match->key->tun_opts_len, + nla_len(a)); + return -EINVAL; + } + + SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, + true); + } + + opt_key_offset = (unsigned long)GENEVE_OPTS( + (struct sw_flow_key *)0, + nla_len(a)); + SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, + nla_data(a), nla_len(a), + is_mask); + break; default: + OVS_NLERR("Unknown IPv4 tunnel attribute (%d).\n", + type); return -EINVAL; } } @@ -421,16 +480,11 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, return 0; } -static int ipv4_tun_to_nlattr(struct sk_buff *skb, - const struct ovs_key_ipv4_tunnel *tun_key, - const struct ovs_key_ipv4_tunnel *output) +static int __ipv4_tun_to_nlattr(struct sk_buff *skb, + const struct ovs_key_ipv4_tunnel *output, + const struct geneve_opt *tun_opts, + int swkey_tun_opts_len) { - struct nlattr *nla; - - nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL); - if (!nla) - return -EMSGSIZE; - if (output->tun_flags & TUNNEL_KEY && nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id)) return -EMSGSIZE; @@ -454,12 +508,35 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb, if ((output->tun_flags & TUNNEL_OAM) && nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM)) return -EMSGSIZE; + if (tun_opts && + nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, + swkey_tun_opts_len, tun_opts)) + return -EMSGSIZE; - nla_nest_end(skb, nla); return 0; } +static int ipv4_tun_to_nlattr(struct sk_buff *skb, + const struct ovs_key_ipv4_tunnel *output, + const struct geneve_opt *tun_opts, + int swkey_tun_opts_len) +{ + struct nlattr *nla; + int err; + + nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL); + if (!nla) + return -EMSGSIZE; + + err = __ipv4_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len); + if (err) + return err; + + nla_nest_end(skb, nla); + return 0; +} + static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, const struct nlattr **a, bool is_mask) { @@ -905,9 +982,16 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority)) goto nla_put_failure; - if ((swkey->tun_key.ipv4_dst || is_mask) && - ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key)) - goto nla_put_failure; + if ((swkey->tun_key.ipv4_dst || is_mask)) { + const struct geneve_opt *opts = NULL; + + if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT) + opts = GENEVE_OPTS(output, swkey->tun_opts_len); + + if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts, + swkey->tun_opts_len)) + goto nla_put_failure; + } if (swkey->phy.in_port == DP_MAX_PORTS) { if (is_mask && (output->phy.in_port == 0xffff)) @@ -1290,17 +1374,55 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, if (err) return err; + if (key.tun_opts_len) { + struct geneve_opt *option = GENEVE_OPTS(&key, + key.tun_opts_len); + int opts_len = key.tun_opts_len; + bool crit_opt = false; + + while (opts_len > 0) { + int len; + + if (opts_len < sizeof(*option)) + return -EINVAL; + + len = sizeof(*option) + option->length * 4; + if (len > opts_len) + return -EINVAL; + + crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE); + + option = (struct geneve_opt *)((u8 *)option + len); + opts_len -= len; + }; + + key.tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0; + }; + start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET); if (start < 0) return start; a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL, - sizeof(*tun_info)); + sizeof(*tun_info) + key.tun_opts_len); if (IS_ERR(a)) return PTR_ERR(a); tun_info = nla_data(a); tun_info->tunnel = key.tun_key; + tun_info->options_len = key.tun_opts_len; + + if (tun_info->options_len) { + /* We need to store the options in the action itself since + * everything else will go away after flow setup. We can append + * it to tun_info and then point there. + */ + memcpy((tun_info + 1), GENEVE_OPTS(&key, key.tun_opts_len), + key.tun_opts_len); + tun_info->options = (struct geneve_opt *)(tun_info + 1); + } else { + tun_info->options = NULL; + } add_nested_action_end(*sfa, start); @@ -1592,7 +1714,9 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) return -EMSGSIZE; err = ipv4_tun_to_nlattr(skb, &tun_info->tunnel, - nla_data(ovs_key)); + tun_info->options_len ? + tun_info->options : NULL, + tun_info->options_len); if (err) return err; nla_nest_end(skb, start); diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c new file mode 100644 index 000000000000..5572d482f285 --- /dev/null +++ b/net/openvswitch/vport-geneve.c @@ -0,0 +1,236 @@ +/* + * Copyright (c) 2014 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "datapath.h" +#include "vport.h" + +/** + * struct geneve_port - Keeps track of open UDP ports + * @sock: The socket created for this port number. + * @name: vport name. + */ +struct geneve_port { + struct geneve_sock *gs; + char name[IFNAMSIZ]; +}; + +static LIST_HEAD(geneve_ports); + +static inline struct geneve_port *geneve_vport(const struct vport *vport) +{ + return vport_priv(vport); +} + +static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) +{ + return (struct genevehdr *)(udp_hdr(skb) + 1); +} + +/* Convert 64 bit tunnel ID to 24 bit VNI. */ +static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni) +{ +#ifdef __BIG_ENDIAN + vni[0] = (__force __u8)(tun_id >> 16); + vni[1] = (__force __u8)(tun_id >> 8); + vni[2] = (__force __u8)tun_id; +#else + vni[0] = (__force __u8)((__force u64)tun_id >> 40); + vni[1] = (__force __u8)((__force u64)tun_id >> 48); + vni[2] = (__force __u8)((__force u64)tun_id >> 56); +#endif +} + +/* Convert 24 bit VNI to 64 bit tunnel ID. */ +static __be64 vni_to_tunnel_id(__u8 *vni) +{ +#ifdef __BIG_ENDIAN + return (vni[0] << 16) | (vni[1] << 8) | vni[2]; +#else + return (__force __be64)(((__force u64)vni[0] << 40) | + ((__force u64)vni[1] << 48) | + ((__force u64)vni[2] << 56)); +#endif +} + +static void geneve_rcv(struct geneve_sock *gs, struct sk_buff *skb) +{ + struct vport *vport = gs->rcv_data; + struct genevehdr *geneveh = geneve_hdr(skb); + int opts_len; + struct ovs_tunnel_info tun_info; + __be64 key; + __be16 flags; + + opts_len = geneveh->opt_len * 4; + + flags = TUNNEL_KEY | TUNNEL_OPTIONS_PRESENT | + (udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0) | + (geneveh->oam ? TUNNEL_OAM : 0) | + (geneveh->critical ? TUNNEL_CRIT_OPT : 0); + + key = vni_to_tunnel_id(geneveh->vni); + + ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key, flags, + geneveh->options, opts_len); + + ovs_vport_receive(vport, skb, &tun_info); +} + +static int geneve_get_options(const struct vport *vport, + struct sk_buff *skb) +{ + struct geneve_port *geneve_port = geneve_vport(vport); + __be16 sport; + + sport = ntohs(inet_sk(geneve_port->gs->sock->sk)->inet_sport); + if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, sport)) + return -EMSGSIZE; + return 0; +} + +static void geneve_tnl_destroy(struct vport *vport) +{ + struct geneve_port *geneve_port = geneve_vport(vport); + + geneve_sock_release(geneve_port->gs); + + ovs_vport_deferred_free(vport); +} + +static struct vport *geneve_tnl_create(const struct vport_parms *parms) +{ + struct net *net = ovs_dp_get_net(parms->dp); + struct nlattr *options = parms->options; + struct geneve_port *geneve_port; + struct geneve_sock *gs; + struct vport *vport; + struct nlattr *a; + int err; + u16 dst_port; + + if (!options) { + err = -EINVAL; + goto error; + } + + a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT); + if (a && nla_len(a) == sizeof(u16)) { + dst_port = nla_get_u16(a); + } else { + /* Require destination port from userspace. */ + err = -EINVAL; + goto error; + } + + vport = ovs_vport_alloc(sizeof(struct geneve_port), + &ovs_geneve_vport_ops, parms); + if (IS_ERR(vport)) + return vport; + + geneve_port = geneve_vport(vport); + strncpy(geneve_port->name, parms->name, IFNAMSIZ); + + gs = geneve_sock_add(net, htons(dst_port), geneve_rcv, vport, true, 0); + if (IS_ERR(gs)) { + ovs_vport_free(vport); + return (void *)gs; + } + geneve_port->gs = gs; + + return vport; +error: + return ERR_PTR(err); +} + +static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb) +{ + struct ovs_key_ipv4_tunnel *tun_key; + struct ovs_tunnel_info *tun_info; + struct net *net = ovs_dp_get_net(vport->dp); + struct geneve_port *geneve_port = geneve_vport(vport); + __be16 dport = inet_sk(geneve_port->gs->sock->sk)->inet_sport; + __be16 sport; + struct rtable *rt; + struct flowi4 fl; + u8 vni[3]; + __be16 df; + int err; + + tun_info = OVS_CB(skb)->egress_tun_info; + if (unlikely(!tun_info)) { + err = -EINVAL; + goto error; + } + + tun_key = &tun_info->tunnel; + + /* Route lookup */ + memset(&fl, 0, sizeof(fl)); + fl.daddr = tun_key->ipv4_dst; + fl.saddr = tun_key->ipv4_src; + fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos); + fl.flowi4_mark = skb->mark; + fl.flowi4_proto = IPPROTO_UDP; + + rt = ip_route_output_key(net, &fl); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + goto error; + } + + df = tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; + sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true); + tunnel_id_to_vni(tun_key->tun_id, vni); + skb->ignore_df = 1; + + err = geneve_xmit_skb(geneve_port->gs, rt, skb, fl.saddr, + tun_key->ipv4_dst, tun_key->ipv4_tos, + tun_key->ipv4_ttl, df, sport, dport, + tun_key->tun_flags, vni, + tun_info->options_len, (u8 *)tun_info->options, + false); + if (err < 0) + ip_rt_put(rt); +error: + return err; +} + +static const char *geneve_get_name(const struct vport *vport) +{ + struct geneve_port *geneve_port = geneve_vport(vport); + + return geneve_port->name; +} + +const struct vport_ops ovs_geneve_vport_ops = { + .type = OVS_VPORT_TYPE_GENEVE, + .create = geneve_tnl_create, + .destroy = geneve_tnl_destroy, + .get_name = geneve_get_name, + .get_options = geneve_get_options, + .send = geneve_tnl_send, +}; diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index fe768bd600eb..108b82da2fd9 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -106,7 +106,7 @@ static int gre_rcv(struct sk_buff *skb, key = key_to_tunnel_id(tpi->key, tpi->seq); ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key, - filter_tnl_flags(tpi->flags)); + filter_tnl_flags(tpi->flags), NULL, 0); ovs_vport_receive(vport, skb, &tun_info); return PACKET_RCVD; diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 5fbff2c1ee49..2735e01dca73 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -66,7 +66,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni) /* Save outer tunnel values */ iph = ip_hdr(skb); key = cpu_to_be64(ntohl(vx_vni) >> 8); - ovs_flow_tun_info_init(&tun_info, iph, key, TUNNEL_KEY); + ovs_flow_tun_info_init(&tun_info, iph, key, TUNNEL_KEY, NULL, 0); ovs_vport_receive(vport, skb, &tun_info); } diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 3e50ee8a218c..53001b020ca7 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -48,6 +48,9 @@ static const struct vport_ops *vport_ops_list[] = { #ifdef CONFIG_OPENVSWITCH_VXLAN &ovs_vxlan_vport_ops, #endif +#ifdef CONFIG_OPENVSWITCH_GENEVE + &ovs_geneve_vport_ops, +#endif }; /* Protected by RCU read lock for reading, ovs_mutex for writing. */ diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index e28964aba021..8942125de3a6 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -215,6 +215,7 @@ extern const struct vport_ops ovs_netdev_vport_ops; extern const struct vport_ops ovs_internal_vport_ops; extern const struct vport_ops ovs_gre_vport_ops; extern const struct vport_ops ovs_vxlan_vport_ops; +extern const struct vport_ops ovs_geneve_vport_ops; static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb, const void *start, unsigned int len) -- cgit v1.2.3 From 25cd9ba0abc0749e5cb78e6493c6f6b3311ec6c5 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 6 Oct 2014 05:05:13 -0700 Subject: openvswitch: Add basic MPLS support to kernel Allow datapath to recognize and extract MPLS labels into flow keys and execute actions which push, pop, and set labels on packets. Based heavily on work by Leo Alterman, Ravi K, Isaku Yamahata and Joe Stringer. Cc: Ravi K Cc: Leo Alterman Cc: Isaku Yamahata Cc: Joe Stringer Signed-off-by: Simon Horman Signed-off-by: Jesse Gross Signed-off-by: Pravin B Shelar --- include/net/mpls.h | 39 +++++++++++ include/uapi/linux/openvswitch.h | 32 +++++++++ net/core/dev.c | 3 +- net/openvswitch/Kconfig | 1 + net/openvswitch/actions.c | 106 ++++++++++++++++++++++++++++- net/openvswitch/datapath.c | 6 +- net/openvswitch/flow.c | 30 +++++++++ net/openvswitch/flow.h | 17 +++-- net/openvswitch/flow_netlink.c | 139 ++++++++++++++++++++++++++++++++++----- net/openvswitch/flow_netlink.h | 2 +- 10 files changed, 345 insertions(+), 30 deletions(-) create mode 100644 include/net/mpls.h (limited to 'include/uapi/linux/openvswitch.h') diff --git a/include/net/mpls.h b/include/net/mpls.h new file mode 100644 index 000000000000..5b3b5addfb08 --- /dev/null +++ b/include/net/mpls.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2014 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef _NET_MPLS_H +#define _NET_MPLS_H 1 + +#include +#include + +#define MPLS_HLEN 4 + +static inline bool eth_p_mpls(__be16 eth_type) +{ + return eth_type == htons(ETH_P_MPLS_UC) || + eth_type == htons(ETH_P_MPLS_MC); +} + +/* + * For non-MPLS skbs this will correspond to the network header. + * For MPLS skbs it will be before the network_header as the MPLS + * label stack lies between the end of the mac header and the network + * header. That is, for MPLS skbs the end of the mac header + * is the top of the MPLS label stack. + */ +static inline unsigned char *skb_mpls_header(struct sk_buff *skb) +{ + return skb_mac_header(skb) + skb->mac_len; +} +#endif diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 435eabc5ffaa..631056b66f80 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -293,6 +293,9 @@ enum ovs_key_attr { OVS_KEY_ATTR_DP_HASH, /* u32 hash value. Value 0 indicates the hash is not computed by the datapath. */ OVS_KEY_ATTR_RECIRC_ID, /* u32 recirc id */ + OVS_KEY_ATTR_MPLS, /* array of struct ovs_key_mpls. + * The implementation may restrict + * the accepted length of the array. */ #ifdef __KERNEL__ OVS_KEY_ATTR_TUNNEL_INFO, /* struct ovs_tunnel_info */ @@ -340,6 +343,10 @@ struct ovs_key_ethernet { __u8 eth_dst[ETH_ALEN]; }; +struct ovs_key_mpls { + __be32 mpls_lse; +}; + struct ovs_key_ipv4 { __be32 ipv4_src; __be32 ipv4_dst; @@ -483,6 +490,19 @@ enum ovs_userspace_attr { #define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1) +/** + * struct ovs_action_push_mpls - %OVS_ACTION_ATTR_PUSH_MPLS action argument. + * @mpls_lse: MPLS label stack entry to push. + * @mpls_ethertype: Ethertype to set in the encapsulating ethernet frame. + * + * The only values @mpls_ethertype should ever be given are %ETH_P_MPLS_UC and + * %ETH_P_MPLS_MC, indicating MPLS unicast or multicast. Other are rejected. + */ +struct ovs_action_push_mpls { + __be32 mpls_lse; + __be16 mpls_ethertype; /* Either %ETH_P_MPLS_UC or %ETH_P_MPLS_MC */ +}; + /** * struct ovs_action_push_vlan - %OVS_ACTION_ATTR_PUSH_VLAN action argument. * @vlan_tpid: Tag protocol identifier (TPID) to push. @@ -534,6 +554,15 @@ struct ovs_action_hash { * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet. * @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in * the nested %OVS_SAMPLE_ATTR_* attributes. + * @OVS_ACTION_ATTR_PUSH_MPLS: Push a new MPLS label stack entry onto the + * top of the packets MPLS label stack. Set the ethertype of the + * encapsulating frame to either %ETH_P_MPLS_UC or %ETH_P_MPLS_MC to + * indicate the new packet contents. + * @OVS_ACTION_ATTR_POP_MPLS: Pop an MPLS label stack entry off of the + * packet's MPLS label stack. Set the encapsulating frame's ethertype to + * indicate the new packet contents. This could potentially still be + * %ETH_P_MPLS if the resulting MPLS label stack is not empty. If there + * is no MPLS label stack, as determined by ethertype, no action is taken. * * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all * fields within a header are modifiable, e.g. the IPv4 protocol and fragment @@ -550,6 +579,9 @@ enum ovs_action_attr { OVS_ACTION_ATTR_SAMPLE, /* Nested OVS_SAMPLE_ATTR_*. */ OVS_ACTION_ATTR_RECIRC, /* u32 recirc_id. */ OVS_ACTION_ATTR_HASH, /* struct ovs_action_hash. */ + OVS_ACTION_ATTR_PUSH_MPLS, /* struct ovs_action_push_mpls. */ + OVS_ACTION_ATTR_POP_MPLS, /* __be16 ethertype. */ + __OVS_ACTION_ATTR_MAX }; diff --git a/net/core/dev.c b/net/core/dev.c index 40be481268de..70bb609c283d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -118,6 +118,7 @@ #include #include #include +#include #include #include #include @@ -2530,7 +2531,7 @@ static netdev_features_t net_mpls_features(struct sk_buff *skb, netdev_features_t features, __be16 type) { - if (type == htons(ETH_P_MPLS_UC) || type == htons(ETH_P_MPLS_MC)) + if (eth_p_mpls(type)) features &= skb->dev->mpls_features; return features; diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index 2a9673e39ca1..454ce12efbbf 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -30,6 +30,7 @@ config OPENVSWITCH config OPENVSWITCH_GRE tristate "Open vSwitch GRE tunneling support" + select NET_MPLS_GSO depends on INET depends on OPENVSWITCH depends on NET_IPGRE_DEMUX diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 922c133b1933..930b1b6e4cef 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -28,10 +28,12 @@ #include #include #include + #include #include #include #include +#include #include #include "datapath.h" @@ -118,6 +120,92 @@ static int make_writable(struct sk_buff *skb, int write_len) return pskb_expand_head(skb, 0, 0, GFP_ATOMIC); } +static int push_mpls(struct sk_buff *skb, + const struct ovs_action_push_mpls *mpls) +{ + __be32 *new_mpls_lse; + struct ethhdr *hdr; + + /* Networking stack do not allow simultaneous Tunnel and MPLS GSO. */ + if (skb->encapsulation) + return -ENOTSUPP; + + if (skb_cow_head(skb, MPLS_HLEN) < 0) + return -ENOMEM; + + skb_push(skb, MPLS_HLEN); + memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb), + skb->mac_len); + skb_reset_mac_header(skb); + + new_mpls_lse = (__be32 *)skb_mpls_header(skb); + *new_mpls_lse = mpls->mpls_lse; + + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_add(skb->csum, csum_partial(new_mpls_lse, + MPLS_HLEN, 0)); + + hdr = eth_hdr(skb); + hdr->h_proto = mpls->mpls_ethertype; + + skb_set_inner_protocol(skb, skb->protocol); + skb->protocol = mpls->mpls_ethertype; + + return 0; +} + +static int pop_mpls(struct sk_buff *skb, const __be16 ethertype) +{ + struct ethhdr *hdr; + int err; + + err = make_writable(skb, skb->mac_len + MPLS_HLEN); + if (unlikely(err)) + return err; + + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_sub(skb->csum, + csum_partial(skb_mpls_header(skb), + MPLS_HLEN, 0)); + + memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb), + skb->mac_len); + + __skb_pull(skb, MPLS_HLEN); + skb_reset_mac_header(skb); + + /* skb_mpls_header() is used to locate the ethertype + * field correctly in the presence of VLAN tags. + */ + hdr = (struct ethhdr *)(skb_mpls_header(skb) - ETH_HLEN); + hdr->h_proto = ethertype; + if (eth_p_mpls(skb->protocol)) + skb->protocol = ethertype; + return 0; +} + +static int set_mpls(struct sk_buff *skb, const __be32 *mpls_lse) +{ + __be32 *stack; + int err; + + err = make_writable(skb, skb->mac_len + MPLS_HLEN); + if (unlikely(err)) + return err; + + stack = (__be32 *)skb_mpls_header(skb); + if (skb->ip_summed == CHECKSUM_COMPLETE) { + __be32 diff[] = { ~(*stack), *mpls_lse }; + + skb->csum = ~csum_partial((char *)diff, sizeof(diff), + ~skb->csum); + } + + *stack = *mpls_lse; + + return 0; +} + /* remove VLAN header from packet and update csum accordingly. */ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci) { @@ -140,10 +228,12 @@ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci) vlan_set_encap_proto(skb, vhdr); skb->mac_header += VLAN_HLEN; + if (skb_network_offset(skb) < ETH_HLEN) skb_set_network_header(skb, ETH_HLEN); - skb_reset_mac_len(skb); + /* Update mac_len for subsequent MPLS actions */ + skb_reset_mac_len(skb); return 0; } @@ -186,6 +276,8 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla if (!__vlan_put_tag(skb, skb->vlan_proto, current_tag)) return -ENOMEM; + /* Update mac_len for subsequent MPLS actions */ + skb->mac_len += VLAN_HLEN; if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_add(skb->csum, csum_partial(skb->data @@ -612,6 +704,10 @@ static int execute_set_action(struct sk_buff *skb, case OVS_KEY_ATTR_SCTP: err = set_sctp(skb, nla_data(nested_attr)); break; + + case OVS_KEY_ATTR_MPLS: + err = set_mpls(skb, nla_data(nested_attr)); + break; } return err; @@ -690,6 +786,14 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, execute_hash(skb, key, a); break; + case OVS_ACTION_ATTR_PUSH_MPLS: + err = push_mpls(skb, nla_data(a)); + break; + + case OVS_ACTION_ATTR_POP_MPLS: + err = pop_mpls(skb, nla_get_be16(a)); + break; + case OVS_ACTION_ATTR_PUSH_VLAN: err = push_vlan(skb, nla_data(a)); if (unlikely(err)) /* skb already freed. */ diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index f18302f32049..688cb9bc0ef1 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -560,7 +560,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) goto err_flow_free; err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], - &flow->key, 0, &acts); + &flow->key, &acts); if (err) goto err_flow_free; @@ -846,7 +846,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) goto err_kfree_flow; error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, - 0, &acts); + &acts); if (error) { OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); goto err_kfree_acts; @@ -953,7 +953,7 @@ static struct sw_flow_actions *get_flow_actions(const struct nlattr *a, return acts; ovs_flow_mask_key(&masked_key, key, mask); - error = ovs_nla_copy_actions(a, &masked_key, 0, &acts); + error = ovs_nla_copy_actions(a, &masked_key, &acts); if (error) { OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); kfree(acts); diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 2b78789ea7c5..90a21010fc8f 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -42,6 +43,7 @@ #include #include #include +#include #include #include "datapath.h" @@ -480,6 +482,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) return -ENOMEM; skb_reset_network_header(skb); + skb_reset_mac_len(skb); __skb_push(skb, skb->data - skb_mac_header(skb)); /* Network layer. */ @@ -584,6 +587,33 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) memset(&key->ip, 0, sizeof(key->ip)); memset(&key->ipv4, 0, sizeof(key->ipv4)); } + } else if (eth_p_mpls(key->eth.type)) { + size_t stack_len = MPLS_HLEN; + + /* In the presence of an MPLS label stack the end of the L2 + * header and the beginning of the L3 header differ. + * + * Advance network_header to the beginning of the L3 + * header. mac_len corresponds to the end of the L2 header. + */ + while (1) { + __be32 lse; + + error = check_header(skb, skb->mac_len + stack_len); + if (unlikely(error)) + return 0; + + memcpy(&lse, skb_network_header(skb), MPLS_HLEN); + + if (stack_len == MPLS_HLEN) + memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN); + + skb_set_network_header(skb, skb->mac_len + stack_len); + if (lse & htonl(MPLS_LS_S_MASK)) + break; + + stack_len += MPLS_HLEN; + } } else if (key->eth.type == htons(ETH_P_IPV6)) { int nh_len; /* IPv6 Header + Extensions */ diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 71813318c8c7..4962bee81a11 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -102,12 +102,17 @@ struct sw_flow_key { __be16 tci; /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */ __be16 type; /* Ethernet frame type. */ } eth; - struct { - u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */ - u8 tos; /* IP ToS. */ - u8 ttl; /* IP TTL/hop limit. */ - u8 frag; /* One of OVS_FRAG_TYPE_*. */ - } ip; + union { + struct { + __be32 top_lse; /* top label stack entry */ + } mpls; + struct { + u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */ + u8 tos; /* IP ToS. */ + u8 ttl; /* IP TTL/hop limit. */ + u8 frag; /* One of OVS_FRAG_TYPE_*. */ + } ip; + }; struct { __be16 src; /* TCP/UDP/SCTP source port. */ __be16 dst; /* TCP/UDP/SCTP destination port. */ diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 939bcb32100f..569309c49cc0 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -46,6 +46,7 @@ #include #include #include +#include #include "flow_netlink.h" @@ -134,7 +135,8 @@ static bool match_validate(const struct sw_flow_match *match, | (1 << OVS_KEY_ATTR_ICMP) | (1 << OVS_KEY_ATTR_ICMPV6) | (1 << OVS_KEY_ATTR_ARP) - | (1 << OVS_KEY_ATTR_ND)); + | (1 << OVS_KEY_ATTR_ND) + | (1 << OVS_KEY_ATTR_MPLS)); /* Always allowed mask fields. */ mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL) @@ -149,6 +151,12 @@ static bool match_validate(const struct sw_flow_match *match, mask_allowed |= 1 << OVS_KEY_ATTR_ARP; } + if (eth_p_mpls(match->key->eth.type)) { + key_expected |= 1 << OVS_KEY_ATTR_MPLS; + if (match->mask && (match->mask->key.eth.type == htons(0xffff))) + mask_allowed |= 1 << OVS_KEY_ATTR_MPLS; + } + if (match->key->eth.type == htons(ETH_P_IP)) { key_expected |= 1 << OVS_KEY_ATTR_IPV4; if (match->mask && (match->mask->key.eth.type == htons(0xffff))) @@ -266,6 +274,7 @@ static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_RECIRC_ID] = sizeof(u32), [OVS_KEY_ATTR_DP_HASH] = sizeof(u32), [OVS_KEY_ATTR_TUNNEL] = -1, + [OVS_KEY_ATTR_MPLS] = sizeof(struct ovs_key_mpls), }; static bool is_all_zero(const u8 *fp, size_t size) @@ -735,6 +744,16 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, attrs &= ~(1 << OVS_KEY_ATTR_ARP); } + if (attrs & (1 << OVS_KEY_ATTR_MPLS)) { + const struct ovs_key_mpls *mpls_key; + + mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]); + SW_FLOW_KEY_PUT(match, mpls.top_lse, + mpls_key->mpls_lse, is_mask); + + attrs &= ~(1 << OVS_KEY_ATTR_MPLS); + } + if (attrs & (1 << OVS_KEY_ATTR_TCP)) { const struct ovs_key_tcp *tcp_key; @@ -1140,6 +1159,14 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, arp_key->arp_op = htons(output->ip.proto); ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha); ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha); + } else if (eth_p_mpls(swkey->eth.type)) { + struct ovs_key_mpls *mpls_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key)); + if (!nla) + goto nla_put_failure; + mpls_key = nla_data(nla); + mpls_key->mpls_lse = output->mpls.top_lse; } if ((swkey->eth.type == htons(ETH_P_IP) || @@ -1336,9 +1363,15 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa, a->nla_len = sfa->actions_len - st_offset; } +static int ovs_nla_copy_actions__(const struct nlattr *attr, + const struct sw_flow_key *key, + int depth, struct sw_flow_actions **sfa, + __be16 eth_type, __be16 vlan_tci); + static int validate_and_copy_sample(const struct nlattr *attr, const struct sw_flow_key *key, int depth, - struct sw_flow_actions **sfa) + struct sw_flow_actions **sfa, + __be16 eth_type, __be16 vlan_tci) { const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; const struct nlattr *probability, *actions; @@ -1375,7 +1408,8 @@ static int validate_and_copy_sample(const struct nlattr *attr, if (st_acts < 0) return st_acts; - err = ovs_nla_copy_actions(actions, key, depth + 1, sfa); + err = ovs_nla_copy_actions__(actions, key, depth + 1, sfa, + eth_type, vlan_tci); if (err) return err; @@ -1385,10 +1419,10 @@ static int validate_and_copy_sample(const struct nlattr *attr, return 0; } -static int validate_tp_port(const struct sw_flow_key *flow_key) +static int validate_tp_port(const struct sw_flow_key *flow_key, + __be16 eth_type) { - if ((flow_key->eth.type == htons(ETH_P_IP) || - flow_key->eth.type == htons(ETH_P_IPV6)) && + if ((eth_type == htons(ETH_P_IP) || eth_type == htons(ETH_P_IPV6)) && (flow_key->tp.src || flow_key->tp.dst)) return 0; @@ -1483,7 +1517,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, static int validate_set(const struct nlattr *a, const struct sw_flow_key *flow_key, struct sw_flow_actions **sfa, - bool *set_tun) + bool *set_tun, __be16 eth_type) { const struct nlattr *ovs_key = nla_data(a); int key_type = nla_type(ovs_key); @@ -1508,6 +1542,9 @@ static int validate_set(const struct nlattr *a, break; case OVS_KEY_ATTR_TUNNEL: + if (eth_p_mpls(eth_type)) + return -EINVAL; + *set_tun = true; err = validate_and_copy_set_tun(a, sfa); if (err) @@ -1515,7 +1552,7 @@ static int validate_set(const struct nlattr *a, break; case OVS_KEY_ATTR_IPV4: - if (flow_key->eth.type != htons(ETH_P_IP)) + if (eth_type != htons(ETH_P_IP)) return -EINVAL; if (!flow_key->ip.proto) @@ -1531,7 +1568,7 @@ static int validate_set(const struct nlattr *a, break; case OVS_KEY_ATTR_IPV6: - if (flow_key->eth.type != htons(ETH_P_IPV6)) + if (eth_type != htons(ETH_P_IPV6)) return -EINVAL; if (!flow_key->ip.proto) @@ -1553,19 +1590,24 @@ static int validate_set(const struct nlattr *a, if (flow_key->ip.proto != IPPROTO_TCP) return -EINVAL; - return validate_tp_port(flow_key); + return validate_tp_port(flow_key, eth_type); case OVS_KEY_ATTR_UDP: if (flow_key->ip.proto != IPPROTO_UDP) return -EINVAL; - return validate_tp_port(flow_key); + return validate_tp_port(flow_key, eth_type); + + case OVS_KEY_ATTR_MPLS: + if (!eth_p_mpls(eth_type)) + return -EINVAL; + break; case OVS_KEY_ATTR_SCTP: if (flow_key->ip.proto != IPPROTO_SCTP) return -EINVAL; - return validate_tp_port(flow_key); + return validate_tp_port(flow_key, eth_type); default: return -EINVAL; @@ -1609,12 +1651,13 @@ static int copy_action(const struct nlattr *from, return 0; } -int ovs_nla_copy_actions(const struct nlattr *attr, - const struct sw_flow_key *key, - int depth, - struct sw_flow_actions **sfa) +static int ovs_nla_copy_actions__(const struct nlattr *attr, + const struct sw_flow_key *key, + int depth, struct sw_flow_actions **sfa, + __be16 eth_type, __be16 vlan_tci) { const struct nlattr *a; + bool out_tnl_port = false; int rem, err; if (depth >= SAMPLE_ACTION_DEPTH) @@ -1626,6 +1669,8 @@ int ovs_nla_copy_actions(const struct nlattr *attr, [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32), [OVS_ACTION_ATTR_RECIRC] = sizeof(u32), [OVS_ACTION_ATTR_USERSPACE] = (u32)-1, + [OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls), + [OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16), [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), [OVS_ACTION_ATTR_POP_VLAN] = 0, [OVS_ACTION_ATTR_SET] = (u32)-1, @@ -1655,6 +1700,8 @@ int ovs_nla_copy_actions(const struct nlattr *attr, case OVS_ACTION_ATTR_OUTPUT: if (nla_get_u32(a) >= DP_MAX_PORTS) return -EINVAL; + out_tnl_port = false; + break; case OVS_ACTION_ATTR_HASH: { @@ -1671,6 +1718,7 @@ int ovs_nla_copy_actions(const struct nlattr *attr, } case OVS_ACTION_ATTR_POP_VLAN: + vlan_tci = htons(0); break; case OVS_ACTION_ATTR_PUSH_VLAN: @@ -1679,19 +1727,66 @@ int ovs_nla_copy_actions(const struct nlattr *attr, return -EINVAL; if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT))) return -EINVAL; + vlan_tci = vlan->vlan_tci; break; case OVS_ACTION_ATTR_RECIRC: break; + case OVS_ACTION_ATTR_PUSH_MPLS: { + const struct ovs_action_push_mpls *mpls = nla_data(a); + + /* Networking stack do not allow simultaneous Tunnel + * and MPLS GSO. + */ + if (out_tnl_port) + return -EINVAL; + + if (!eth_p_mpls(mpls->mpls_ethertype)) + return -EINVAL; + /* Prohibit push MPLS other than to a white list + * for packets that have a known tag order. + */ + if (vlan_tci & htons(VLAN_TAG_PRESENT) || + (eth_type != htons(ETH_P_IP) && + eth_type != htons(ETH_P_IPV6) && + eth_type != htons(ETH_P_ARP) && + eth_type != htons(ETH_P_RARP) && + !eth_p_mpls(eth_type))) + return -EINVAL; + eth_type = mpls->mpls_ethertype; + break; + } + + case OVS_ACTION_ATTR_POP_MPLS: + if (vlan_tci & htons(VLAN_TAG_PRESENT) || + !eth_p_mpls(eth_type)) + return -EINVAL; + + /* Disallow subsequent L2.5+ set and mpls_pop actions + * as there is no check here to ensure that the new + * eth_type is valid and thus set actions could + * write off the end of the packet or otherwise + * corrupt it. + * + * Support for these actions is planned using packet + * recirculation. + */ + eth_type = htons(0); + break; + case OVS_ACTION_ATTR_SET: - err = validate_set(a, key, sfa, &skip_copy); + err = validate_set(a, key, sfa, + &out_tnl_port, eth_type); if (err) return err; + + skip_copy = out_tnl_port; break; case OVS_ACTION_ATTR_SAMPLE: - err = validate_and_copy_sample(a, key, depth, sfa); + err = validate_and_copy_sample(a, key, depth, sfa, + eth_type, vlan_tci); if (err) return err; skip_copy = true; @@ -1713,6 +1808,14 @@ int ovs_nla_copy_actions(const struct nlattr *attr, return 0; } +int ovs_nla_copy_actions(const struct nlattr *attr, + const struct sw_flow_key *key, + struct sw_flow_actions **sfa) +{ + return ovs_nla_copy_actions__(attr, key, 0, sfa, key->eth.type, + key->eth.tci); +} + static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) { const struct nlattr *a; diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index 206e45add888..6355b1d01329 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -49,7 +49,7 @@ int ovs_nla_get_match(struct sw_flow_match *match, const struct nlattr *); int ovs_nla_copy_actions(const struct nlattr *attr, - const struct sw_flow_key *key, int depth, + const struct sw_flow_key *key, struct sw_flow_actions **sfa); int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb); -- cgit v1.2.3 From 1a4e96a0e989200f7180264e27b22e9a85f3fcc8 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Tue, 30 Sep 2014 10:52:32 -0700 Subject: openvswitch: Fix the type of struct ovs_key_nd nd_target field. Should be the same as other IPv6 address fields. Current master produces sparse warnings without this change. Signed-off-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar --- include/uapi/linux/openvswitch.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux/openvswitch.h') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 631056b66f80..26c36c4cf7e2 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -400,9 +400,9 @@ struct ovs_key_arp { }; struct ovs_key_nd { - __u32 nd_target[4]; - __u8 nd_sll[ETH_ALEN]; - __u8 nd_tll[ETH_ALEN]; + __be32 nd_target[4]; + __u8 nd_sll[ETH_ALEN]; + __u8 nd_tll[ETH_ALEN]; }; /** -- cgit v1.2.3 From 8f0aad6f35f7e8b3118b7b8a65e8e76b135cc4cb Mon Sep 17 00:00:00 2001 From: Wenyu Zhang Date: Thu, 6 Nov 2014 06:51:24 -0800 Subject: openvswitch: Extend packet attribute for egress tunnel info OVS vswitch has extended IPFIX exporter to export tunnel headers to improve network visibility. To export this information userspace needs to know egress tunnel for given packet. By extending packet attributes datapath can export egress tunnel info for given packet. So that userspace can ask for egress tunnel info in userspace action. This information is used to build IPFIX data for given flow. Signed-off-by: Wenyu Zhang Acked-by: Romain Lenglet Acked-by: Ben Pfaff Signed-off-by: Pravin B Shelar --- include/uapi/linux/openvswitch.h | 13 +++++++++ net/openvswitch/actions.c | 19 ++++++++++++ net/openvswitch/datapath.c | 21 +++++++++++--- net/openvswitch/datapath.h | 2 ++ net/openvswitch/flow.h | 62 +++++++++++++++++++++++++++++++--------- net/openvswitch/flow_netlink.c | 54 +++++++++++++++++++++++++++------- net/openvswitch/flow_netlink.h | 3 ++ net/openvswitch/vport-geneve.c | 21 +++++++++++++- net/openvswitch/vport-gre.c | 12 +++++++- net/openvswitch/vport-vxlan.c | 24 +++++++++++++++- net/openvswitch/vport.c | 61 +++++++++++++++++++++++++++++++++++++++ net/openvswitch/vport.h | 14 +++++++++ 12 files changed, 275 insertions(+), 31 deletions(-) (limited to 'include/uapi/linux/openvswitch.h') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 26c36c4cf7e2..cf8185661e52 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -157,6 +157,11 @@ enum ovs_packet_cmd { * notification if the %OVS_ACTION_ATTR_USERSPACE action specified an * %OVS_USERSPACE_ATTR_USERDATA attribute, with the same length and content * specified there. + * @OVS_PACKET_ATTR_EGRESS_TUN_KEY: Present for an %OVS_PACKET_CMD_ACTION + * notification if the %OVS_ACTION_ATTR_USERSPACE action specified an + * %OVS_USERSPACE_ATTR_EGRESS_TUN_PORT attribute, which is sent only if the + * output port is actually a tunnel port. Contains the output tunnel key + * extracted from the packet as nested %OVS_TUNNEL_KEY_ATTR_* attributes. * * These attributes follow the &struct ovs_header within the Generic Netlink * payload for %OVS_PACKET_* commands. @@ -167,6 +172,8 @@ enum ovs_packet_attr { OVS_PACKET_ATTR_KEY, /* Nested OVS_KEY_ATTR_* attributes. */ OVS_PACKET_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ OVS_PACKET_ATTR_USERDATA, /* OVS_ACTION_ATTR_USERSPACE arg. */ + OVS_PACKET_ATTR_EGRESS_TUN_KEY, /* Nested OVS_TUNNEL_KEY_ATTR_* + attributes. */ __OVS_PACKET_ATTR_MAX }; @@ -315,6 +322,8 @@ enum ovs_tunnel_key_attr { OVS_TUNNEL_KEY_ATTR_CSUM, /* No argument. CSUM packet. */ OVS_TUNNEL_KEY_ATTR_OAM, /* No argument. OAM frame. */ OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, /* Array of Geneve options. */ + OVS_TUNNEL_KEY_ATTR_TP_SRC, /* be16 src Transport Port. */ + OVS_TUNNEL_KEY_ATTR_TP_DST, /* be16 dst Transport Port. */ __OVS_TUNNEL_KEY_ATTR_MAX }; @@ -480,11 +489,15 @@ enum ovs_sample_attr { * message should be sent. Required. * @OVS_USERSPACE_ATTR_USERDATA: If present, its variable-length argument is * copied to the %OVS_PACKET_CMD_ACTION message as %OVS_PACKET_ATTR_USERDATA. + * @OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: If present, u32 output port to get + * tunnel info. */ enum ovs_userspace_attr { OVS_USERSPACE_ATTR_UNSPEC, OVS_USERSPACE_ATTR_PID, /* u32 Netlink PID to receive upcalls. */ OVS_USERSPACE_ATTR_USERDATA, /* Optional user-specified cookie. */ + OVS_USERSPACE_ATTR_EGRESS_TUN_PORT, /* Optional, u32 output port + * to get tunnel info. */ __OVS_USERSPACE_ATTR_MAX }; diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index f7e589159e4a..ceb618cf1292 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -564,6 +564,7 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port) static int output_userspace(struct datapath *dp, struct sk_buff *skb, struct sw_flow_key *key, const struct nlattr *attr) { + struct ovs_tunnel_info info; struct dp_upcall_info upcall; const struct nlattr *a; int rem; @@ -572,6 +573,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, upcall.key = key; upcall.userdata = NULL; upcall.portid = 0; + upcall.egress_tun_info = NULL; for (a = nla_data(attr), rem = nla_len(attr); rem > 0; a = nla_next(a, &rem)) { @@ -583,7 +585,24 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, case OVS_USERSPACE_ATTR_PID: upcall.portid = nla_get_u32(a); break; + + case OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: { + /* Get out tunnel info. */ + struct vport *vport; + + vport = ovs_vport_rcu(dp, nla_get_u32(a)); + if (vport) { + int err; + + err = ovs_vport_get_egress_tun_info(vport, skb, + &info); + if (!err) + upcall.egress_tun_info = &info; + } + break; } + + } /* End of switch. */ } return ovs_dp_upcall(dp, skb, &upcall); diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 6cfb44f3a7f0..c2ac340e19fb 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -274,6 +274,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) upcall.key = key; upcall.userdata = NULL; upcall.portid = ovs_vport_find_upcall_portid(p, skb); + upcall.egress_tun_info = NULL; error = ovs_dp_upcall(dp, skb, &upcall); if (unlikely(error)) kfree_skb(skb); @@ -375,7 +376,7 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, return err; } -static size_t upcall_msg_size(const struct nlattr *userdata, +static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info, unsigned int hdrlen) { size_t size = NLMSG_ALIGN(sizeof(struct ovs_header)) @@ -383,8 +384,12 @@ static size_t upcall_msg_size(const struct nlattr *userdata, + nla_total_size(ovs_key_attr_size()); /* OVS_PACKET_ATTR_KEY */ /* OVS_PACKET_ATTR_USERDATA */ - if (userdata) - size += NLA_ALIGN(userdata->nla_len); + if (upcall_info->userdata) + size += NLA_ALIGN(upcall_info->userdata->nla_len); + + /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */ + if (upcall_info->egress_tun_info) + size += nla_total_size(ovs_tun_key_attr_size()); return size; } @@ -440,7 +445,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, else hlen = skb->len; - len = upcall_msg_size(upcall_info->userdata, hlen); + len = upcall_msg_size(upcall_info, hlen); user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC); if (!user_skb) { err = -ENOMEM; @@ -461,6 +466,14 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, nla_len(upcall_info->userdata), nla_data(upcall_info->userdata)); + if (upcall_info->egress_tun_info) { + nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY); + err = ovs_nla_put_egress_tunnel_key(user_skb, + upcall_info->egress_tun_info); + BUG_ON(err); + nla_nest_end(user_skb, nla); + } + /* Only reserve room for attribute header, packet data is added * in skb_zerocopy() */ if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) { diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 1c56a80d6677..2bc577bf9b31 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -114,12 +114,14 @@ struct ovs_skb_cb { * @pid: Netlink PID to which packet should be sent. If @pid is 0 then no * packet is sent and the packet is accounted in the datapath's @n_lost * counter. + * @egress_tun_info: If nonnull, becomes %OVS_PACKET_ATTR_EGRESS_TUN_KEY. */ struct dp_upcall_info { u8 cmd; const struct sw_flow_key *key; const struct nlattr *userdata; u32 portid; + const struct ovs_tunnel_info *egress_tun_info; }; /** diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 4962bee81a11..543b358ee57f 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -37,8 +37,8 @@ struct sk_buff; /* Used to memset ovs_key_ipv4_tunnel padding. */ #define OVS_TUNNEL_KEY_SIZE \ - (offsetof(struct ovs_key_ipv4_tunnel, ipv4_ttl) + \ - FIELD_SIZEOF(struct ovs_key_ipv4_tunnel, ipv4_ttl)) + (offsetof(struct ovs_key_ipv4_tunnel, tp_dst) + \ + FIELD_SIZEOF(struct ovs_key_ipv4_tunnel, tp_dst)) struct ovs_key_ipv4_tunnel { __be64 tun_id; @@ -47,6 +47,8 @@ struct ovs_key_ipv4_tunnel { __be16 tun_flags; u8 ipv4_tos; u8 ipv4_ttl; + __be16 tp_src; + __be16 tp_dst; } __packed __aligned(4); /* Minimize padding. */ struct ovs_tunnel_info { @@ -64,27 +66,59 @@ struct ovs_tunnel_info { FIELD_SIZEOF(struct sw_flow_key, tun_opts) - \ opt_len)) -static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, - const struct iphdr *iph, - __be64 tun_id, __be16 tun_flags, - struct geneve_opt *opts, - u8 opts_len) +static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, + __be32 saddr, __be32 daddr, + u8 tos, u8 ttl, + __be16 tp_src, + __be16 tp_dst, + __be64 tun_id, + __be16 tun_flags, + struct geneve_opt *opts, + u8 opts_len) { tun_info->tunnel.tun_id = tun_id; - tun_info->tunnel.ipv4_src = iph->saddr; - tun_info->tunnel.ipv4_dst = iph->daddr; - tun_info->tunnel.ipv4_tos = iph->tos; - tun_info->tunnel.ipv4_ttl = iph->ttl; + tun_info->tunnel.ipv4_src = saddr; + tun_info->tunnel.ipv4_dst = daddr; + tun_info->tunnel.ipv4_tos = tos; + tun_info->tunnel.ipv4_ttl = ttl; tun_info->tunnel.tun_flags = tun_flags; - /* clear struct padding. */ - memset((unsigned char *)&tun_info->tunnel + OVS_TUNNEL_KEY_SIZE, 0, - sizeof(tun_info->tunnel) - OVS_TUNNEL_KEY_SIZE); + /* For the tunnel types on the top of IPsec, the tp_src and tp_dst of + * the upper tunnel are used. + * E.g: GRE over IPSEC, the tp_src and tp_port are zero. + */ + tun_info->tunnel.tp_src = tp_src; + tun_info->tunnel.tp_dst = tp_dst; + + /* Clear struct padding. */ + if (sizeof(tun_info->tunnel) != OVS_TUNNEL_KEY_SIZE) + memset((unsigned char *)&tun_info->tunnel + OVS_TUNNEL_KEY_SIZE, + 0, sizeof(tun_info->tunnel) - OVS_TUNNEL_KEY_SIZE); tun_info->options = opts; tun_info->options_len = opts_len; } +static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, + const struct iphdr *iph, + __be16 tp_src, + __be16 tp_dst, + __be64 tun_id, + __be16 tun_flags, + struct geneve_opt *opts, + u8 opts_len) +{ + __ovs_flow_tun_info_init(tun_info, iph->saddr, iph->daddr, + iph->tos, iph->ttl, + tp_src, tp_dst, + tun_id, tun_flags, + opts, opts_len); +} + +#define OVS_SW_FLOW_KEY_METADATA_SIZE \ + (offsetof(struct sw_flow_key, recirc_id) + \ + FIELD_SIZEOF(struct sw_flow_key, recirc_id)) + struct sw_flow_key { u8 tun_opts[255]; u8 tun_opts_len; diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index ed3109761827..98a3e96b7d93 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -245,6 +245,24 @@ static bool match_validate(const struct sw_flow_match *match, return true; } +size_t ovs_tun_key_attr_size(void) +{ + /* Whenever adding new OVS_TUNNEL_KEY_ FIELDS, we should consider + * updating this function. + */ + return nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */ + + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */ + + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */ + + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */ + + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */ + + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */ + + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ + + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */ + + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */ + + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */ + + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */ +} + size_t ovs_key_attr_size(void) { /* Whenever adding new OVS_KEY_ FIELDS, we should consider @@ -254,15 +272,7 @@ size_t ovs_key_attr_size(void) return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ - + nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */ - + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */ - + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */ - + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */ - + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */ - + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */ - + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ - + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */ - + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */ + + ovs_tun_key_attr_size() + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */ + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ + nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */ @@ -393,6 +403,8 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, [OVS_TUNNEL_KEY_ATTR_TTL] = 1, [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, + [OVS_TUNNEL_KEY_ATTR_TP_SRC] = sizeof(u16), + [OVS_TUNNEL_KEY_ATTR_TP_DST] = sizeof(u16), [OVS_TUNNEL_KEY_ATTR_OAM] = 0, [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = -1, }; @@ -440,6 +452,14 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, case OVS_TUNNEL_KEY_ATTR_CSUM: tun_flags |= TUNNEL_CSUM; break; + case OVS_TUNNEL_KEY_ATTR_TP_SRC: + SW_FLOW_KEY_PUT(match, tun_key.tp_src, + nla_get_be16(a), is_mask); + break; + case OVS_TUNNEL_KEY_ATTR_TP_DST: + SW_FLOW_KEY_PUT(match, tun_key.tp_dst, + nla_get_be16(a), is_mask); + break; case OVS_TUNNEL_KEY_ATTR_OAM: tun_flags |= TUNNEL_OAM; break; @@ -548,6 +568,12 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb, if ((output->tun_flags & TUNNEL_CSUM) && nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) return -EMSGSIZE; + if (output->tp_src && + nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_SRC, output->tp_src)) + return -EMSGSIZE; + if (output->tp_dst && + nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_DST, output->tp_dst)) + return -EMSGSIZE; if ((output->tun_flags & TUNNEL_OAM) && nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM)) return -EMSGSIZE; @@ -559,7 +585,6 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb, return 0; } - static int ipv4_tun_to_nlattr(struct sk_buff *skb, const struct ovs_key_ipv4_tunnel *output, const struct geneve_opt *tun_opts, @@ -580,6 +605,14 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb, return 0; } +int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb, + const struct ovs_tunnel_info *egress_tun_info) +{ + return __ipv4_tun_to_nlattr(skb, &egress_tun_info->tunnel, + egress_tun_info->options, + egress_tun_info->options_len); +} + static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, const struct nlattr **a, bool is_mask) { @@ -1653,6 +1686,7 @@ static int validate_userspace(const struct nlattr *attr) static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = { [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 }, [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC }, + [OVS_USERSPACE_ATTR_EGRESS_TUN_PORT] = {.type = NLA_U32 }, }; struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; int error; diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index eb0b177300ad..90bbe3785504 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -37,6 +37,7 @@ #include "flow.h" +size_t ovs_tun_key_attr_size(void); size_t ovs_key_attr_size(void); void ovs_match_init(struct sw_flow_match *match, @@ -49,6 +50,8 @@ int ovs_nla_get_flow_metadata(const struct nlattr *, struct sw_flow_key *); int ovs_nla_get_match(struct sw_flow_match *match, const struct nlattr *, const struct nlattr *); +int ovs_nla_put_egress_tunnel_key(struct sk_buff *, + const struct ovs_tunnel_info *); int ovs_nla_copy_actions(const struct nlattr *attr, const struct sw_flow_key *key, diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c index 70c9765011f4..e31f19c922e2 100644 --- a/net/openvswitch/vport-geneve.c +++ b/net/openvswitch/vport-geneve.c @@ -97,7 +97,9 @@ static void geneve_rcv(struct geneve_sock *gs, struct sk_buff *skb) key = vni_to_tunnel_id(geneveh->vni); - ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key, flags, + ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), + udp_hdr(skb)->source, udp_hdr(skb)->dest, + key, flags, geneveh->options, opts_len); ovs_vport_receive(vport, skb, &tun_info); @@ -228,6 +230,22 @@ static const char *geneve_get_name(const struct vport *vport) return geneve_port->name; } +static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, + struct ovs_tunnel_info *egress_tun_info) +{ + struct geneve_port *geneve_port = geneve_vport(vport); + struct net *net = ovs_dp_get_net(vport->dp); + __be16 dport = inet_sk(geneve_port->gs->sock->sk)->inet_sport; + __be16 sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true); + + /* Get tp_src and tp_dst, refert to geneve_build_header(). + */ + return ovs_tunnel_get_egress_info(egress_tun_info, + ovs_dp_get_net(vport->dp), + OVS_CB(skb)->egress_tun_info, + IPPROTO_UDP, skb->mark, sport, dport); +} + static struct vport_ops ovs_geneve_vport_ops = { .type = OVS_VPORT_TYPE_GENEVE, .create = geneve_tnl_create, @@ -236,6 +254,7 @@ static struct vport_ops ovs_geneve_vport_ops = { .get_options = geneve_get_options, .send = geneve_tnl_send, .owner = THIS_MODULE, + .get_egress_tun_info = geneve_get_egress_tun_info, }; static int __init ovs_geneve_tnl_init(void) diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index 00270b608844..8e61a5c6ae7c 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -108,7 +108,7 @@ static int gre_rcv(struct sk_buff *skb, return PACKET_REJECT; key = key_to_tunnel_id(tpi->key, tpi->seq); - ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key, + ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), 0, 0, key, filter_tnl_flags(tpi->flags), NULL, 0); ovs_vport_receive(vport, skb, &tun_info); @@ -284,12 +284,22 @@ static void gre_tnl_destroy(struct vport *vport) gre_exit(); } +static int gre_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, + struct ovs_tunnel_info *egress_tun_info) +{ + return ovs_tunnel_get_egress_info(egress_tun_info, + ovs_dp_get_net(vport->dp), + OVS_CB(skb)->egress_tun_info, + IPPROTO_GRE, skb->mark, 0, 0); +} + static struct vport_ops ovs_gre_vport_ops = { .type = OVS_VPORT_TYPE_GRE, .create = gre_create, .destroy = gre_tnl_destroy, .get_name = gre_get_name, .send = gre_tnl_send, + .get_egress_tun_info = gre_get_egress_tun_info, .owner = THIS_MODULE, }; diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 965e7500c5a6..38f95a52241b 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -69,7 +69,9 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni) /* Save outer tunnel values */ iph = ip_hdr(skb); key = cpu_to_be64(ntohl(vx_vni) >> 8); - ovs_flow_tun_info_init(&tun_info, iph, key, TUNNEL_KEY, NULL, 0); + ovs_flow_tun_info_init(&tun_info, iph, + udp_hdr(skb)->source, udp_hdr(skb)->dest, + key, TUNNEL_KEY, NULL, 0); ovs_vport_receive(vport, skb, &tun_info); } @@ -189,6 +191,25 @@ error: return err; } +static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, + struct ovs_tunnel_info *egress_tun_info) +{ + struct net *net = ovs_dp_get_net(vport->dp); + struct vxlan_port *vxlan_port = vxlan_vport(vport); + __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport; + __be16 src_port; + int port_min; + int port_max; + + inet_get_local_port_range(net, &port_min, &port_max); + src_port = udp_flow_src_port(net, skb, 0, 0, true); + + return ovs_tunnel_get_egress_info(egress_tun_info, net, + OVS_CB(skb)->egress_tun_info, + IPPROTO_UDP, skb->mark, + src_port, dst_port); +} + static const char *vxlan_get_name(const struct vport *vport) { struct vxlan_port *vxlan_port = vxlan_vport(vport); @@ -202,6 +223,7 @@ static struct vport_ops ovs_vxlan_vport_ops = { .get_name = vxlan_get_name, .get_options = vxlan_get_options, .send = vxlan_tnl_send, + .get_egress_tun_info = vxlan_get_egress_tun_info, .owner = THIS_MODULE, }; diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 4b5dd18953a6..630e81984b65 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -573,3 +573,64 @@ void ovs_vport_deferred_free(struct vport *vport) call_rcu(&vport->rcu, free_vport_rcu); } EXPORT_SYMBOL_GPL(ovs_vport_deferred_free); + +int ovs_tunnel_get_egress_info(struct ovs_tunnel_info *egress_tun_info, + struct net *net, + const struct ovs_tunnel_info *tun_info, + u8 ipproto, + u32 skb_mark, + __be16 tp_src, + __be16 tp_dst) +{ + const struct ovs_key_ipv4_tunnel *tun_key; + struct rtable *rt; + struct flowi4 fl; + + if (unlikely(!tun_info)) + return -EINVAL; + + tun_key = &tun_info->tunnel; + + /* Route lookup to get srouce IP address. + * The process may need to be changed if the corresponding process + * in vports ops changed. + */ + memset(&fl, 0, sizeof(fl)); + fl.daddr = tun_key->ipv4_dst; + fl.saddr = tun_key->ipv4_src; + fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos); + fl.flowi4_mark = skb_mark; + fl.flowi4_proto = IPPROTO_GRE; + + rt = ip_route_output_key(net, &fl); + if (IS_ERR(rt)) + return PTR_ERR(rt); + + ip_rt_put(rt); + + /* Generate egress_tun_info based on tun_info, + * saddr, tp_src and tp_dst + */ + __ovs_flow_tun_info_init(egress_tun_info, + fl.saddr, tun_key->ipv4_dst, + tun_key->ipv4_tos, + tun_key->ipv4_ttl, + tp_src, tp_dst, + tun_key->tun_id, + tun_key->tun_flags, + tun_info->options, + tun_info->options_len); + + return 0; +} +EXPORT_SYMBOL_GPL(ovs_tunnel_get_egress_info); + +int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, + struct ovs_tunnel_info *info) +{ + /* get_egress_tun_info() is only implemented on tunnel ports. */ + if (unlikely(!vport->ops->get_egress_tun_info)) + return -EINVAL; + + return vport->ops->get_egress_tun_info(vport, skb, info); +} diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index e41c3facf799..0635d1d761e9 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -58,6 +58,16 @@ u32 ovs_vport_find_upcall_portid(const struct vport *, struct sk_buff *); int ovs_vport_send(struct vport *, struct sk_buff *); +int ovs_tunnel_get_egress_info(struct ovs_tunnel_info *egress_tun_info, + struct net *net, + const struct ovs_tunnel_info *tun_info, + u8 ipproto, + u32 skb_mark, + __be16 tp_src, + __be16 tp_dst); +int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, + struct ovs_tunnel_info *info); + /* The following definitions are for implementers of vport devices: */ struct vport_err_stats { @@ -146,6 +156,8 @@ struct vport_parms { * @get_name: Get the device's name. * @send: Send a packet on the device. Returns the length of the packet sent, * zero for dropped packets or negative for error. + * @get_egress_tun_info: Get the egress tunnel 5-tuple and other info for + * a packet. */ struct vport_ops { enum ovs_vport_type type; @@ -161,6 +173,8 @@ struct vport_ops { const char *(*get_name)(const struct vport *); int (*send)(struct vport *, struct sk_buff *); + int (*get_egress_tun_info)(struct vport *, struct sk_buff *, + struct ovs_tunnel_info *); struct module *owner; struct list_head list; -- cgit v1.2.3 From 05da5898a96c05e32aa9850c9cd89eef29471b13 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Thu, 6 Nov 2014 07:03:05 -0800 Subject: openvswitch: Add support for OVS_FLOW_ATTR_PROBE. This new flag is useful for suppressing error logging while probing for datapath features using flow commands. For backwards compatibility reasons the commands are executed normally, but error logging is suppressed. Signed-off-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar --- include/uapi/linux/openvswitch.h | 2 + net/openvswitch/datapath.c | 49 ++++--- net/openvswitch/datapath.h | 6 +- net/openvswitch/flow.c | 4 +- net/openvswitch/flow.h | 2 +- net/openvswitch/flow_netlink.c | 303 +++++++++++++++++++++------------------ net/openvswitch/flow_netlink.h | 10 +- 7 files changed, 208 insertions(+), 168 deletions(-) (limited to 'include/uapi/linux/openvswitch.h') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index cf8185661e52..3a6dcaa359b7 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -457,6 +457,8 @@ enum ovs_flow_attr { OVS_FLOW_ATTR_USED, /* u64 msecs last used in monotonic time. */ OVS_FLOW_ATTR_CLEAR, /* Flag to clear stats, tcp_flags, used. */ OVS_FLOW_ATTR_MASK, /* Sequence of OVS_KEY_ATTR_* attributes. */ + OVS_FLOW_ATTR_PROBE, /* Flow operation is a feature probe, error + * logging should be suppressed. */ __OVS_FLOW_ATTR_MAX }; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 65561ebb489e..ab141d49bb9d 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -526,6 +526,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) struct vport *input_vport; int len; int err; + bool log = !a[OVS_FLOW_ATTR_PROBE]; err = -EINVAL; if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] || @@ -559,12 +560,12 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) goto err_kfree_skb; err = ovs_flow_key_extract_userspace(a[OVS_PACKET_ATTR_KEY], packet, - &flow->key); + &flow->key, log); if (err) goto err_flow_free; err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], - &flow->key, &acts); + &flow->key, &acts, log); if (err) goto err_flow_free; @@ -855,15 +856,16 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) struct sw_flow_actions *acts; struct sw_flow_match match; int error; + bool log = !a[OVS_FLOW_ATTR_PROBE]; /* Must have key and actions. */ error = -EINVAL; if (!a[OVS_FLOW_ATTR_KEY]) { - OVS_NLERR("Flow key attribute not present in new flow.\n"); + OVS_NLERR(log, "Flow key attr not present in new flow."); goto error; } if (!a[OVS_FLOW_ATTR_ACTIONS]) { - OVS_NLERR("Flow actions attribute not present in new flow.\n"); + OVS_NLERR(log, "Flow actions attr not present in new flow."); goto error; } @@ -878,8 +880,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) /* Extract key. */ ovs_match_init(&match, &new_flow->unmasked_key, &mask); - error = ovs_nla_get_match(&match, - a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); + error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], + a[OVS_FLOW_ATTR_MASK], log); if (error) goto err_kfree_flow; @@ -887,9 +889,9 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) /* Validate actions. */ error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, - &acts); + &acts, log); if (error) { - OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); + OVS_NLERR(log, "Flow actions may not be safe on all matching packets."); goto err_kfree_flow; } @@ -942,6 +944,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) } /* The unmasked key has to be the same for flow updates. */ if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) { + /* Look for any overlapping flow. */ flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); if (!flow) { error = -ENOENT; @@ -984,16 +987,18 @@ error: /* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */ static struct sw_flow_actions *get_flow_actions(const struct nlattr *a, const struct sw_flow_key *key, - const struct sw_flow_mask *mask) + const struct sw_flow_mask *mask, + bool log) { struct sw_flow_actions *acts; struct sw_flow_key masked_key; int error; ovs_flow_mask_key(&masked_key, key, mask); - error = ovs_nla_copy_actions(a, &masked_key, &acts); + error = ovs_nla_copy_actions(a, &masked_key, &acts, log); if (error) { - OVS_NLERR("Actions may not be safe on all matching packets.\n"); + OVS_NLERR(log, + "Actions may not be safe on all matching packets"); return ERR_PTR(error); } @@ -1012,23 +1017,25 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) struct sw_flow_actions *old_acts = NULL, *acts = NULL; struct sw_flow_match match; int error; + bool log = !a[OVS_FLOW_ATTR_PROBE]; /* Extract key. */ error = -EINVAL; if (!a[OVS_FLOW_ATTR_KEY]) { - OVS_NLERR("Flow key attribute not present in set flow.\n"); + OVS_NLERR(log, "Flow key attribute not present in set flow."); goto error; } ovs_match_init(&match, &key, &mask); - error = ovs_nla_get_match(&match, - a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); + error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], + a[OVS_FLOW_ATTR_MASK], log); if (error) goto error; /* Validate actions. */ if (a[OVS_FLOW_ATTR_ACTIONS]) { - acts = get_flow_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, &mask); + acts = get_flow_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, &mask, + log); if (IS_ERR(acts)) { error = PTR_ERR(acts); goto error; @@ -1109,14 +1116,16 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; struct sw_flow_match match; int err; + bool log = !a[OVS_FLOW_ATTR_PROBE]; if (!a[OVS_FLOW_ATTR_KEY]) { - OVS_NLERR("Flow get message rejected, Key attribute missing.\n"); + OVS_NLERR(log, + "Flow get message rejected, Key attribute missing."); return -EINVAL; } ovs_match_init(&match, &key, NULL); - err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); + err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL, log); if (err) return err; @@ -1157,10 +1166,12 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; struct sw_flow_match match; int err; + bool log = !a[OVS_FLOW_ATTR_PROBE]; if (likely(a[OVS_FLOW_ATTR_KEY])) { ovs_match_init(&match, &key, NULL); - err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); + err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL, + log); if (unlikely(err)) return err; } @@ -1250,8 +1261,10 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED }, + [OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED }, [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, + [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG }, }; static const struct genl_ops dp_flow_genl_ops[] = { diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 8389c1d68e57..3ece94563079 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -199,9 +199,9 @@ void ovs_dp_notify_wq(struct work_struct *work); int action_fifos_init(void); void action_fifos_exit(void); -#define OVS_NLERR(fmt, ...) \ +#define OVS_NLERR(logging_allowed, fmt, ...) \ do { \ - if (net_ratelimit()) \ - pr_info("netlink: " fmt, ##__VA_ARGS__); \ + if (logging_allowed && net_ratelimit()) \ + pr_info("netlink: " fmt "\n", ##__VA_ARGS__); \ } while (0) #endif /* datapath.h */ diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 25e9abcd51f1..70bef2ab7f2b 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -712,12 +712,12 @@ int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info, int ovs_flow_key_extract_userspace(const struct nlattr *attr, struct sk_buff *skb, - struct sw_flow_key *key) + struct sw_flow_key *key, bool log) { int err; /* Extract metadata from netlink attributes. */ - err = ovs_nla_get_flow_metadata(attr, key); + err = ovs_nla_get_flow_metadata(attr, key, log); if (err) return err; diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 9e0a787c8627..a8b30f334388 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -257,6 +257,6 @@ int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info, /* Extract key from packet coming from userspace. */ int ovs_flow_key_extract_userspace(const struct nlattr *attr, struct sk_buff *skb, - struct sw_flow_key *key); + struct sw_flow_key *key, bool log); #endif /* flow.h */ diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 98a3e96b7d93..c0d066def228 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -112,7 +112,7 @@ static void update_range(struct sw_flow_match *match, } while (0) static bool match_validate(const struct sw_flow_match *match, - u64 key_attrs, u64 mask_attrs) + u64 key_attrs, u64 mask_attrs, bool log) { u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET; u64 mask_allowed = key_attrs; /* At most allow all key attributes */ @@ -230,15 +230,17 @@ static bool match_validate(const struct sw_flow_match *match, if ((key_attrs & key_expected) != key_expected) { /* Key attributes check failed. */ - OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n", - (unsigned long long)key_attrs, (unsigned long long)key_expected); + OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)", + (unsigned long long)key_attrs, + (unsigned long long)key_expected); return false; } if ((mask_attrs & mask_allowed) != mask_attrs) { /* Mask attributes check failed. */ - OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n", - (unsigned long long)mask_attrs, (unsigned long long)mask_allowed); + OVS_NLERR(log, "Unexpected mask (mask=%llx, allowed=%llx)", + (unsigned long long)mask_attrs, + (unsigned long long)mask_allowed); return false; } @@ -328,7 +330,7 @@ static bool is_all_zero(const u8 *fp, size_t size) static int __parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[], - u64 *attrsp, bool nz) + u64 *attrsp, bool log, bool nz) { const struct nlattr *nla; u64 attrs; @@ -340,21 +342,20 @@ static int __parse_flow_nlattrs(const struct nlattr *attr, int expected_len; if (type > OVS_KEY_ATTR_MAX) { - OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n", + OVS_NLERR(log, "Key type %d is out of range max %d", type, OVS_KEY_ATTR_MAX); return -EINVAL; } if (attrs & (1 << type)) { - OVS_NLERR("Duplicate key attribute (type %d).\n", type); + OVS_NLERR(log, "Duplicate key (type %d).", type); return -EINVAL; } expected_len = ovs_key_lens[type]; if (nla_len(nla) != expected_len && expected_len != -1) { - OVS_NLERR("Key attribute has unexpected length (type=%d" - ", length=%d, expected=%d).\n", type, - nla_len(nla), expected_len); + OVS_NLERR(log, "Key %d has unexpected len %d expected %d", + type, nla_len(nla), expected_len); return -EINVAL; } @@ -364,7 +365,7 @@ static int __parse_flow_nlattrs(const struct nlattr *attr, } } if (rem) { - OVS_NLERR("Message has %d unknown bytes.\n", rem); + OVS_NLERR(log, "Message has %d unknown bytes.", rem); return -EINVAL; } @@ -373,28 +374,84 @@ static int __parse_flow_nlattrs(const struct nlattr *attr, } static int parse_flow_mask_nlattrs(const struct nlattr *attr, - const struct nlattr *a[], u64 *attrsp) + const struct nlattr *a[], u64 *attrsp, + bool log) { - return __parse_flow_nlattrs(attr, a, attrsp, true); + return __parse_flow_nlattrs(attr, a, attrsp, log, true); } static int parse_flow_nlattrs(const struct nlattr *attr, - const struct nlattr *a[], u64 *attrsp) + const struct nlattr *a[], u64 *attrsp, + bool log) { - return __parse_flow_nlattrs(attr, a, attrsp, false); + return __parse_flow_nlattrs(attr, a, attrsp, log, false); +} + +static int genev_tun_opt_from_nlattr(const struct nlattr *a, + struct sw_flow_match *match, bool is_mask, + bool log) +{ + unsigned long opt_key_offset; + + if (nla_len(a) > sizeof(match->key->tun_opts)) { + OVS_NLERR(log, "Geneve option length err (len %d, max %zu).", + nla_len(a), sizeof(match->key->tun_opts)); + return -EINVAL; + } + + if (nla_len(a) % 4 != 0) { + OVS_NLERR(log, "Geneve opt len %d is not a multiple of 4.", + nla_len(a)); + return -EINVAL; + } + + /* We need to record the length of the options passed + * down, otherwise packets with the same format but + * additional options will be silently matched. + */ + if (!is_mask) { + SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a), + false); + } else { + /* This is somewhat unusual because it looks at + * both the key and mask while parsing the + * attributes (and by extension assumes the key + * is parsed first). Normally, we would verify + * that each is the correct length and that the + * attributes line up in the validate function. + * However, that is difficult because this is + * variable length and we won't have the + * information later. + */ + if (match->key->tun_opts_len != nla_len(a)) { + OVS_NLERR(log, "Geneve option len %d != mask len %d", + match->key->tun_opts_len, nla_len(a)); + return -EINVAL; + } + + SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true); + } + + opt_key_offset = (unsigned long)GENEVE_OPTS((struct sw_flow_key *)0, + nla_len(a)); + SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a), + nla_len(a), is_mask); + return 0; } static int ipv4_tun_from_nlattr(const struct nlattr *attr, - struct sw_flow_match *match, bool is_mask) + struct sw_flow_match *match, bool is_mask, + bool log) { struct nlattr *a; int rem; bool ttl = false; __be16 tun_flags = 0; - unsigned long opt_key_offset; nla_for_each_nested(a, attr, rem) { int type = nla_type(a); + int err; + static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64), [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32), @@ -410,15 +467,14 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, }; if (type > OVS_TUNNEL_KEY_ATTR_MAX) { - OVS_NLERR("Unknown IPv4 tunnel attribute (type=%d, max=%d).\n", - type, OVS_TUNNEL_KEY_ATTR_MAX); + OVS_NLERR(log, "Tunnel attr %d out of range max %d", + type, OVS_TUNNEL_KEY_ATTR_MAX); return -EINVAL; } if (ovs_tunnel_key_lens[type] != nla_len(a) && ovs_tunnel_key_lens[type] != -1) { - OVS_NLERR("IPv4 tunnel attribute type has unexpected " - " length (type=%d, length=%d, expected=%d).\n", + OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d", type, nla_len(a), ovs_tunnel_key_lens[type]); return -EINVAL; } @@ -464,58 +520,14 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, tun_flags |= TUNNEL_OAM; break; case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: - tun_flags |= TUNNEL_OPTIONS_PRESENT; - if (nla_len(a) > sizeof(match->key->tun_opts)) { - OVS_NLERR("Geneve option length exceeds maximum size (len %d, max %zu).\n", - nla_len(a), - sizeof(match->key->tun_opts)); - return -EINVAL; - } - - if (nla_len(a) % 4 != 0) { - OVS_NLERR("Geneve option length is not a multiple of 4 (len %d).\n", - nla_len(a)); - return -EINVAL; - } - - /* We need to record the length of the options passed - * down, otherwise packets with the same format but - * additional options will be silently matched. - */ - if (!is_mask) { - SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a), - false); - } else { - /* This is somewhat unusual because it looks at - * both the key and mask while parsing the - * attributes (and by extension assumes the key - * is parsed first). Normally, we would verify - * that each is the correct length and that the - * attributes line up in the validate function. - * However, that is difficult because this is - * variable length and we won't have the - * information later. - */ - if (match->key->tun_opts_len != nla_len(a)) { - OVS_NLERR("Geneve option key length (%d) is different from mask length (%d).", - match->key->tun_opts_len, - nla_len(a)); - return -EINVAL; - } - - SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, - true); - } + err = genev_tun_opt_from_nlattr(a, match, is_mask, log); + if (err) + return err; - opt_key_offset = (unsigned long)GENEVE_OPTS( - (struct sw_flow_key *)0, - nla_len(a)); - SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, - nla_data(a), nla_len(a), - is_mask); + tun_flags |= TUNNEL_OPTIONS_PRESENT; break; default: - OVS_NLERR("Unknown IPv4 tunnel attribute (%d).\n", + OVS_NLERR(log, "Unknown IPv4 tunnel attribute %d", type); return -EINVAL; } @@ -524,18 +536,19 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask); if (rem > 0) { - OVS_NLERR("IPv4 tunnel attribute has %d unknown bytes.\n", rem); + OVS_NLERR(log, "IPv4 tunnel attribute has %d unknown bytes.", + rem); return -EINVAL; } if (!is_mask) { if (!match->key->tun_key.ipv4_dst) { - OVS_NLERR("IPv4 tunnel destination address is zero.\n"); + OVS_NLERR(log, "IPv4 tunnel dst address is zero"); return -EINVAL; } if (!ttl) { - OVS_NLERR("IPv4 tunnel TTL not specified.\n"); + OVS_NLERR(log, "IPv4 tunnel TTL not specified."); return -EINVAL; } } @@ -614,7 +627,8 @@ int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb, } static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, - const struct nlattr **a, bool is_mask) + const struct nlattr **a, bool is_mask, + bool log) { if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) { u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]); @@ -642,7 +656,7 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, if (is_mask) { in_port = 0xffffffff; /* Always exact match in_port. */ } else if (in_port >= DP_MAX_PORTS) { - OVS_NLERR("Port (%d) exceeds maximum allowable (%d).\n", + OVS_NLERR(log, "Port %d exceeds max allowable %d", in_port, DP_MAX_PORTS); return -EINVAL; } @@ -661,7 +675,7 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, } if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, - is_mask)) + is_mask, log)) return -EINVAL; *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); } @@ -669,11 +683,12 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, } static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, - const struct nlattr **a, bool is_mask) + const struct nlattr **a, bool is_mask, + bool log) { int err; - err = metadata_from_nlattrs(match, &attrs, a, is_mask); + err = metadata_from_nlattrs(match, &attrs, a, is_mask, log); if (err) return err; @@ -694,9 +709,9 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); if (!(tci & htons(VLAN_TAG_PRESENT))) { if (is_mask) - OVS_NLERR("VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.\n"); + OVS_NLERR(log, "VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit."); else - OVS_NLERR("VLAN TCI does not have VLAN_TAG_PRESENT bit set.\n"); + OVS_NLERR(log, "VLAN TCI does not have VLAN_TAG_PRESENT bit set."); return -EINVAL; } @@ -713,8 +728,8 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, /* Always exact match EtherType. */ eth_type = htons(0xffff); } else if (ntohs(eth_type) < ETH_P_802_3_MIN) { - OVS_NLERR("EtherType is less than minimum (type=%x, min=%x).\n", - ntohs(eth_type), ETH_P_802_3_MIN); + OVS_NLERR(log, "EtherType %x is less than min %x", + ntohs(eth_type), ETH_P_802_3_MIN); return -EINVAL; } @@ -729,8 +744,8 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]); if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) { - OVS_NLERR("Unknown IPv4 fragment type (value=%d, max=%d).\n", - ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX); + OVS_NLERR(log, "IPv4 frag type %d is out of range max %d", + ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX); return -EINVAL; } SW_FLOW_KEY_PUT(match, ip.proto, @@ -753,8 +768,8 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]); if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) { - OVS_NLERR("Unknown IPv6 fragment type (value=%d, max=%d).\n", - ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX); + OVS_NLERR(log, "IPv6 frag type %d is out of range max %d", + ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX); return -EINVAL; } SW_FLOW_KEY_PUT(match, ipv6.label, @@ -784,7 +799,7 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, arp_key = nla_data(a[OVS_KEY_ATTR_ARP]); if (!is_mask && (arp_key->arp_op & htons(0xff00))) { - OVS_NLERR("Unknown ARP opcode (opcode=%d).\n", + OVS_NLERR(log, "Unknown ARP opcode (opcode=%d).", arp_key->arp_op); return -EINVAL; } @@ -885,7 +900,7 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, } if (attrs != 0) { - OVS_NLERR("Unknown key attributes (%llx).\n", + OVS_NLERR(log, "Unknown key attributes %llx", (unsigned long long)attrs); return -EINVAL; } @@ -926,10 +941,14 @@ static void mask_set_nlattr(struct nlattr *attr, u8 val) * of this flow. * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink * attribute specifies the mask field of the wildcarded flow. + * @log: Boolean to allow kernel error logging. Normally true, but when + * probing for feature compatibility this should be passed in as false to + * suppress unnecessary error logging. */ int ovs_nla_get_match(struct sw_flow_match *match, const struct nlattr *nla_key, - const struct nlattr *nla_mask) + const struct nlattr *nla_mask, + bool log) { const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; const struct nlattr *encap; @@ -939,7 +958,7 @@ int ovs_nla_get_match(struct sw_flow_match *match, bool encap_valid = false; int err; - err = parse_flow_nlattrs(nla_key, a, &key_attrs); + err = parse_flow_nlattrs(nla_key, a, &key_attrs, log); if (err) return err; @@ -950,7 +969,7 @@ int ovs_nla_get_match(struct sw_flow_match *match, if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) && (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) { - OVS_NLERR("Invalid Vlan frame.\n"); + OVS_NLERR(log, "Invalid Vlan frame."); return -EINVAL; } @@ -961,22 +980,22 @@ int ovs_nla_get_match(struct sw_flow_match *match, encap_valid = true; if (tci & htons(VLAN_TAG_PRESENT)) { - err = parse_flow_nlattrs(encap, a, &key_attrs); + err = parse_flow_nlattrs(encap, a, &key_attrs, log); if (err) return err; } else if (!tci) { /* Corner case for truncated 802.1Q header. */ if (nla_len(encap)) { - OVS_NLERR("Truncated 802.1Q header has non-zero encap attribute.\n"); + OVS_NLERR(log, "Truncated 802.1Q header has non-zero encap attribute."); return -EINVAL; } } else { - OVS_NLERR("Encap attribute is set for a non-VLAN frame.\n"); + OVS_NLERR(log, "Encap attr is set for non-VLAN frame"); return -EINVAL; } } - err = ovs_key_from_nlattrs(match, key_attrs, a, false); + err = ovs_key_from_nlattrs(match, key_attrs, a, false, log); if (err) return err; @@ -1010,7 +1029,7 @@ int ovs_nla_get_match(struct sw_flow_match *match, nla_mask = newmask; } - err = parse_flow_mask_nlattrs(nla_mask, a, &mask_attrs); + err = parse_flow_mask_nlattrs(nla_mask, a, &mask_attrs, log); if (err) goto free_newmask; @@ -1022,7 +1041,7 @@ int ovs_nla_get_match(struct sw_flow_match *match, __be16 tci = 0; if (!encap_valid) { - OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n"); + OVS_NLERR(log, "Encap mask attribute is set for non-VLAN frame."); err = -EINVAL; goto free_newmask; } @@ -1034,12 +1053,13 @@ int ovs_nla_get_match(struct sw_flow_match *match, if (eth_type == htons(0xffff)) { mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); encap = a[OVS_KEY_ATTR_ENCAP]; - err = parse_flow_mask_nlattrs(encap, a, &mask_attrs); + err = parse_flow_mask_nlattrs(encap, a, + &mask_attrs, log); if (err) goto free_newmask; } else { - OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n", - ntohs(eth_type)); + OVS_NLERR(log, "VLAN frames must have an exact match on the TPID (mask=%x).", + ntohs(eth_type)); err = -EINVAL; goto free_newmask; } @@ -1048,18 +1068,19 @@ int ovs_nla_get_match(struct sw_flow_match *match, tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); if (!(tci & htons(VLAN_TAG_PRESENT))) { - OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci)); + OVS_NLERR(log, "VLAN tag present bit must have an exact match (tci_mask=%x).", + ntohs(tci)); err = -EINVAL; goto free_newmask; } } - err = ovs_key_from_nlattrs(match, mask_attrs, a, true); + err = ovs_key_from_nlattrs(match, mask_attrs, a, true, log); if (err) goto free_newmask; } - if (!match_validate(match, key_attrs, mask_attrs)) + if (!match_validate(match, key_attrs, mask_attrs, log)) err = -EINVAL; free_newmask: @@ -1072,6 +1093,9 @@ free_newmask: * @key: Receives extracted in_port, priority, tun_key and skb_mark. * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute * sequence. + * @log: Boolean to allow kernel error logging. Normally true, but when + * probing for feature compatibility this should be passed in as false to + * suppress unnecessary error logging. * * This parses a series of Netlink attributes that form a flow key, which must * take the same form accepted by flow_from_nlattrs(), but only enough of it to @@ -1080,14 +1104,15 @@ free_newmask: */ int ovs_nla_get_flow_metadata(const struct nlattr *attr, - struct sw_flow_key *key) + struct sw_flow_key *key, + bool log) { const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; struct sw_flow_match match; u64 attrs = 0; int err; - err = parse_flow_nlattrs(attr, a, &attrs); + err = parse_flow_nlattrs(attr, a, &attrs, log); if (err) return -EINVAL; @@ -1096,7 +1121,7 @@ int ovs_nla_get_flow_metadata(const struct nlattr *attr, key->phy.in_port = DP_MAX_PORTS; - return metadata_from_nlattrs(&match, &attrs, a, false); + return metadata_from_nlattrs(&match, &attrs, a, false, log); } int ovs_nla_put_flow(const struct sw_flow_key *swkey, @@ -1316,12 +1341,12 @@ nla_put_failure: #define MAX_ACTIONS_BUFSIZE (32 * 1024) -static struct sw_flow_actions *nla_alloc_flow_actions(int size) +static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log) { struct sw_flow_actions *sfa; if (size > MAX_ACTIONS_BUFSIZE) { - OVS_NLERR("Flow action size (%u bytes) exceeds maximum", size); + OVS_NLERR(log, "Flow action size %u bytes exceeds max", size); return ERR_PTR(-EINVAL); } @@ -1341,7 +1366,7 @@ void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts) } static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, - int attr_len) + int attr_len, bool log) { struct sw_flow_actions *acts; @@ -1361,7 +1386,7 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, new_acts_size = MAX_ACTIONS_BUFSIZE; } - acts = nla_alloc_flow_actions(new_acts_size); + acts = nla_alloc_flow_actions(new_acts_size, log); if (IS_ERR(acts)) return (void *)acts; @@ -1376,11 +1401,11 @@ out: } static struct nlattr *__add_action(struct sw_flow_actions **sfa, - int attrtype, void *data, int len) + int attrtype, void *data, int len, bool log) { struct nlattr *a; - a = reserve_sfa_size(sfa, nla_attr_size(len)); + a = reserve_sfa_size(sfa, nla_attr_size(len), log); if (IS_ERR(a)) return a; @@ -1395,11 +1420,11 @@ static struct nlattr *__add_action(struct sw_flow_actions **sfa, } static int add_action(struct sw_flow_actions **sfa, int attrtype, - void *data, int len) + void *data, int len, bool log) { struct nlattr *a; - a = __add_action(sfa, attrtype, data, len); + a = __add_action(sfa, attrtype, data, len, log); if (IS_ERR(a)) return PTR_ERR(a); @@ -1407,12 +1432,12 @@ static int add_action(struct sw_flow_actions **sfa, int attrtype, } static inline int add_nested_action_start(struct sw_flow_actions **sfa, - int attrtype) + int attrtype, bool log) { int used = (*sfa)->actions_len; int err; - err = add_action(sfa, attrtype, NULL, 0); + err = add_action(sfa, attrtype, NULL, 0, log); if (err) return err; @@ -1431,12 +1456,12 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa, static int __ovs_nla_copy_actions(const struct nlattr *attr, const struct sw_flow_key *key, int depth, struct sw_flow_actions **sfa, - __be16 eth_type, __be16 vlan_tci); + __be16 eth_type, __be16 vlan_tci, bool log); static int validate_and_copy_sample(const struct nlattr *attr, const struct sw_flow_key *key, int depth, struct sw_flow_actions **sfa, - __be16 eth_type, __be16 vlan_tci) + __be16 eth_type, __be16 vlan_tci, bool log) { const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; const struct nlattr *probability, *actions; @@ -1462,19 +1487,19 @@ static int validate_and_copy_sample(const struct nlattr *attr, return -EINVAL; /* validation done, copy sample action. */ - start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE); + start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log); if (start < 0) return start; err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, - nla_data(probability), sizeof(u32)); + nla_data(probability), sizeof(u32), log); if (err) return err; - st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS); + st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS, log); if (st_acts < 0) return st_acts; err = __ovs_nla_copy_actions(actions, key, depth + 1, sfa, - eth_type, vlan_tci); + eth_type, vlan_tci, log); if (err) return err; @@ -1511,7 +1536,7 @@ void ovs_match_init(struct sw_flow_match *match, } static int validate_and_copy_set_tun(const struct nlattr *attr, - struct sw_flow_actions **sfa) + struct sw_flow_actions **sfa, bool log) { struct sw_flow_match match; struct sw_flow_key key; @@ -1520,7 +1545,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, int err, start; ovs_match_init(&match, &key, NULL); - err = ipv4_tun_from_nlattr(nla_data(attr), &match, false); + err = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log); if (err) return err; @@ -1549,12 +1574,12 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, key.tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0; }; - start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET); + start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log); if (start < 0) return start; a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL, - sizeof(*tun_info) + key.tun_opts_len); + sizeof(*tun_info) + key.tun_opts_len, log); if (IS_ERR(a)) return PTR_ERR(a); @@ -1582,7 +1607,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, static int validate_set(const struct nlattr *a, const struct sw_flow_key *flow_key, struct sw_flow_actions **sfa, - bool *set_tun, __be16 eth_type) + bool *set_tun, __be16 eth_type, bool log) { const struct nlattr *ovs_key = nla_data(a); int key_type = nla_type(ovs_key); @@ -1611,7 +1636,7 @@ static int validate_set(const struct nlattr *a, return -EINVAL; *set_tun = true; - err = validate_and_copy_set_tun(a, sfa); + err = validate_and_copy_set_tun(a, sfa, log); if (err) return err; break; @@ -1704,12 +1729,12 @@ static int validate_userspace(const struct nlattr *attr) } static int copy_action(const struct nlattr *from, - struct sw_flow_actions **sfa) + struct sw_flow_actions **sfa, bool log) { int totlen = NLA_ALIGN(from->nla_len); struct nlattr *to; - to = reserve_sfa_size(sfa, from->nla_len); + to = reserve_sfa_size(sfa, from->nla_len, log); if (IS_ERR(to)) return PTR_ERR(to); @@ -1720,7 +1745,7 @@ static int copy_action(const struct nlattr *from, static int __ovs_nla_copy_actions(const struct nlattr *attr, const struct sw_flow_key *key, int depth, struct sw_flow_actions **sfa, - __be16 eth_type, __be16 vlan_tci) + __be16 eth_type, __be16 vlan_tci, bool log) { const struct nlattr *a; bool out_tnl_port = false; @@ -1843,7 +1868,7 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr, case OVS_ACTION_ATTR_SET: err = validate_set(a, key, sfa, - &out_tnl_port, eth_type); + &out_tnl_port, eth_type, log); if (err) return err; @@ -1852,18 +1877,18 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr, case OVS_ACTION_ATTR_SAMPLE: err = validate_and_copy_sample(a, key, depth, sfa, - eth_type, vlan_tci); + eth_type, vlan_tci, log); if (err) return err; skip_copy = true; break; default: - OVS_NLERR("Unknown tunnel attribute (%d).\n", type); + OVS_NLERR(log, "Unknown Action type %d", type); return -EINVAL; } if (!skip_copy) { - err = copy_action(a, sfa); + err = copy_action(a, sfa, log); if (err) return err; } @@ -1877,16 +1902,16 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr, int ovs_nla_copy_actions(const struct nlattr *attr, const struct sw_flow_key *key, - struct sw_flow_actions **sfa) + struct sw_flow_actions **sfa, bool log) { int err; - *sfa = nla_alloc_flow_actions(nla_len(attr)); + *sfa = nla_alloc_flow_actions(nla_len(attr), log); if (IS_ERR(*sfa)) return PTR_ERR(*sfa); err = __ovs_nla_copy_actions(attr, key, 0, sfa, key->eth.type, - key->eth.tci); + key->eth.tci, log); if (err) kfree(*sfa); diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index 90bbe3785504..577f12be3459 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -45,17 +45,17 @@ void ovs_match_init(struct sw_flow_match *match, int ovs_nla_put_flow(const struct sw_flow_key *, const struct sw_flow_key *, struct sk_buff *); -int ovs_nla_get_flow_metadata(const struct nlattr *, struct sw_flow_key *); +int ovs_nla_get_flow_metadata(const struct nlattr *, struct sw_flow_key *, + bool log); -int ovs_nla_get_match(struct sw_flow_match *match, - const struct nlattr *, - const struct nlattr *); +int ovs_nla_get_match(struct sw_flow_match *, const struct nlattr *key, + const struct nlattr *mask, bool log); int ovs_nla_put_egress_tunnel_key(struct sk_buff *, const struct ovs_tunnel_info *); int ovs_nla_copy_actions(const struct nlattr *attr, const struct sw_flow_key *key, - struct sw_flow_actions **sfa); + struct sw_flow_actions **sfa, bool log); int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb); -- cgit v1.2.3